This commit is contained in:
UrloMythus
2026-02-19 20:15:03 +01:00
parent 7785e8c604
commit cfc6bbabc9
181 changed files with 32141 additions and 4629 deletions

View File

@@ -9,56 +9,56 @@ logger = logging.getLogger(__name__)
def is_base64_url(url: str) -> bool:
"""
Check if a URL appears to be base64 encoded.
Args:
url (str): The URL to check.
Returns:
bool: True if the URL appears to be base64 encoded, False otherwise.
"""
# Check if the URL doesn't start with http/https and contains base64-like characters
if url.startswith(('http://', 'https://', 'ftp://', 'ftps://')):
if url.startswith(("http://", "https://", "ftp://", "ftps://")):
return False
# Base64 URLs typically contain only alphanumeric characters, +, /, and =
# and don't contain typical URL characters like ://
base64_chars = set('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=')
# and don't contain typical URL characters like ://
base64_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")
url_chars = set(url)
# If the URL contains characters not in base64 charset, it's likely not base64
if not url_chars.issubset(base64_chars):
return False
# Additional heuristic: base64 strings are typically longer and don't contain common URL patterns
if len(url) < 10: # Too short to be a meaningful base64 encoded URL
return False
return True
def decode_base64_url(encoded_url: str) -> Optional[str]:
"""
Decode a base64 encoded URL.
Args:
encoded_url (str): The base64 encoded URL string.
Returns:
Optional[str]: The decoded URL if successful, None if decoding fails.
"""
try:
# Handle URL-safe base64 encoding (replace - with + and _ with /)
url_safe_encoded = encoded_url.replace('-', '+').replace('_', '/')
url_safe_encoded = encoded_url.replace("-", "+").replace("_", "/")
# Add padding if necessary
missing_padding = len(url_safe_encoded) % 4
if missing_padding:
url_safe_encoded += '=' * (4 - missing_padding)
url_safe_encoded += "=" * (4 - missing_padding)
# Decode the base64 string
decoded_bytes = base64.b64decode(url_safe_encoded)
decoded_url = decoded_bytes.decode('utf-8')
decoded_url = decoded_bytes.decode("utf-8")
# Validate that the decoded string is a valid URL
parsed = urlparse(decoded_url)
if parsed.scheme and parsed.netloc:
@@ -67,7 +67,7 @@ def decode_base64_url(encoded_url: str) -> Optional[str]:
else:
logger.warning(f"Decoded string is not a valid URL: {decoded_url}")
return None
except (base64.binascii.Error, UnicodeDecodeError, ValueError) as e:
logger.debug(f"Failed to decode base64 URL '{encoded_url[:50]}...': {e}")
return None
@@ -76,27 +76,27 @@ def decode_base64_url(encoded_url: str) -> Optional[str]:
def encode_url_to_base64(url: str, url_safe: bool = True) -> str:
"""
Encode a URL to base64.
Args:
url (str): The URL to encode.
url_safe (bool): Whether to use URL-safe base64 encoding (default: True).
Returns:
str: The base64 encoded URL.
"""
try:
url_bytes = url.encode('utf-8')
url_bytes = url.encode("utf-8")
if url_safe:
# Use URL-safe base64 encoding (replace + with - and / with _)
encoded = base64.urlsafe_b64encode(url_bytes).decode('utf-8')
encoded = base64.urlsafe_b64encode(url_bytes).decode("utf-8")
# Remove padding for cleaner URLs
encoded = encoded.rstrip('=')
encoded = encoded.rstrip("=")
else:
encoded = base64.b64encode(url_bytes).decode('utf-8')
encoded = base64.b64encode(url_bytes).decode("utf-8")
logger.debug(f"Encoded URL to base64: {url} -> {encoded}")
return encoded
except Exception as e:
logger.error(f"Failed to encode URL to base64: {e}")
raise
@@ -106,10 +106,10 @@ def process_potential_base64_url(url: str) -> str:
"""
Process a URL that might be base64 encoded. If it's base64 encoded, decode it.
Otherwise, return the original URL.
Args:
url (str): The URL to process.
Returns:
str: The processed URL (decoded if it was base64, original otherwise).
"""
@@ -119,5 +119,5 @@ def process_potential_base64_url(url: str) -> str:
return decoded_url
else:
logger.warning(f"URL appears to be base64 but failed to decode: {url[:50]}...")
return url
return url