mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 03:40:54 +00:00
124 lines
3.9 KiB
Python
124 lines
3.9 KiB
Python
import base64
|
|
import logging
|
|
from typing import Optional
|
|
from urllib.parse import urlparse
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def is_base64_url(url: str) -> bool:
|
|
"""
|
|
Check if a URL appears to be base64 encoded.
|
|
|
|
Args:
|
|
url (str): The URL to check.
|
|
|
|
Returns:
|
|
bool: True if the URL appears to be base64 encoded, False otherwise.
|
|
"""
|
|
# Check if the URL doesn't start with http/https and contains base64-like characters
|
|
if url.startswith(("http://", "https://", "ftp://", "ftps://")):
|
|
return False
|
|
|
|
# Base64 URLs typically contain only alphanumeric characters, +, /, and =
|
|
# and don't contain typical URL characters like ://
|
|
base64_chars = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")
|
|
url_chars = set(url)
|
|
|
|
# If the URL contains characters not in base64 charset, it's likely not base64
|
|
if not url_chars.issubset(base64_chars):
|
|
return False
|
|
|
|
# Additional heuristic: base64 strings are typically longer and don't contain common URL patterns
|
|
if len(url) < 10: # Too short to be a meaningful base64 encoded URL
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def decode_base64_url(encoded_url: str) -> Optional[str]:
|
|
"""
|
|
Decode a base64 encoded URL.
|
|
|
|
Args:
|
|
encoded_url (str): The base64 encoded URL string.
|
|
|
|
Returns:
|
|
Optional[str]: The decoded URL if successful, None if decoding fails.
|
|
"""
|
|
try:
|
|
# Handle URL-safe base64 encoding (replace - with + and _ with /)
|
|
url_safe_encoded = encoded_url.replace("-", "+").replace("_", "/")
|
|
|
|
# Add padding if necessary
|
|
missing_padding = len(url_safe_encoded) % 4
|
|
if missing_padding:
|
|
url_safe_encoded += "=" * (4 - missing_padding)
|
|
|
|
# Decode the base64 string
|
|
decoded_bytes = base64.b64decode(url_safe_encoded)
|
|
decoded_url = decoded_bytes.decode("utf-8")
|
|
|
|
# Validate that the decoded string is a valid URL
|
|
parsed = urlparse(decoded_url)
|
|
if parsed.scheme and parsed.netloc:
|
|
logger.info(f"Successfully decoded base64 URL: {encoded_url[:50]}... -> {decoded_url}")
|
|
return decoded_url
|
|
else:
|
|
logger.warning(f"Decoded string is not a valid URL: {decoded_url}")
|
|
return None
|
|
|
|
except (base64.binascii.Error, UnicodeDecodeError, ValueError) as e:
|
|
logger.debug(f"Failed to decode base64 URL '{encoded_url[:50]}...': {e}")
|
|
return None
|
|
|
|
|
|
def encode_url_to_base64(url: str, url_safe: bool = True) -> str:
|
|
"""
|
|
Encode a URL to base64.
|
|
|
|
Args:
|
|
url (str): The URL to encode.
|
|
url_safe (bool): Whether to use URL-safe base64 encoding (default: True).
|
|
|
|
Returns:
|
|
str: The base64 encoded URL.
|
|
"""
|
|
try:
|
|
url_bytes = url.encode("utf-8")
|
|
if url_safe:
|
|
# Use URL-safe base64 encoding (replace + with - and / with _)
|
|
encoded = base64.urlsafe_b64encode(url_bytes).decode("utf-8")
|
|
# Remove padding for cleaner URLs
|
|
encoded = encoded.rstrip("=")
|
|
else:
|
|
encoded = base64.b64encode(url_bytes).decode("utf-8")
|
|
|
|
logger.debug(f"Encoded URL to base64: {url} -> {encoded}")
|
|
return encoded
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to encode URL to base64: {e}")
|
|
raise
|
|
|
|
|
|
def process_potential_base64_url(url: str) -> str:
|
|
"""
|
|
Process a URL that might be base64 encoded. If it's base64 encoded, decode it.
|
|
Otherwise, return the original URL.
|
|
|
|
Args:
|
|
url (str): The URL to process.
|
|
|
|
Returns:
|
|
str: The processed URL (decoded if it was base64, original otherwise).
|
|
"""
|
|
if is_base64_url(url):
|
|
decoded_url = decode_base64_url(url)
|
|
if decoded_url:
|
|
return decoded_url
|
|
else:
|
|
logger.warning(f"URL appears to be base64 but failed to decode: {url[:50]}...")
|
|
|
|
return url
|