diff --git a/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc index c0093f8..6000a2a 100644 Binary files a/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc and b/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/configs.cpython-313.pyc b/mediaflow_proxy/__pycache__/configs.cpython-313.pyc index 1da72df..522ec1b 100644 Binary files a/mediaflow_proxy/__pycache__/configs.cpython-313.pyc and b/mediaflow_proxy/__pycache__/configs.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/const.cpython-313.pyc b/mediaflow_proxy/__pycache__/const.cpython-313.pyc index 173fd13..82642d9 100644 Binary files a/mediaflow_proxy/__pycache__/const.cpython-313.pyc and b/mediaflow_proxy/__pycache__/const.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc b/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc index d3857cf..948bfca 100644 Binary files a/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc and b/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/main.cpython-313.pyc b/mediaflow_proxy/__pycache__/main.cpython-313.pyc index 8e81be0..043afc9 100644 Binary files a/mediaflow_proxy/__pycache__/main.cpython-313.pyc and b/mediaflow_proxy/__pycache__/main.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc b/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc index 5bb4a40..f6e56a4 100644 Binary files a/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc and b/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc b/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc index 925c50d..22904d1 100644 Binary files a/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc and b/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc b/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc index e15268d..076d8f3 100644 Binary files a/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc and b/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc differ diff --git a/mediaflow_proxy/configs.py b/mediaflow_proxy/configs.py index fbae17f..d80c3a1 100644 --- a/mediaflow_proxy/configs.py +++ b/mediaflow_proxy/configs.py @@ -64,14 +64,21 @@ class Settings(BaseSettings): dash_prebuffer_emergency_threshold: int = 90 # Emergency threshold percentage to trigger aggressive cache cleanup. dash_prebuffer_inactivity_timeout: int = 60 # Seconds of inactivity before cleaning up stream state. dash_segment_cache_ttl: int = 60 # TTL (seconds) for cached media segments; longer = better for slow playback. + dash_player_lock_timeout: float = 2.5 # Max wait (seconds) for player requests when a segment lock is busy. + dash_prebuffer_lock_timeout: float = 0.25 # Max wait (seconds) for background prebuffer lock acquisition. + dash_prefetch_max_concurrent: int = 1 # Max concurrent live DASH prefetch downloads to reduce lock contention. + dash_live_initial_media_prebuffer: bool = ( + False # Whether manifest-time prebuffer should fetch live media segments (init segments are still prewarmed). + ) mpd_live_init_cache_ttl: int = 60 # TTL (seconds) for live init segment cache; 0 disables caching. mpd_live_playlist_depth: int = 8 # Number of recent segments to expose per live playlist variant. remux_to_ts: bool = False # Remux fMP4 segments to MPEG-TS for ExoPlayer/VLC compatibility. processed_segment_cache_ttl: int = 60 # TTL (seconds) for caching processed (decrypted/remuxed) segments. - # FlareSolverr settings (for Cloudflare bypass) - flaresolverr_url: str | None = None # FlareSolverr service URL. Example: http://localhost:8191 - flaresolverr_timeout: int = 60 # Timeout (seconds) for FlareSolverr requests. + # Byparr settings — Firefox/Camoufox-based solver for Cloudflare bypass and chevy IP whitelist. + # https://github.com/ThePhaseless/Byparr (drop-in FlareSolverr-compatible API) + byparr_url: str | None = None # Byparr service URL. Example: http://localhost:8192 + byparr_timeout: int = 60 # Timeout (seconds) for Byparr requests. # Acestream settings enable_acestream: bool = False # Whether to enable Acestream proxy support. @@ -89,6 +96,8 @@ class Settings(BaseSettings): telegram_session_string: SecretStr | None = None # Persistent session string (avoids re-authentication). telegram_max_connections: int = 8 # Max parallel DC connections for downloads (max 20, careful of floods). telegram_request_timeout: int = 30 # Request timeout in seconds. + telegram_document_scan_limit: int = 500 # Max recent messages to scan when resolving chat_id+document_id. + telegram_document_cache_ttl: int = 3600 # TTL (seconds) for cached document_id->message_id mappings. # Transcode settings enable_transcode: bool = True # Whether to enable on-the-fly transcoding endpoints (MKV→fMP4, HLS VOD). @@ -105,6 +114,9 @@ class Settings(BaseSettings): upstream_retry_delay: float = 1.0 # Delay (seconds) between retry attempts. graceful_stream_end: bool = True # Return valid empty playlist instead of error when upstream fails. + # EPG proxy settings + epg_cache_ttl: int = 3600 # TTL (seconds) for cached EPG/XMLTV data. Default 1 hour. + # Redis settings redis_url: str | None = None # Redis URL for distributed locking and caching. None = disabled. cache_namespace: str | None = ( diff --git a/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc index ab17842..0a378aa 100644 Binary files a/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc and b/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc b/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc index f69ffd6..592010c 100644 Binary files a/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc and b/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc differ diff --git a/mediaflow_proxy/drm/decrypter.py b/mediaflow_proxy/drm/decrypter.py index 8b6a354..486a0e5 100644 --- a/mediaflow_proxy/drm/decrypter.py +++ b/mediaflow_proxy/drm/decrypter.py @@ -645,17 +645,23 @@ class MP4Decrypter: return sample_info - def _get_key_for_track(self, track_id: int) -> bytes: + def _get_key_for_track(self, track_id: int) -> Optional[bytes]: """ Retrieves the decryption key for a given track ID from the key map. Uses the KID extracted from the tenc box if available, otherwise falls back to using the first key if only one key is provided. + Returns None (rather than raising) when no matching key can be found — the + caller is expected to handle None by skipping decryption for that track + and passing the encrypted data through unchanged. This mirrors the Rust + implementation and avoids crashing the whole moof/mdat pipeline when + content uses slightly different KID byte-ordering than the URL parameters. + Args: track_id (int): The track ID. Returns: - bytes: The decryption key for the specified track ID. + Optional[bytes]: The decryption key, or None if no key found. """ # If we have an extracted KID for this track, use it to look up the key if track_id in self.extracted_kids: @@ -668,13 +674,31 @@ class MP4Decrypter: if len(self.key_map) == 1: return next(iter(self.key_map.values())) else: - # Use the extracted KID to look up the key + # Direct lookup: tenc KID matches a provided key_id byte-for-byte key = self.key_map.get(extracted_kid) if key: return key - # If KID doesn't match, try fallback - # Note: This is expected when KID in file doesn't match provided key_id - # The provided key_id should still work if it's the correct decryption key + + # PlayReady GUID fallback: some content packagers store the KID in + # the tenc box using little-endian byte order for the first three UUID + # components (the PlayReady GUID format), while the MPD advertises + # @cenc:default_KID in standard big-endian UUID order. + # + # UUID: AABBCCDD-EEFF-GGHH-II... + # LE GUID: DDCCBBAA-FFEE-HHGG-II... (first 4, next 2, next 2 swapped) + # + # Try both directions so that audio-only or video-only init segments + # whose tenc KID was written in the opposite format can still match. + if len(extracted_kid) == 16: + swapped = ( + extracted_kid[3::-1] # bytes 0-3 reversed + + extracted_kid[5:3:-1] # bytes 4-5 reversed + + extracted_kid[7:5:-1] # bytes 6-7 reversed + + extracted_kid[8:] # bytes 8-15 unchanged + ) + key = self.key_map.get(bytes(swapped)) + if key: + return key # Fallback: if only one key provided, use it (backward compatibility) if len(self.key_map) == 1: @@ -683,9 +707,12 @@ class MP4Decrypter: # Try using track_id as KID (for multi-key scenarios) track_id_bytes = track_id.to_bytes(4, "big") key = self.key_map.get(track_id_bytes) - if not key: - raise ValueError(f"No key found for track ID {track_id}") - return key + if key: + return key + + # No key found — return None so callers can pass encrypted data through + # rather than aborting the entire segment stream. + return None @staticmethod def _process_sample( diff --git a/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc index 77bccc4..e396185 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc index 065ca49..e665ba8 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc index 5f22939..099f95a 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/city.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/city.cpython-313.pyc new file mode 100644 index 0000000..baca599 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/city.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc deleted file mode 100644 index ec4dd71..0000000 Binary files a/mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc and /dev/null differ diff --git a/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc index 81a2fae..deb0526 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc index 6d7ef51..6e0af0f 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc index 22dd5fb..b0d77db 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc index f8f0cd9..27c191c 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc index 682877e..c9523be 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc index afdbc12..0625cf2 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc index 511dfca..d6ce6d1 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc index 15f04bf..bc02460 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc index 39fd206..d5ff765 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc index 3d78afb..21ac707 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc index 2f8fd0e..2f80c85 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc index c9051e7..d93c64f 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc index 9889241..0141915 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc index 3e4e085..82e4139 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc index 0882358..8e4ba64 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc index 0055a02..b10b73c 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc index 4d802f6..e86aeae 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc index 7777896..65feec6 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vidfast.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidfast.cpython-313.pyc new file mode 100644 index 0000000..600ccac Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vidfast.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc index adafcde..a48481d 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc index 273f693..e17b1e2 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc index 99c4449..815a77c 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc index ceaeb54..f75c0d5 100644 Binary files a/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc and b/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/base.py b/mediaflow_proxy/extractors/base.py index bd0b76e..d3a2ac0 100644 --- a/mediaflow_proxy/extractors/base.py +++ b/mediaflow_proxy/extractors/base.py @@ -9,7 +9,7 @@ import json import logging from mediaflow_proxy.configs import settings -from mediaflow_proxy.utils.http_client import create_aiohttp_session +from mediaflow_proxy.utils.http_client import create_aiohttp_session, _ensure_routing_initialized, get_routing_config from mediaflow_proxy.utils.http_utils import DownloadError logger = logging.getLogger(__name__) @@ -65,6 +65,16 @@ class BaseExtractor(ABC): # merge incoming headers (e.g. Accept-Language / Referer) with default base headers self.base_headers.update(request_headers or {}) + @staticmethod + def _get_proxy(url: str) -> str | None: + """Return the configured proxy URL for *url*, or None if no proxy applies.""" + try: + _ensure_routing_initialized() + route = get_routing_config().match_url(url) + return route.proxy_url + except Exception: + return None + async def _make_request( self, url: str, diff --git a/mediaflow_proxy/extractors/city.py b/mediaflow_proxy/extractors/city.py new file mode 100644 index 0000000..5394e8e --- /dev/null +++ b/mediaflow_proxy/extractors/city.py @@ -0,0 +1,158 @@ +import re +import json +import base64 +from typing import Dict, Any +from urllib.parse import urlparse, parse_qs +from bs4 import BeautifulSoup + +from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError + + +class CityExtractor(BaseExtractor): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.mediaflow_endpoint = "hls_manifest_proxy" + + def atob_fixed(self, data: str) -> str: + try: + return base64.b64decode(data).decode("utf-8", errors="ignore") + except Exception: + return "" + + def extract_json_array(self, decoded: str): + start = decoded.find("file:") + if start == -1: + start = decoded.find("sources:") + if start == -1: + return None + + start = decoded.find("[", start) + if start == -1: + return None + + depth = 0 + for i in range(start, len(decoded)): + if decoded[i] == "[": + depth += 1 + elif decoded[i] == "]": + depth -= 1 + if depth == 0: + return decoded[start : i + 1] + + return None + + def pick_stream(self, file_data, season: int = 1, episode: int = 1): + + if isinstance(file_data, str): + return file_data + + if isinstance(file_data, list): + if all(isinstance(x, dict) and "file" in x for x in file_data): + idx = max(0, episode - 1) + return file_data[idx]["file"] + + selected_season = None + for s in file_data: + if not isinstance(s, dict): + continue + folder = s.get("folder") + if not folder: + continue + title = (s.get("title") or "").lower() + if re.search(rf"(season|s)\s*0*{season}\b", title): + selected_season = folder + break + + if not selected_season: + for s in file_data: + folder = s.get("folder") + if folder: + selected_season = folder + break + + if not selected_season: + return None + + idx = max(0, episode - 1) + return selected_season[idx].get("file") if idx < len(selected_season) else selected_season[0].get("file") + + return None + + async def extract(self, url: str, season: int = 1, episode: int = 1, **kwargs) -> Dict[str, Any]: + """Main extraction entry point""" + + parsed = urlparse(url) + query = parse_qs(parsed.query) + if "s" in query: + try: + season = int(query["s"][0]) + except Exception: + pass + if "e" in query: + try: + episode = int(query["e"][0]) + except Exception: + pass + + clean_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}" + + cookie_b64 = "ZGxlX3VzZXJfaWQ9MzI3Mjk7IGRsZV9wYXNzd29yZD04OTQxNzFjNmE4ZGFiMThlZTU5NGQ1YzY1MjAwOWEzNTs=" + cookie = base64.b64decode(cookie_b64).decode() + + headers = { + "User-Agent": self.base_headers.get("user-agent"), + "Referer": clean_url, + "Cookie": cookie, + } + + response = await self._make_request(clean_url, headers=headers) + if response.status != 200: + raise ExtractorError("Failed to load City page") + + soup = BeautifulSoup(response.text, "lxml") + file_data = None + + for script in soup.find_all("script"): + if file_data: + break + + script_html = script.string or script.text or "" + if "atob" not in script_html: + continue + + matches = re.finditer(r'atob\(\s*[\'"](.*?)[\'"]\s*\)', script_html) + for match in matches: + encoded = match.group(1) + decoded = self.atob_fixed(encoded) + if not decoded: + continue + + raw_json = self.extract_json_array(decoded) + if raw_json: + try: + raw_json = re.sub(r"\\(.)", r"\1", raw_json) + file_data = json.loads(raw_json) + except Exception: + file_data = raw_json + break + + file_match = re.search(r'file\s*:\s*[\'"](.*?)[\'"]', decoded, re.S) + if file_match: + file_data = file_match.group(1) + break + + if not file_data: + raise ExtractorError("No stream found") + + stream_url = self.pick_stream(file_data, season=season, episode=episode) + if not stream_url: + raise ExtractorError("Stream extraction failed") + + return { + "destination_url": stream_url, + "request_headers": { + "Referer": clean_url, + "User-Agent": self.base_headers.get("user-agent"), + }, + "mediaflow_endpoint": self.mediaflow_endpoint, + } diff --git a/mediaflow_proxy/extractors/dlhd.py b/mediaflow_proxy/extractors/dlhd.py deleted file mode 100644 index 07a1d87..0000000 --- a/mediaflow_proxy/extractors/dlhd.py +++ /dev/null @@ -1,704 +0,0 @@ -import hashlib -import hmac -import re -import time -import logging - -from typing import Any, Dict, Optional -from urllib.parse import urlparse - -import aiohttp - -from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse -from mediaflow_proxy.utils.http_client import create_aiohttp_session -from mediaflow_proxy.configs import settings - - -logger = logging.getLogger(__name__) - -# Silenzia l'errore ConnectionResetError su Windows -logging.getLogger("asyncio").setLevel(logging.CRITICAL) - -# Default fingerprint parameters -DEFAULT_DLHD_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0" -DEFAULT_DLHD_SCREEN_RESOLUTION = "1920x1080" -DEFAULT_DLHD_TIMEZONE = "UTC" -DEFAULT_DLHD_LANGUAGE = "en" - - -def compute_fingerprint( - user_agent: str = DEFAULT_DLHD_USER_AGENT, - screen_resolution: str = DEFAULT_DLHD_SCREEN_RESOLUTION, - timezone: str = DEFAULT_DLHD_TIMEZONE, - language: str = DEFAULT_DLHD_LANGUAGE, -) -> str: - """ - Compute the X-Fingerprint header value. - - Algorithm: - fingerprint = SHA256(useragent + screen_resolution + timezone + language).hex()[:16] - - Args: - user_agent: The user agent string - screen_resolution: The screen resolution (e.g., "1920x1080") - timezone: The timezone (e.g., "UTC") - language: The language code (e.g., "en") - - Returns: - The 16-character fingerprint - """ - combined = f"{user_agent}{screen_resolution}{timezone}{language}" - return hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16] - - -def compute_key_path(resource: str, number: str, timestamp: int, fingerprint: str, secret_key: str) -> str: - """ - Compute the X-Key-Path header value. - - Algorithm: - key_path = HMAC-SHA256("resource|number|timestamp|fingerprint", secret_key).hex()[:16] - - Args: - resource: The resource from the key URL - number: The number from the key URL - timestamp: The Unix timestamp - fingerprint: The fingerprint value - secret_key: The HMAC secret key (channel_salt) - - Returns: - The 16-character key path - """ - combined = f"{resource}|{number}|{timestamp}|{fingerprint}" - hmac_hash = hmac.new(secret_key.encode("utf-8"), combined.encode("utf-8"), hashlib.sha256).hexdigest() - return hmac_hash[:16] - - -def compute_key_headers(key_url: str, secret_key: str) -> tuple[int, int, str, str] | None: - """ - Compute X-Key-Timestamp, X-Key-Nonce, X-Key-Path, and X-Fingerprint for a /key/ URL. - - Algorithm: - 1. Extract resource and number from URL pattern /key/{resource}/{number} - 2. ts = Unix timestamp in seconds - 3. hmac_hash = HMAC-SHA256(resource, secret_key).hex() - 4. nonce = proof-of-work: find i where MD5(hmac+resource+number+ts+i)[:4] < 0x1000 - 5. fingerprint = compute_fingerprint() - 6. key_path = HMAC-SHA256("resource|number|ts|fingerprint", secret_key).hex()[:16] - - Args: - key_url: The key URL containing /key/{resource}/{number} - secret_key: The HMAC secret key (channel_salt) - - Returns: - Tuple of (timestamp, nonce, key_path, fingerprint) or None if URL doesn't match pattern - """ - # Extract resource and number from URL - pattern = r"/key/([^/]+)/(\d+)" - match = re.search(pattern, key_url) - - if not match: - return None - - resource = match.group(1) - number = match.group(2) - - ts = int(time.time()) - - # Compute HMAC-SHA256 - hmac_hash = hmac.new(secret_key.encode("utf-8"), resource.encode("utf-8"), hashlib.sha256).hexdigest() - - # Proof-of-work loop - nonce = 0 - for i in range(100000): - combined = f"{hmac_hash}{resource}{number}{ts}{i}" - md5_hash = hashlib.md5(combined.encode("utf-8")).hexdigest() - prefix_value = int(md5_hash[:4], 16) - - if prefix_value < 0x1000: # < 4096 - nonce = i - break - - fingerprint = compute_fingerprint() - key_path = compute_key_path(resource, number, ts, fingerprint, secret_key) - - return ts, nonce, key_path, fingerprint - - -class DLHDExtractor(BaseExtractor): - """DLHD (DaddyLive) URL extractor for M3U8 streams. - - Supports the new authentication flow with: - - EPlayerAuth extraction (auth_token, channel_key, channel_salt) - - Server lookup for dynamic server selection - - Dynamic key header computation for AES-128 encrypted streams - """ - - def __init__(self, request_headers: dict): - super().__init__(request_headers) - self.mediaflow_endpoint = "hls_key_proxy" - self._iframe_context: Optional[str] = None - self._flaresolverr_cookies: Optional[str] = None - self._flaresolverr_user_agent: Optional[str] = None - - async def _fetch_via_flaresolverr(self, url: str) -> HttpResponse: - """Fetch a URL using FlareSolverr to bypass Cloudflare protection.""" - if not settings.flaresolverr_url: - raise ExtractorError("FlareSolverr URL not configured. Set FLARESOLVERR_URL in environment.") - - flaresolverr_endpoint = f"{settings.flaresolverr_url.rstrip('/')}/v1" - payload = { - "cmd": "request.get", - "url": url, - "maxTimeout": settings.flaresolverr_timeout * 1000, - } - - logger.info(f"Using FlareSolverr to fetch: {url}") - - async with aiohttp.ClientSession() as session: - async with session.post( - flaresolverr_endpoint, - json=payload, - timeout=aiohttp.ClientTimeout(total=settings.flaresolverr_timeout + 10), - ) as response: - if response.status != 200: - raise ExtractorError(f"FlareSolverr returned status {response.status}") - - data = await response.json() - - if data.get("status") != "ok": - raise ExtractorError(f"FlareSolverr failed: {data.get('message', 'Unknown error')}") - - solution = data.get("solution", {}) - html_content = solution.get("response", "") - final_url = solution.get("url", url) - status = solution.get("status", 200) - - # Store cookies and user-agent for subsequent requests - cookies = solution.get("cookies", []) - if cookies: - cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies]) - self._flaresolverr_cookies = cookie_str - logger.info(f"FlareSolverr provided {len(cookies)} cookies") - - user_agent = solution.get("userAgent") - if user_agent: - self._flaresolverr_user_agent = user_agent - logger.info(f"FlareSolverr user-agent: {user_agent}") - - logger.info(f"FlareSolverr successfully bypassed Cloudflare for: {url}") - - return HttpResponse( - status=status, - headers={}, - text=html_content, - content=html_content.encode("utf-8", errors="replace"), - url=final_url, - ) - - async def _make_request( - self, url: str, method: str = "GET", headers: Optional[Dict] = None, use_flaresolverr: bool = False, **kwargs - ) -> HttpResponse: - """Override to disable SSL verification and optionally use FlareSolverr.""" - # Use FlareSolverr for Cloudflare-protected pages - if use_flaresolverr and settings.flaresolverr_url: - return await self._fetch_via_flaresolverr(url) - - timeout = kwargs.pop("timeout", 15) - kwargs.pop("retries", 3) # consumed but not used directly - kwargs.pop("backoff_factor", 0.5) # consumed but not used directly - - # Merge headers - request_headers = self.base_headers.copy() - if headers: - request_headers.update(headers) - - # Add FlareSolverr cookies if available - if self._flaresolverr_cookies: - existing_cookies = request_headers.get("Cookie", "") - if existing_cookies: - request_headers["Cookie"] = f"{existing_cookies}; {self._flaresolverr_cookies}" - else: - request_headers["Cookie"] = self._flaresolverr_cookies - - # Use FlareSolverr user-agent if available - if self._flaresolverr_user_agent: - request_headers["User-Agent"] = self._flaresolverr_user_agent - - # Use create_aiohttp_session with verify=False for SSL bypass - async with create_aiohttp_session(url, timeout=timeout, verify=False) as (session, proxy_url): - async with session.request(method, url, headers=request_headers, proxy=proxy_url, **kwargs) as response: - content = await response.read() - final_url = str(response.url) - status = response.status - resp_headers = dict(response.headers) - - if status >= 400: - raise ExtractorError(f"HTTP error {status} while requesting {url}") - - return HttpResponse( - status=status, - headers=resp_headers, - text=content.decode("utf-8", errors="replace"), - content=content, - url=final_url, - ) - - async def _extract_session_data(self, iframe_url: str, main_url: str) -> dict | None: - """ - Fetch the iframe URL and extract auth_token, channel_key, and channel_salt. - - Args: - iframe_url: The iframe URL to fetch - main_url: The main site domain for Referer header - - Returns: - Dict with auth_token, channel_key, channel_salt, or None if not found - """ - headers = { - "User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT, - "Referer": f"https://{main_url}/", - } - - try: - resp = await self._make_request(iframe_url, headers=headers, timeout=12) - html = resp.text - except Exception as e: - logger.warning(f"Error fetching iframe URL: {e}") - return None - - # Pattern to extract EPlayerAuth.init block with authToken, channelKey, channelSalt - # Matches: EPlayerAuth.init({ authToken: '...', channelKey: '...', ..., channelSalt: '...' }); - auth_pattern = r"EPlayerAuth\.init\s*\(\s*\{\s*authToken:\s*'([^']+)'" - channel_key_pattern = r"channelKey:\s*'([^']+)'" - channel_salt_pattern = r"channelSalt:\s*'([^']+)'" - - # Pattern to extract server lookup base URL from fetchWithRetry call - lookup_pattern = r"fetchWithRetry\s*\(\s*'([^']+server_lookup\?channel_id=)" - - auth_match = re.search(auth_pattern, html) - channel_key_match = re.search(channel_key_pattern, html) - channel_salt_match = re.search(channel_salt_pattern, html) - lookup_match = re.search(lookup_pattern, html) - - if auth_match and channel_key_match and channel_salt_match: - result = { - "auth_token": auth_match.group(1), - "channel_key": channel_key_match.group(1), - "channel_salt": channel_salt_match.group(1), - } - if lookup_match: - result["server_lookup_url"] = lookup_match.group(1) + result["channel_key"] - - return result - - return None - - async def _get_server_key(self, server_lookup_url: str, iframe_url: str) -> str | None: - """ - Fetch the server lookup URL and extract the server_key. - - Args: - server_lookup_url: The server lookup URL - iframe_url: The iframe URL for extracting the host for headers - - Returns: - The server_key or None if not found - """ - parsed = urlparse(iframe_url) - iframe_host = parsed.netloc - - headers = { - "User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT, - "Referer": f"https://{iframe_host}/", - "Origin": f"https://{iframe_host}", - } - - try: - resp = await self._make_request(server_lookup_url, headers=headers, timeout=10) - data = resp.json() - return data.get("server_key") - except Exception as e: - logger.warning(f"Error fetching server lookup: {e}") - return None - - def _build_m3u8_url(self, server_key: str, channel_key: str) -> str: - """ - Build the m3u8 URL based on the server_key. - - Args: - server_key: The server key from server lookup - channel_key: The channel key - - Returns: - The m3u8 URL (with .css extension as per the original implementation) - """ - if server_key == "top1/cdn": - return f"https://top1.dvalna.ru/top1/cdn/{channel_key}/mono.css" - else: - return f"https://{server_key}new.dvalna.ru/{server_key}/{channel_key}/mono.css" - - async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]: - """Handles the new authentication flow found in recent updates.""" - - def _extract_params(js: str) -> Dict[str, Optional[str]]: - params = {} - patterns = { - "channel_key": r'(?:const|var|let)\s+(?:CHANNEL_KEY|channelKey)\s*=\s*["\']([^"\']+)["\']', - "auth_token": r'(?:const|var|let)\s+AUTH_TOKEN\s*=\s*["\']([^"\']+)["\']', - "auth_country": r'(?:const|var|let)\s+AUTH_COUNTRY\s*=\s*["\']([^"\']+)["\']', - "auth_ts": r'(?:const|var|let)\s+AUTH_TS\s*=\s*["\']([^"\']+)["\']', - "auth_expiry": r'(?:const|var|let)\s+AUTH_EXPIRY\s*=\s*["\']([^"\']+)["\']', - } - for key, pattern in patterns.items(): - match = re.search(pattern, js) - params[key] = match.group(1) if match else None - return params - - params = _extract_params(iframe_content) - - missing_params = [k for k, v in params.items() if not v] - if missing_params: - # This is not an error, just means it's not the new flow - raise ExtractorError(f"Not the new auth flow: missing params {missing_params}") - - logger.info("New auth flow detected. Proceeding with POST auth.") - - # 1. Initial Auth POST - auth_url = "https://security.newkso.ru/auth2.php" - - iframe_origin = f"https://{urlparse(iframe_url).netloc}" - auth_headers = headers.copy() - auth_headers.update( - { - "Accept": "*/*", - "Accept-Language": "en-US,en;q=0.9", - "Origin": iframe_origin, - "Referer": iframe_url, - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "cross-site", - "Priority": "u=1, i", - } - ) - - # Build form data for multipart/form-data - form_data = aiohttp.FormData() - form_data.add_field("channelKey", params["channel_key"]) - form_data.add_field("country", params["auth_country"]) - form_data.add_field("timestamp", params["auth_ts"]) - form_data.add_field("expiry", params["auth_expiry"]) - form_data.add_field("token", params["auth_token"]) - - try: - async with create_aiohttp_session(auth_url, timeout=12, verify=False) as (session, proxy_url): - async with session.post( - auth_url, - headers=auth_headers, - data=form_data, - proxy=proxy_url, - ) as response: - content = await response.read() - response.raise_for_status() - import json - - auth_data = json.loads(content.decode("utf-8")) - if not (auth_data.get("valid") or auth_data.get("success")): - raise ExtractorError(f"Initial auth failed with response: {auth_data}") - logger.info("New auth flow: Initial auth successful.") - except ExtractorError: - raise - except Exception as e: - raise ExtractorError(f"New auth flow failed during initial auth POST: {e}") - - # 2. Server Lookup - server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}" - try: - # Use _make_request as it handles retries - lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10) - server_data = lookup_resp.json() - server_key = server_data.get("server_key") - if not server_key: - raise ExtractorError(f"No server_key in lookup response: {server_data}") - logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}") - except ExtractorError: - raise - except Exception as e: - raise ExtractorError(f"New auth flow failed during server lookup: {e}") - - # 3. Build final stream URL - channel_key = params["channel_key"] - auth_token = params["auth_token"] - # The JS logic uses .css, not .m3u8 - if server_key == "top1/cdn": - stream_url = f"https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css" - else: - stream_url = f"https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css" - - logger.info(f"New auth flow: Constructed stream URL: {stream_url}") - - stream_headers = { - "User-Agent": headers["User-Agent"], - "Referer": iframe_url, - "Origin": iframe_origin, - "Authorization": f"Bearer {auth_token}", - "X-Channel-Key": channel_key, - } - - return { - "destination_url": stream_url, - "request_headers": stream_headers, - "mediaflow_endpoint": "hls_manifest_proxy", - } - - async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]: - """ - Alternative extractor for lovecdn.ru iframe that uses a different format. - """ - try: - # Look for direct stream URL patterns - m3u8_patterns = [ - r'["\']([^"\']*\.m3u8[^"\']*)["\']', - r'source[:\s]+["\']([^"\']+)["\']', - r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']', - r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']', - ] - - stream_url = None - for pattern in m3u8_patterns: - matches = re.findall(pattern, iframe_content) - for match in matches: - if ".m3u8" in match and match.startswith("http"): - stream_url = match - logger.info(f"Found direct m3u8 URL: {stream_url}") - break - if stream_url: - break - - # Pattern 2: Look for dynamic URL construction - if not stream_url: - channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content) - server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content) - - if channel_match: - channel_name = channel_match.group(1) - server = server_match.group(1) if server_match else "newkso.ru" - stream_url = f"https://{server}/{channel_name}/mono.m3u8" - logger.info(f"Constructed stream URL: {stream_url}") - - if not stream_url: - # Fallback: look for any URL that looks like a stream - url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*' - matches = re.findall(url_pattern, iframe_content) - if matches: - stream_url = matches[0] - logger.info(f"Found fallback stream URL: {stream_url}") - - if not stream_url: - raise ExtractorError("Could not find stream URL in lovecdn.ru iframe") - - # Use iframe URL as referer - iframe_origin = f"https://{urlparse(iframe_url).netloc}" - stream_headers = {"User-Agent": headers["User-Agent"], "Referer": iframe_url, "Origin": iframe_origin} - - # Determine endpoint based on the stream domain - endpoint = "hls_key_proxy" - - logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}") - - return { - "destination_url": stream_url, - "request_headers": stream_headers, - "mediaflow_endpoint": endpoint, - } - - except Exception as e: - raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}") - - async def _extract_direct_stream(self, channel_id: str) -> Dict[str, Any]: - """ - Direct stream extraction using server lookup API with the new auth flow. - This extracts auth_token, channel_key, channel_salt and computes key headers. - """ - # Common iframe domains for DLHD - iframe_domains = ["lefttoplay.xyz"] - - for iframe_domain in iframe_domains: - try: - iframe_url = f"https://{iframe_domain}/premiumtv/daddyhd.php?id={channel_id}" - logger.info(f"Attempting extraction via {iframe_domain}") - - session_data = await self._extract_session_data(iframe_url, "dlhd.link") - - if not session_data: - logger.debug(f"No session data from {iframe_domain}") - continue - - logger.info(f"Got session data from {iframe_domain}: channel_key={session_data['channel_key']}") - - # Get server key - if "server_lookup_url" not in session_data: - logger.debug(f"No server lookup URL from {iframe_domain}") - continue - - server_key = await self._get_server_key(session_data["server_lookup_url"], iframe_url) - - if not server_key: - logger.debug(f"No server key from {iframe_domain}") - continue - - logger.info(f"Got server key: {server_key}") - - # Build m3u8 URL - m3u8_url = self._build_m3u8_url(server_key, session_data["channel_key"]) - logger.info(f"M3U8 URL: {m3u8_url}") - - # Build stream headers with auth - iframe_origin = f"https://{iframe_domain}" - stream_headers = { - "User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT, - "Referer": iframe_url, - "Origin": iframe_origin, - "Authorization": f"Bearer {session_data['auth_token']}", - } - - # Return the result with key header parameters - # These will be used to compute headers when fetching keys - return { - "destination_url": m3u8_url, - "request_headers": stream_headers, - "mediaflow_endpoint": "hls_key_proxy", - # Force playlist processing since DLHD uses .css extension for m3u8 - "force_playlist_proxy": True, - # Key header computation parameters - "dlhd_key_params": { - "channel_salt": session_data["channel_salt"], - "auth_token": session_data["auth_token"], - "iframe_url": iframe_url, - }, - } - - except Exception as e: - logger.warning(f"Failed extraction via {iframe_domain}: {e}") - continue - - raise ExtractorError(f"Failed to extract stream from all iframe domains for channel {channel_id}") - - async def extract(self, url: str, **kwargs) -> Dict[str, Any]: - """Main extraction flow - uses direct server lookup with new auth flow.""" - - def extract_channel_id(u: str) -> Optional[str]: - match_watch_id = re.search(r"watch\.php\?id=(\d+)", u) - if match_watch_id: - return match_watch_id.group(1) - # Also try stream-XXX pattern - match_stream = re.search(r"stream-(\d+)", u) - if match_stream: - return match_stream.group(1) - return None - - try: - channel_id = extract_channel_id(url) - if not channel_id: - raise ExtractorError(f"Unable to extract channel ID from {url}") - - logger.info(f"Extracting DLHD stream for channel ID: {channel_id}") - - # Try direct stream extraction with new auth flow - try: - return await self._extract_direct_stream(channel_id) - except ExtractorError as e: - logger.warning(f"Direct stream extraction failed: {e}") - - # Fallback to legacy iframe-based extraction if direct fails - logger.info("Falling back to iframe-based extraction...") - return await self._extract_via_iframe(url, channel_id) - - except Exception as e: - raise ExtractorError(f"Extraction failed: {str(e)}") - - async def _extract_via_iframe(self, url: str, channel_id: str) -> Dict[str, Any]: - """Legacy iframe-based extraction flow - used as fallback.""" - baseurl = "https://dlhd.dad/" - - daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc - daddylive_headers = { - "User-Agent": self._flaresolverr_user_agent - or "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", - "Referer": baseurl, - "Origin": daddy_origin, - } - - # 1. Request initial page - use FlareSolverr if available to bypass Cloudflare - use_flaresolverr = settings.flaresolverr_url is not None - resp1 = await self._make_request(url, headers=daddylive_headers, timeout=15, use_flaresolverr=use_flaresolverr) - resp1_text = resp1.text - - # Update headers with FlareSolverr user-agent after initial request - if self._flaresolverr_user_agent: - daddylive_headers["User-Agent"] = self._flaresolverr_user_agent - - player_links = re.findall(r']*data-url="([^"]+)"[^>]*>Player\s*\d+', resp1_text) - if not player_links: - raise ExtractorError("No player links found on the page.") - - # Try all players and collect all valid iframes - last_player_error = None - iframe_candidates = [] - - for player_url in player_links: - try: - if not player_url.startswith("http"): - player_url = baseurl + player_url.lstrip("/") - - daddylive_headers["Referer"] = player_url - daddylive_headers["Origin"] = player_url - resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12) - resp2_text = resp2.text - iframes2 = re.findall(r' dict: + """ + Use Byparr to bypass Cloudflare protection on the DoodStream embed page. + + Strategy: fetch the embed page without any injected script. Byparr's + Firefox/Camoufox browser auto-passes Cloudflare's bot checks and often + bypasses the Turnstile CAPTCHA gate directly, returning the embed HTML + with pass_md5. If the response doesn't contain pass_md5, reuse the CF + cookies + UA from Byparr in a follow-up curl_cffi request (which avoids + re-triggering the bot check). + """ + endpoint = f"{settings.byparr_url.rstrip('/')}/v1" + embed_url = url if "/e/" in url else f"https://{urlparse(url).netloc}/e/{video_id}" + payload = { + "cmd": "request.get", + "url": embed_url, + "maxTimeout": settings.byparr_timeout * 1000, } - embed_url = f"{self.base_url}/e/{video_id}" - html = (await self._make_request(embed_url, headers=headers)).text + async with aiohttp.ClientSession() as session: + async with session.post( + endpoint, + json=payload, + timeout=aiohttp.ClientTimeout(total=settings.byparr_timeout + 15), + ) as resp: + if resp.status != 200: + raise ExtractorError(f"Byparr HTTP {resp.status}") + data = await resp.json() - match = re.search(r"(\/pass_md5\/[^']+)", html) - if not match: - raise ExtractorError("Dood: pass_md5 not found") + if data.get("status") != "ok": + raise ExtractorError(f"Byparr: {data.get('message', 'unknown error')}") - pass_url = urljoin(self.base_url, match.group(1)) + solution = data.get("solution", {}) + final_url = solution.get("url", embed_url) + if not final_url.startswith("http"): + final_url = embed_url + base_url = f"https://{urlparse(final_url).netloc}" + html = solution.get("response", "") - base_stream = (await self._make_request(pass_url, headers=headers)).text.strip() + if "pass_md5" not in html: + # Byparr may not have the pass_md5 in the initial response. + # Try two recovery strategies in order: + # + # 1. Cookie reuse — if Byparr collected CF clearance cookies before + # the page loaded fully, inject them into a curl_cffi request. + # 2. Plain curl_cffi — Chrome TLS impersonation without JS execution. + raw_cookies = solution.get("cookies", []) + cookies = {c["name"]: c["value"] for c in raw_cookies} + ua = solution.get("userAgent", _DOOD_UA) - token_match = re.search(r"token=([^&]+)", html) + if cookies: + cf_domain = ( + next( + (c.get("domain", "").lstrip(".") for c in raw_cookies if c.get("name") == "cf_clearance"), + None, + ) + or "playmogo.com" + ) + retry_url = f"https://{cf_domain}/e/{video_id}" + logger.debug( + "Byparr response lacked pass_md5 (final_url=%s); retrying %s with CF cookies via curl_cffi", + final_url, + retry_url, + ) + proxy = self._get_proxy(retry_url) + async with AsyncSession() as s: + r = await s.get( + retry_url, + impersonate="chrome", + cookies=cookies, + headers={"User-Agent": ua, "Referer": f"https://{cf_domain}/"}, + timeout=20, + **({"proxy": proxy} if proxy else {}), + ) + html = r.text + final_url = str(r.url) + base_url = f"https://{urlparse(final_url).netloc}" + + if "pass_md5" not in html: + logger.debug("Byparr cookie reuse also failed; falling back to curl_cffi for %s", embed_url) + return await self._extract_via_curl_cffi(embed_url, video_id) + + return await self._parse_embed_html(html, base_url) + + # ------------------------------------------------------------------ + # Path 2 – curl_cffi (bypasses CF bot protection; Turnstile may block) + # ------------------------------------------------------------------ + + async def _extract_via_curl_cffi(self, url: str, video_id: str) -> dict: + proxy = self._get_proxy(url) + async with AsyncSession() as s: + r = await s.get( + url, + impersonate="chrome", + headers={"Referer": f"https://{urlparse(url).netloc}/"}, + timeout=30, + allow_redirects=True, + **({"proxy": proxy} if proxy else {}), + ) + final_url = str(r.url) + html = r.text + base_url = f"https://{urlparse(final_url).netloc}" + + if "pass_md5" not in html: + if "turnstile" in html.lower() or "captcha_l" in html: + raise ExtractorError( + "DoodStream: site is serving a Turnstile CAPTCHA that requires " + "browser interaction — cannot be bypassed automatically from this " + "network location. Try a residential IP or a VPN/proxy." + ) + raise ExtractorError(f"DoodStream: pass_md5 not found in embed HTML ({final_url})") + + return await self._parse_embed_html(html, base_url) + + # ------------------------------------------------------------------ + # Common HTML parser + # ------------------------------------------------------------------ + + async def _parse_embed_html(self, html: str, base_url: str) -> dict: + pass_match = re.search(r"(/pass_md5/[^'\"<>\s]+)", html) + if not pass_match: + raise ExtractorError("DoodStream: pass_md5 path not found in embed HTML") + + pass_url = urljoin(base_url, pass_match.group(1)) + ua = self.base_headers.get("user-agent") or _DOOD_UA + headers = { + "user-agent": ua, + "referer": f"{base_url}/", + } + + proxy = self._get_proxy(pass_url) + async with AsyncSession() as s: + r = await s.get( + pass_url, + impersonate="chrome", + headers=headers, + timeout=20, + **({"proxy": proxy} if proxy else {}), + ) + + base_stream = r.text.strip() + if not base_stream or "RELOAD" in base_stream: + raise ExtractorError( + "DoodStream: pass_md5 endpoint returned no stream URL " + "(captcha session may have expired). " + "Ensure BYPARR_URL is set for reliable extraction." + ) + + token_match = re.search(r"token=([^&\s'\"]+)", html) if not token_match: - raise ExtractorError("Dood: token missing") + raise ExtractorError("DoodStream: token not found in embed HTML") token = token_match.group(1) - - final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}" + expiry = int(time.time()) + final_url = f"{base_stream}123456789?token={token}&expiry={expiry}" return { "destination_url": final_url, diff --git a/mediaflow_proxy/extractors/factory.py b/mediaflow_proxy/extractors/factory.py index e3158bc..8f02737 100644 --- a/mediaflow_proxy/extractors/factory.py +++ b/mediaflow_proxy/extractors/factory.py @@ -1,8 +1,8 @@ from typing import Dict, Type from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError -from mediaflow_proxy.extractors.dlhd import DLHDExtractor from mediaflow_proxy.extractors.doodstream import DoodStreamExtractor +from mediaflow_proxy.extractors.city import CityExtractor from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor from mediaflow_proxy.extractors.filelions import FileLionsExtractor from mediaflow_proxy.extractors.filemoon import FileMoonExtractor @@ -24,12 +24,14 @@ from mediaflow_proxy.extractors.vidoza import VidozaExtractor from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor from mediaflow_proxy.extractors.fastream import FastreamExtractor from mediaflow_proxy.extractors.voe import VoeExtractor +from mediaflow_proxy.extractors.vidfast import VidFastExtractor class ExtractorFactory: """Factory for creating URL extractors.""" _extractors: Dict[str, Type[BaseExtractor]] = { + "City": CityExtractor, "Doodstream": DoodStreamExtractor, "FileLions": FileLionsExtractor, "FileMoon": FileMoonExtractor, @@ -46,13 +48,13 @@ class ExtractorFactory: "Maxstream": MaxstreamExtractor, "LiveTV": LiveTVExtractor, "LuluStream": LuluStreamExtractor, - "DLHD": DLHDExtractor, "Vavoo": VavooExtractor, "Vidmoly": VidmolyExtractor, "Vidoza": VidozaExtractor, "Fastream": FastreamExtractor, "Voe": VoeExtractor, "Sportsonline": SportsonlineExtractor, + "VidFast": VidFastExtractor, } @classmethod diff --git a/mediaflow_proxy/extractors/lulustream.py b/mediaflow_proxy/extractors/lulustream.py index 63aaf7d..bdd18e9 100644 --- a/mediaflow_proxy/extractors/lulustream.py +++ b/mediaflow_proxy/extractors/lulustream.py @@ -1,23 +1,42 @@ import re from typing import Dict, Any +from curl_cffi.requests import AsyncSession + from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError class LuluStreamExtractor(BaseExtractor): + """LuluStream URL extractor. + + Uses curl_cffi + Chrome impersonation to bypass Cloudflare protection. + lulustream.com embeds are served via luluvdo.com. + """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.mediaflow_endpoint = "hls_manifest_proxy" async def extract(self, url: str, **kwargs) -> Dict[str, Any]: - response = await self._make_request(url) + proxy = self._get_proxy(url) + async with AsyncSession() as session: + response = await session.get( + url, + impersonate="chrome", + timeout=30, + allow_redirects=True, + **({"proxy": proxy} if proxy else {}), + ) + + if response.status_code >= 400: + raise ExtractorError(f"HTTP {response.status_code} while fetching {url}") # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py pattern = r"""sources:\s*\[{file:\s*["'](?P[^"']+)""" match = re.search(pattern, response.text, re.DOTALL) if not match: - raise ExtractorError("Failed to extract source URL") - final_url = match.group(1) + raise ExtractorError("LuluStream: Failed to extract source URL") + final_url = match.group("url") self.base_headers["referer"] = url return { diff --git a/mediaflow_proxy/extractors/sportsonline.py b/mediaflow_proxy/extractors/sportsonline.py index a72a62b..fb747ff 100644 --- a/mediaflow_proxy/extractors/sportsonline.py +++ b/mediaflow_proxy/extractors/sportsonline.py @@ -1,7 +1,7 @@ import re import logging from typing import Any, Dict -from urllib.parse import urlparse +from urllib.parse import urljoin, urlparse from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError from mediaflow_proxy.utils.packed import unpack @@ -14,7 +14,7 @@ class SportsonlineExtractor(BaseExtractor): Strategy: 1. Fetch page -> find first