diff --git a/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..0c4e097 Binary files /dev/null and b/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/configs.cpython-313.pyc b/mediaflow_proxy/__pycache__/configs.cpython-313.pyc new file mode 100644 index 0000000..d9c5251 Binary files /dev/null and b/mediaflow_proxy/__pycache__/configs.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/const.cpython-313.pyc b/mediaflow_proxy/__pycache__/const.cpython-313.pyc new file mode 100644 index 0000000..d1bbd34 Binary files /dev/null and b/mediaflow_proxy/__pycache__/const.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc b/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc new file mode 100644 index 0000000..5de2e62 Binary files /dev/null and b/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/main.cpython-313.pyc b/mediaflow_proxy/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000..87f467e Binary files /dev/null and b/mediaflow_proxy/__pycache__/main.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc b/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc new file mode 100644 index 0000000..1dab5e5 Binary files /dev/null and b/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc b/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc new file mode 100644 index 0000000..e348eeb Binary files /dev/null and b/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc b/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc new file mode 100644 index 0000000..0ba9e5e Binary files /dev/null and b/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc differ diff --git a/mediaflow_proxy/configs.py b/mediaflow_proxy/configs.py index d80c3a1..fb1f722 100644 --- a/mediaflow_proxy/configs.py +++ b/mediaflow_proxy/configs.py @@ -114,6 +114,22 @@ class Settings(BaseSettings): upstream_retry_delay: float = 1.0 # Delay (seconds) between retry attempts. graceful_stream_end: bool = True # Return valid empty playlist instead of error when upstream fails. + # Generic HTTP forward endpoint (/proxy/forward) settings + forward_allowed_hosts: list[str] = [] # Allowlist of hostnames. Empty = allow all. + forward_denied_hosts: list[str] = [] # Extra denied hostnames (in addition to automatic private-IP guard). + forward_max_body_bytes: int = ( + 10 * 1024 * 1024 + ) # Deprecated: use forward_max_request_body_bytes / forward_max_response_body_bytes. + forward_max_request_body_bytes: int = ( + 50 * 1024 * 1024 + ) # Max incoming request body size (50 MB — allows NZB/torrent file uploads). + forward_max_response_body_bytes: int = ( + 10 * 1024 * 1024 + ) # Max upstream response body size (10 MB — API JSON responses). + public_ip: str | None = ( + None # MediaFlow's own public IP. Used to substitute {mediaflow_ip} in forwarded requests. Auto-detected if not set. + ) + # EPG proxy settings epg_cache_ttl: int = 3600 # TTL (seconds) for cached EPG/XMLTV data. Default 1 hour. diff --git a/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..894a426 Binary files /dev/null and b/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc b/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc new file mode 100644 index 0000000..0f1bddb Binary files /dev/null and b/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/F16Px.py b/mediaflow_proxy/extractors/F16Px.py index ea244eb..dbc7e40 100644 --- a/mediaflow_proxy/extractors/F16Px.py +++ b/mediaflow_proxy/extractors/F16Px.py @@ -1,11 +1,13 @@ # https://github.com/Gujal00/ResolveURL/blob/55c7f66524ebd65bc1f88650614e627b00167fa0/script.module.resolveurl/lib/resolveurl/plugins/f16px.py - import base64 import json import re +import time +import hmac +import hashlib +import os from typing import Dict, Any from urllib.parse import urlparse - from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError from mediaflow_proxy.utils import python_aesgcm @@ -17,16 +19,91 @@ class F16PxExtractor(BaseExtractor): @staticmethod def _b64url_decode(value: str) -> bytes: - # base64url -> base64 value = value.replace("-", "+").replace("_", "/") padding = (-len(value)) % 4 if padding: value += "=" * padding return base64.b64decode(value) + @staticmethod + def _b64url_encode(data: bytes) -> str: + return base64.urlsafe_b64encode(data).rstrip(b"=").decode() + def _join_key_parts(self, parts) -> bytes: return b"".join(self._b64url_decode(p) for p in parts) + @staticmethod + def _pick_best(sources: list) -> str: + """Return URL of highest-quality source by numeric label.""" + + def label_key(s): + try: + return int(s.get("label", 0)) + except (ValueError, TypeError): + return 0 + + return sorted(sources, key=label_key, reverse=True)[0]["url"] + + def _make_fingerprint(self) -> dict: + viewer_id = self._b64url_encode(os.urandom(16)) + device_id = self._b64url_encode(os.urandom(16)) + now = int(time.time()) + + token_payload = { + "viewer_id": viewer_id, + "device_id": device_id, + "confidence": 0.93, + "iat": now, + "exp": now + 600, + } + payload_b64 = self._b64url_encode(json.dumps(token_payload, separators=(",", ":")).encode()) + sig = hmac.new(b"", payload_b64.encode(), hashlib.sha256).digest() + token = f"{payload_b64}.{self._b64url_encode(sig)}" + + return { + "fingerprint": { + "token": token, + "viewer_id": viewer_id, + "device_id": device_id, + "confidence": 0.93, + } + } + + def _decrypt_playback(self, pb: dict) -> list: + """Decrypt primary payload, fall back to payload2+decrypt_keys.""" + iv = self._b64url_decode(pb["iv"]) + key = self._join_key_parts(pb["key_parts"]) + payload = self._b64url_decode(pb["payload"]) + + cipher = python_aesgcm.new(key) + decrypted = cipher.open(iv, payload) + + if decrypted is not None: + sources = json.loads(decrypted.decode("utf-8", "ignore")).get("sources") or [] + if sources: + return sources + + # Fallback: payload2 + decrypt_keys + decrypt_keys = pb.get("decrypt_keys") or {} + iv2 = pb.get("iv2") + pay2 = pb.get("payload2") + if iv2 and pay2 and decrypt_keys: + iv2 = self._b64url_decode(iv2) + pay2 = self._b64url_decode(pay2) + for key_b64 in decrypt_keys.values(): + try: + key2 = self._b64url_decode(key_b64) + cipher2 = python_aesgcm.new(key2) + decrypted = cipher2.open(iv2, pay2) + if decrypted: + sources = json.loads(decrypted.decode("utf-8", "ignore")).get("sources") or [] + if sources: + return sources + except Exception: + continue + + return [] + async def extract(self, url: str) -> Dict[str, Any]: parsed = urlparse(url) host = parsed.netloc @@ -35,26 +112,32 @@ class F16PxExtractor(BaseExtractor): match = re.search(r"/e/([A-Za-z0-9]+)", parsed.path or "") if not match: raise ExtractorError("F16PX: Invalid embed URL") - media_id = match.group(1) + api_url = f"https://{host}/api/videos/{media_id}/embed/playback" headers = self.base_headers.copy() - headers["referer"] = f"https://{host}/" + headers["referer"] = f"https://{host}/e/{media_id}" + headers["origin"] = origin + headers["content-type"] = "application/json" + + resp = await self._make_request( + api_url, + headers=headers, + method="POST", + json=self._make_fingerprint(), + ) - resp = await self._make_request(api_url, headers=headers) try: data = resp.json() except Exception: raise ExtractorError("F16PX: Invalid JSON response") # Case 1: plain sources - if "sources" in data and data["sources"]: - src = data["sources"][0].get("url") - if not src: - raise ExtractorError("F16PX: Empty source URL") + if data.get("sources"): + best = self._pick_best(data["sources"]) return { - "destination_url": src, + "destination_url": best, "request_headers": headers, "mediaflow_endpoint": self.mediaflow_endpoint, } @@ -65,40 +148,24 @@ class F16PxExtractor(BaseExtractor): raise ExtractorError("F16PX: No playback data") try: - iv = self._b64url_decode(pb["iv"]) # nonce - key = self._join_key_parts(pb["key_parts"]) # AES key - payload = self._b64url_decode(pb["payload"]) # ciphertext + tag - - cipher = python_aesgcm.new(key) - decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL - - if decrypted is None: - raise ExtractorError("F16PX: GCM authentication failed") - - decrypted_json = json.loads(decrypted.decode("utf-8", "ignore")) - - except ExtractorError: - raise + sources = self._decrypt_playback(pb) except Exception as e: raise ExtractorError(f"F16PX: Decryption failed ({e})") - sources = decrypted_json.get("sources") or [] if not sources: raise ExtractorError("F16PX: No sources after decryption") - best = sources[0].get("url") - if not best: - raise ExtractorError("F16PX: Empty source URL after decryption") - - self.base_headers.clear() - self.base_headers["referer"] = f"{origin}/" - self.base_headers["origin"] = origin - self.base_headers["Accept-Language"] = "en-US,en;q=0.5" - self.base_headers["Accept"] = "*/*" - self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0" + best = self._pick_best(sources) + out_headers = { + "referer": f"{origin}/", + "origin": origin, + "Accept-Language": "en-US,en;q=0.5", + "Accept": "*/*", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0", + } return { "destination_url": best, - "request_headers": self.base_headers, + "request_headers": out_headers, "mediaflow_endpoint": self.mediaflow_endpoint, } diff --git a/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc new file mode 100644 index 0000000..78043c5 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..749cd67 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc new file mode 100644 index 0000000..93ef8f8 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/city.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/city.cpython-313.pyc new file mode 100644 index 0000000..2f4a5f5 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/city.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc new file mode 100644 index 0000000..8205e25 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc new file mode 100644 index 0000000..1a0a856 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc new file mode 100644 index 0000000..1df2c60 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc new file mode 100644 index 0000000..ce5c70c Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc new file mode 100644 index 0000000..9d619f8 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc new file mode 100644 index 0000000..3cfabd5 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc new file mode 100644 index 0000000..66cc774 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc new file mode 100644 index 0000000..84c4700 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc new file mode 100644 index 0000000..cf5bd45 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc new file mode 100644 index 0000000..fd7d554 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc new file mode 100644 index 0000000..b1d46d4 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc new file mode 100644 index 0000000..157e39e Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/streamhg.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamhg.cpython-313.pyc new file mode 100644 index 0000000..8c9aaca Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/streamhg.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc new file mode 100644 index 0000000..819f34b Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc new file mode 100644 index 0000000..9199897 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc new file mode 100644 index 0000000..b9bae5a Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc new file mode 100644 index 0000000..8508602 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc new file mode 100644 index 0000000..2804ac9 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc new file mode 100644 index 0000000..7b89933 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vidfast.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidfast.cpython-313.pyc new file mode 100644 index 0000000..f27f8fe Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vidfast.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc new file mode 100644 index 0000000..90dff7a Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc new file mode 100644 index 0000000..ac937b8 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc new file mode 100644 index 0000000..9618c2a Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc new file mode 100644 index 0000000..eee49e7 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/doodstream.py b/mediaflow_proxy/extractors/doodstream.py index b6ba592..2b840bc 100644 --- a/mediaflow_proxy/extractors/doodstream.py +++ b/mediaflow_proxy/extractors/doodstream.py @@ -152,6 +152,25 @@ class DoodStreamExtractor(BaseExtractor): html = r.text base_url = f"https://{urlparse(final_url).netloc}" + # Some pages embed a JS redirect instead of a real HTTP redirect. + js_redirect = re.search(r'(?:window\.location|location\.href)\s*=\s*[\'"]https?://([^/\'"]+)', html) + if js_redirect: + redirected_host = js_redirect.group(1) + redirect_url = f"https://{redirected_host}/e/{video_id}" + logger.debug("JS redirect detected → %s", redirect_url) + async with AsyncSession() as s2: + r2 = await s2.get( + redirect_url, + impersonate="chrome", + headers={"Referer": f"https://{redirected_host}/"}, + timeout=30, + allow_redirects=True, + **({"proxy": proxy} if proxy else {}), + ) + final_url = str(r2.url) + html = r2.text + base_url = f"https://{urlparse(final_url).netloc}" + if "pass_md5" not in html: if "turnstile" in html.lower() or "captcha_l" in html: raise ExtractorError( @@ -197,6 +216,14 @@ class DoodStreamExtractor(BaseExtractor): "Ensure BYPARR_URL is set for reliable extraction." ) + # CloudFlare R2 storage URLs are self-contained — no salt/token needed. + if "cloudflarestorage." in base_stream.lower(): + return { + "destination_url": base_stream, + "request_headers": headers, + "mediaflow_endpoint": "proxy_stream_endpoint", + } + token_match = re.search(r"token=([^&\s'\"]+)", html) if not token_match: raise ExtractorError("DoodStream: token not found in embed HTML") diff --git a/mediaflow_proxy/extractors/factory.py b/mediaflow_proxy/extractors/factory.py index 8f02737..8193658 100644 --- a/mediaflow_proxy/extractors/factory.py +++ b/mediaflow_proxy/extractors/factory.py @@ -25,6 +25,7 @@ from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor from mediaflow_proxy.extractors.fastream import FastreamExtractor from mediaflow_proxy.extractors.voe import VoeExtractor from mediaflow_proxy.extractors.vidfast import VidFastExtractor +from mediaflow_proxy.extractors.streamhg import StreamHGExtractor class ExtractorFactory: @@ -55,6 +56,7 @@ class ExtractorFactory: "Voe": VoeExtractor, "Sportsonline": SportsonlineExtractor, "VidFast": VidFastExtractor, + "StreamHG": StreamHGExtractor, } @classmethod diff --git a/mediaflow_proxy/extractors/filemoon.py b/mediaflow_proxy/extractors/filemoon.py index e67d3f1..9dd7fd6 100644 --- a/mediaflow_proxy/extractors/filemoon.py +++ b/mediaflow_proxy/extractors/filemoon.py @@ -1,9 +1,45 @@ -import re +import base64 +import json from typing import Dict, Any -from urllib.parse import urlparse, urljoin +from urllib.parse import urlparse + +from cryptography.hazmat.primitives.ciphers.aead import AESGCM from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError -from mediaflow_proxy.utils.packed import eval_solver + + +def _base64url_decode(input_str: str) -> bytes: + """Decode a base64url-encoded string to bytes.""" + padded = input_str.replace("-", "+").replace("_", "/") + padding = 4 - len(padded) % 4 + if padding != 4: + padded += "=" * padding + return base64.b64decode(padded) + + +def _combine_key_parts(key_parts: list) -> bytes: + """Combine base64url-encoded key parts into a single key.""" + decoded = [_base64url_decode(part) for part in key_parts] + return b"".join(decoded) + + +def _decrypt_playback(playback: dict) -> dict: + """Decrypt AES-256-GCM encrypted playback payload.""" + key = _combine_key_parts(playback["key_parts"]) + iv = _base64url_decode(playback["iv"]) + payload = _base64url_decode(playback["payload"]) + + # GCM auth tag is the last 16 bytes of the payload + tag = payload[-16:] + ciphertext = payload[:-16] + + aesgcm = AESGCM(key) + try: + plaintext = aesgcm.decrypt(iv, ciphertext + tag, None) + except Exception as e: + raise ExtractorError(f"Decryption failed: {e}") + + return json.loads(plaintext.decode("utf-8")) class FileMoonExtractor(BaseExtractor): @@ -12,41 +48,49 @@ class FileMoonExtractor(BaseExtractor): self.mediaflow_endpoint = "hls_manifest_proxy" async def extract(self, url: str, **kwargs) -> Dict[str, Any]: - response = await self._make_request(url) + # URL format: https://filemoon.sx/e/{code} or https://filemoon.sx/d/{code} + parsed = urlparse(url) + path = parsed.path.rstrip("/") + code = path.split("/")[-1] if path else None - pattern = r'iframe.*?src=["\'](.*?)["\']' - match = re.search(pattern, response.text, re.DOTALL) - if not match: - raise ExtractorError("Failed to extract iframe URL") + if not code or code in ("e", "d"): + raise ExtractorError(f"Could not extract video code from URL: {url}") - iframe_url = match.group(1) - - parsed = urlparse(str(response.url)) - base_url = f"{parsed.scheme}://{parsed.netloc}" - - if iframe_url.startswith("//"): - iframe_url = f"{parsed.scheme}:{iframe_url}" - elif not urlparse(iframe_url).scheme: - iframe_url = urljoin(base_url, iframe_url) + api_url = f"{parsed.scheme}://{parsed.netloc}/api/videos/{code}" headers = {"Referer": url} - patterns = [r'file:"(.*?)"'] + response = await self._make_request(api_url, headers=headers) - final_url = await eval_solver( - self, - iframe_url, - headers, - patterns, - ) + try: + data = response.json() + except Exception as e: + raise ExtractorError(f"Failed to parse API response: {e}") - test_resp = await self._make_request(final_url, headers=headers) - if test_resp.status == 404: - raise ExtractorError("Stream not found (404)") + if "error" in data: + raise ExtractorError(f"FileMoon API error: {data['error']}") + + playback = data.get("playback") + if not playback or not playback.get("key_parts") or not playback.get("payload"): + raise ExtractorError("No playback data available") + + decrypted = _decrypt_playback(playback) + + sources = decrypted.get("sources", []) + hls_source = None + for source in sources: + if source.get("mime_type") == "application/vnd.apple.mpegurl": + hls_source = source + break + + if not hls_source: + raise ExtractorError("No HLS source found in decrypted playback") + + destination_url = hls_source["url"] self.base_headers["referer"] = url return { - "destination_url": final_url, + "destination_url": destination_url, "request_headers": self.base_headers, "mediaflow_endpoint": self.mediaflow_endpoint, } diff --git a/mediaflow_proxy/extractors/maxstream.py b/mediaflow_proxy/extractors/maxstream.py index 707368e..32afbed 100644 --- a/mediaflow_proxy/extractors/maxstream.py +++ b/mediaflow_proxy/extractors/maxstream.py @@ -1,67 +1,651 @@ +"""Maxstream URL extractor — full uprot bypass pipeline. + +Solves the problem of `uprot.net` redirects on `/msf/`, `/msfi/` and +`/msfld/` paths used by Italian aggregators (CB01, EuroStreaming, etc). + +Key features: + 1. TLS-fingerprint-resistant fetch via curl_cffi (chrome131 impersonation) + 2. 4-digit captcha solver with multi-engine OCR ensemble: + ddddocr (primary) → tesseract (fallback) → CF Workers AI (3rd, opt-in) + 3. Honeypot URL filtering on the post-captcha page + 4. uprots/uprotem → maxstream redirect chain follow with cookie continuity + 5. /msfld/ folder picker (season + episode kwargs from MFP route) + 6. Optional persistent URL cache (when paired with services/uprot_warmer.py) + +All advanced features are guarded by lazy imports — if `curl_cffi`, +`pytesseract`, `Pillow` or `ddddocr` are not installed the extractor +falls back to the previous behaviour for `/msf/` URLs and skips +`/msfld/` cleanly. + +Activation: + CF_WORKER_OCR_URL e.g. https://easyproxy-ocr.user.workers.dev + CF_WORKER_OCR_AUTH Worker AUTH_TOKEN + +Credits: pipeline ported from NelloStream +(https://github.com/vitouchiha/nello-stream) — `workers/cfworker.js` +functions `_uprotBypassWithCookies`, `_extractMaxstreamVideo`, +`_aiOcrDigits`, `_handleScheduledUprotRefresh`. All credit to Nello. +""" + +import asyncio +import logging +import os import re -from typing import Dict, Any +from typing import Any, Dict, Optional +from urllib.parse import urljoin, urlparse, urlencode from bs4 import BeautifulSoup from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError +logger = logging.getLogger(__name__) + class MaxstreamExtractor(BaseExtractor): - """Maxstream URL extractor.""" + """Maxstream URL extractor with full uprot bypass pipeline.""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.mediaflow_endpoint = "hls_manifest_proxy" + # Persistent cookie jar across the uprot → maxstream redirect chain. + # PHPSESSID + captcha hash + uprot_session must travel together for + # the post-captcha redirect to be honoured by the maxstream WAF. + self.cookies: Dict[str, str] = {} + self._last_solve_text: Optional[str] = None - async def get_uprot(self, link: str): - """Extract MaxStream URL.""" - if "msf" in link: - link = link.replace("msf", "mse") - response = await self._make_request(link) - soup = BeautifulSoup(response.text, "lxml") - maxstream_url = soup.find("a") - maxstream_url = maxstream_url.get("href") - return maxstream_url + # ───────────────────────── HTTP layer ────────────────────────────── + + async def _curl_cffi_fetch( + self, + url: str, + method: str = "GET", + data: Optional[Any] = None, + headers: Optional[Dict[str, str]] = None, + allow_redirects: bool = True, + timeout: int = 30, + ) -> Optional[Dict[str, Any]]: + """Browser-impersonated fetch via curl_cffi. + + uprot.net inspects TLS fingerprints; aiohttp's JA3 is recognised as + a bot within a few requests and served captcha pages or 503 even + from clean residential IPs. curl_cffi with `impersonate="chrome131"` + replays a real Chrome JA3 + ALPN order, so uprot serves the real + redirect link or the (legitimately-protected) captcha page. + + Returns None if curl_cffi is not installed (caller falls back to + BaseExtractor._make_request for the simpler legacy /msf/ path). + """ + try: + from curl_cffi import requests as cffi_requests + except ImportError: + logger.debug("curl_cffi not installed — uprot bypass disabled") + return None + + merged_headers = dict(self.base_headers) + if headers: + merged_headers.update(headers) + if method.upper() == "POST" and isinstance(data, (str, bytes)): + merged_headers.setdefault("content-type", "application/x-www-form-urlencoded") + + proxy = self._get_proxy(url) + proxies_arg = {"http": proxy, "https": proxy} if proxy else None + + loop = asyncio.get_running_loop() + + def _do_request(): + try: + req_cookies = dict(self.cookies) if self.cookies else None + r = cffi_requests.request( + method, + url, + headers=merged_headers, + data=data, + cookies=req_cookies, + proxies=proxies_arg, + impersonate="chrome131", + timeout=timeout, + allow_redirects=allow_redirects, + ) + cookies = {} + try: + cookies = {c.name: c.value for c in r.cookies.jar} + except Exception: + cookies = dict(r.cookies) if r.cookies else {} + return { + "ok": r.status_code < 400, + "status": r.status_code, + "text": r.text, + "content": r.content, + "url": str(r.url), + "headers": dict(r.headers), + "cookies": cookies, + } + except Exception as e: + return { + "ok": False, + "status": 0, + "text": "", + "content": b"", + "url": url, + "headers": {}, + "cookies": {}, + "error": str(e), + } + + result = await loop.run_in_executor(None, _do_request) + if result.get("cookies"): + self.cookies.update(result["cookies"]) + return result + + # ─────────────────────── Honeypot filter ─────────────────────────── + + @staticmethod + def _strip_uprot_honeypots(html: str) -> str: + """Remove uprot's anti-bot honeypot blocks before URL extraction. + + The post-captcha success page intentionally hides decoy URLs in: + 1. HTML comments () + 2.
blocks containing fake + "Continue" buttons that point to placeholder URLs like + `maxstream.video/uprots/123456789012` (12 sequential digits). + + A naive regex grabs the FIRST match (the honeypot). Strip both + before parsing so the regex/BS4 see only the visible-to-user DOM. + """ + no_comments = re.sub(r"", "", html) + no_hidden = re.sub( + r"]*style=[\"'][^\"']*display\s*:\s*none[^\"']*[\"'][^>]*>[\s\S]*?", + "", + no_comments, + flags=re.IGNORECASE, + ) + return no_hidden + + # ─────────────────────── Redirect parser ─────────────────────────── + + def _parse_uprot_html(self, text: str) -> Optional[str]: + """Parse a uprot success page and return the next-hop URL. + + Strategy mirrored from NelloStream `_uprotBypassWithCookies`: + 1. Strip honeypot blocks first + 2. Prefer explicit `id="buttok"` CONTINUE button (uprot marker) + 3. Fallback: (case+spacing tolerant) + 4. Last resort: a `/uprots/` or `/uprotem/` URL appearing exactly + once in the cleaned HTML (uprot scatters multiple decoys) + 5. Generic stayonline.pro / maxstream.video regex with honeypot + literal filter + 6. window.location / meta refresh / BS4 button fallbacks + """ + cleaned = self._strip_uprot_honeypots(text).replace("\\/", "/") + + def _valid(c): + if not c: + return None + try: + p = urlparse(c) + if p.netloc and "maxstream.video" in p.netloc and p.path.startswith("/cdn-cgi/"): + return None + except Exception: + pass + return c + + # 1. id="buttok" CONTINUE button + m = re.search( + r'href=["\'](https?://[^"\']+)["\'][^>]*>\s*]*id=["\']buttok["\'][^>]*>\s*C\s*O\s*N\s*T\s*I\s*N\s*U\s*E', + cleaned, + re.IGNORECASE, + ) + if m and _valid(m.group(1)): + return m.group(1) + + # 2. Generic + m = re.search( + r'href=["\'](https?://[^"\']+)["\'][^>]*>\s*]*>\s*[Cc]\s*[Oo]\s*[Nn]\s*[Tt]\s*[Ii]\s*[Nn]\s*[Uu]\s*[Ee]', + cleaned, + ) + if m and _valid(m.group(1)): + return m.group(1) + + # 3. Unique uprots/uprotem URL + all_uprots = re.findall( + r'href=["\'](https?://[^"\']*uprot(?:s|em)/[^"\']+)["\']', + cleaned, + re.IGNORECASE, + ) + if all_uprots: + counts: Dict[str, int] = {} + for u in all_uprots: + counts[u] = counts.get(u, 0) + 1 + unique = [u for u, c in counts.items() if c == 1] + if unique and _valid(unique[0]): + return unique[0] + + # 4. Generic stayonline / maxstream regex + m = re.search( + r'https?://(?:www\.)?(?:stayonline\.pro|maxstream\.video)[^"\'\s<>\\ ]+', + cleaned, + ) + if m and "/uprots/123456789012" not in m.group(0) and _valid(m.group(0)): + return m.group(0) + + # 5. window.location / meta refresh + m = re.search(r'window\.location(?:\.href)?\s*=\s*["\']([^"\']+)["\']', cleaned) + if m and _valid(m.group(1)): + return m.group(1) + m = re.search(r'content=["\']0;\s*url=([^"\']+)["\']', cleaned, re.I) + if m and _valid(m.group(1)): + return m.group(1) + + # 6. BS4 buttons / forms (rare paths) + soup = BeautifulSoup(cleaned, "lxml") + for btn in soup.find_all(["a", "button"]): + t = btn.get_text().strip().lower() + if "continue" in t or "continua" in t or "vai al" in t: + href = btn.get("href") + if not href and btn.parent and btn.parent.name == "a": + href = btn.parent.get("href") + if href and "uprot.net" not in href and _valid(href): + return href + return None + + def _parse_uprot_folder(self, text: str, season, episode) -> Optional[str]: + """Parse a /msfld/ folder HTML and return the /msfi/ link for S{ss}E{ee}.""" + try: + s_int = int(season) + e_int = int(episode) + except (TypeError, ValueError): + return None + s_pad = f"{s_int:02d}" + e_pad = f"{e_int:02d}" + patterns = [ + rf"S{s_pad}E{e_pad}", + rf"\b0*{s_int}x0*{e_int}\b", + rf"\b0*{s_int}×0*{e_int}\b", + rf"\b0*{s_int}×0*{e_int}\b", + ] + for pat in patterns: + m = re.search( + rf"{pat}[\s\S]{{0,500}}?href=['\"]([^'\"]+/msfi/[^'\"]+)['\"]", + text, + re.I, + ) + if m: + return m.group(1) + return None + + # ─────────────────────── OCR backends ────────────────────────────── + + @staticmethod + def _preprocess_captcha_png(img_bytes: bytes) -> bytes: + """Binarize + denoise the captcha PNG to boost ddddocr accuracy.""" + try: + from PIL import Image, ImageFilter + import io + + img = Image.open(io.BytesIO(img_bytes)).convert("L") + img = img.point(lambda p: 255 if p >= 140 else 0, mode="L") + img = img.filter(ImageFilter.MaxFilter(3)) + img = img.filter(ImageFilter.MinFilter(3)) + out = io.BytesIO() + img.save(out, format="PNG") + return out.getvalue() + except Exception: + return img_bytes + + @staticmethod + def _tesseract_classify(img_bytes: bytes) -> str: + try: + import pytesseract + from PIL import Image, ImageFilter + import io + + img = Image.open(io.BytesIO(img_bytes)).convert("L") + img = img.point(lambda p: 255 if p >= 140 else 0, mode="L") + img = img.filter(ImageFilter.MaxFilter(3)) + img = img.filter(ImageFilter.MinFilter(3)) + return pytesseract.image_to_string(img, config="--psm 7 -c tessedit_char_whitelist=0123456789").strip() + except Exception: + return "" + + @staticmethod + async def _cf_worker_ocr(img_bytes: bytes, expected_digits: int = 4) -> str: + """Optional 3rd OCR backend: Cloudflare Workers AI vision LLM. + + ddddocr + tesseract top out at ~50-65% on uprot's noisy captcha. + A vision LLM (Llama 4 Scout / Gemma 3 / LLaVA) gets ~80-90%. + POSTs the captcha PNG to a user-deployed CF Worker (see + docs/MAXSTREAM_UPROT.md for setup). + + Activated only when both env vars are set: + CF_WORKER_OCR_URL + CF_WORKER_OCR_AUTH + Returns "" on any failure — caller falls through gracefully. + """ + base = (os.getenv("CF_WORKER_OCR_URL") or "").strip().rstrip("/") + if not base: + return "" + auth = (os.getenv("CF_WORKER_OCR_AUTH") or "").strip() + try: + import aiohttp + + headers = {"content-type": "image/png"} + if auth: + headers["x-worker-auth"] = auth + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=20)) as s: + async with s.post( + f"{base}/?ocr=1&digits={expected_digits}", + data=img_bytes, + headers=headers, + ) as resp: + if resp.status != 200: + return "" + data = await resp.json() + return (data.get("digits") or "").strip() + except Exception as e: + logger.debug(f"CF Worker OCR failed: {e}") + return "" + + # ─────────────────── Captcha solver loop ─────────────────────────── + + async def _solve_uprot_captcha_once(self, text: str, original_url: str, preprocess: bool = False) -> Optional[str]: + try: + import ddddocr + except ImportError: + logger.debug("ddddocr not installed — skipping captcha solve") + return None + + soup = BeautifulSoup(text, "lxml") + img_tag = soup.find("img", src=re.compile(r"data:image/|/captcha|/image/|captcha\.php")) + img_url = img_tag.get("src") if img_tag else None + if not img_url: + m = re.search( + r']+src=["\']([^"\']*(?:data:image/|captcha|image)[^"\']*)["\']', + text, + ) + img_url = m.group(1) if m else None + if not img_url: + return None + + form = soup.find("form") + form_action = form.get("action") if form else "" + if not form_action or form_action == "#": + form_action = original_url + elif form_action.startswith("/"): + p = urlparse(original_url) + form_action = f"{p.scheme}://{p.netloc}{form_action}" + + # Download captcha image + if img_url.startswith("data:"): + try: + import base64 + + _, b64 = img_url.split(",", 1) + img_data = base64.b64decode(b64) + except Exception: + return None + else: + full_url = img_url + if full_url.startswith("/"): + p = urlparse(original_url) + full_url = f"{p.scheme}://{p.netloc}{full_url}" + res = await self._curl_cffi_fetch(full_url) + if not res or not res.get("ok"): + return None + img_data = res.get("content") or b"" + + ocr_input = self._preprocess_captcha_png(img_data) if preprocess else img_data + + if not hasattr(self, "_ocr_engine"): + self._ocr_engine = ddddocr.DdddOcr(show_ad=False) + res_str = self._ocr_engine.classification(ocr_input) + res_digits = "".join(c for c in str(res_str) if c.isdigit()) + + # Accept 3-or-4 digit answers (uprot uses 4 today; legacy 3 still seen) + def _ok(n): + return 3 <= n <= 4 + + if not _ok(len(res_digits)): + tess = self._tesseract_classify(ocr_input) + tess_digits = "".join(c for c in str(tess) if c.isdigit()) + if _ok(len(tess_digits)): + res_digits = tess_digits + else: + cf = await self._cf_worker_ocr(ocr_input, expected_digits=4) + cf_digits = "".join(c for c in str(cf) if c.isdigit()) + if _ok(len(cf_digits)): + res_digits = cf_digits + else: + return None + + # Prepare POST data + captcha_input = soup.find("input", {"name": re.compile(r"captcha|code|val", re.I)}) + if captcha_input and captcha_input.get("name"): + field_name = captcha_input["name"] + else: + m = re.search(r'name=["\'](captcha|code|val|captch5)[^"\']*["\']', text, re.I) + field_name = m.group(1) if m else "captcha" + + post_data = {field_name: res_digits} + if form: + for inp in form.find_all(["input", "button", "select"]): + n = inp.get("name") + v = inp.get("value", "") + if n and n not in post_data: + post_data[n] = v + + headers = {**self.base_headers, "referer": original_url} + result = await self._curl_cffi_fetch(form_action, method="POST", data=urlencode(post_data), headers=headers) + if not result: + return None + solved_text = result.get("text") or "" + self._last_solve_text = solved_text if isinstance(solved_text, str) else None + return self._parse_uprot_html(solved_text) + + async def _solve_uprot_captcha(self, text: str, original_url: str, max_attempts: int = 4) -> Optional[str]: + """Solve the captcha with retries on fresh images. + + Each wrong submit triggers uprot to serve a brand-new captcha + image; we feed that fresh page back into the next attempt instead + of OCRing the same image with different preprocessing. + """ + current = text + for attempt in range(1, max_attempts + 1): + preprocess = attempt % 2 == 0 + result = await self._solve_uprot_captcha_once(current, original_url, preprocess=preprocess) + if result: + return result + new_text = self._last_solve_text + if new_text and new_text != current: + current = new_text + return None + + # ──────────────────── Redirect chain ─────────────────────────────── + + async def _follow_uprots_chain(self, url: str, max_hops: int = 10) -> str: + """Walk the uprots/uprotem → maxstream redirect chain manually. + + After captcha, the URL we extract is usually + `maxstream.video/uprots/` whose WAF only honours the token + when reached via the proper redirect chain (Referer + cookie + continuity from uprot.net). Direct GET → Error 131. + + Walks hop-by-hop preserving cookies until landing on + `maxsun{N}.online/watchfree/...` or `maxstream.video/emvvv/`, + then converts watchfree → emvvv so the existing packer extraction + works. + """ + if "/uprots/" not in url and "/uprotem/" not in url: + return url + + current = url + for _ in range(max_hops): + res = await self._curl_cffi_fetch( + current, + headers={**self.base_headers, "referer": "https://uprot.net/"}, + allow_redirects=False, + timeout=15, + ) + if not res: + break + loc = (res.get("headers") or {}).get("location") or (res.get("headers") or {}).get("Location") + if not loc: + current = res.get("url") or current + break + current = urljoin(current, loc) + if "/uprots/" not in current and "/uprotem/" not in current: + break + + if "watchfree/" in current: + try: + tail = current.split("watchfree/", 1)[1] + segments = [s for s in tail.split("/") if s] + if len(segments) >= 2: + current = f"https://maxstream.video/emvvv/{segments[1]}" + except Exception: + pass + + return current + + # ─────────────────────── Public flow ─────────────────────────────── + + async def get_uprot(self, link: str, season=None, episode=None) -> str: + """Resolve a uprot URL to its maxstream destination. + + Supports: + - /msf/{id} single movie (legacy alias /mse/) + - /msfi/{id} single episode + - /msfld/{id} folder of episodes (requires season + episode) + """ + # Map only the modern /msf/ single-video path to its legacy /mse/ + # alias. A naive str.replace("msf", "mse") corrupts /msfld/ into + # /mseld/ (404) and /msfi/ into /msei/ (deprecated 500 on new IDs). + link = re.sub(r"/msf/", "/mse/", link) + + # Try curl_cffi first; fall back to BaseExtractor._make_request if + # curl_cffi isn't installed (legacy /msf/ path may still work). + cffi = await self._curl_cffi_fetch(link) + if cffi and cffi.get("ok"): + text = cffi["text"] + else: + response = await self._make_request(link) + text = response.text + + if "/msfld/" in link: + if season is None or episode is None: + raise ExtractorError("msfld folder URL requires 'season' and 'episode' parameters") + episode_link = self._parse_uprot_folder(text, season, episode) + if not episode_link: + raise ExtractorError(f"Episode S{season}E{episode} not found in msfld folder") + link = episode_link + cffi = await self._curl_cffi_fetch(link) + if cffi and cffi.get("ok"): + text = cffi["text"] + else: + response = await self._make_request(link) + text = response.text + + # 1. Direct parse — works on legacy uprot pages without captcha + res = self._parse_uprot_html(text) + if res: + return res + + # 2. Captcha solver + res = await self._solve_uprot_captcha(text, link) + if res: + return res + + raise ExtractorError("Redirect link not found in uprot page") async def extract(self, url: str, **kwargs) -> Dict[str, Any]: - """Extract Maxstream URL.""" - maxstream_url = await self.get_uprot(url) - response = await self._make_request(maxstream_url, headers={"accept-language": "en-US,en;q=0.5"}) + """Extract Maxstream URL. - # Extract and decode URL - match = re.search(r"\}\('(.+)',.+,'(.+)'\.split", response.text) - if not match: + For /msfld/ folder URLs, callers must pass season=N&episode=M as + query parameters (forwarded by MFP routes as kwargs). + + Optional persistent cache: if `mediaflow_proxy.services.uprot_url_cache` + is importable, cache hits skip captcha+chain entirely (<100ms). + """ + season = kwargs.get("season") + episode = kwargs.get("episode") + + cached = None + try: + from mediaflow_proxy.services import uprot_url_cache # type: ignore + + cached = uprot_url_cache.get(url, season=season, episode=episode) + except Exception: + pass + + if cached: + logger.debug(f"uprot cache HIT: {url[:80]}") + maxstream_url = cached + else: + maxstream_url = await self.get_uprot(url, season=season, episode=episode) + maxstream_url = await self._follow_uprots_chain(maxstream_url) + + # Fetch the maxstream embed page + cffi = await self._curl_cffi_fetch( + maxstream_url, + headers={**self.base_headers, "referer": "https://uprot.net/", "accept-language": "en-US,en;q=0.5"}, + ) + if cffi and cffi.get("ok"): + text = cffi["text"] + else: + response = await self._make_request(maxstream_url, headers={"accept-language": "en-US,en;q=0.5"}) + text = response.text + + if not cached: + try: + from mediaflow_proxy.services import uprot_url_cache # type: ignore + + uprot_url_cache.put(url, maxstream_url, season=season, episode=episode) + except Exception: + pass + + # Direct sources check + m = re.search(r'sources:\s*\[\{src:\s*"([^"]+)"', text) + if m: + return { + "destination_url": m.group(1), + "request_headers": {**self.base_headers, "referer": maxstream_url}, + "mediaflow_endpoint": self.mediaflow_endpoint, + } + + # Packer fallback + m = re.search(r"\}\('(.+)',.+,'(.+)'\.split", text) + if not m: + m = re.search(r"eval\(function\(p,a,c,k,e,d\).+?\}\('(.+?)',.+?,'(.+?)'\.split", text, re.S) + if not m: raise ExtractorError("Failed to extract URL components") - s1 = match.group(2) - # Extract Terms - terms = s1.split("|") - urlset_index = terms.index("urlset") - hls_index = terms.index("hls") - sources_index = terms.index("sources") - result = terms[urlset_index + 1 : hls_index] - reversed_elements = result[::-1] - first_part = terms[hls_index + 1 : sources_index] - reversed_first_part = first_part[::-1] - first_url_part = "" - for first_part in reversed_first_part: - if "0" in first_part: - first_url_part += first_part - else: - first_url_part += first_part + "-" + terms = m.group(2).split("|") + try: + urlset_index = terms.index("urlset") + hls_index = terms.index("hls") + sources_index = terms.index("sources") + except ValueError as e: + raise ExtractorError(f"Missing components in packer: {e}") - base_url = f"https://{first_url_part}.host-cdn.net/hls/" + result_parts = terms[urlset_index + 1 : hls_index] + reversed_elements = result_parts[::-1] + first_part_terms = terms[hls_index + 1 : sources_index] + reversed_first_part = first_part_terms[::-1] + + first_url_part = "" + for fp in reversed_first_part: + if "0" in fp: + first_url_part += fp + else: + first_url_part += fp + "-" + + base_url = f"https://{first_url_part.rstrip('-')}.host-cdn.net/hls/" if len(reversed_elements) == 1: final_url = base_url + "," + reversed_elements[0] + ".urlset/master.m3u8" - lenght = len(reversed_elements) - i = 1 - for element in reversed_elements: - base_url += element + "," - if lenght == i: - base_url += ".urlset/master.m3u8" - else: - i += 1 - final_url = base_url + else: + final_url = base_url + for element in reversed_elements: + final_url += element + "," + final_url = final_url.rstrip(",") + ".urlset/master.m3u8" self.base_headers["referer"] = url return { diff --git a/mediaflow_proxy/extractors/streamhg.py b/mediaflow_proxy/extractors/streamhg.py new file mode 100644 index 0000000..b82de22 --- /dev/null +++ b/mediaflow_proxy/extractors/streamhg.py @@ -0,0 +1,25 @@ +from typing import Dict, Any + +from mediaflow_proxy.extractors.base import BaseExtractor +from mediaflow_proxy.utils.packed import eval_solver + + +class StreamHGExtractor(BaseExtractor): + """StreamHG URL extractor.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.mediaflow_endpoint = "hls_manifest_proxy" + + async def extract(self, url: str, **kwargs) -> Dict[str, Any]: + """Extract StreamHG URL.""" + headers = {} + patterns = [r'"hls2":"([^"]+)"'] + + final_url = await eval_solver(self, url, headers, patterns) + + return { + "destination_url": final_url, + "request_headers": self.base_headers, + "mediaflow_endpoint": self.mediaflow_endpoint, + } diff --git a/mediaflow_proxy/extractors/vidmoly.py b/mediaflow_proxy/extractors/vidmoly.py index 161f813..bdca4be 100644 --- a/mediaflow_proxy/extractors/vidmoly.py +++ b/mediaflow_proxy/extractors/vidmoly.py @@ -1,7 +1,6 @@ import re from typing import Dict, Any from urllib.parse import urljoin, urlparse - from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError @@ -15,12 +14,23 @@ class VidmolyExtractor(BaseExtractor): if not parsed.hostname or "vidmoly" not in parsed.hostname: raise ExtractorError("VIDMOLY: Invalid domain") + embed_id_match = re.search(r"/embed-([a-zA-Z0-9]+)\.html", parsed.path) + if not embed_id_match: + raise ExtractorError("VIDMOLY: Could not extract embed ID from URL") + embed_id = embed_id_match.group(1) + headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/120 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", + "Connection": "keep-alive", + "Cookie": f"cf_turnstile_demo_pass_{embed_id}=1", "Referer": url, - "Sec-Fetch-Dest": "iframe", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "same-origin", } # --- Fetch embed page --- @@ -33,11 +43,10 @@ class VidmolyExtractor(BaseExtractor): raise ExtractorError("VIDMOLY: Stream URL not found") master_url = match.group(1) - if not master_url.startswith("http"): master_url = urljoin(url, master_url) - # --- Validate stream (prevents Stremio timeout) --- + # --- Validate stream --- try: test = await self._make_request(master_url, headers=headers) except Exception as e: @@ -48,8 +57,6 @@ class VidmolyExtractor(BaseExtractor): if test.status >= 400: raise ExtractorError(f"VIDMOLY: Stream unavailable ({test.status})") - # Return MASTER playlist, not variant - # Let MediaFlow Proxy handle variants return { "destination_url": master_url, "request_headers": headers, diff --git a/mediaflow_proxy/extractors/vixcloud.py b/mediaflow_proxy/extractors/vixcloud.py index 3f990ae..a969698 100644 --- a/mediaflow_proxy/extractors/vixcloud.py +++ b/mediaflow_proxy/extractors/vixcloud.py @@ -54,9 +54,9 @@ class VixCloudExtractor(BaseExtractor): "Origin": f"{site_url}", } - response = await self._make_request(site_url + '/api' + parts[1]) + response = await self._make_request(site_url + "/api" + parts[1]) - response = await self._make_request(site_url + '/' + response.json()['src'],headers=headers) + response = await self._make_request(site_url + "/" + response.json()["src"], headers=headers) if response.status != 200: raise ExtractorError("Failed to extract URL components, Invalid Request") diff --git a/mediaflow_proxy/handlers.py b/mediaflow_proxy/handlers.py index dfb2546..4fe5755 100644 --- a/mediaflow_proxy/handlers.py +++ b/mediaflow_proxy/handlers.py @@ -136,7 +136,7 @@ async def handle_hls_stream_proxy( Returns: Union[Response, EnhancedStreamingResponse]: Either a processed m3u8 playlist or a streaming response. """ - streamer = await create_streamer() + streamer = await create_streamer(hls_params.destination) # Handle range requests content_range = proxy_headers.request.get("range", "bytes=0-") if "nan" in content_range.casefold(): diff --git a/mediaflow_proxy/mpd_processor.py b/mediaflow_proxy/mpd_processor.py index 1bf742e..ab5d2a9 100644 --- a/mediaflow_proxy/mpd_processor.py +++ b/mediaflow_proxy/mpd_processor.py @@ -664,60 +664,83 @@ def build_hls_playlist( init_proxy_url = request.url_for("init_endpoint") init_proxy_url = str(init_proxy_url.replace(scheme=get_original_scheme(request))) - for index, profile in enumerate(profiles): - segments = profile["segments"] - if not segments: + # Merge segments from all periods into a single ordered list. + # Multi-period MPDs can produce multiple profiles with the same unique_id (one per period); + # depth trimming must be applied to the combined segment list, not per-period. + merged: list[tuple[dict, dict]] = [] + for profile in profiles: + if not profile["segments"]: logger.warning(f"No segments found for profile {profile['id']}") continue + for seg in profile["segments"]: + merged.append((seg, profile)) - if is_live: - extinf_values_for_depth = [s["extinf"] for s in segments if "extinf" in s] - depth = _compute_live_playlist_depth(is_ts_mode, effective_start_offset, extinf_values_for_depth) - trimmed_segments = segments[-depth:] - else: - trimmed_segments = segments + if not merged: + return "\n".join(hls) - # Add headers for only the first profile - if index == 0: - first_segment = trimmed_segments[0] - extinf_values = [f["extinf"] for f in trimmed_segments if "extinf" in f] + if is_live: + extinf_values_for_depth = [e[0]["extinf"] for e in merged if "extinf" in e[0]] + depth = _compute_live_playlist_depth(is_ts_mode, effective_start_offset, extinf_values_for_depth) + merged = merged[-depth:] - # TS mode uses int(max)+1 to reduce buffer underruns in ExoPlayer - if is_ts_mode: - target_duration = int(max(extinf_values)) + 1 if extinf_values else 10 - else: - target_duration = math.ceil(max(extinf_values)) if extinf_values else 3 + # Emit playlist headers using the first trimmed segment's profile + first_segment, first_profile = merged[0] + extinf_values = [e[0]["extinf"] for e in merged if "extinf" in e[0]] - if is_live: - sequence = _compute_live_media_sequence(first_segment, profile, trimmed_segments) - else: - mpd_start_number = profile.get("segment_template_start_number") - sequence = first_segment.get("number") - if sequence is None: - sequence = mpd_start_number if mpd_start_number is not None else 1 + if is_ts_mode: + target_duration = int(max(extinf_values)) + 1 if extinf_values else 10 + else: + target_duration = math.ceil(max(extinf_values)) if extinf_values else 3 - hls.extend( - [ - f"#EXT-X-TARGETDURATION:{target_duration}", - f"#EXT-X-MEDIA-SEQUENCE:{sequence}", - ] - ) - # For live streams, don't set PLAYLIST-TYPE to allow sliding window - if not is_live: - hls.append("#EXT-X-PLAYLIST-TYPE:VOD") + if is_live: + sequence = _compute_live_media_sequence(first_segment, first_profile, [e[0] for e in merged]) + else: + mpd_start_number = first_profile.get("segment_template_start_number") + sequence = first_segment.get("number") + if sequence is None: + sequence = mpd_start_number if mpd_start_number is not None else 1 + hls.extend( + [ + f"#EXT-X-TARGETDURATION:{target_duration}", + f"#EXT-X-MEDIA-SEQUENCE:{sequence}", + ] + ) + # For live streams, don't set PLAYLIST-TYPE to allow sliding window + if not is_live: + hls.append("#EXT-X-PLAYLIST-TYPE:VOD") + + query_params = dict(request.query_params) + query_params.pop("profile_id", None) + query_params.pop("d", None) + query_params.pop("remux_to_ts", None) # per-request override; already resolved into endpoint choice + has_encrypted = query_params.pop("has_encrypted", False) + + current_init_url = None # track to detect period boundaries and re-emit EXT-X-MAP + need_discontinuity = False + + for segment, profile in merged: + duration = segment["extinf"] init_url = profile["initUrl"] # For SegmentBase profiles, we may have byte range for initialization segment init_range = profile.get("initRange") - query_params = dict(request.query_params) - query_params.pop("profile_id", None) - query_params.pop("d", None) - query_params.pop("remux_to_ts", None) # per-request override; already resolved into endpoint choice - has_encrypted = query_params.pop("has_encrypted", False) + # Check if this segment should be skipped + if skip_filter: + if skip_filter.should_skip_segment(duration): + skip_filter.advance_time(duration) + skipped_segments += 1 + need_discontinuity = True + continue + skip_filter.advance_time(duration) - # Add EXT-X-MAP for init segment (for live streams or when beneficial) - if use_map: + # Emit EXT-X-MAP when init URL changes (first segment or period boundary in fMP4 mode) + if use_map and init_url != current_init_url: + if current_init_url is not None: + # Period boundary: insert discontinuity before new init + hls.append("#EXT-X-DISCONTINUITY") + need_discontinuity = False + current_init_url = init_url init_query_params = { "init_url": init_url, "mime_type": profile["mimeType"], @@ -745,63 +768,50 @@ def build_hls_playlist( ) hls.append(f'#EXT-X-MAP:URI="{init_map_url}"') - need_discontinuity = False - for segment in trimmed_segments: - duration = segment["extinf"] + # Add discontinuity marker after skipped segments + if need_discontinuity: + hls.append("#EXT-X-DISCONTINUITY") + need_discontinuity = False - # Check if this segment should be skipped - if skip_filter: - if skip_filter.should_skip_segment(duration): - skip_filter.advance_time(duration) - skipped_segments += 1 - need_discontinuity = True - continue - skip_filter.advance_time(duration) + # Emit EXT-X-PROGRAM-DATE-TIME only for fMP4 (not TS) + program_date_time = segment.get("program_date_time") + if program_date_time and not is_ts_mode: + hls.append(f"#EXT-X-PROGRAM-DATE-TIME:{program_date_time}") + hls.append(f"#EXTINF:{duration:.3f},") - # Add discontinuity marker after skipped segments - if need_discontinuity: - hls.append("#EXT-X-DISCONTINUITY") - need_discontinuity = False + segment_query_params = { + "init_url": init_url, + "segment_url": segment["media"], + "mime_type": profile["mimeType"], + "is_live": "true" if is_live else "false", + } - # Emit EXT-X-PROGRAM-DATE-TIME only for fMP4 (not TS) - program_date_time = segment.get("program_date_time") - if program_date_time and not is_ts_mode: - hls.append(f"#EXT-X-PROGRAM-DATE-TIME:{program_date_time}") - hls.append(f"#EXTINF:{duration:.3f},") + # Add use_map flag so segment endpoint knows not to include init + if use_map and not is_ts_mode: + segment_query_params["use_map"] = "true" + elif is_ts_mode: + # TS segments are self-contained; init is always embedded by remuxer + segment_query_params["use_map"] = "false" - segment_query_params = { - "init_url": init_url, - "segment_url": segment["media"], - "mime_type": profile["mimeType"], - "is_live": "true" if is_live else "false", - } + # Add byte range parameters for SegmentBase + if init_range: + segment_query_params["init_range"] = init_range + # Segment may also have its own range (for SegmentBase) + if "initRange" in segment: + segment_query_params["init_range"] = segment["initRange"] + # Media byte range: bytes after the init segment (SegmentBase only) + if segment.get("mediaRange"): + segment_query_params["segment_range"] = segment["mediaRange"] - # Add use_map flag so segment endpoint knows not to include init - if use_map and not is_ts_mode: - segment_query_params["use_map"] = "true" - elif is_ts_mode: - # TS segments are self-contained; init is always embedded by remuxer - segment_query_params["use_map"] = "false" - - # Add byte range parameters for SegmentBase - if init_range: - segment_query_params["init_range"] = init_range - # Segment may also have its own range (for SegmentBase) - if "initRange" in segment: - segment_query_params["init_range"] = segment["initRange"] - # Media byte range: bytes after the init segment (SegmentBase only) - if segment.get("mediaRange"): - segment_query_params["segment_range"] = segment["mediaRange"] - - query_params.update(segment_query_params) - hls.append( - encode_mediaflow_proxy_url( - proxy_url, - query_params=query_params, - encryption_handler=encryption_handler if has_encrypted else None, - ) + query_params.update(segment_query_params) + hls.append( + encode_mediaflow_proxy_url( + proxy_url, + query_params=query_params, + encryption_handler=encryption_handler if has_encrypted else None, ) - added_segments += 1 + ) + added_segments += 1 if not mpd_dict["isLive"]: hls.append("#EXT-X-ENDLIST") diff --git a/mediaflow_proxy/remuxer/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/remuxer/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..a281ab2 Binary files /dev/null and b/mediaflow_proxy/remuxer/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/remuxer/__pycache__/ts_muxer.cpython-313.pyc b/mediaflow_proxy/remuxer/__pycache__/ts_muxer.cpython-313.pyc new file mode 100644 index 0000000..c62c5bf Binary files /dev/null and b/mediaflow_proxy/remuxer/__pycache__/ts_muxer.cpython-313.pyc differ diff --git a/mediaflow_proxy/remuxer/media_source.py b/mediaflow_proxy/remuxer/media_source.py index c3cbd5a..9e63502 100644 --- a/mediaflow_proxy/remuxer/media_source.py +++ b/mediaflow_proxy/remuxer/media_source.py @@ -198,7 +198,7 @@ class HTTPMediaSource: proxy=proxy_url, allow_redirects=True, ) as resp: - cl = resp.headers.get("content-length") + cl = resp.headers.get("content-length") if resp.status < 400 else None if cl: self._file_size = int(cl) else: diff --git a/mediaflow_proxy/routes/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/routes/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..ee63950 Binary files /dev/null and b/mediaflow_proxy/routes/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/routes/__pycache__/epg.cpython-313.pyc b/mediaflow_proxy/routes/__pycache__/epg.cpython-313.pyc new file mode 100644 index 0000000..e45b100 Binary files /dev/null and b/mediaflow_proxy/routes/__pycache__/epg.cpython-313.pyc differ diff --git a/mediaflow_proxy/routes/__pycache__/extractor.cpython-313.pyc b/mediaflow_proxy/routes/__pycache__/extractor.cpython-313.pyc new file mode 100644 index 0000000..09e4ea7 Binary files /dev/null and b/mediaflow_proxy/routes/__pycache__/extractor.cpython-313.pyc differ diff --git a/mediaflow_proxy/routes/__pycache__/playlist_builder.cpython-313.pyc b/mediaflow_proxy/routes/__pycache__/playlist_builder.cpython-313.pyc new file mode 100644 index 0000000..95522c4 Binary files /dev/null and b/mediaflow_proxy/routes/__pycache__/playlist_builder.cpython-313.pyc differ diff --git a/mediaflow_proxy/routes/__pycache__/proxy.cpython-313.pyc b/mediaflow_proxy/routes/__pycache__/proxy.cpython-313.pyc new file mode 100644 index 0000000..c55aaa4 Binary files /dev/null and b/mediaflow_proxy/routes/__pycache__/proxy.cpython-313.pyc differ diff --git a/mediaflow_proxy/routes/__pycache__/speedtest.cpython-313.pyc b/mediaflow_proxy/routes/__pycache__/speedtest.cpython-313.pyc new file mode 100644 index 0000000..1511ea4 Binary files /dev/null and b/mediaflow_proxy/routes/__pycache__/speedtest.cpython-313.pyc differ diff --git a/mediaflow_proxy/routes/__pycache__/xtream.cpython-313.pyc b/mediaflow_proxy/routes/__pycache__/xtream.cpython-313.pyc new file mode 100644 index 0000000..2a76d98 Binary files /dev/null and b/mediaflow_proxy/routes/__pycache__/xtream.cpython-313.pyc differ diff --git a/mediaflow_proxy/routes/playlist_builder.py b/mediaflow_proxy/routes/playlist_builder.py index 6951be5..0690c9c 100644 --- a/mediaflow_proxy/routes/playlist_builder.py +++ b/mediaflow_proxy/routes/playlist_builder.py @@ -16,7 +16,12 @@ playlist_builder_router = APIRouter() def rewrite_m3u_links_streaming( - m3u_lines_iterator: Iterator[str], base_url: str, api_password: Optional[str] + m3u_lines_iterator: Iterator[str], + base_url: str, + api_password: Optional[str], + max_res: bool = False, + redirect_stream: bool = True, + no_proxy: bool = True, ) -> Iterator[str]: """ Rewrites links from an M3U line iterator according to the specified rules, @@ -99,7 +104,13 @@ def rewrite_m3u_links_streaming( processed_url_content = f"{base_url}/proxy/hls/manifest.m3u8?d={encoded_url}" elif "vixsrc.to" in logical_line: encoded_url = urllib.parse.quote(logical_line, safe="") - processed_url_content = f"{base_url}/extractor/video?host=VixCloud&redirect_stream=true&d={encoded_url}&max_res=true&no_proxy=true" + processed_url_content = ( + f"{base_url}/extractor/video?host=VixCloud" + f"&redirect_stream={str(redirect_stream).lower()}" + f"&d={encoded_url}" + f"&max_res={str(max_res).lower()}" + f"&no_proxy={str(no_proxy).lower()}" + ) elif ".m3u8" in logical_line: encoded_url = urllib.parse.quote(logical_line, safe="") processed_url_content = f"{base_url}/proxy/hls/manifest.m3u8?d={encoded_url}" @@ -232,7 +243,14 @@ def parse_channel_entries(lines: list[str]) -> list[list[str]]: return entries -async def async_generate_combined_playlist(playlist_definitions: list[str], base_url: str, api_password: Optional[str]): +async def async_generate_combined_playlist( + playlist_definitions: list[str], + base_url: str, + api_password: Optional[str], + max_res: bool = False, + redirect_stream: bool = True, + no_proxy: bool = True, +): """Genera una playlist combinata da multiple definizioni, scaricando in parallelo.""" # Prepara i task di download download_tasks = [] @@ -318,7 +336,9 @@ async def async_generate_combined_playlist(playlist_definitions: list[str], base if should_proxy: # Usa un iteratore fittizio per processare una sola linea - rewritten_url_iter = rewrite_m3u_links_streaming(iter([url]), base_url, api_password) + rewritten_url_iter = rewrite_m3u_links_streaming( + iter([url]), base_url, api_password, max_res, redirect_stream, no_proxy + ) yield next(rewritten_url_iter, url) # Prende l'URL riscritto, con fallback all'originale else: yield url # Lascia l'URL invariato @@ -327,7 +347,9 @@ async def async_generate_combined_playlist(playlist_definitions: list[str], base for playlist_data in unsorted_playlists_data: lines_iterator = iter(playlist_data["lines"]) if playlist_data["proxy"]: - lines_iterator = rewrite_m3u_links_streaming(lines_iterator, base_url, api_password) + lines_iterator = rewrite_m3u_links_streaming( + lines_iterator, base_url, api_password, max_res, redirect_stream, no_proxy + ) for line in yield_header_once(lines_iterator): yield line @@ -336,14 +358,26 @@ async def async_generate_combined_playlist(playlist_definitions: list[str], base @playlist_builder_router.get("/playlist") async def proxy_handler( request: Request, - d: str = Query(..., description="Query string con le definizioni delle playlist", alias="d"), - api_password: Optional[str] = Query(None, description="Password API per MFP"), + d: str = Query(..., description="Semicolon-separated playlist URL definitions"), + api_password: Optional[str] = Query(None, description="MFP API password"), + max_res: bool = Query(False, description="Redirect extractor streams to the highest available resolution"), + redirect_stream: bool = Query(True, description="Redirect extractor to the direct stream URL"), + no_proxy: bool = Query(True, description="Skip proxying internal segment URLs for extractor streams"), ): """ - Endpoint per il proxy delle playlist M3U con supporto MFP. + Proxy and merge one or more M3U playlists, rewriting stream URLs through MFP. - Formato query string: playlist1&url1;playlist2&url2 - Esempio: https://mfp.com:pass123&http://provider.com/playlist.m3u + **`d` format** — semicolon-separated playlist definitions: + - Plain URL: `http://provider.com/playlist.m3u` + - With sort prefix: `sort:http://provider.com/playlist.m3u` + - Without rewriting: `no_proxy:http://provider.com/playlist.m3u` + - Combinable: `sort:no_proxy:http://provider.com/playlist.m3u` + + Stream URLs in each playlist are automatically rewritten to route through the + appropriate MFP proxy endpoint (`/proxy/hls`, `/proxy/mpd`, `/extractor/video`). + + **Extractor parameters** (`max_res`, `redirect_stream`, `no_proxy`) apply to + streams that go through `/extractor/video` (e.g. vixsrc.to links). """ try: if not d: @@ -371,7 +405,9 @@ async def proxy_handler( base_url = base_url_part async def generate_response(): - async for line in async_generate_combined_playlist(playlist_definitions, base_url, api_password): + async for line in async_generate_combined_playlist( + playlist_definitions, base_url, api_password, max_res, redirect_stream, no_proxy + ): yield line return StreamingResponse( diff --git a/mediaflow_proxy/routes/proxy.py b/mediaflow_proxy/routes/proxy.py index 09e44cb..ffbb9d9 100644 --- a/mediaflow_proxy/routes/proxy.py +++ b/mediaflow_proxy/routes/proxy.py @@ -1,9 +1,13 @@ +import asyncio +import ipaddress import logging import re from functools import lru_cache from typing import Annotated -from urllib.parse import quote, unquote +from urllib.parse import quote, unquote, urlparse +import aiohttp +from aiohttp import ClientTimeout from fastapi import Request, Depends, APIRouter, Query, HTTPException, Response from fastapi.datastructures import QueryParams @@ -30,6 +34,7 @@ from mediaflow_proxy.utils.extractor_helpers import ( check_and_extract_sportsonline_stream, ) from mediaflow_proxy.utils.hls_prebuffer import hls_prebuffer +from mediaflow_proxy.utils.http_client import create_aiohttp_session from mediaflow_proxy.utils.http_utils import ( get_proxy_headers, ProxyRequestHeaders, @@ -452,6 +457,203 @@ def _build_hls_query_params(request: Request, destination: str) -> str: return "&".join(params) +MEDIAFLOW_IP_PLACEHOLDER = "{mediaflow_ip}" +_IP_DETECT_URLS = ["https://api.ipify.org", "https://checkip.amazonaws.com"] +_cached_public_ip: str | None = None +_public_ip_lock: asyncio.Lock | None = None + + +async def _resolve_public_ip() -> str | None: + """Return MediaFlow's public IP: configured value, cached detection, or None.""" + global _cached_public_ip, _public_ip_lock + + if settings.public_ip: + return settings.public_ip + if _cached_public_ip: + return _cached_public_ip + + if _public_ip_lock is None: + _public_ip_lock = asyncio.Lock() + + async with _public_ip_lock: + if _cached_public_ip: + return _cached_public_ip + for url in _IP_DETECT_URLS: + try: + async with aiohttp.ClientSession() as sess: + async with sess.get(url, timeout=ClientTimeout(total=5)) as resp: + ip = (await resp.text()).strip() + if ip: + _cached_public_ip = ip + return ip + except Exception: + continue + return None + + +_IP_DISCLOSURE_HEADERS = frozenset( + { + "x-forwarded-for", + "x-real-ip", + "x-client-ip", + "true-client-ip", + "forwarded", + "cf-connecting-ip", + "x-original-forwarded-for", + "x-cluster-client-ip", + } +) + +_HOP_BY_HOP_HEADERS = frozenset( + { + "connection", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailers", + "transfer-encoding", + "upgrade", + } +) + +# Headers that callers must not inject via h_* params — they enable host-header +# injection, HTTP request smuggling, or break the session's own framing logic. +_BLOCKED_REQUEST_HEADERS = frozenset( + { + "host", + "content-length", + "transfer-encoding", + "content-encoding", + } +) + + +def _check_forward_destination(destination: str) -> None: + """SSRF guard and allowlist/denylist check for /proxy/forward.""" + parsed = urlparse(destination) + + # Only allow http(s) — blocks file://, ftp://, gopher://, data:, javascript:, etc. + scheme = (parsed.scheme or "").lower() + if scheme not in ("http", "https"): + raise HTTPException( + status_code=400, + detail=f"Invalid URL scheme '{scheme}'. Only http and https are allowed.", + ) + + hostname = (parsed.hostname or "").lower() + + if not hostname: + raise HTTPException(status_code=400, detail="Invalid destination URL: no hostname") + + # Allowlist check (if configured) + allowed = settings.forward_allowed_hosts + if allowed and hostname not in {h.lower() for h in allowed}: + raise HTTPException(status_code=403, detail=f"Host '{hostname}' is not in forward_allowed_hosts") + + # Explicit denylist + denied = {h.lower() for h in settings.forward_denied_hosts} + if hostname in denied: + raise HTTPException(status_code=403, detail=f"Host '{hostname}' is denied") + + # Always block loopback literals + if hostname in ("localhost", "ip6-localhost", "ip6-loopback"): + raise HTTPException(status_code=403, detail="Forwarding to localhost is not allowed") + + # Block private/loopback/link-local IPs given as literals + try: + addr = ipaddress.ip_address(hostname) + if addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_unspecified: + raise HTTPException(status_code=403, detail="Forwarding to private/loopback addresses is not allowed") + except ValueError: + pass # Not a numeric IP — hostname-based SSRF is the caller's responsibility + + +@proxy_router.api_route( + "/forward", + methods=["GET", "HEAD", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"], +) +async def proxy_forward_endpoint( + request: Request, + proxy_headers: Annotated[ProxyRequestHeaders, Depends(get_proxy_headers)], + destination: str = Query(..., description="The destination URL to forward to.", alias="d"), +): + """ + Generic transparent HTTP forwarding endpoint. + + Forwards any HTTP method (including POST with body) to the given destination URL + using MediaFlow's outbound IP. Useful for IP-bound API calls (e.g. debrid service + APIs, extractor POST requests) where the request must appear to originate from + MediaFlow rather than the addon server. + + Pass outbound headers via ``h_=`` query params. The upstream response + (status code, headers, body) is returned verbatim. IP-disclosure headers are + stripped before forwarding so the caller's IP is not leaked. + """ + destination = sanitize_url(destination) + _check_forward_destination(destination) + + # Strip IP-disclosure headers — the whole point is hiding the origin IP + for h in _IP_DISCLOSURE_HEADERS: + proxy_headers.request.pop(h, None) + + # Strip headers that could enable host-header injection or HTTP smuggling + for h in _BLOCKED_REQUEST_HEADERS: + proxy_headers.request.pop(h, None) + + body = await request.body() + if len(body) > settings.forward_max_request_body_bytes: + raise HTTPException(status_code=413, detail="Request body too large") + max_response_bytes = settings.forward_max_response_body_bytes + + # Substitute {mediaflow_ip} placeholder with MediaFlow's actual public IP so + # debrid services receive a consistent ip= parameter that matches the TCP source. + if MEDIAFLOW_IP_PLACEHOLDER in destination or MEDIAFLOW_IP_PLACEHOLDER.encode() in body: + public_ip = await _resolve_public_ip() + if public_ip: + destination = destination.replace(MEDIAFLOW_IP_PLACEHOLDER, public_ip) + body = body.replace(MEDIAFLOW_IP_PLACEHOLDER.encode(), public_ip.encode()) + + async with create_aiohttp_session(destination) as (session, proxy_url): + try: + async with session.request( + method=request.method, + url=destination, + headers=proxy_headers.request, + data=body if body else None, + proxy=proxy_url, + timeout=ClientTimeout(total=settings.transport_config.timeout), + allow_redirects=True, + ) as upstream_resp: + resp_body = await upstream_resp.content.read(max_response_bytes + 1) + if len(resp_body) > max_response_bytes: + raise HTTPException(status_code=502, detail="Upstream response too large") + + resp_headers = {k: v for k, v in upstream_resp.headers.items() if k.lower() not in _HOP_BY_HOP_HEADERS} + resp_headers.update(proxy_headers.response) + + return Response( + content=resp_body, + status_code=upstream_resp.status, + headers=resp_headers, + ) + except aiohttp.ClientResponseError as e: + raise HTTPException(status_code=e.status, detail=f"Upstream error: {e.message}") + except asyncio.TimeoutError: + raise HTTPException(status_code=504, detail="Upstream timeout") + except aiohttp.ClientError as e: + raise HTTPException(status_code=502, detail=f"Upstream connection error: {e}") + + +@proxy_router.get("/ip") +async def get_public_ip_endpoint(): + """Return MediaFlow's public IP address.""" + ip = await _resolve_public_ip() + if ip is None: + raise HTTPException(status_code=503, detail="Could not determine public IP") + return {"ip": ip} + + @proxy_router.head("/stream") @proxy_router.get("/stream") @proxy_router.head("/stream/{filename:path}") diff --git a/mediaflow_proxy/schemas.py b/mediaflow_proxy/schemas.py index bd3682c..a7751fb 100644 --- a/mediaflow_proxy/schemas.py +++ b/mediaflow_proxy/schemas.py @@ -308,6 +308,7 @@ class ExtractorURLParams(GenericParams): "Sportsonline", "Vavoo", "VidFast", + "StreamHG", ] = Field(..., description="The host to extract the URL from.") destination: Annotated[str, Field(description="The URL of the stream.", alias="d")] redirect_stream: bool = Field(False, description="Whether to redirect to the stream endpoint automatically.") diff --git a/mediaflow_proxy/speedtest/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/speedtest/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..ee30852 Binary files /dev/null and b/mediaflow_proxy/speedtest/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/speedtest/__pycache__/models.cpython-313.pyc b/mediaflow_proxy/speedtest/__pycache__/models.cpython-313.pyc new file mode 100644 index 0000000..c071492 Binary files /dev/null and b/mediaflow_proxy/speedtest/__pycache__/models.cpython-313.pyc differ diff --git a/mediaflow_proxy/speedtest/__pycache__/service.cpython-313.pyc b/mediaflow_proxy/speedtest/__pycache__/service.cpython-313.pyc new file mode 100644 index 0000000..2ed6130 Binary files /dev/null and b/mediaflow_proxy/speedtest/__pycache__/service.cpython-313.pyc differ diff --git a/mediaflow_proxy/speedtest/providers/__pycache__/all_debrid.cpython-313.pyc b/mediaflow_proxy/speedtest/providers/__pycache__/all_debrid.cpython-313.pyc index 89bfd1d..dc460d0 100644 Binary files a/mediaflow_proxy/speedtest/providers/__pycache__/all_debrid.cpython-313.pyc and b/mediaflow_proxy/speedtest/providers/__pycache__/all_debrid.cpython-313.pyc differ diff --git a/mediaflow_proxy/speedtest/providers/__pycache__/base.cpython-313.pyc b/mediaflow_proxy/speedtest/providers/__pycache__/base.cpython-313.pyc index c440c9e..1fe50c3 100644 Binary files a/mediaflow_proxy/speedtest/providers/__pycache__/base.cpython-313.pyc and b/mediaflow_proxy/speedtest/providers/__pycache__/base.cpython-313.pyc differ diff --git a/mediaflow_proxy/speedtest/providers/__pycache__/real_debrid.cpython-313.pyc b/mediaflow_proxy/speedtest/providers/__pycache__/real_debrid.cpython-313.pyc index 060f3a9..9afb968 100644 Binary files a/mediaflow_proxy/speedtest/providers/__pycache__/real_debrid.cpython-313.pyc and b/mediaflow_proxy/speedtest/providers/__pycache__/real_debrid.cpython-313.pyc differ diff --git a/mediaflow_proxy/static/speedtest.html b/mediaflow_proxy/static/speedtest.html index e161c5d..02f97c6 100644 --- a/mediaflow_proxy/static/speedtest.html +++ b/mediaflow_proxy/static/speedtest.html @@ -234,7 +234,7 @@
-