diff --git a/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000..0c4e097
Binary files /dev/null and b/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc differ
diff --git a/mediaflow_proxy/__pycache__/configs.cpython-313.pyc b/mediaflow_proxy/__pycache__/configs.cpython-313.pyc
new file mode 100644
index 0000000..d9c5251
Binary files /dev/null and b/mediaflow_proxy/__pycache__/configs.cpython-313.pyc differ
diff --git a/mediaflow_proxy/__pycache__/const.cpython-313.pyc b/mediaflow_proxy/__pycache__/const.cpython-313.pyc
new file mode 100644
index 0000000..d1bbd34
Binary files /dev/null and b/mediaflow_proxy/__pycache__/const.cpython-313.pyc differ
diff --git a/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc b/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc
new file mode 100644
index 0000000..5de2e62
Binary files /dev/null and b/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc differ
diff --git a/mediaflow_proxy/__pycache__/main.cpython-313.pyc b/mediaflow_proxy/__pycache__/main.cpython-313.pyc
new file mode 100644
index 0000000..87f467e
Binary files /dev/null and b/mediaflow_proxy/__pycache__/main.cpython-313.pyc differ
diff --git a/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc b/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc
new file mode 100644
index 0000000..1dab5e5
Binary files /dev/null and b/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc differ
diff --git a/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc b/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc
new file mode 100644
index 0000000..e348eeb
Binary files /dev/null and b/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc differ
diff --git a/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc b/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc
new file mode 100644
index 0000000..0ba9e5e
Binary files /dev/null and b/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc differ
diff --git a/mediaflow_proxy/configs.py b/mediaflow_proxy/configs.py
index d80c3a1..fb1f722 100644
--- a/mediaflow_proxy/configs.py
+++ b/mediaflow_proxy/configs.py
@@ -114,6 +114,22 @@ class Settings(BaseSettings):
upstream_retry_delay: float = 1.0 # Delay (seconds) between retry attempts.
graceful_stream_end: bool = True # Return valid empty playlist instead of error when upstream fails.
+ # Generic HTTP forward endpoint (/proxy/forward) settings
+ forward_allowed_hosts: list[str] = [] # Allowlist of hostnames. Empty = allow all.
+ forward_denied_hosts: list[str] = [] # Extra denied hostnames (in addition to automatic private-IP guard).
+ forward_max_body_bytes: int = (
+ 10 * 1024 * 1024
+ ) # Deprecated: use forward_max_request_body_bytes / forward_max_response_body_bytes.
+ forward_max_request_body_bytes: int = (
+ 50 * 1024 * 1024
+ ) # Max incoming request body size (50 MB — allows NZB/torrent file uploads).
+ forward_max_response_body_bytes: int = (
+ 10 * 1024 * 1024
+ ) # Max upstream response body size (10 MB — API JSON responses).
+ public_ip: str | None = (
+ None # MediaFlow's own public IP. Used to substitute {mediaflow_ip} in forwarded requests. Auto-detected if not set.
+ )
+
# EPG proxy settings
epg_cache_ttl: int = 3600 # TTL (seconds) for cached EPG/XMLTV data. Default 1 hour.
diff --git a/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000..894a426
Binary files /dev/null and b/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc differ
diff --git a/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc b/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc
new file mode 100644
index 0000000..0f1bddb
Binary files /dev/null and b/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/F16Px.py b/mediaflow_proxy/extractors/F16Px.py
index ea244eb..dbc7e40 100644
--- a/mediaflow_proxy/extractors/F16Px.py
+++ b/mediaflow_proxy/extractors/F16Px.py
@@ -1,11 +1,13 @@
# https://github.com/Gujal00/ResolveURL/blob/55c7f66524ebd65bc1f88650614e627b00167fa0/script.module.resolveurl/lib/resolveurl/plugins/f16px.py
-
import base64
import json
import re
+import time
+import hmac
+import hashlib
+import os
from typing import Dict, Any
from urllib.parse import urlparse
-
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.utils import python_aesgcm
@@ -17,16 +19,91 @@ class F16PxExtractor(BaseExtractor):
@staticmethod
def _b64url_decode(value: str) -> bytes:
- # base64url -> base64
value = value.replace("-", "+").replace("_", "/")
padding = (-len(value)) % 4
if padding:
value += "=" * padding
return base64.b64decode(value)
+ @staticmethod
+ def _b64url_encode(data: bytes) -> str:
+ return base64.urlsafe_b64encode(data).rstrip(b"=").decode()
+
def _join_key_parts(self, parts) -> bytes:
return b"".join(self._b64url_decode(p) for p in parts)
+ @staticmethod
+ def _pick_best(sources: list) -> str:
+ """Return URL of highest-quality source by numeric label."""
+
+ def label_key(s):
+ try:
+ return int(s.get("label", 0))
+ except (ValueError, TypeError):
+ return 0
+
+ return sorted(sources, key=label_key, reverse=True)[0]["url"]
+
+ def _make_fingerprint(self) -> dict:
+ viewer_id = self._b64url_encode(os.urandom(16))
+ device_id = self._b64url_encode(os.urandom(16))
+ now = int(time.time())
+
+ token_payload = {
+ "viewer_id": viewer_id,
+ "device_id": device_id,
+ "confidence": 0.93,
+ "iat": now,
+ "exp": now + 600,
+ }
+ payload_b64 = self._b64url_encode(json.dumps(token_payload, separators=(",", ":")).encode())
+ sig = hmac.new(b"", payload_b64.encode(), hashlib.sha256).digest()
+ token = f"{payload_b64}.{self._b64url_encode(sig)}"
+
+ return {
+ "fingerprint": {
+ "token": token,
+ "viewer_id": viewer_id,
+ "device_id": device_id,
+ "confidence": 0.93,
+ }
+ }
+
+ def _decrypt_playback(self, pb: dict) -> list:
+ """Decrypt primary payload, fall back to payload2+decrypt_keys."""
+ iv = self._b64url_decode(pb["iv"])
+ key = self._join_key_parts(pb["key_parts"])
+ payload = self._b64url_decode(pb["payload"])
+
+ cipher = python_aesgcm.new(key)
+ decrypted = cipher.open(iv, payload)
+
+ if decrypted is not None:
+ sources = json.loads(decrypted.decode("utf-8", "ignore")).get("sources") or []
+ if sources:
+ return sources
+
+ # Fallback: payload2 + decrypt_keys
+ decrypt_keys = pb.get("decrypt_keys") or {}
+ iv2 = pb.get("iv2")
+ pay2 = pb.get("payload2")
+ if iv2 and pay2 and decrypt_keys:
+ iv2 = self._b64url_decode(iv2)
+ pay2 = self._b64url_decode(pay2)
+ for key_b64 in decrypt_keys.values():
+ try:
+ key2 = self._b64url_decode(key_b64)
+ cipher2 = python_aesgcm.new(key2)
+ decrypted = cipher2.open(iv2, pay2)
+ if decrypted:
+ sources = json.loads(decrypted.decode("utf-8", "ignore")).get("sources") or []
+ if sources:
+ return sources
+ except Exception:
+ continue
+
+ return []
+
async def extract(self, url: str) -> Dict[str, Any]:
parsed = urlparse(url)
host = parsed.netloc
@@ -35,26 +112,32 @@ class F16PxExtractor(BaseExtractor):
match = re.search(r"/e/([A-Za-z0-9]+)", parsed.path or "")
if not match:
raise ExtractorError("F16PX: Invalid embed URL")
-
media_id = match.group(1)
+
api_url = f"https://{host}/api/videos/{media_id}/embed/playback"
headers = self.base_headers.copy()
- headers["referer"] = f"https://{host}/"
+ headers["referer"] = f"https://{host}/e/{media_id}"
+ headers["origin"] = origin
+ headers["content-type"] = "application/json"
+
+ resp = await self._make_request(
+ api_url,
+ headers=headers,
+ method="POST",
+ json=self._make_fingerprint(),
+ )
- resp = await self._make_request(api_url, headers=headers)
try:
data = resp.json()
except Exception:
raise ExtractorError("F16PX: Invalid JSON response")
# Case 1: plain sources
- if "sources" in data and data["sources"]:
- src = data["sources"][0].get("url")
- if not src:
- raise ExtractorError("F16PX: Empty source URL")
+ if data.get("sources"):
+ best = self._pick_best(data["sources"])
return {
- "destination_url": src,
+ "destination_url": best,
"request_headers": headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
@@ -65,40 +148,24 @@ class F16PxExtractor(BaseExtractor):
raise ExtractorError("F16PX: No playback data")
try:
- iv = self._b64url_decode(pb["iv"]) # nonce
- key = self._join_key_parts(pb["key_parts"]) # AES key
- payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
-
- cipher = python_aesgcm.new(key)
- decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL
-
- if decrypted is None:
- raise ExtractorError("F16PX: GCM authentication failed")
-
- decrypted_json = json.loads(decrypted.decode("utf-8", "ignore"))
-
- except ExtractorError:
- raise
+ sources = self._decrypt_playback(pb)
except Exception as e:
raise ExtractorError(f"F16PX: Decryption failed ({e})")
- sources = decrypted_json.get("sources") or []
if not sources:
raise ExtractorError("F16PX: No sources after decryption")
- best = sources[0].get("url")
- if not best:
- raise ExtractorError("F16PX: Empty source URL after decryption")
-
- self.base_headers.clear()
- self.base_headers["referer"] = f"{origin}/"
- self.base_headers["origin"] = origin
- self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
- self.base_headers["Accept"] = "*/*"
- self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
+ best = self._pick_best(sources)
+ out_headers = {
+ "referer": f"{origin}/",
+ "origin": origin,
+ "Accept-Language": "en-US,en;q=0.5",
+ "Accept": "*/*",
+ "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0",
+ }
return {
"destination_url": best,
- "request_headers": self.base_headers,
+ "request_headers": out_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
diff --git a/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc
new file mode 100644
index 0000000..78043c5
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000..749cd67
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc
new file mode 100644
index 0000000..93ef8f8
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/city.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/city.cpython-313.pyc
new file mode 100644
index 0000000..2f4a5f5
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/city.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc
new file mode 100644
index 0000000..8205e25
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc
new file mode 100644
index 0000000..1a0a856
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc
new file mode 100644
index 0000000..1df2c60
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc
new file mode 100644
index 0000000..ce5c70c
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc
new file mode 100644
index 0000000..9d619f8
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc
new file mode 100644
index 0000000..3cfabd5
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc
new file mode 100644
index 0000000..66cc774
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc
new file mode 100644
index 0000000..84c4700
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc
new file mode 100644
index 0000000..cf5bd45
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc
new file mode 100644
index 0000000..fd7d554
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc
new file mode 100644
index 0000000..b1d46d4
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc
new file mode 100644
index 0000000..157e39e
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/streamhg.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamhg.cpython-313.pyc
new file mode 100644
index 0000000..8c9aaca
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/streamhg.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc
new file mode 100644
index 0000000..819f34b
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc
new file mode 100644
index 0000000..9199897
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc
new file mode 100644
index 0000000..b9bae5a
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc
new file mode 100644
index 0000000..8508602
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc
new file mode 100644
index 0000000..2804ac9
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc
new file mode 100644
index 0000000..7b89933
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/vidfast.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidfast.cpython-313.pyc
new file mode 100644
index 0000000..f27f8fe
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vidfast.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc
new file mode 100644
index 0000000..90dff7a
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc
new file mode 100644
index 0000000..ac937b8
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc
new file mode 100644
index 0000000..9618c2a
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc
new file mode 100644
index 0000000..eee49e7
Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc differ
diff --git a/mediaflow_proxy/extractors/doodstream.py b/mediaflow_proxy/extractors/doodstream.py
index b6ba592..2b840bc 100644
--- a/mediaflow_proxy/extractors/doodstream.py
+++ b/mediaflow_proxy/extractors/doodstream.py
@@ -152,6 +152,25 @@ class DoodStreamExtractor(BaseExtractor):
html = r.text
base_url = f"https://{urlparse(final_url).netloc}"
+ # Some pages embed a JS redirect instead of a real HTTP redirect.
+ js_redirect = re.search(r'(?:window\.location|location\.href)\s*=\s*[\'"]https?://([^/\'"]+)', html)
+ if js_redirect:
+ redirected_host = js_redirect.group(1)
+ redirect_url = f"https://{redirected_host}/e/{video_id}"
+ logger.debug("JS redirect detected → %s", redirect_url)
+ async with AsyncSession() as s2:
+ r2 = await s2.get(
+ redirect_url,
+ impersonate="chrome",
+ headers={"Referer": f"https://{redirected_host}/"},
+ timeout=30,
+ allow_redirects=True,
+ **({"proxy": proxy} if proxy else {}),
+ )
+ final_url = str(r2.url)
+ html = r2.text
+ base_url = f"https://{urlparse(final_url).netloc}"
+
if "pass_md5" not in html:
if "turnstile" in html.lower() or "captcha_l" in html:
raise ExtractorError(
@@ -197,6 +216,14 @@ class DoodStreamExtractor(BaseExtractor):
"Ensure BYPARR_URL is set for reliable extraction."
)
+ # CloudFlare R2 storage URLs are self-contained — no salt/token needed.
+ if "cloudflarestorage." in base_stream.lower():
+ return {
+ "destination_url": base_stream,
+ "request_headers": headers,
+ "mediaflow_endpoint": "proxy_stream_endpoint",
+ }
+
token_match = re.search(r"token=([^&\s'\"]+)", html)
if not token_match:
raise ExtractorError("DoodStream: token not found in embed HTML")
diff --git a/mediaflow_proxy/extractors/factory.py b/mediaflow_proxy/extractors/factory.py
index 8f02737..8193658 100644
--- a/mediaflow_proxy/extractors/factory.py
+++ b/mediaflow_proxy/extractors/factory.py
@@ -25,6 +25,7 @@ from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor
from mediaflow_proxy.extractors.fastream import FastreamExtractor
from mediaflow_proxy.extractors.voe import VoeExtractor
from mediaflow_proxy.extractors.vidfast import VidFastExtractor
+from mediaflow_proxy.extractors.streamhg import StreamHGExtractor
class ExtractorFactory:
@@ -55,6 +56,7 @@ class ExtractorFactory:
"Voe": VoeExtractor,
"Sportsonline": SportsonlineExtractor,
"VidFast": VidFastExtractor,
+ "StreamHG": StreamHGExtractor,
}
@classmethod
diff --git a/mediaflow_proxy/extractors/filemoon.py b/mediaflow_proxy/extractors/filemoon.py
index e67d3f1..9dd7fd6 100644
--- a/mediaflow_proxy/extractors/filemoon.py
+++ b/mediaflow_proxy/extractors/filemoon.py
@@ -1,9 +1,45 @@
-import re
+import base64
+import json
from typing import Dict, Any
-from urllib.parse import urlparse, urljoin
+from urllib.parse import urlparse
+
+from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
-from mediaflow_proxy.utils.packed import eval_solver
+
+
+def _base64url_decode(input_str: str) -> bytes:
+ """Decode a base64url-encoded string to bytes."""
+ padded = input_str.replace("-", "+").replace("_", "/")
+ padding = 4 - len(padded) % 4
+ if padding != 4:
+ padded += "=" * padding
+ return base64.b64decode(padded)
+
+
+def _combine_key_parts(key_parts: list) -> bytes:
+ """Combine base64url-encoded key parts into a single key."""
+ decoded = [_base64url_decode(part) for part in key_parts]
+ return b"".join(decoded)
+
+
+def _decrypt_playback(playback: dict) -> dict:
+ """Decrypt AES-256-GCM encrypted playback payload."""
+ key = _combine_key_parts(playback["key_parts"])
+ iv = _base64url_decode(playback["iv"])
+ payload = _base64url_decode(playback["payload"])
+
+ # GCM auth tag is the last 16 bytes of the payload
+ tag = payload[-16:]
+ ciphertext = payload[:-16]
+
+ aesgcm = AESGCM(key)
+ try:
+ plaintext = aesgcm.decrypt(iv, ciphertext + tag, None)
+ except Exception as e:
+ raise ExtractorError(f"Decryption failed: {e}")
+
+ return json.loads(plaintext.decode("utf-8"))
class FileMoonExtractor(BaseExtractor):
@@ -12,41 +48,49 @@ class FileMoonExtractor(BaseExtractor):
self.mediaflow_endpoint = "hls_manifest_proxy"
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
- response = await self._make_request(url)
+ # URL format: https://filemoon.sx/e/{code} or https://filemoon.sx/d/{code}
+ parsed = urlparse(url)
+ path = parsed.path.rstrip("/")
+ code = path.split("/")[-1] if path else None
- pattern = r'iframe.*?src=["\'](.*?)["\']'
- match = re.search(pattern, response.text, re.DOTALL)
- if not match:
- raise ExtractorError("Failed to extract iframe URL")
+ if not code or code in ("e", "d"):
+ raise ExtractorError(f"Could not extract video code from URL: {url}")
- iframe_url = match.group(1)
-
- parsed = urlparse(str(response.url))
- base_url = f"{parsed.scheme}://{parsed.netloc}"
-
- if iframe_url.startswith("//"):
- iframe_url = f"{parsed.scheme}:{iframe_url}"
- elif not urlparse(iframe_url).scheme:
- iframe_url = urljoin(base_url, iframe_url)
+ api_url = f"{parsed.scheme}://{parsed.netloc}/api/videos/{code}"
headers = {"Referer": url}
- patterns = [r'file:"(.*?)"']
+ response = await self._make_request(api_url, headers=headers)
- final_url = await eval_solver(
- self,
- iframe_url,
- headers,
- patterns,
- )
+ try:
+ data = response.json()
+ except Exception as e:
+ raise ExtractorError(f"Failed to parse API response: {e}")
- test_resp = await self._make_request(final_url, headers=headers)
- if test_resp.status == 404:
- raise ExtractorError("Stream not found (404)")
+ if "error" in data:
+ raise ExtractorError(f"FileMoon API error: {data['error']}")
+
+ playback = data.get("playback")
+ if not playback or not playback.get("key_parts") or not playback.get("payload"):
+ raise ExtractorError("No playback data available")
+
+ decrypted = _decrypt_playback(playback)
+
+ sources = decrypted.get("sources", [])
+ hls_source = None
+ for source in sources:
+ if source.get("mime_type") == "application/vnd.apple.mpegurl":
+ hls_source = source
+ break
+
+ if not hls_source:
+ raise ExtractorError("No HLS source found in decrypted playback")
+
+ destination_url = hls_source["url"]
self.base_headers["referer"] = url
return {
- "destination_url": final_url,
+ "destination_url": destination_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
diff --git a/mediaflow_proxy/extractors/maxstream.py b/mediaflow_proxy/extractors/maxstream.py
index 707368e..32afbed 100644
--- a/mediaflow_proxy/extractors/maxstream.py
+++ b/mediaflow_proxy/extractors/maxstream.py
@@ -1,67 +1,651 @@
+"""Maxstream URL extractor — full uprot bypass pipeline.
+
+Solves the problem of `uprot.net` redirects on `/msf/`, `/msfi/` and
+`/msfld/` paths used by Italian aggregators (CB01, EuroStreaming, etc).
+
+Key features:
+ 1. TLS-fingerprint-resistant fetch via curl_cffi (chrome131 impersonation)
+ 2. 4-digit captcha solver with multi-engine OCR ensemble:
+ ddddocr (primary) → tesseract (fallback) → CF Workers AI (3rd, opt-in)
+ 3. Honeypot URL filtering on the post-captcha page
+ 4. uprots/uprotem → maxstream redirect chain follow with cookie continuity
+ 5. /msfld/ folder picker (season + episode kwargs from MFP route)
+ 6. Optional persistent URL cache (when paired with services/uprot_warmer.py)
+
+All advanced features are guarded by lazy imports — if `curl_cffi`,
+`pytesseract`, `Pillow` or `ddddocr` are not installed the extractor
+falls back to the previous behaviour for `/msf/` URLs and skips
+`/msfld/` cleanly.
+
+Activation:
+ CF_WORKER_OCR_URL e.g. https://easyproxy-ocr.user.workers.dev
+ CF_WORKER_OCR_AUTH Worker AUTH_TOKEN
+
+Credits: pipeline ported from NelloStream
+(https://github.com/vitouchiha/nello-stream) — `workers/cfworker.js`
+functions `_uprotBypassWithCookies`, `_extractMaxstreamVideo`,
+`_aiOcrDigits`, `_handleScheduledUprotRefresh`. All credit to Nello.
+"""
+
+import asyncio
+import logging
+import os
import re
-from typing import Dict, Any
+from typing import Any, Dict, Optional
+from urllib.parse import urljoin, urlparse, urlencode
from bs4 import BeautifulSoup
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
+logger = logging.getLogger(__name__)
+
class MaxstreamExtractor(BaseExtractor):
- """Maxstream URL extractor."""
+ """Maxstream URL extractor with full uprot bypass pipeline."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"
+ # Persistent cookie jar across the uprot → maxstream redirect chain.
+ # PHPSESSID + captcha hash + uprot_session must travel together for
+ # the post-captcha redirect to be honoured by the maxstream WAF.
+ self.cookies: Dict[str, str] = {}
+ self._last_solve_text: Optional[str] = None
- async def get_uprot(self, link: str):
- """Extract MaxStream URL."""
- if "msf" in link:
- link = link.replace("msf", "mse")
- response = await self._make_request(link)
- soup = BeautifulSoup(response.text, "lxml")
- maxstream_url = soup.find("a")
- maxstream_url = maxstream_url.get("href")
- return maxstream_url
+ # ───────────────────────── HTTP layer ──────────────────────────────
+
+ async def _curl_cffi_fetch(
+ self,
+ url: str,
+ method: str = "GET",
+ data: Optional[Any] = None,
+ headers: Optional[Dict[str, str]] = None,
+ allow_redirects: bool = True,
+ timeout: int = 30,
+ ) -> Optional[Dict[str, Any]]:
+ """Browser-impersonated fetch via curl_cffi.
+
+ uprot.net inspects TLS fingerprints; aiohttp's JA3 is recognised as
+ a bot within a few requests and served captcha pages or 503 even
+ from clean residential IPs. curl_cffi with `impersonate="chrome131"`
+ replays a real Chrome JA3 + ALPN order, so uprot serves the real
+ redirect link or the (legitimately-protected) captcha page.
+
+ Returns None if curl_cffi is not installed (caller falls back to
+ BaseExtractor._make_request for the simpler legacy /msf/ path).
+ """
+ try:
+ from curl_cffi import requests as cffi_requests
+ except ImportError:
+ logger.debug("curl_cffi not installed — uprot bypass disabled")
+ return None
+
+ merged_headers = dict(self.base_headers)
+ if headers:
+ merged_headers.update(headers)
+ if method.upper() == "POST" and isinstance(data, (str, bytes)):
+ merged_headers.setdefault("content-type", "application/x-www-form-urlencoded")
+
+ proxy = self._get_proxy(url)
+ proxies_arg = {"http": proxy, "https": proxy} if proxy else None
+
+ loop = asyncio.get_running_loop()
+
+ def _do_request():
+ try:
+ req_cookies = dict(self.cookies) if self.cookies else None
+ r = cffi_requests.request(
+ method,
+ url,
+ headers=merged_headers,
+ data=data,
+ cookies=req_cookies,
+ proxies=proxies_arg,
+ impersonate="chrome131",
+ timeout=timeout,
+ allow_redirects=allow_redirects,
+ )
+ cookies = {}
+ try:
+ cookies = {c.name: c.value for c in r.cookies.jar}
+ except Exception:
+ cookies = dict(r.cookies) if r.cookies else {}
+ return {
+ "ok": r.status_code < 400,
+ "status": r.status_code,
+ "text": r.text,
+ "content": r.content,
+ "url": str(r.url),
+ "headers": dict(r.headers),
+ "cookies": cookies,
+ }
+ except Exception as e:
+ return {
+ "ok": False,
+ "status": 0,
+ "text": "",
+ "content": b"",
+ "url": url,
+ "headers": {},
+ "cookies": {},
+ "error": str(e),
+ }
+
+ result = await loop.run_in_executor(None, _do_request)
+ if result.get("cookies"):
+ self.cookies.update(result["cookies"])
+ return result
+
+ # ─────────────────────── Honeypot filter ───────────────────────────
+
+ @staticmethod
+ def _strip_uprot_honeypots(html: str) -> str:
+ """Remove uprot's anti-bot honeypot blocks before URL extraction.
+
+ The post-captcha success page intentionally hides decoy URLs in:
+ 1. HTML comments ()
+ 2.
…
blocks containing fake
+ "Continue" buttons that point to placeholder URLs like
+ `maxstream.video/uprots/123456789012` (12 sequential digits).
+
+ A naive regex grabs the FIRST match (the honeypot). Strip both
+ before parsing so the regex/BS4 see only the visible-to-user DOM.
+ """
+ no_comments = re.sub(r"", "", html)
+ no_hidden = re.sub(
+ r"