mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-06-10 09:10:23 +00:00
new version
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -114,6 +114,22 @@ class Settings(BaseSettings):
|
||||
upstream_retry_delay: float = 1.0 # Delay (seconds) between retry attempts.
|
||||
graceful_stream_end: bool = True # Return valid empty playlist instead of error when upstream fails.
|
||||
|
||||
# Generic HTTP forward endpoint (/proxy/forward) settings
|
||||
forward_allowed_hosts: list[str] = [] # Allowlist of hostnames. Empty = allow all.
|
||||
forward_denied_hosts: list[str] = [] # Extra denied hostnames (in addition to automatic private-IP guard).
|
||||
forward_max_body_bytes: int = (
|
||||
10 * 1024 * 1024
|
||||
) # Deprecated: use forward_max_request_body_bytes / forward_max_response_body_bytes.
|
||||
forward_max_request_body_bytes: int = (
|
||||
50 * 1024 * 1024
|
||||
) # Max incoming request body size (50 MB — allows NZB/torrent file uploads).
|
||||
forward_max_response_body_bytes: int = (
|
||||
10 * 1024 * 1024
|
||||
) # Max upstream response body size (10 MB — API JSON responses).
|
||||
public_ip: str | None = (
|
||||
None # MediaFlow's own public IP. Used to substitute {mediaflow_ip} in forwarded requests. Auto-detected if not set.
|
||||
)
|
||||
|
||||
# EPG proxy settings
|
||||
epg_cache_ttl: int = 3600 # TTL (seconds) for cached EPG/XMLTV data. Default 1 hour.
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,11 +1,13 @@
|
||||
# https://github.com/Gujal00/ResolveURL/blob/55c7f66524ebd65bc1f88650614e627b00167fa0/script.module.resolveurl/lib/resolveurl/plugins/f16px.py
|
||||
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import hmac
|
||||
import hashlib
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils import python_aesgcm
|
||||
|
||||
@@ -17,16 +19,91 @@ class F16PxExtractor(BaseExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _b64url_decode(value: str) -> bytes:
|
||||
# base64url -> base64
|
||||
value = value.replace("-", "+").replace("_", "/")
|
||||
padding = (-len(value)) % 4
|
||||
if padding:
|
||||
value += "=" * padding
|
||||
return base64.b64decode(value)
|
||||
|
||||
@staticmethod
|
||||
def _b64url_encode(data: bytes) -> str:
|
||||
return base64.urlsafe_b64encode(data).rstrip(b"=").decode()
|
||||
|
||||
def _join_key_parts(self, parts) -> bytes:
|
||||
return b"".join(self._b64url_decode(p) for p in parts)
|
||||
|
||||
@staticmethod
|
||||
def _pick_best(sources: list) -> str:
|
||||
"""Return URL of highest-quality source by numeric label."""
|
||||
|
||||
def label_key(s):
|
||||
try:
|
||||
return int(s.get("label", 0))
|
||||
except (ValueError, TypeError):
|
||||
return 0
|
||||
|
||||
return sorted(sources, key=label_key, reverse=True)[0]["url"]
|
||||
|
||||
def _make_fingerprint(self) -> dict:
|
||||
viewer_id = self._b64url_encode(os.urandom(16))
|
||||
device_id = self._b64url_encode(os.urandom(16))
|
||||
now = int(time.time())
|
||||
|
||||
token_payload = {
|
||||
"viewer_id": viewer_id,
|
||||
"device_id": device_id,
|
||||
"confidence": 0.93,
|
||||
"iat": now,
|
||||
"exp": now + 600,
|
||||
}
|
||||
payload_b64 = self._b64url_encode(json.dumps(token_payload, separators=(",", ":")).encode())
|
||||
sig = hmac.new(b"", payload_b64.encode(), hashlib.sha256).digest()
|
||||
token = f"{payload_b64}.{self._b64url_encode(sig)}"
|
||||
|
||||
return {
|
||||
"fingerprint": {
|
||||
"token": token,
|
||||
"viewer_id": viewer_id,
|
||||
"device_id": device_id,
|
||||
"confidence": 0.93,
|
||||
}
|
||||
}
|
||||
|
||||
def _decrypt_playback(self, pb: dict) -> list:
|
||||
"""Decrypt primary payload, fall back to payload2+decrypt_keys."""
|
||||
iv = self._b64url_decode(pb["iv"])
|
||||
key = self._join_key_parts(pb["key_parts"])
|
||||
payload = self._b64url_decode(pb["payload"])
|
||||
|
||||
cipher = python_aesgcm.new(key)
|
||||
decrypted = cipher.open(iv, payload)
|
||||
|
||||
if decrypted is not None:
|
||||
sources = json.loads(decrypted.decode("utf-8", "ignore")).get("sources") or []
|
||||
if sources:
|
||||
return sources
|
||||
|
||||
# Fallback: payload2 + decrypt_keys
|
||||
decrypt_keys = pb.get("decrypt_keys") or {}
|
||||
iv2 = pb.get("iv2")
|
||||
pay2 = pb.get("payload2")
|
||||
if iv2 and pay2 and decrypt_keys:
|
||||
iv2 = self._b64url_decode(iv2)
|
||||
pay2 = self._b64url_decode(pay2)
|
||||
for key_b64 in decrypt_keys.values():
|
||||
try:
|
||||
key2 = self._b64url_decode(key_b64)
|
||||
cipher2 = python_aesgcm.new(key2)
|
||||
decrypted = cipher2.open(iv2, pay2)
|
||||
if decrypted:
|
||||
sources = json.loads(decrypted.decode("utf-8", "ignore")).get("sources") or []
|
||||
if sources:
|
||||
return sources
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return []
|
||||
|
||||
async def extract(self, url: str) -> Dict[str, Any]:
|
||||
parsed = urlparse(url)
|
||||
host = parsed.netloc
|
||||
@@ -35,26 +112,32 @@ class F16PxExtractor(BaseExtractor):
|
||||
match = re.search(r"/e/([A-Za-z0-9]+)", parsed.path or "")
|
||||
if not match:
|
||||
raise ExtractorError("F16PX: Invalid embed URL")
|
||||
|
||||
media_id = match.group(1)
|
||||
|
||||
api_url = f"https://{host}/api/videos/{media_id}/embed/playback"
|
||||
|
||||
headers = self.base_headers.copy()
|
||||
headers["referer"] = f"https://{host}/"
|
||||
headers["referer"] = f"https://{host}/e/{media_id}"
|
||||
headers["origin"] = origin
|
||||
headers["content-type"] = "application/json"
|
||||
|
||||
resp = await self._make_request(
|
||||
api_url,
|
||||
headers=headers,
|
||||
method="POST",
|
||||
json=self._make_fingerprint(),
|
||||
)
|
||||
|
||||
resp = await self._make_request(api_url, headers=headers)
|
||||
try:
|
||||
data = resp.json()
|
||||
except Exception:
|
||||
raise ExtractorError("F16PX: Invalid JSON response")
|
||||
|
||||
# Case 1: plain sources
|
||||
if "sources" in data and data["sources"]:
|
||||
src = data["sources"][0].get("url")
|
||||
if not src:
|
||||
raise ExtractorError("F16PX: Empty source URL")
|
||||
if data.get("sources"):
|
||||
best = self._pick_best(data["sources"])
|
||||
return {
|
||||
"destination_url": src,
|
||||
"destination_url": best,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
@@ -65,40 +148,24 @@ class F16PxExtractor(BaseExtractor):
|
||||
raise ExtractorError("F16PX: No playback data")
|
||||
|
||||
try:
|
||||
iv = self._b64url_decode(pb["iv"]) # nonce
|
||||
key = self._join_key_parts(pb["key_parts"]) # AES key
|
||||
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
|
||||
|
||||
cipher = python_aesgcm.new(key)
|
||||
decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL
|
||||
|
||||
if decrypted is None:
|
||||
raise ExtractorError("F16PX: GCM authentication failed")
|
||||
|
||||
decrypted_json = json.loads(decrypted.decode("utf-8", "ignore"))
|
||||
|
||||
except ExtractorError:
|
||||
raise
|
||||
sources = self._decrypt_playback(pb)
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"F16PX: Decryption failed ({e})")
|
||||
|
||||
sources = decrypted_json.get("sources") or []
|
||||
if not sources:
|
||||
raise ExtractorError("F16PX: No sources after decryption")
|
||||
|
||||
best = sources[0].get("url")
|
||||
if not best:
|
||||
raise ExtractorError("F16PX: Empty source URL after decryption")
|
||||
|
||||
self.base_headers.clear()
|
||||
self.base_headers["referer"] = f"{origin}/"
|
||||
self.base_headers["origin"] = origin
|
||||
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||
self.base_headers["Accept"] = "*/*"
|
||||
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
|
||||
best = self._pick_best(sources)
|
||||
|
||||
out_headers = {
|
||||
"referer": f"{origin}/",
|
||||
"origin": origin,
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept": "*/*",
|
||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0",
|
||||
}
|
||||
return {
|
||||
"destination_url": best,
|
||||
"request_headers": self.base_headers,
|
||||
"request_headers": out_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -152,6 +152,25 @@ class DoodStreamExtractor(BaseExtractor):
|
||||
html = r.text
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
|
||||
# Some pages embed a JS redirect instead of a real HTTP redirect.
|
||||
js_redirect = re.search(r'(?:window\.location|location\.href)\s*=\s*[\'"]https?://([^/\'"]+)', html)
|
||||
if js_redirect:
|
||||
redirected_host = js_redirect.group(1)
|
||||
redirect_url = f"https://{redirected_host}/e/{video_id}"
|
||||
logger.debug("JS redirect detected → %s", redirect_url)
|
||||
async with AsyncSession() as s2:
|
||||
r2 = await s2.get(
|
||||
redirect_url,
|
||||
impersonate="chrome",
|
||||
headers={"Referer": f"https://{redirected_host}/"},
|
||||
timeout=30,
|
||||
allow_redirects=True,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
final_url = str(r2.url)
|
||||
html = r2.text
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
|
||||
if "pass_md5" not in html:
|
||||
if "turnstile" in html.lower() or "captcha_l" in html:
|
||||
raise ExtractorError(
|
||||
@@ -197,6 +216,14 @@ class DoodStreamExtractor(BaseExtractor):
|
||||
"Ensure BYPARR_URL is set for reliable extraction."
|
||||
)
|
||||
|
||||
# CloudFlare R2 storage URLs are self-contained — no salt/token needed.
|
||||
if "cloudflarestorage." in base_stream.lower():
|
||||
return {
|
||||
"destination_url": base_stream,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": "proxy_stream_endpoint",
|
||||
}
|
||||
|
||||
token_match = re.search(r"token=([^&\s'\"]+)", html)
|
||||
if not token_match:
|
||||
raise ExtractorError("DoodStream: token not found in embed HTML")
|
||||
|
||||
@@ -25,6 +25,7 @@ from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor
|
||||
from mediaflow_proxy.extractors.fastream import FastreamExtractor
|
||||
from mediaflow_proxy.extractors.voe import VoeExtractor
|
||||
from mediaflow_proxy.extractors.vidfast import VidFastExtractor
|
||||
from mediaflow_proxy.extractors.streamhg import StreamHGExtractor
|
||||
|
||||
|
||||
class ExtractorFactory:
|
||||
@@ -55,6 +56,7 @@ class ExtractorFactory:
|
||||
"Voe": VoeExtractor,
|
||||
"Sportsonline": SportsonlineExtractor,
|
||||
"VidFast": VidFastExtractor,
|
||||
"StreamHG": StreamHGExtractor,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -1,9 +1,45 @@
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
|
||||
def _base64url_decode(input_str: str) -> bytes:
|
||||
"""Decode a base64url-encoded string to bytes."""
|
||||
padded = input_str.replace("-", "+").replace("_", "/")
|
||||
padding = 4 - len(padded) % 4
|
||||
if padding != 4:
|
||||
padded += "=" * padding
|
||||
return base64.b64decode(padded)
|
||||
|
||||
|
||||
def _combine_key_parts(key_parts: list) -> bytes:
|
||||
"""Combine base64url-encoded key parts into a single key."""
|
||||
decoded = [_base64url_decode(part) for part in key_parts]
|
||||
return b"".join(decoded)
|
||||
|
||||
|
||||
def _decrypt_playback(playback: dict) -> dict:
|
||||
"""Decrypt AES-256-GCM encrypted playback payload."""
|
||||
key = _combine_key_parts(playback["key_parts"])
|
||||
iv = _base64url_decode(playback["iv"])
|
||||
payload = _base64url_decode(playback["payload"])
|
||||
|
||||
# GCM auth tag is the last 16 bytes of the payload
|
||||
tag = payload[-16:]
|
||||
ciphertext = payload[:-16]
|
||||
|
||||
aesgcm = AESGCM(key)
|
||||
try:
|
||||
plaintext = aesgcm.decrypt(iv, ciphertext + tag, None)
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Decryption failed: {e}")
|
||||
|
||||
return json.loads(plaintext.decode("utf-8"))
|
||||
|
||||
|
||||
class FileMoonExtractor(BaseExtractor):
|
||||
@@ -12,41 +48,49 @@ class FileMoonExtractor(BaseExtractor):
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
response = await self._make_request(url)
|
||||
# URL format: https://filemoon.sx/e/{code} or https://filemoon.sx/d/{code}
|
||||
parsed = urlparse(url)
|
||||
path = parsed.path.rstrip("/")
|
||||
code = path.split("/")[-1] if path else None
|
||||
|
||||
pattern = r'iframe.*?src=["\'](.*?)["\']'
|
||||
match = re.search(pattern, response.text, re.DOTALL)
|
||||
if not match:
|
||||
raise ExtractorError("Failed to extract iframe URL")
|
||||
if not code or code in ("e", "d"):
|
||||
raise ExtractorError(f"Could not extract video code from URL: {url}")
|
||||
|
||||
iframe_url = match.group(1)
|
||||
|
||||
parsed = urlparse(str(response.url))
|
||||
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
if iframe_url.startswith("//"):
|
||||
iframe_url = f"{parsed.scheme}:{iframe_url}"
|
||||
elif not urlparse(iframe_url).scheme:
|
||||
iframe_url = urljoin(base_url, iframe_url)
|
||||
api_url = f"{parsed.scheme}://{parsed.netloc}/api/videos/{code}"
|
||||
|
||||
headers = {"Referer": url}
|
||||
patterns = [r'file:"(.*?)"']
|
||||
response = await self._make_request(api_url, headers=headers)
|
||||
|
||||
final_url = await eval_solver(
|
||||
self,
|
||||
iframe_url,
|
||||
headers,
|
||||
patterns,
|
||||
)
|
||||
try:
|
||||
data = response.json()
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to parse API response: {e}")
|
||||
|
||||
test_resp = await self._make_request(final_url, headers=headers)
|
||||
if test_resp.status == 404:
|
||||
raise ExtractorError("Stream not found (404)")
|
||||
if "error" in data:
|
||||
raise ExtractorError(f"FileMoon API error: {data['error']}")
|
||||
|
||||
playback = data.get("playback")
|
||||
if not playback or not playback.get("key_parts") or not playback.get("payload"):
|
||||
raise ExtractorError("No playback data available")
|
||||
|
||||
decrypted = _decrypt_playback(playback)
|
||||
|
||||
sources = decrypted.get("sources", [])
|
||||
hls_source = None
|
||||
for source in sources:
|
||||
if source.get("mime_type") == "application/vnd.apple.mpegurl":
|
||||
hls_source = source
|
||||
break
|
||||
|
||||
if not hls_source:
|
||||
raise ExtractorError("No HLS source found in decrypted playback")
|
||||
|
||||
destination_url = hls_source["url"]
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"destination_url": destination_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
@@ -1,67 +1,651 @@
|
||||
"""Maxstream URL extractor — full uprot bypass pipeline.
|
||||
|
||||
Solves the problem of `uprot.net` redirects on `/msf/`, `/msfi/` and
|
||||
`/msfld/` paths used by Italian aggregators (CB01, EuroStreaming, etc).
|
||||
|
||||
Key features:
|
||||
1. TLS-fingerprint-resistant fetch via curl_cffi (chrome131 impersonation)
|
||||
2. 4-digit captcha solver with multi-engine OCR ensemble:
|
||||
ddddocr (primary) → tesseract (fallback) → CF Workers AI (3rd, opt-in)
|
||||
3. Honeypot URL filtering on the post-captcha page
|
||||
4. uprots/uprotem → maxstream redirect chain follow with cookie continuity
|
||||
5. /msfld/ folder picker (season + episode kwargs from MFP route)
|
||||
6. Optional persistent URL cache (when paired with services/uprot_warmer.py)
|
||||
|
||||
All advanced features are guarded by lazy imports — if `curl_cffi`,
|
||||
`pytesseract`, `Pillow` or `ddddocr` are not installed the extractor
|
||||
falls back to the previous behaviour for `/msf/` URLs and skips
|
||||
`/msfld/` cleanly.
|
||||
|
||||
Activation:
|
||||
CF_WORKER_OCR_URL e.g. https://easyproxy-ocr.user.workers.dev
|
||||
CF_WORKER_OCR_AUTH Worker AUTH_TOKEN
|
||||
|
||||
Credits: pipeline ported from NelloStream
|
||||
(https://github.com/vitouchiha/nello-stream) — `workers/cfworker.js`
|
||||
functions `_uprotBypassWithCookies`, `_extractMaxstreamVideo`,
|
||||
`_aiOcrDigits`, `_handleScheduledUprotRefresh`. All credit to Nello.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import urljoin, urlparse, urlencode
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MaxstreamExtractor(BaseExtractor):
|
||||
"""Maxstream URL extractor."""
|
||||
"""Maxstream URL extractor with full uprot bypass pipeline."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
# Persistent cookie jar across the uprot → maxstream redirect chain.
|
||||
# PHPSESSID + captcha hash + uprot_session must travel together for
|
||||
# the post-captcha redirect to be honoured by the maxstream WAF.
|
||||
self.cookies: Dict[str, str] = {}
|
||||
self._last_solve_text: Optional[str] = None
|
||||
|
||||
async def get_uprot(self, link: str):
|
||||
"""Extract MaxStream URL."""
|
||||
if "msf" in link:
|
||||
link = link.replace("msf", "mse")
|
||||
# ───────────────────────── HTTP layer ──────────────────────────────
|
||||
|
||||
async def _curl_cffi_fetch(
|
||||
self,
|
||||
url: str,
|
||||
method: str = "GET",
|
||||
data: Optional[Any] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
allow_redirects: bool = True,
|
||||
timeout: int = 30,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Browser-impersonated fetch via curl_cffi.
|
||||
|
||||
uprot.net inspects TLS fingerprints; aiohttp's JA3 is recognised as
|
||||
a bot within a few requests and served captcha pages or 503 even
|
||||
from clean residential IPs. curl_cffi with `impersonate="chrome131"`
|
||||
replays a real Chrome JA3 + ALPN order, so uprot serves the real
|
||||
redirect link or the (legitimately-protected) captcha page.
|
||||
|
||||
Returns None if curl_cffi is not installed (caller falls back to
|
||||
BaseExtractor._make_request for the simpler legacy /msf/ path).
|
||||
"""
|
||||
try:
|
||||
from curl_cffi import requests as cffi_requests
|
||||
except ImportError:
|
||||
logger.debug("curl_cffi not installed — uprot bypass disabled")
|
||||
return None
|
||||
|
||||
merged_headers = dict(self.base_headers)
|
||||
if headers:
|
||||
merged_headers.update(headers)
|
||||
if method.upper() == "POST" and isinstance(data, (str, bytes)):
|
||||
merged_headers.setdefault("content-type", "application/x-www-form-urlencoded")
|
||||
|
||||
proxy = self._get_proxy(url)
|
||||
proxies_arg = {"http": proxy, "https": proxy} if proxy else None
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
def _do_request():
|
||||
try:
|
||||
req_cookies = dict(self.cookies) if self.cookies else None
|
||||
r = cffi_requests.request(
|
||||
method,
|
||||
url,
|
||||
headers=merged_headers,
|
||||
data=data,
|
||||
cookies=req_cookies,
|
||||
proxies=proxies_arg,
|
||||
impersonate="chrome131",
|
||||
timeout=timeout,
|
||||
allow_redirects=allow_redirects,
|
||||
)
|
||||
cookies = {}
|
||||
try:
|
||||
cookies = {c.name: c.value for c in r.cookies.jar}
|
||||
except Exception:
|
||||
cookies = dict(r.cookies) if r.cookies else {}
|
||||
return {
|
||||
"ok": r.status_code < 400,
|
||||
"status": r.status_code,
|
||||
"text": r.text,
|
||||
"content": r.content,
|
||||
"url": str(r.url),
|
||||
"headers": dict(r.headers),
|
||||
"cookies": cookies,
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"ok": False,
|
||||
"status": 0,
|
||||
"text": "",
|
||||
"content": b"",
|
||||
"url": url,
|
||||
"headers": {},
|
||||
"cookies": {},
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
result = await loop.run_in_executor(None, _do_request)
|
||||
if result.get("cookies"):
|
||||
self.cookies.update(result["cookies"])
|
||||
return result
|
||||
|
||||
# ─────────────────────── Honeypot filter ───────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _strip_uprot_honeypots(html: str) -> str:
|
||||
"""Remove uprot's anti-bot honeypot blocks before URL extraction.
|
||||
|
||||
The post-captcha success page intentionally hides decoy URLs in:
|
||||
1. HTML comments (<!-- … -->)
|
||||
2. <div style="display:none">…</div> blocks containing fake
|
||||
"Continue" buttons that point to placeholder URLs like
|
||||
`maxstream.video/uprots/123456789012` (12 sequential digits).
|
||||
|
||||
A naive regex grabs the FIRST match (the honeypot). Strip both
|
||||
before parsing so the regex/BS4 see only the visible-to-user DOM.
|
||||
"""
|
||||
no_comments = re.sub(r"<!--[\s\S]*?-->", "", html)
|
||||
no_hidden = re.sub(
|
||||
r"<div[^>]*style=[\"'][^\"']*display\s*:\s*none[^\"']*[\"'][^>]*>[\s\S]*?</div>",
|
||||
"",
|
||||
no_comments,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
return no_hidden
|
||||
|
||||
# ─────────────────────── Redirect parser ───────────────────────────
|
||||
|
||||
def _parse_uprot_html(self, text: str) -> Optional[str]:
|
||||
"""Parse a uprot success page and return the next-hop URL.
|
||||
|
||||
Strategy mirrored from NelloStream `_uprotBypassWithCookies`:
|
||||
1. Strip honeypot blocks first
|
||||
2. Prefer explicit `id="buttok"` CONTINUE button (uprot marker)
|
||||
3. Fallback: <a><button>Continue</button></a> (case+spacing tolerant)
|
||||
4. Last resort: a `/uprots/` or `/uprotem/` URL appearing exactly
|
||||
once in the cleaned HTML (uprot scatters multiple decoys)
|
||||
5. Generic stayonline.pro / maxstream.video regex with honeypot
|
||||
literal filter
|
||||
6. window.location / meta refresh / BS4 button fallbacks
|
||||
"""
|
||||
cleaned = self._strip_uprot_honeypots(text).replace("\\/", "/")
|
||||
|
||||
def _valid(c):
|
||||
if not c:
|
||||
return None
|
||||
try:
|
||||
p = urlparse(c)
|
||||
if p.netloc and "maxstream.video" in p.netloc and p.path.startswith("/cdn-cgi/"):
|
||||
return None
|
||||
except Exception:
|
||||
pass
|
||||
return c
|
||||
|
||||
# 1. id="buttok" CONTINUE button
|
||||
m = re.search(
|
||||
r'href=["\'](https?://[^"\']+)["\'][^>]*>\s*<button[^>]*id=["\']buttok["\'][^>]*>\s*C\s*O\s*N\s*T\s*I\s*N\s*U\s*E',
|
||||
cleaned,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if m and _valid(m.group(1)):
|
||||
return m.group(1)
|
||||
|
||||
# 2. Generic <a><button>Continue</button></a>
|
||||
m = re.search(
|
||||
r'href=["\'](https?://[^"\']+)["\'][^>]*>\s*<button[^>]*>\s*[Cc]\s*[Oo]\s*[Nn]\s*[Tt]\s*[Ii]\s*[Nn]\s*[Uu]\s*[Ee]',
|
||||
cleaned,
|
||||
)
|
||||
if m and _valid(m.group(1)):
|
||||
return m.group(1)
|
||||
|
||||
# 3. Unique uprots/uprotem URL
|
||||
all_uprots = re.findall(
|
||||
r'href=["\'](https?://[^"\']*uprot(?:s|em)/[^"\']+)["\']',
|
||||
cleaned,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if all_uprots:
|
||||
counts: Dict[str, int] = {}
|
||||
for u in all_uprots:
|
||||
counts[u] = counts.get(u, 0) + 1
|
||||
unique = [u for u, c in counts.items() if c == 1]
|
||||
if unique and _valid(unique[0]):
|
||||
return unique[0]
|
||||
|
||||
# 4. Generic stayonline / maxstream regex
|
||||
m = re.search(
|
||||
r'https?://(?:www\.)?(?:stayonline\.pro|maxstream\.video)[^"\'\s<>\\ ]+',
|
||||
cleaned,
|
||||
)
|
||||
if m and "/uprots/123456789012" not in m.group(0) and _valid(m.group(0)):
|
||||
return m.group(0)
|
||||
|
||||
# 5. window.location / meta refresh
|
||||
m = re.search(r'window\.location(?:\.href)?\s*=\s*["\']([^"\']+)["\']', cleaned)
|
||||
if m and _valid(m.group(1)):
|
||||
return m.group(1)
|
||||
m = re.search(r'content=["\']0;\s*url=([^"\']+)["\']', cleaned, re.I)
|
||||
if m and _valid(m.group(1)):
|
||||
return m.group(1)
|
||||
|
||||
# 6. BS4 buttons / forms (rare paths)
|
||||
soup = BeautifulSoup(cleaned, "lxml")
|
||||
for btn in soup.find_all(["a", "button"]):
|
||||
t = btn.get_text().strip().lower()
|
||||
if "continue" in t or "continua" in t or "vai al" in t:
|
||||
href = btn.get("href")
|
||||
if not href and btn.parent and btn.parent.name == "a":
|
||||
href = btn.parent.get("href")
|
||||
if href and "uprot.net" not in href and _valid(href):
|
||||
return href
|
||||
return None
|
||||
|
||||
def _parse_uprot_folder(self, text: str, season, episode) -> Optional[str]:
|
||||
"""Parse a /msfld/ folder HTML and return the /msfi/ link for S{ss}E{ee}."""
|
||||
try:
|
||||
s_int = int(season)
|
||||
e_int = int(episode)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
s_pad = f"{s_int:02d}"
|
||||
e_pad = f"{e_int:02d}"
|
||||
patterns = [
|
||||
rf"S{s_pad}E{e_pad}",
|
||||
rf"\b0*{s_int}x0*{e_int}\b",
|
||||
rf"\b0*{s_int}×0*{e_int}\b",
|
||||
rf"\b0*{s_int}×0*{e_int}\b",
|
||||
]
|
||||
for pat in patterns:
|
||||
m = re.search(
|
||||
rf"{pat}[\s\S]{{0,500}}?href=['\"]([^'\"]+/msfi/[^'\"]+)['\"]",
|
||||
text,
|
||||
re.I,
|
||||
)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return None
|
||||
|
||||
# ─────────────────────── OCR backends ──────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _preprocess_captcha_png(img_bytes: bytes) -> bytes:
|
||||
"""Binarize + denoise the captcha PNG to boost ddddocr accuracy."""
|
||||
try:
|
||||
from PIL import Image, ImageFilter
|
||||
import io
|
||||
|
||||
img = Image.open(io.BytesIO(img_bytes)).convert("L")
|
||||
img = img.point(lambda p: 255 if p >= 140 else 0, mode="L")
|
||||
img = img.filter(ImageFilter.MaxFilter(3))
|
||||
img = img.filter(ImageFilter.MinFilter(3))
|
||||
out = io.BytesIO()
|
||||
img.save(out, format="PNG")
|
||||
return out.getvalue()
|
||||
except Exception:
|
||||
return img_bytes
|
||||
|
||||
@staticmethod
|
||||
def _tesseract_classify(img_bytes: bytes) -> str:
|
||||
try:
|
||||
import pytesseract
|
||||
from PIL import Image, ImageFilter
|
||||
import io
|
||||
|
||||
img = Image.open(io.BytesIO(img_bytes)).convert("L")
|
||||
img = img.point(lambda p: 255 if p >= 140 else 0, mode="L")
|
||||
img = img.filter(ImageFilter.MaxFilter(3))
|
||||
img = img.filter(ImageFilter.MinFilter(3))
|
||||
return pytesseract.image_to_string(img, config="--psm 7 -c tessedit_char_whitelist=0123456789").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
async def _cf_worker_ocr(img_bytes: bytes, expected_digits: int = 4) -> str:
|
||||
"""Optional 3rd OCR backend: Cloudflare Workers AI vision LLM.
|
||||
|
||||
ddddocr + tesseract top out at ~50-65% on uprot's noisy captcha.
|
||||
A vision LLM (Llama 4 Scout / Gemma 3 / LLaVA) gets ~80-90%.
|
||||
POSTs the captcha PNG to a user-deployed CF Worker (see
|
||||
docs/MAXSTREAM_UPROT.md for setup).
|
||||
|
||||
Activated only when both env vars are set:
|
||||
CF_WORKER_OCR_URL
|
||||
CF_WORKER_OCR_AUTH
|
||||
Returns "" on any failure — caller falls through gracefully.
|
||||
"""
|
||||
base = (os.getenv("CF_WORKER_OCR_URL") or "").strip().rstrip("/")
|
||||
if not base:
|
||||
return ""
|
||||
auth = (os.getenv("CF_WORKER_OCR_AUTH") or "").strip()
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
headers = {"content-type": "image/png"}
|
||||
if auth:
|
||||
headers["x-worker-auth"] = auth
|
||||
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=20)) as s:
|
||||
async with s.post(
|
||||
f"{base}/?ocr=1&digits={expected_digits}",
|
||||
data=img_bytes,
|
||||
headers=headers,
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
return ""
|
||||
data = await resp.json()
|
||||
return (data.get("digits") or "").strip()
|
||||
except Exception as e:
|
||||
logger.debug(f"CF Worker OCR failed: {e}")
|
||||
return ""
|
||||
|
||||
# ─────────────────── Captcha solver loop ───────────────────────────
|
||||
|
||||
async def _solve_uprot_captcha_once(self, text: str, original_url: str, preprocess: bool = False) -> Optional[str]:
|
||||
try:
|
||||
import ddddocr
|
||||
except ImportError:
|
||||
logger.debug("ddddocr not installed — skipping captcha solve")
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(text, "lxml")
|
||||
img_tag = soup.find("img", src=re.compile(r"data:image/|/captcha|/image/|captcha\.php"))
|
||||
img_url = img_tag.get("src") if img_tag else None
|
||||
if not img_url:
|
||||
m = re.search(
|
||||
r'<img[^>]+src=["\']([^"\']*(?:data:image/|captcha|image)[^"\']*)["\']',
|
||||
text,
|
||||
)
|
||||
img_url = m.group(1) if m else None
|
||||
if not img_url:
|
||||
return None
|
||||
|
||||
form = soup.find("form")
|
||||
form_action = form.get("action") if form else ""
|
||||
if not form_action or form_action == "#":
|
||||
form_action = original_url
|
||||
elif form_action.startswith("/"):
|
||||
p = urlparse(original_url)
|
||||
form_action = f"{p.scheme}://{p.netloc}{form_action}"
|
||||
|
||||
# Download captcha image
|
||||
if img_url.startswith("data:"):
|
||||
try:
|
||||
import base64
|
||||
|
||||
_, b64 = img_url.split(",", 1)
|
||||
img_data = base64.b64decode(b64)
|
||||
except Exception:
|
||||
return None
|
||||
else:
|
||||
full_url = img_url
|
||||
if full_url.startswith("/"):
|
||||
p = urlparse(original_url)
|
||||
full_url = f"{p.scheme}://{p.netloc}{full_url}"
|
||||
res = await self._curl_cffi_fetch(full_url)
|
||||
if not res or not res.get("ok"):
|
||||
return None
|
||||
img_data = res.get("content") or b""
|
||||
|
||||
ocr_input = self._preprocess_captcha_png(img_data) if preprocess else img_data
|
||||
|
||||
if not hasattr(self, "_ocr_engine"):
|
||||
self._ocr_engine = ddddocr.DdddOcr(show_ad=False)
|
||||
res_str = self._ocr_engine.classification(ocr_input)
|
||||
res_digits = "".join(c for c in str(res_str) if c.isdigit())
|
||||
|
||||
# Accept 3-or-4 digit answers (uprot uses 4 today; legacy 3 still seen)
|
||||
def _ok(n):
|
||||
return 3 <= n <= 4
|
||||
|
||||
if not _ok(len(res_digits)):
|
||||
tess = self._tesseract_classify(ocr_input)
|
||||
tess_digits = "".join(c for c in str(tess) if c.isdigit())
|
||||
if _ok(len(tess_digits)):
|
||||
res_digits = tess_digits
|
||||
else:
|
||||
cf = await self._cf_worker_ocr(ocr_input, expected_digits=4)
|
||||
cf_digits = "".join(c for c in str(cf) if c.isdigit())
|
||||
if _ok(len(cf_digits)):
|
||||
res_digits = cf_digits
|
||||
else:
|
||||
return None
|
||||
|
||||
# Prepare POST data
|
||||
captcha_input = soup.find("input", {"name": re.compile(r"captcha|code|val", re.I)})
|
||||
if captcha_input and captcha_input.get("name"):
|
||||
field_name = captcha_input["name"]
|
||||
else:
|
||||
m = re.search(r'name=["\'](captcha|code|val|captch5)[^"\']*["\']', text, re.I)
|
||||
field_name = m.group(1) if m else "captcha"
|
||||
|
||||
post_data = {field_name: res_digits}
|
||||
if form:
|
||||
for inp in form.find_all(["input", "button", "select"]):
|
||||
n = inp.get("name")
|
||||
v = inp.get("value", "")
|
||||
if n and n not in post_data:
|
||||
post_data[n] = v
|
||||
|
||||
headers = {**self.base_headers, "referer": original_url}
|
||||
result = await self._curl_cffi_fetch(form_action, method="POST", data=urlencode(post_data), headers=headers)
|
||||
if not result:
|
||||
return None
|
||||
solved_text = result.get("text") or ""
|
||||
self._last_solve_text = solved_text if isinstance(solved_text, str) else None
|
||||
return self._parse_uprot_html(solved_text)
|
||||
|
||||
async def _solve_uprot_captcha(self, text: str, original_url: str, max_attempts: int = 4) -> Optional[str]:
|
||||
"""Solve the captcha with retries on fresh images.
|
||||
|
||||
Each wrong submit triggers uprot to serve a brand-new captcha
|
||||
image; we feed that fresh page back into the next attempt instead
|
||||
of OCRing the same image with different preprocessing.
|
||||
"""
|
||||
current = text
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
preprocess = attempt % 2 == 0
|
||||
result = await self._solve_uprot_captcha_once(current, original_url, preprocess=preprocess)
|
||||
if result:
|
||||
return result
|
||||
new_text = self._last_solve_text
|
||||
if new_text and new_text != current:
|
||||
current = new_text
|
||||
return None
|
||||
|
||||
# ──────────────────── Redirect chain ───────────────────────────────
|
||||
|
||||
async def _follow_uprots_chain(self, url: str, max_hops: int = 10) -> str:
|
||||
"""Walk the uprots/uprotem → maxstream redirect chain manually.
|
||||
|
||||
After captcha, the URL we extract is usually
|
||||
`maxstream.video/uprots/<token>` whose WAF only honours the token
|
||||
when reached via the proper redirect chain (Referer + cookie
|
||||
continuity from uprot.net). Direct GET → Error 131.
|
||||
|
||||
Walks hop-by-hop preserving cookies until landing on
|
||||
`maxsun{N}.online/watchfree/...` or `maxstream.video/emvvv/<id>`,
|
||||
then converts watchfree → emvvv so the existing packer extraction
|
||||
works.
|
||||
"""
|
||||
if "/uprots/" not in url and "/uprotem/" not in url:
|
||||
return url
|
||||
|
||||
current = url
|
||||
for _ in range(max_hops):
|
||||
res = await self._curl_cffi_fetch(
|
||||
current,
|
||||
headers={**self.base_headers, "referer": "https://uprot.net/"},
|
||||
allow_redirects=False,
|
||||
timeout=15,
|
||||
)
|
||||
if not res:
|
||||
break
|
||||
loc = (res.get("headers") or {}).get("location") or (res.get("headers") or {}).get("Location")
|
||||
if not loc:
|
||||
current = res.get("url") or current
|
||||
break
|
||||
current = urljoin(current, loc)
|
||||
if "/uprots/" not in current and "/uprotem/" not in current:
|
||||
break
|
||||
|
||||
if "watchfree/" in current:
|
||||
try:
|
||||
tail = current.split("watchfree/", 1)[1]
|
||||
segments = [s for s in tail.split("/") if s]
|
||||
if len(segments) >= 2:
|
||||
current = f"https://maxstream.video/emvvv/{segments[1]}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return current
|
||||
|
||||
# ─────────────────────── Public flow ───────────────────────────────
|
||||
|
||||
async def get_uprot(self, link: str, season=None, episode=None) -> str:
|
||||
"""Resolve a uprot URL to its maxstream destination.
|
||||
|
||||
Supports:
|
||||
- /msf/{id} single movie (legacy alias /mse/)
|
||||
- /msfi/{id} single episode
|
||||
- /msfld/{id} folder of episodes (requires season + episode)
|
||||
"""
|
||||
# Map only the modern /msf/ single-video path to its legacy /mse/
|
||||
# alias. A naive str.replace("msf", "mse") corrupts /msfld/ into
|
||||
# /mseld/ (404) and /msfi/ into /msei/ (deprecated 500 on new IDs).
|
||||
link = re.sub(r"/msf/", "/mse/", link)
|
||||
|
||||
# Try curl_cffi first; fall back to BaseExtractor._make_request if
|
||||
# curl_cffi isn't installed (legacy /msf/ path may still work).
|
||||
cffi = await self._curl_cffi_fetch(link)
|
||||
if cffi and cffi.get("ok"):
|
||||
text = cffi["text"]
|
||||
else:
|
||||
response = await self._make_request(link)
|
||||
soup = BeautifulSoup(response.text, "lxml")
|
||||
maxstream_url = soup.find("a")
|
||||
maxstream_url = maxstream_url.get("href")
|
||||
return maxstream_url
|
||||
text = response.text
|
||||
|
||||
if "/msfld/" in link:
|
||||
if season is None or episode is None:
|
||||
raise ExtractorError("msfld folder URL requires 'season' and 'episode' parameters")
|
||||
episode_link = self._parse_uprot_folder(text, season, episode)
|
||||
if not episode_link:
|
||||
raise ExtractorError(f"Episode S{season}E{episode} not found in msfld folder")
|
||||
link = episode_link
|
||||
cffi = await self._curl_cffi_fetch(link)
|
||||
if cffi and cffi.get("ok"):
|
||||
text = cffi["text"]
|
||||
else:
|
||||
response = await self._make_request(link)
|
||||
text = response.text
|
||||
|
||||
# 1. Direct parse — works on legacy uprot pages without captcha
|
||||
res = self._parse_uprot_html(text)
|
||||
if res:
|
||||
return res
|
||||
|
||||
# 2. Captcha solver
|
||||
res = await self._solve_uprot_captcha(text, link)
|
||||
if res:
|
||||
return res
|
||||
|
||||
raise ExtractorError("Redirect link not found in uprot page")
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Extract Maxstream URL."""
|
||||
maxstream_url = await self.get_uprot(url)
|
||||
response = await self._make_request(maxstream_url, headers={"accept-language": "en-US,en;q=0.5"})
|
||||
"""Extract Maxstream URL.
|
||||
|
||||
# Extract and decode URL
|
||||
match = re.search(r"\}\('(.+)',.+,'(.+)'\.split", response.text)
|
||||
if not match:
|
||||
For /msfld/ folder URLs, callers must pass season=N&episode=M as
|
||||
query parameters (forwarded by MFP routes as kwargs).
|
||||
|
||||
Optional persistent cache: if `mediaflow_proxy.services.uprot_url_cache`
|
||||
is importable, cache hits skip captcha+chain entirely (<100ms).
|
||||
"""
|
||||
season = kwargs.get("season")
|
||||
episode = kwargs.get("episode")
|
||||
|
||||
cached = None
|
||||
try:
|
||||
from mediaflow_proxy.services import uprot_url_cache # type: ignore
|
||||
|
||||
cached = uprot_url_cache.get(url, season=season, episode=episode)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if cached:
|
||||
logger.debug(f"uprot cache HIT: {url[:80]}")
|
||||
maxstream_url = cached
|
||||
else:
|
||||
maxstream_url = await self.get_uprot(url, season=season, episode=episode)
|
||||
maxstream_url = await self._follow_uprots_chain(maxstream_url)
|
||||
|
||||
# Fetch the maxstream embed page
|
||||
cffi = await self._curl_cffi_fetch(
|
||||
maxstream_url,
|
||||
headers={**self.base_headers, "referer": "https://uprot.net/", "accept-language": "en-US,en;q=0.5"},
|
||||
)
|
||||
if cffi and cffi.get("ok"):
|
||||
text = cffi["text"]
|
||||
else:
|
||||
response = await self._make_request(maxstream_url, headers={"accept-language": "en-US,en;q=0.5"})
|
||||
text = response.text
|
||||
|
||||
if not cached:
|
||||
try:
|
||||
from mediaflow_proxy.services import uprot_url_cache # type: ignore
|
||||
|
||||
uprot_url_cache.put(url, maxstream_url, season=season, episode=episode)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Direct sources check
|
||||
m = re.search(r'sources:\s*\[\{src:\s*"([^"]+)"', text)
|
||||
if m:
|
||||
return {
|
||||
"destination_url": m.group(1),
|
||||
"request_headers": {**self.base_headers, "referer": maxstream_url},
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
# Packer fallback
|
||||
m = re.search(r"\}\('(.+)',.+,'(.+)'\.split", text)
|
||||
if not m:
|
||||
m = re.search(r"eval\(function\(p,a,c,k,e,d\).+?\}\('(.+?)',.+?,'(.+?)'\.split", text, re.S)
|
||||
if not m:
|
||||
raise ExtractorError("Failed to extract URL components")
|
||||
|
||||
s1 = match.group(2)
|
||||
# Extract Terms
|
||||
terms = s1.split("|")
|
||||
terms = m.group(2).split("|")
|
||||
try:
|
||||
urlset_index = terms.index("urlset")
|
||||
hls_index = terms.index("hls")
|
||||
sources_index = terms.index("sources")
|
||||
result = terms[urlset_index + 1 : hls_index]
|
||||
reversed_elements = result[::-1]
|
||||
first_part = terms[hls_index + 1 : sources_index]
|
||||
reversed_first_part = first_part[::-1]
|
||||
first_url_part = ""
|
||||
for first_part in reversed_first_part:
|
||||
if "0" in first_part:
|
||||
first_url_part += first_part
|
||||
else:
|
||||
first_url_part += first_part + "-"
|
||||
except ValueError as e:
|
||||
raise ExtractorError(f"Missing components in packer: {e}")
|
||||
|
||||
base_url = f"https://{first_url_part}.host-cdn.net/hls/"
|
||||
result_parts = terms[urlset_index + 1 : hls_index]
|
||||
reversed_elements = result_parts[::-1]
|
||||
first_part_terms = terms[hls_index + 1 : sources_index]
|
||||
reversed_first_part = first_part_terms[::-1]
|
||||
|
||||
first_url_part = ""
|
||||
for fp in reversed_first_part:
|
||||
if "0" in fp:
|
||||
first_url_part += fp
|
||||
else:
|
||||
first_url_part += fp + "-"
|
||||
|
||||
base_url = f"https://{first_url_part.rstrip('-')}.host-cdn.net/hls/"
|
||||
if len(reversed_elements) == 1:
|
||||
final_url = base_url + "," + reversed_elements[0] + ".urlset/master.m3u8"
|
||||
lenght = len(reversed_elements)
|
||||
i = 1
|
||||
for element in reversed_elements:
|
||||
base_url += element + ","
|
||||
if lenght == i:
|
||||
base_url += ".urlset/master.m3u8"
|
||||
else:
|
||||
i += 1
|
||||
final_url = base_url
|
||||
for element in reversed_elements:
|
||||
final_url += element + ","
|
||||
final_url = final_url.rstrip(",") + ".urlset/master.m3u8"
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
return {
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
from typing import Dict, Any
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
|
||||
class StreamHGExtractor(BaseExtractor):
|
||||
"""StreamHG URL extractor."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Extract StreamHG URL."""
|
||||
headers = {}
|
||||
patterns = [r'"hls2":"([^"]+)"']
|
||||
|
||||
final_url = await eval_solver(self, url, headers, patterns)
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
@@ -15,12 +14,23 @@ class VidmolyExtractor(BaseExtractor):
|
||||
if not parsed.hostname or "vidmoly" not in parsed.hostname:
|
||||
raise ExtractorError("VIDMOLY: Invalid domain")
|
||||
|
||||
embed_id_match = re.search(r"/embed-([a-zA-Z0-9]+)\.html", parsed.path)
|
||||
if not embed_id_match:
|
||||
raise ExtractorError("VIDMOLY: Could not extract embed ID from URL")
|
||||
embed_id = embed_id_match.group(1)
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Connection": "keep-alive",
|
||||
"Cookie": f"cf_turnstile_demo_pass_{embed_id}=1",
|
||||
"Referer": url,
|
||||
"Sec-Fetch-Dest": "iframe",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
}
|
||||
|
||||
# --- Fetch embed page ---
|
||||
@@ -33,11 +43,10 @@ class VidmolyExtractor(BaseExtractor):
|
||||
raise ExtractorError("VIDMOLY: Stream URL not found")
|
||||
|
||||
master_url = match.group(1)
|
||||
|
||||
if not master_url.startswith("http"):
|
||||
master_url = urljoin(url, master_url)
|
||||
|
||||
# --- Validate stream (prevents Stremio timeout) ---
|
||||
# --- Validate stream ---
|
||||
try:
|
||||
test = await self._make_request(master_url, headers=headers)
|
||||
except Exception as e:
|
||||
@@ -48,8 +57,6 @@ class VidmolyExtractor(BaseExtractor):
|
||||
if test.status >= 400:
|
||||
raise ExtractorError(f"VIDMOLY: Stream unavailable ({test.status})")
|
||||
|
||||
# Return MASTER playlist, not variant
|
||||
# Let MediaFlow Proxy handle variants
|
||||
return {
|
||||
"destination_url": master_url,
|
||||
"request_headers": headers,
|
||||
|
||||
@@ -54,9 +54,9 @@ class VixCloudExtractor(BaseExtractor):
|
||||
"Origin": f"{site_url}",
|
||||
}
|
||||
|
||||
response = await self._make_request(site_url + '/api' + parts[1])
|
||||
response = await self._make_request(site_url + "/api" + parts[1])
|
||||
|
||||
response = await self._make_request(site_url + '/' + response.json()['src'],headers=headers)
|
||||
response = await self._make_request(site_url + "/" + response.json()["src"], headers=headers)
|
||||
|
||||
if response.status != 200:
|
||||
raise ExtractorError("Failed to extract URL components, Invalid Request")
|
||||
|
||||
@@ -136,7 +136,7 @@ async def handle_hls_stream_proxy(
|
||||
Returns:
|
||||
Union[Response, EnhancedStreamingResponse]: Either a processed m3u8 playlist or a streaming response.
|
||||
"""
|
||||
streamer = await create_streamer()
|
||||
streamer = await create_streamer(hls_params.destination)
|
||||
# Handle range requests
|
||||
content_range = proxy_headers.request.get("range", "bytes=0-")
|
||||
if "nan" in content_range.casefold():
|
||||
|
||||
@@ -664,34 +664,38 @@ def build_hls_playlist(
|
||||
init_proxy_url = request.url_for("init_endpoint")
|
||||
init_proxy_url = str(init_proxy_url.replace(scheme=get_original_scheme(request)))
|
||||
|
||||
for index, profile in enumerate(profiles):
|
||||
segments = profile["segments"]
|
||||
if not segments:
|
||||
# Merge segments from all periods into a single ordered list.
|
||||
# Multi-period MPDs can produce multiple profiles with the same unique_id (one per period);
|
||||
# depth trimming must be applied to the combined segment list, not per-period.
|
||||
merged: list[tuple[dict, dict]] = []
|
||||
for profile in profiles:
|
||||
if not profile["segments"]:
|
||||
logger.warning(f"No segments found for profile {profile['id']}")
|
||||
continue
|
||||
for seg in profile["segments"]:
|
||||
merged.append((seg, profile))
|
||||
|
||||
if not merged:
|
||||
return "\n".join(hls)
|
||||
|
||||
if is_live:
|
||||
extinf_values_for_depth = [s["extinf"] for s in segments if "extinf" in s]
|
||||
extinf_values_for_depth = [e[0]["extinf"] for e in merged if "extinf" in e[0]]
|
||||
depth = _compute_live_playlist_depth(is_ts_mode, effective_start_offset, extinf_values_for_depth)
|
||||
trimmed_segments = segments[-depth:]
|
||||
else:
|
||||
trimmed_segments = segments
|
||||
merged = merged[-depth:]
|
||||
|
||||
# Add headers for only the first profile
|
||||
if index == 0:
|
||||
first_segment = trimmed_segments[0]
|
||||
extinf_values = [f["extinf"] for f in trimmed_segments if "extinf" in f]
|
||||
# Emit playlist headers using the first trimmed segment's profile
|
||||
first_segment, first_profile = merged[0]
|
||||
extinf_values = [e[0]["extinf"] for e in merged if "extinf" in e[0]]
|
||||
|
||||
# TS mode uses int(max)+1 to reduce buffer underruns in ExoPlayer
|
||||
if is_ts_mode:
|
||||
target_duration = int(max(extinf_values)) + 1 if extinf_values else 10
|
||||
else:
|
||||
target_duration = math.ceil(max(extinf_values)) if extinf_values else 3
|
||||
|
||||
if is_live:
|
||||
sequence = _compute_live_media_sequence(first_segment, profile, trimmed_segments)
|
||||
sequence = _compute_live_media_sequence(first_segment, first_profile, [e[0] for e in merged])
|
||||
else:
|
||||
mpd_start_number = profile.get("segment_template_start_number")
|
||||
mpd_start_number = first_profile.get("segment_template_start_number")
|
||||
sequence = first_segment.get("number")
|
||||
if sequence is None:
|
||||
sequence = mpd_start_number if mpd_start_number is not None else 1
|
||||
@@ -706,18 +710,37 @@ def build_hls_playlist(
|
||||
if not is_live:
|
||||
hls.append("#EXT-X-PLAYLIST-TYPE:VOD")
|
||||
|
||||
init_url = profile["initUrl"]
|
||||
# For SegmentBase profiles, we may have byte range for initialization segment
|
||||
init_range = profile.get("initRange")
|
||||
|
||||
query_params = dict(request.query_params)
|
||||
query_params.pop("profile_id", None)
|
||||
query_params.pop("d", None)
|
||||
query_params.pop("remux_to_ts", None) # per-request override; already resolved into endpoint choice
|
||||
has_encrypted = query_params.pop("has_encrypted", False)
|
||||
|
||||
# Add EXT-X-MAP for init segment (for live streams or when beneficial)
|
||||
if use_map:
|
||||
current_init_url = None # track to detect period boundaries and re-emit EXT-X-MAP
|
||||
need_discontinuity = False
|
||||
|
||||
for segment, profile in merged:
|
||||
duration = segment["extinf"]
|
||||
init_url = profile["initUrl"]
|
||||
# For SegmentBase profiles, we may have byte range for initialization segment
|
||||
init_range = profile.get("initRange")
|
||||
|
||||
# Check if this segment should be skipped
|
||||
if skip_filter:
|
||||
if skip_filter.should_skip_segment(duration):
|
||||
skip_filter.advance_time(duration)
|
||||
skipped_segments += 1
|
||||
need_discontinuity = True
|
||||
continue
|
||||
skip_filter.advance_time(duration)
|
||||
|
||||
# Emit EXT-X-MAP when init URL changes (first segment or period boundary in fMP4 mode)
|
||||
if use_map and init_url != current_init_url:
|
||||
if current_init_url is not None:
|
||||
# Period boundary: insert discontinuity before new init
|
||||
hls.append("#EXT-X-DISCONTINUITY")
|
||||
need_discontinuity = False
|
||||
current_init_url = init_url
|
||||
init_query_params = {
|
||||
"init_url": init_url,
|
||||
"mime_type": profile["mimeType"],
|
||||
@@ -745,19 +768,6 @@ def build_hls_playlist(
|
||||
)
|
||||
hls.append(f'#EXT-X-MAP:URI="{init_map_url}"')
|
||||
|
||||
need_discontinuity = False
|
||||
for segment in trimmed_segments:
|
||||
duration = segment["extinf"]
|
||||
|
||||
# Check if this segment should be skipped
|
||||
if skip_filter:
|
||||
if skip_filter.should_skip_segment(duration):
|
||||
skip_filter.advance_time(duration)
|
||||
skipped_segments += 1
|
||||
need_discontinuity = True
|
||||
continue
|
||||
skip_filter.advance_time(duration)
|
||||
|
||||
# Add discontinuity marker after skipped segments
|
||||
if need_discontinuity:
|
||||
hls.append("#EXT-X-DISCONTINUITY")
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -198,7 +198,7 @@ class HTTPMediaSource:
|
||||
proxy=proxy_url,
|
||||
allow_redirects=True,
|
||||
) as resp:
|
||||
cl = resp.headers.get("content-length")
|
||||
cl = resp.headers.get("content-length") if resp.status < 400 else None
|
||||
if cl:
|
||||
self._file_size = int(cl)
|
||||
else:
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -16,7 +16,12 @@ playlist_builder_router = APIRouter()
|
||||
|
||||
|
||||
def rewrite_m3u_links_streaming(
|
||||
m3u_lines_iterator: Iterator[str], base_url: str, api_password: Optional[str]
|
||||
m3u_lines_iterator: Iterator[str],
|
||||
base_url: str,
|
||||
api_password: Optional[str],
|
||||
max_res: bool = False,
|
||||
redirect_stream: bool = True,
|
||||
no_proxy: bool = True,
|
||||
) -> Iterator[str]:
|
||||
"""
|
||||
Rewrites links from an M3U line iterator according to the specified rules,
|
||||
@@ -99,7 +104,13 @@ def rewrite_m3u_links_streaming(
|
||||
processed_url_content = f"{base_url}/proxy/hls/manifest.m3u8?d={encoded_url}"
|
||||
elif "vixsrc.to" in logical_line:
|
||||
encoded_url = urllib.parse.quote(logical_line, safe="")
|
||||
processed_url_content = f"{base_url}/extractor/video?host=VixCloud&redirect_stream=true&d={encoded_url}&max_res=true&no_proxy=true"
|
||||
processed_url_content = (
|
||||
f"{base_url}/extractor/video?host=VixCloud"
|
||||
f"&redirect_stream={str(redirect_stream).lower()}"
|
||||
f"&d={encoded_url}"
|
||||
f"&max_res={str(max_res).lower()}"
|
||||
f"&no_proxy={str(no_proxy).lower()}"
|
||||
)
|
||||
elif ".m3u8" in logical_line:
|
||||
encoded_url = urllib.parse.quote(logical_line, safe="")
|
||||
processed_url_content = f"{base_url}/proxy/hls/manifest.m3u8?d={encoded_url}"
|
||||
@@ -232,7 +243,14 @@ def parse_channel_entries(lines: list[str]) -> list[list[str]]:
|
||||
return entries
|
||||
|
||||
|
||||
async def async_generate_combined_playlist(playlist_definitions: list[str], base_url: str, api_password: Optional[str]):
|
||||
async def async_generate_combined_playlist(
|
||||
playlist_definitions: list[str],
|
||||
base_url: str,
|
||||
api_password: Optional[str],
|
||||
max_res: bool = False,
|
||||
redirect_stream: bool = True,
|
||||
no_proxy: bool = True,
|
||||
):
|
||||
"""Genera una playlist combinata da multiple definizioni, scaricando in parallelo."""
|
||||
# Prepara i task di download
|
||||
download_tasks = []
|
||||
@@ -318,7 +336,9 @@ async def async_generate_combined_playlist(playlist_definitions: list[str], base
|
||||
|
||||
if should_proxy:
|
||||
# Usa un iteratore fittizio per processare una sola linea
|
||||
rewritten_url_iter = rewrite_m3u_links_streaming(iter([url]), base_url, api_password)
|
||||
rewritten_url_iter = rewrite_m3u_links_streaming(
|
||||
iter([url]), base_url, api_password, max_res, redirect_stream, no_proxy
|
||||
)
|
||||
yield next(rewritten_url_iter, url) # Prende l'URL riscritto, con fallback all'originale
|
||||
else:
|
||||
yield url # Lascia l'URL invariato
|
||||
@@ -327,7 +347,9 @@ async def async_generate_combined_playlist(playlist_definitions: list[str], base
|
||||
for playlist_data in unsorted_playlists_data:
|
||||
lines_iterator = iter(playlist_data["lines"])
|
||||
if playlist_data["proxy"]:
|
||||
lines_iterator = rewrite_m3u_links_streaming(lines_iterator, base_url, api_password)
|
||||
lines_iterator = rewrite_m3u_links_streaming(
|
||||
lines_iterator, base_url, api_password, max_res, redirect_stream, no_proxy
|
||||
)
|
||||
|
||||
for line in yield_header_once(lines_iterator):
|
||||
yield line
|
||||
@@ -336,14 +358,26 @@ async def async_generate_combined_playlist(playlist_definitions: list[str], base
|
||||
@playlist_builder_router.get("/playlist")
|
||||
async def proxy_handler(
|
||||
request: Request,
|
||||
d: str = Query(..., description="Query string con le definizioni delle playlist", alias="d"),
|
||||
api_password: Optional[str] = Query(None, description="Password API per MFP"),
|
||||
d: str = Query(..., description="Semicolon-separated playlist URL definitions"),
|
||||
api_password: Optional[str] = Query(None, description="MFP API password"),
|
||||
max_res: bool = Query(False, description="Redirect extractor streams to the highest available resolution"),
|
||||
redirect_stream: bool = Query(True, description="Redirect extractor to the direct stream URL"),
|
||||
no_proxy: bool = Query(True, description="Skip proxying internal segment URLs for extractor streams"),
|
||||
):
|
||||
"""
|
||||
Endpoint per il proxy delle playlist M3U con supporto MFP.
|
||||
Proxy and merge one or more M3U playlists, rewriting stream URLs through MFP.
|
||||
|
||||
Formato query string: playlist1&url1;playlist2&url2
|
||||
Esempio: https://mfp.com:pass123&http://provider.com/playlist.m3u
|
||||
**`d` format** — semicolon-separated playlist definitions:
|
||||
- Plain URL: `http://provider.com/playlist.m3u`
|
||||
- With sort prefix: `sort:http://provider.com/playlist.m3u`
|
||||
- Without rewriting: `no_proxy:http://provider.com/playlist.m3u`
|
||||
- Combinable: `sort:no_proxy:http://provider.com/playlist.m3u`
|
||||
|
||||
Stream URLs in each playlist are automatically rewritten to route through the
|
||||
appropriate MFP proxy endpoint (`/proxy/hls`, `/proxy/mpd`, `/extractor/video`).
|
||||
|
||||
**Extractor parameters** (`max_res`, `redirect_stream`, `no_proxy`) apply to
|
||||
streams that go through `/extractor/video` (e.g. vixsrc.to links).
|
||||
"""
|
||||
try:
|
||||
if not d:
|
||||
@@ -371,7 +405,9 @@ async def proxy_handler(
|
||||
base_url = base_url_part
|
||||
|
||||
async def generate_response():
|
||||
async for line in async_generate_combined_playlist(playlist_definitions, base_url, api_password):
|
||||
async for line in async_generate_combined_playlist(
|
||||
playlist_definitions, base_url, api_password, max_res, redirect_stream, no_proxy
|
||||
):
|
||||
yield line
|
||||
|
||||
return StreamingResponse(
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
import asyncio
|
||||
import ipaddress
|
||||
import logging
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from typing import Annotated
|
||||
from urllib.parse import quote, unquote
|
||||
from urllib.parse import quote, unquote, urlparse
|
||||
|
||||
import aiohttp
|
||||
from aiohttp import ClientTimeout
|
||||
from fastapi import Request, Depends, APIRouter, Query, HTTPException, Response
|
||||
from fastapi.datastructures import QueryParams
|
||||
|
||||
@@ -30,6 +34,7 @@ from mediaflow_proxy.utils.extractor_helpers import (
|
||||
check_and_extract_sportsonline_stream,
|
||||
)
|
||||
from mediaflow_proxy.utils.hls_prebuffer import hls_prebuffer
|
||||
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||
from mediaflow_proxy.utils.http_utils import (
|
||||
get_proxy_headers,
|
||||
ProxyRequestHeaders,
|
||||
@@ -452,6 +457,203 @@ def _build_hls_query_params(request: Request, destination: str) -> str:
|
||||
return "&".join(params)
|
||||
|
||||
|
||||
MEDIAFLOW_IP_PLACEHOLDER = "{mediaflow_ip}"
|
||||
_IP_DETECT_URLS = ["https://api.ipify.org", "https://checkip.amazonaws.com"]
|
||||
_cached_public_ip: str | None = None
|
||||
_public_ip_lock: asyncio.Lock | None = None
|
||||
|
||||
|
||||
async def _resolve_public_ip() -> str | None:
|
||||
"""Return MediaFlow's public IP: configured value, cached detection, or None."""
|
||||
global _cached_public_ip, _public_ip_lock
|
||||
|
||||
if settings.public_ip:
|
||||
return settings.public_ip
|
||||
if _cached_public_ip:
|
||||
return _cached_public_ip
|
||||
|
||||
if _public_ip_lock is None:
|
||||
_public_ip_lock = asyncio.Lock()
|
||||
|
||||
async with _public_ip_lock:
|
||||
if _cached_public_ip:
|
||||
return _cached_public_ip
|
||||
for url in _IP_DETECT_URLS:
|
||||
try:
|
||||
async with aiohttp.ClientSession() as sess:
|
||||
async with sess.get(url, timeout=ClientTimeout(total=5)) as resp:
|
||||
ip = (await resp.text()).strip()
|
||||
if ip:
|
||||
_cached_public_ip = ip
|
||||
return ip
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
_IP_DISCLOSURE_HEADERS = frozenset(
|
||||
{
|
||||
"x-forwarded-for",
|
||||
"x-real-ip",
|
||||
"x-client-ip",
|
||||
"true-client-ip",
|
||||
"forwarded",
|
||||
"cf-connecting-ip",
|
||||
"x-original-forwarded-for",
|
||||
"x-cluster-client-ip",
|
||||
}
|
||||
)
|
||||
|
||||
_HOP_BY_HOP_HEADERS = frozenset(
|
||||
{
|
||||
"connection",
|
||||
"keep-alive",
|
||||
"proxy-authenticate",
|
||||
"proxy-authorization",
|
||||
"te",
|
||||
"trailers",
|
||||
"transfer-encoding",
|
||||
"upgrade",
|
||||
}
|
||||
)
|
||||
|
||||
# Headers that callers must not inject via h_* params — they enable host-header
|
||||
# injection, HTTP request smuggling, or break the session's own framing logic.
|
||||
_BLOCKED_REQUEST_HEADERS = frozenset(
|
||||
{
|
||||
"host",
|
||||
"content-length",
|
||||
"transfer-encoding",
|
||||
"content-encoding",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _check_forward_destination(destination: str) -> None:
|
||||
"""SSRF guard and allowlist/denylist check for /proxy/forward."""
|
||||
parsed = urlparse(destination)
|
||||
|
||||
# Only allow http(s) — blocks file://, ftp://, gopher://, data:, javascript:, etc.
|
||||
scheme = (parsed.scheme or "").lower()
|
||||
if scheme not in ("http", "https"):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid URL scheme '{scheme}'. Only http and https are allowed.",
|
||||
)
|
||||
|
||||
hostname = (parsed.hostname or "").lower()
|
||||
|
||||
if not hostname:
|
||||
raise HTTPException(status_code=400, detail="Invalid destination URL: no hostname")
|
||||
|
||||
# Allowlist check (if configured)
|
||||
allowed = settings.forward_allowed_hosts
|
||||
if allowed and hostname not in {h.lower() for h in allowed}:
|
||||
raise HTTPException(status_code=403, detail=f"Host '{hostname}' is not in forward_allowed_hosts")
|
||||
|
||||
# Explicit denylist
|
||||
denied = {h.lower() for h in settings.forward_denied_hosts}
|
||||
if hostname in denied:
|
||||
raise HTTPException(status_code=403, detail=f"Host '{hostname}' is denied")
|
||||
|
||||
# Always block loopback literals
|
||||
if hostname in ("localhost", "ip6-localhost", "ip6-loopback"):
|
||||
raise HTTPException(status_code=403, detail="Forwarding to localhost is not allowed")
|
||||
|
||||
# Block private/loopback/link-local IPs given as literals
|
||||
try:
|
||||
addr = ipaddress.ip_address(hostname)
|
||||
if addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_unspecified:
|
||||
raise HTTPException(status_code=403, detail="Forwarding to private/loopback addresses is not allowed")
|
||||
except ValueError:
|
||||
pass # Not a numeric IP — hostname-based SSRF is the caller's responsibility
|
||||
|
||||
|
||||
@proxy_router.api_route(
|
||||
"/forward",
|
||||
methods=["GET", "HEAD", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
|
||||
)
|
||||
async def proxy_forward_endpoint(
|
||||
request: Request,
|
||||
proxy_headers: Annotated[ProxyRequestHeaders, Depends(get_proxy_headers)],
|
||||
destination: str = Query(..., description="The destination URL to forward to.", alias="d"),
|
||||
):
|
||||
"""
|
||||
Generic transparent HTTP forwarding endpoint.
|
||||
|
||||
Forwards any HTTP method (including POST with body) to the given destination URL
|
||||
using MediaFlow's outbound IP. Useful for IP-bound API calls (e.g. debrid service
|
||||
APIs, extractor POST requests) where the request must appear to originate from
|
||||
MediaFlow rather than the addon server.
|
||||
|
||||
Pass outbound headers via ``h_<name>=<value>`` query params. The upstream response
|
||||
(status code, headers, body) is returned verbatim. IP-disclosure headers are
|
||||
stripped before forwarding so the caller's IP is not leaked.
|
||||
"""
|
||||
destination = sanitize_url(destination)
|
||||
_check_forward_destination(destination)
|
||||
|
||||
# Strip IP-disclosure headers — the whole point is hiding the origin IP
|
||||
for h in _IP_DISCLOSURE_HEADERS:
|
||||
proxy_headers.request.pop(h, None)
|
||||
|
||||
# Strip headers that could enable host-header injection or HTTP smuggling
|
||||
for h in _BLOCKED_REQUEST_HEADERS:
|
||||
proxy_headers.request.pop(h, None)
|
||||
|
||||
body = await request.body()
|
||||
if len(body) > settings.forward_max_request_body_bytes:
|
||||
raise HTTPException(status_code=413, detail="Request body too large")
|
||||
max_response_bytes = settings.forward_max_response_body_bytes
|
||||
|
||||
# Substitute {mediaflow_ip} placeholder with MediaFlow's actual public IP so
|
||||
# debrid services receive a consistent ip= parameter that matches the TCP source.
|
||||
if MEDIAFLOW_IP_PLACEHOLDER in destination or MEDIAFLOW_IP_PLACEHOLDER.encode() in body:
|
||||
public_ip = await _resolve_public_ip()
|
||||
if public_ip:
|
||||
destination = destination.replace(MEDIAFLOW_IP_PLACEHOLDER, public_ip)
|
||||
body = body.replace(MEDIAFLOW_IP_PLACEHOLDER.encode(), public_ip.encode())
|
||||
|
||||
async with create_aiohttp_session(destination) as (session, proxy_url):
|
||||
try:
|
||||
async with session.request(
|
||||
method=request.method,
|
||||
url=destination,
|
||||
headers=proxy_headers.request,
|
||||
data=body if body else None,
|
||||
proxy=proxy_url,
|
||||
timeout=ClientTimeout(total=settings.transport_config.timeout),
|
||||
allow_redirects=True,
|
||||
) as upstream_resp:
|
||||
resp_body = await upstream_resp.content.read(max_response_bytes + 1)
|
||||
if len(resp_body) > max_response_bytes:
|
||||
raise HTTPException(status_code=502, detail="Upstream response too large")
|
||||
|
||||
resp_headers = {k: v for k, v in upstream_resp.headers.items() if k.lower() not in _HOP_BY_HOP_HEADERS}
|
||||
resp_headers.update(proxy_headers.response)
|
||||
|
||||
return Response(
|
||||
content=resp_body,
|
||||
status_code=upstream_resp.status,
|
||||
headers=resp_headers,
|
||||
)
|
||||
except aiohttp.ClientResponseError as e:
|
||||
raise HTTPException(status_code=e.status, detail=f"Upstream error: {e.message}")
|
||||
except asyncio.TimeoutError:
|
||||
raise HTTPException(status_code=504, detail="Upstream timeout")
|
||||
except aiohttp.ClientError as e:
|
||||
raise HTTPException(status_code=502, detail=f"Upstream connection error: {e}")
|
||||
|
||||
|
||||
@proxy_router.get("/ip")
|
||||
async def get_public_ip_endpoint():
|
||||
"""Return MediaFlow's public IP address."""
|
||||
ip = await _resolve_public_ip()
|
||||
if ip is None:
|
||||
raise HTTPException(status_code=503, detail="Could not determine public IP")
|
||||
return {"ip": ip}
|
||||
|
||||
|
||||
@proxy_router.head("/stream")
|
||||
@proxy_router.get("/stream")
|
||||
@proxy_router.head("/stream/{filename:path}")
|
||||
|
||||
@@ -308,6 +308,7 @@ class ExtractorURLParams(GenericParams):
|
||||
"Sportsonline",
|
||||
"Vavoo",
|
||||
"VidFast",
|
||||
"StreamHG",
|
||||
] = Field(..., description="The host to extract the URL from.")
|
||||
destination: Annotated[str, Field(description="The URL of the stream.", alias="d")]
|
||||
redirect_stream: bool = Field(False, description="Whether to redirect to the stream endpoint automatically.")
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -234,7 +234,7 @@
|
||||
<input type="text" placeholder="Server Name (optional)"
|
||||
class="server-name input-field w-full px-4 py-2.5 rounded-xl border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-800 text-gray-900 dark:text-white focus:outline-none focus:border-blue-500 text-sm">
|
||||
<div class="flex gap-2">
|
||||
<input type="password" placeholder="API Password (optional)"
|
||||
<input type="password" placeholder="Override password (blank = use main)"
|
||||
class="server-password input-field flex-1 px-4 py-2.5 rounded-xl border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-800 text-gray-900 dark:text-white focus:outline-none focus:border-blue-500 text-sm">
|
||||
<button type="button" class="remove-server p-2.5 text-red-500 hover:bg-red-50 dark:hover:bg-red-900/20 rounded-xl transition-colors hidden">
|
||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
|
||||
@@ -81,6 +81,27 @@ class MediaFlowSpeedTest {
|
||||
firstServerUrl.value = baseUrl;
|
||||
firstServerUrl.placeholder = `${baseUrl} (Current Instance)`;
|
||||
|
||||
// The top "MediaFlow API Password" (#currentApiPassword) is the
|
||||
// source of truth; hydrate from localStorage (shared with the URL
|
||||
// generator) and persist edits back. Per-server fields are treated
|
||||
// as optional overrides (see collectServerConfigurations).
|
||||
const topPw = document.getElementById('currentApiPassword');
|
||||
const saved = localStorage.getItem('mediaflow_api_password');
|
||||
if (topPw) {
|
||||
if (saved && !topPw.value) topPw.value = saved;
|
||||
topPw.addEventListener('input', () => {
|
||||
const v = topPw.value.trim();
|
||||
if (v) localStorage.setItem('mediaflow_api_password', v);
|
||||
else localStorage.removeItem('mediaflow_api_password');
|
||||
});
|
||||
}
|
||||
|
||||
// Relabel the per-server field so users realise it's an override
|
||||
// rather than a second required field.
|
||||
document.querySelectorAll('.server-password').forEach(el => {
|
||||
el.placeholder = 'Override password (blank = use main)';
|
||||
});
|
||||
|
||||
// Show placeholder CDN selection initially
|
||||
this.showPlaceholderCdnSelection();
|
||||
this.updateCdnButtonStates();
|
||||
@@ -133,7 +154,7 @@ class MediaFlowSpeedTest {
|
||||
<div class="flex gap-2">
|
||||
<input
|
||||
type="password"
|
||||
placeholder="API Password (optional)"
|
||||
placeholder="Override password (blank = use main)"
|
||||
class="server-password flex-1 px-3 py-2 rounded-md border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-800 text-gray-900 dark:text-white focus:outline-none focus:ring-2 focus:ring-blue-500 text-sm"
|
||||
>
|
||||
<button
|
||||
@@ -216,11 +237,16 @@ class MediaFlowSpeedTest {
|
||||
// Collect server configurations
|
||||
this.servers = [];
|
||||
const serverInputs = document.querySelectorAll('.server-input');
|
||||
// Fall back to the top #currentApiPassword when a server's per-row
|
||||
// override is blank — saves single-server users from typing the
|
||||
// password twice.
|
||||
const mainPassword = (document.getElementById('currentApiPassword')?.value || '').trim();
|
||||
|
||||
serverInputs.forEach(input => {
|
||||
const url = input.querySelector('.server-url').value.trim();
|
||||
const name = input.querySelector('.server-name').value.trim();
|
||||
const password = input.querySelector('.server-password').value.trim();
|
||||
const override = input.querySelector('.server-password').value.trim();
|
||||
const password = override || mainPassword;
|
||||
|
||||
if (url) {
|
||||
this.servers.push({
|
||||
|
||||
@@ -1831,6 +1831,21 @@
|
||||
localStorage.setItem('theme', newTheme);
|
||||
});
|
||||
|
||||
// Persist the global API password across pages (speedtest, playlist
|
||||
// builder) so the user only has to type it once — otherwise speedtest
|
||||
// calls /proxy/stream without the password and gets 401.
|
||||
(function restoreApiPassword() {
|
||||
const el = document.getElementById('globalApiPassword');
|
||||
if (!el) return;
|
||||
const saved = localStorage.getItem('mediaflow_api_password');
|
||||
if (saved) el.value = saved;
|
||||
el.addEventListener('input', () => {
|
||||
const v = el.value.trim();
|
||||
if (v) localStorage.setItem('mediaflow_api_password', v);
|
||||
else localStorage.removeItem('mediaflow_api_password');
|
||||
});
|
||||
})();
|
||||
|
||||
|
||||
// Tab switching
|
||||
function switchTab(tabName) {
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user