mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-06-10 09:10:23 +00:00
new version
This commit is contained in:
@@ -1,11 +1,13 @@
|
||||
# https://github.com/Gujal00/ResolveURL/blob/55c7f66524ebd65bc1f88650614e627b00167fa0/script.module.resolveurl/lib/resolveurl/plugins/f16px.py
|
||||
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import hmac
|
||||
import hashlib
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils import python_aesgcm
|
||||
|
||||
@@ -17,16 +19,91 @@ class F16PxExtractor(BaseExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _b64url_decode(value: str) -> bytes:
|
||||
# base64url -> base64
|
||||
value = value.replace("-", "+").replace("_", "/")
|
||||
padding = (-len(value)) % 4
|
||||
if padding:
|
||||
value += "=" * padding
|
||||
return base64.b64decode(value)
|
||||
|
||||
@staticmethod
|
||||
def _b64url_encode(data: bytes) -> str:
|
||||
return base64.urlsafe_b64encode(data).rstrip(b"=").decode()
|
||||
|
||||
def _join_key_parts(self, parts) -> bytes:
|
||||
return b"".join(self._b64url_decode(p) for p in parts)
|
||||
|
||||
@staticmethod
|
||||
def _pick_best(sources: list) -> str:
|
||||
"""Return URL of highest-quality source by numeric label."""
|
||||
|
||||
def label_key(s):
|
||||
try:
|
||||
return int(s.get("label", 0))
|
||||
except (ValueError, TypeError):
|
||||
return 0
|
||||
|
||||
return sorted(sources, key=label_key, reverse=True)[0]["url"]
|
||||
|
||||
def _make_fingerprint(self) -> dict:
|
||||
viewer_id = self._b64url_encode(os.urandom(16))
|
||||
device_id = self._b64url_encode(os.urandom(16))
|
||||
now = int(time.time())
|
||||
|
||||
token_payload = {
|
||||
"viewer_id": viewer_id,
|
||||
"device_id": device_id,
|
||||
"confidence": 0.93,
|
||||
"iat": now,
|
||||
"exp": now + 600,
|
||||
}
|
||||
payload_b64 = self._b64url_encode(json.dumps(token_payload, separators=(",", ":")).encode())
|
||||
sig = hmac.new(b"", payload_b64.encode(), hashlib.sha256).digest()
|
||||
token = f"{payload_b64}.{self._b64url_encode(sig)}"
|
||||
|
||||
return {
|
||||
"fingerprint": {
|
||||
"token": token,
|
||||
"viewer_id": viewer_id,
|
||||
"device_id": device_id,
|
||||
"confidence": 0.93,
|
||||
}
|
||||
}
|
||||
|
||||
def _decrypt_playback(self, pb: dict) -> list:
|
||||
"""Decrypt primary payload, fall back to payload2+decrypt_keys."""
|
||||
iv = self._b64url_decode(pb["iv"])
|
||||
key = self._join_key_parts(pb["key_parts"])
|
||||
payload = self._b64url_decode(pb["payload"])
|
||||
|
||||
cipher = python_aesgcm.new(key)
|
||||
decrypted = cipher.open(iv, payload)
|
||||
|
||||
if decrypted is not None:
|
||||
sources = json.loads(decrypted.decode("utf-8", "ignore")).get("sources") or []
|
||||
if sources:
|
||||
return sources
|
||||
|
||||
# Fallback: payload2 + decrypt_keys
|
||||
decrypt_keys = pb.get("decrypt_keys") or {}
|
||||
iv2 = pb.get("iv2")
|
||||
pay2 = pb.get("payload2")
|
||||
if iv2 and pay2 and decrypt_keys:
|
||||
iv2 = self._b64url_decode(iv2)
|
||||
pay2 = self._b64url_decode(pay2)
|
||||
for key_b64 in decrypt_keys.values():
|
||||
try:
|
||||
key2 = self._b64url_decode(key_b64)
|
||||
cipher2 = python_aesgcm.new(key2)
|
||||
decrypted = cipher2.open(iv2, pay2)
|
||||
if decrypted:
|
||||
sources = json.loads(decrypted.decode("utf-8", "ignore")).get("sources") or []
|
||||
if sources:
|
||||
return sources
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return []
|
||||
|
||||
async def extract(self, url: str) -> Dict[str, Any]:
|
||||
parsed = urlparse(url)
|
||||
host = parsed.netloc
|
||||
@@ -35,26 +112,32 @@ class F16PxExtractor(BaseExtractor):
|
||||
match = re.search(r"/e/([A-Za-z0-9]+)", parsed.path or "")
|
||||
if not match:
|
||||
raise ExtractorError("F16PX: Invalid embed URL")
|
||||
|
||||
media_id = match.group(1)
|
||||
|
||||
api_url = f"https://{host}/api/videos/{media_id}/embed/playback"
|
||||
|
||||
headers = self.base_headers.copy()
|
||||
headers["referer"] = f"https://{host}/"
|
||||
headers["referer"] = f"https://{host}/e/{media_id}"
|
||||
headers["origin"] = origin
|
||||
headers["content-type"] = "application/json"
|
||||
|
||||
resp = await self._make_request(
|
||||
api_url,
|
||||
headers=headers,
|
||||
method="POST",
|
||||
json=self._make_fingerprint(),
|
||||
)
|
||||
|
||||
resp = await self._make_request(api_url, headers=headers)
|
||||
try:
|
||||
data = resp.json()
|
||||
except Exception:
|
||||
raise ExtractorError("F16PX: Invalid JSON response")
|
||||
|
||||
# Case 1: plain sources
|
||||
if "sources" in data and data["sources"]:
|
||||
src = data["sources"][0].get("url")
|
||||
if not src:
|
||||
raise ExtractorError("F16PX: Empty source URL")
|
||||
if data.get("sources"):
|
||||
best = self._pick_best(data["sources"])
|
||||
return {
|
||||
"destination_url": src,
|
||||
"destination_url": best,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
@@ -65,40 +148,24 @@ class F16PxExtractor(BaseExtractor):
|
||||
raise ExtractorError("F16PX: No playback data")
|
||||
|
||||
try:
|
||||
iv = self._b64url_decode(pb["iv"]) # nonce
|
||||
key = self._join_key_parts(pb["key_parts"]) # AES key
|
||||
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
|
||||
|
||||
cipher = python_aesgcm.new(key)
|
||||
decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL
|
||||
|
||||
if decrypted is None:
|
||||
raise ExtractorError("F16PX: GCM authentication failed")
|
||||
|
||||
decrypted_json = json.loads(decrypted.decode("utf-8", "ignore"))
|
||||
|
||||
except ExtractorError:
|
||||
raise
|
||||
sources = self._decrypt_playback(pb)
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"F16PX: Decryption failed ({e})")
|
||||
|
||||
sources = decrypted_json.get("sources") or []
|
||||
if not sources:
|
||||
raise ExtractorError("F16PX: No sources after decryption")
|
||||
|
||||
best = sources[0].get("url")
|
||||
if not best:
|
||||
raise ExtractorError("F16PX: Empty source URL after decryption")
|
||||
|
||||
self.base_headers.clear()
|
||||
self.base_headers["referer"] = f"{origin}/"
|
||||
self.base_headers["origin"] = origin
|
||||
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||
self.base_headers["Accept"] = "*/*"
|
||||
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
|
||||
best = self._pick_best(sources)
|
||||
|
||||
out_headers = {
|
||||
"referer": f"{origin}/",
|
||||
"origin": origin,
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept": "*/*",
|
||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0",
|
||||
}
|
||||
return {
|
||||
"destination_url": best,
|
||||
"request_headers": self.base_headers,
|
||||
"request_headers": out_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -152,6 +152,25 @@ class DoodStreamExtractor(BaseExtractor):
|
||||
html = r.text
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
|
||||
# Some pages embed a JS redirect instead of a real HTTP redirect.
|
||||
js_redirect = re.search(r'(?:window\.location|location\.href)\s*=\s*[\'"]https?://([^/\'"]+)', html)
|
||||
if js_redirect:
|
||||
redirected_host = js_redirect.group(1)
|
||||
redirect_url = f"https://{redirected_host}/e/{video_id}"
|
||||
logger.debug("JS redirect detected → %s", redirect_url)
|
||||
async with AsyncSession() as s2:
|
||||
r2 = await s2.get(
|
||||
redirect_url,
|
||||
impersonate="chrome",
|
||||
headers={"Referer": f"https://{redirected_host}/"},
|
||||
timeout=30,
|
||||
allow_redirects=True,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
final_url = str(r2.url)
|
||||
html = r2.text
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
|
||||
if "pass_md5" not in html:
|
||||
if "turnstile" in html.lower() or "captcha_l" in html:
|
||||
raise ExtractorError(
|
||||
@@ -197,6 +216,14 @@ class DoodStreamExtractor(BaseExtractor):
|
||||
"Ensure BYPARR_URL is set for reliable extraction."
|
||||
)
|
||||
|
||||
# CloudFlare R2 storage URLs are self-contained — no salt/token needed.
|
||||
if "cloudflarestorage." in base_stream.lower():
|
||||
return {
|
||||
"destination_url": base_stream,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": "proxy_stream_endpoint",
|
||||
}
|
||||
|
||||
token_match = re.search(r"token=([^&\s'\"]+)", html)
|
||||
if not token_match:
|
||||
raise ExtractorError("DoodStream: token not found in embed HTML")
|
||||
|
||||
@@ -25,6 +25,7 @@ from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor
|
||||
from mediaflow_proxy.extractors.fastream import FastreamExtractor
|
||||
from mediaflow_proxy.extractors.voe import VoeExtractor
|
||||
from mediaflow_proxy.extractors.vidfast import VidFastExtractor
|
||||
from mediaflow_proxy.extractors.streamhg import StreamHGExtractor
|
||||
|
||||
|
||||
class ExtractorFactory:
|
||||
@@ -55,6 +56,7 @@ class ExtractorFactory:
|
||||
"Voe": VoeExtractor,
|
||||
"Sportsonline": SportsonlineExtractor,
|
||||
"VidFast": VidFastExtractor,
|
||||
"StreamHG": StreamHGExtractor,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -1,9 +1,45 @@
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
|
||||
def _base64url_decode(input_str: str) -> bytes:
|
||||
"""Decode a base64url-encoded string to bytes."""
|
||||
padded = input_str.replace("-", "+").replace("_", "/")
|
||||
padding = 4 - len(padded) % 4
|
||||
if padding != 4:
|
||||
padded += "=" * padding
|
||||
return base64.b64decode(padded)
|
||||
|
||||
|
||||
def _combine_key_parts(key_parts: list) -> bytes:
|
||||
"""Combine base64url-encoded key parts into a single key."""
|
||||
decoded = [_base64url_decode(part) for part in key_parts]
|
||||
return b"".join(decoded)
|
||||
|
||||
|
||||
def _decrypt_playback(playback: dict) -> dict:
|
||||
"""Decrypt AES-256-GCM encrypted playback payload."""
|
||||
key = _combine_key_parts(playback["key_parts"])
|
||||
iv = _base64url_decode(playback["iv"])
|
||||
payload = _base64url_decode(playback["payload"])
|
||||
|
||||
# GCM auth tag is the last 16 bytes of the payload
|
||||
tag = payload[-16:]
|
||||
ciphertext = payload[:-16]
|
||||
|
||||
aesgcm = AESGCM(key)
|
||||
try:
|
||||
plaintext = aesgcm.decrypt(iv, ciphertext + tag, None)
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Decryption failed: {e}")
|
||||
|
||||
return json.loads(plaintext.decode("utf-8"))
|
||||
|
||||
|
||||
class FileMoonExtractor(BaseExtractor):
|
||||
@@ -12,41 +48,49 @@ class FileMoonExtractor(BaseExtractor):
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
response = await self._make_request(url)
|
||||
# URL format: https://filemoon.sx/e/{code} or https://filemoon.sx/d/{code}
|
||||
parsed = urlparse(url)
|
||||
path = parsed.path.rstrip("/")
|
||||
code = path.split("/")[-1] if path else None
|
||||
|
||||
pattern = r'iframe.*?src=["\'](.*?)["\']'
|
||||
match = re.search(pattern, response.text, re.DOTALL)
|
||||
if not match:
|
||||
raise ExtractorError("Failed to extract iframe URL")
|
||||
if not code or code in ("e", "d"):
|
||||
raise ExtractorError(f"Could not extract video code from URL: {url}")
|
||||
|
||||
iframe_url = match.group(1)
|
||||
|
||||
parsed = urlparse(str(response.url))
|
||||
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
if iframe_url.startswith("//"):
|
||||
iframe_url = f"{parsed.scheme}:{iframe_url}"
|
||||
elif not urlparse(iframe_url).scheme:
|
||||
iframe_url = urljoin(base_url, iframe_url)
|
||||
api_url = f"{parsed.scheme}://{parsed.netloc}/api/videos/{code}"
|
||||
|
||||
headers = {"Referer": url}
|
||||
patterns = [r'file:"(.*?)"']
|
||||
response = await self._make_request(api_url, headers=headers)
|
||||
|
||||
final_url = await eval_solver(
|
||||
self,
|
||||
iframe_url,
|
||||
headers,
|
||||
patterns,
|
||||
)
|
||||
try:
|
||||
data = response.json()
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to parse API response: {e}")
|
||||
|
||||
test_resp = await self._make_request(final_url, headers=headers)
|
||||
if test_resp.status == 404:
|
||||
raise ExtractorError("Stream not found (404)")
|
||||
if "error" in data:
|
||||
raise ExtractorError(f"FileMoon API error: {data['error']}")
|
||||
|
||||
playback = data.get("playback")
|
||||
if not playback or not playback.get("key_parts") or not playback.get("payload"):
|
||||
raise ExtractorError("No playback data available")
|
||||
|
||||
decrypted = _decrypt_playback(playback)
|
||||
|
||||
sources = decrypted.get("sources", [])
|
||||
hls_source = None
|
||||
for source in sources:
|
||||
if source.get("mime_type") == "application/vnd.apple.mpegurl":
|
||||
hls_source = source
|
||||
break
|
||||
|
||||
if not hls_source:
|
||||
raise ExtractorError("No HLS source found in decrypted playback")
|
||||
|
||||
destination_url = hls_source["url"]
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"destination_url": destination_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
@@ -1,67 +1,651 @@
|
||||
"""Maxstream URL extractor — full uprot bypass pipeline.
|
||||
|
||||
Solves the problem of `uprot.net` redirects on `/msf/`, `/msfi/` and
|
||||
`/msfld/` paths used by Italian aggregators (CB01, EuroStreaming, etc).
|
||||
|
||||
Key features:
|
||||
1. TLS-fingerprint-resistant fetch via curl_cffi (chrome131 impersonation)
|
||||
2. 4-digit captcha solver with multi-engine OCR ensemble:
|
||||
ddddocr (primary) → tesseract (fallback) → CF Workers AI (3rd, opt-in)
|
||||
3. Honeypot URL filtering on the post-captcha page
|
||||
4. uprots/uprotem → maxstream redirect chain follow with cookie continuity
|
||||
5. /msfld/ folder picker (season + episode kwargs from MFP route)
|
||||
6. Optional persistent URL cache (when paired with services/uprot_warmer.py)
|
||||
|
||||
All advanced features are guarded by lazy imports — if `curl_cffi`,
|
||||
`pytesseract`, `Pillow` or `ddddocr` are not installed the extractor
|
||||
falls back to the previous behaviour for `/msf/` URLs and skips
|
||||
`/msfld/` cleanly.
|
||||
|
||||
Activation:
|
||||
CF_WORKER_OCR_URL e.g. https://easyproxy-ocr.user.workers.dev
|
||||
CF_WORKER_OCR_AUTH Worker AUTH_TOKEN
|
||||
|
||||
Credits: pipeline ported from NelloStream
|
||||
(https://github.com/vitouchiha/nello-stream) — `workers/cfworker.js`
|
||||
functions `_uprotBypassWithCookies`, `_extractMaxstreamVideo`,
|
||||
`_aiOcrDigits`, `_handleScheduledUprotRefresh`. All credit to Nello.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import urljoin, urlparse, urlencode
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MaxstreamExtractor(BaseExtractor):
|
||||
"""Maxstream URL extractor."""
|
||||
"""Maxstream URL extractor with full uprot bypass pipeline."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
# Persistent cookie jar across the uprot → maxstream redirect chain.
|
||||
# PHPSESSID + captcha hash + uprot_session must travel together for
|
||||
# the post-captcha redirect to be honoured by the maxstream WAF.
|
||||
self.cookies: Dict[str, str] = {}
|
||||
self._last_solve_text: Optional[str] = None
|
||||
|
||||
async def get_uprot(self, link: str):
|
||||
"""Extract MaxStream URL."""
|
||||
if "msf" in link:
|
||||
link = link.replace("msf", "mse")
|
||||
response = await self._make_request(link)
|
||||
soup = BeautifulSoup(response.text, "lxml")
|
||||
maxstream_url = soup.find("a")
|
||||
maxstream_url = maxstream_url.get("href")
|
||||
return maxstream_url
|
||||
# ───────────────────────── HTTP layer ──────────────────────────────
|
||||
|
||||
async def _curl_cffi_fetch(
|
||||
self,
|
||||
url: str,
|
||||
method: str = "GET",
|
||||
data: Optional[Any] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
allow_redirects: bool = True,
|
||||
timeout: int = 30,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Browser-impersonated fetch via curl_cffi.
|
||||
|
||||
uprot.net inspects TLS fingerprints; aiohttp's JA3 is recognised as
|
||||
a bot within a few requests and served captcha pages or 503 even
|
||||
from clean residential IPs. curl_cffi with `impersonate="chrome131"`
|
||||
replays a real Chrome JA3 + ALPN order, so uprot serves the real
|
||||
redirect link or the (legitimately-protected) captcha page.
|
||||
|
||||
Returns None if curl_cffi is not installed (caller falls back to
|
||||
BaseExtractor._make_request for the simpler legacy /msf/ path).
|
||||
"""
|
||||
try:
|
||||
from curl_cffi import requests as cffi_requests
|
||||
except ImportError:
|
||||
logger.debug("curl_cffi not installed — uprot bypass disabled")
|
||||
return None
|
||||
|
||||
merged_headers = dict(self.base_headers)
|
||||
if headers:
|
||||
merged_headers.update(headers)
|
||||
if method.upper() == "POST" and isinstance(data, (str, bytes)):
|
||||
merged_headers.setdefault("content-type", "application/x-www-form-urlencoded")
|
||||
|
||||
proxy = self._get_proxy(url)
|
||||
proxies_arg = {"http": proxy, "https": proxy} if proxy else None
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
def _do_request():
|
||||
try:
|
||||
req_cookies = dict(self.cookies) if self.cookies else None
|
||||
r = cffi_requests.request(
|
||||
method,
|
||||
url,
|
||||
headers=merged_headers,
|
||||
data=data,
|
||||
cookies=req_cookies,
|
||||
proxies=proxies_arg,
|
||||
impersonate="chrome131",
|
||||
timeout=timeout,
|
||||
allow_redirects=allow_redirects,
|
||||
)
|
||||
cookies = {}
|
||||
try:
|
||||
cookies = {c.name: c.value for c in r.cookies.jar}
|
||||
except Exception:
|
||||
cookies = dict(r.cookies) if r.cookies else {}
|
||||
return {
|
||||
"ok": r.status_code < 400,
|
||||
"status": r.status_code,
|
||||
"text": r.text,
|
||||
"content": r.content,
|
||||
"url": str(r.url),
|
||||
"headers": dict(r.headers),
|
||||
"cookies": cookies,
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"ok": False,
|
||||
"status": 0,
|
||||
"text": "",
|
||||
"content": b"",
|
||||
"url": url,
|
||||
"headers": {},
|
||||
"cookies": {},
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
result = await loop.run_in_executor(None, _do_request)
|
||||
if result.get("cookies"):
|
||||
self.cookies.update(result["cookies"])
|
||||
return result
|
||||
|
||||
# ─────────────────────── Honeypot filter ───────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _strip_uprot_honeypots(html: str) -> str:
|
||||
"""Remove uprot's anti-bot honeypot blocks before URL extraction.
|
||||
|
||||
The post-captcha success page intentionally hides decoy URLs in:
|
||||
1. HTML comments (<!-- … -->)
|
||||
2. <div style="display:none">…</div> blocks containing fake
|
||||
"Continue" buttons that point to placeholder URLs like
|
||||
`maxstream.video/uprots/123456789012` (12 sequential digits).
|
||||
|
||||
A naive regex grabs the FIRST match (the honeypot). Strip both
|
||||
before parsing so the regex/BS4 see only the visible-to-user DOM.
|
||||
"""
|
||||
no_comments = re.sub(r"<!--[\s\S]*?-->", "", html)
|
||||
no_hidden = re.sub(
|
||||
r"<div[^>]*style=[\"'][^\"']*display\s*:\s*none[^\"']*[\"'][^>]*>[\s\S]*?</div>",
|
||||
"",
|
||||
no_comments,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
return no_hidden
|
||||
|
||||
# ─────────────────────── Redirect parser ───────────────────────────
|
||||
|
||||
def _parse_uprot_html(self, text: str) -> Optional[str]:
|
||||
"""Parse a uprot success page and return the next-hop URL.
|
||||
|
||||
Strategy mirrored from NelloStream `_uprotBypassWithCookies`:
|
||||
1. Strip honeypot blocks first
|
||||
2. Prefer explicit `id="buttok"` CONTINUE button (uprot marker)
|
||||
3. Fallback: <a><button>Continue</button></a> (case+spacing tolerant)
|
||||
4. Last resort: a `/uprots/` or `/uprotem/` URL appearing exactly
|
||||
once in the cleaned HTML (uprot scatters multiple decoys)
|
||||
5. Generic stayonline.pro / maxstream.video regex with honeypot
|
||||
literal filter
|
||||
6. window.location / meta refresh / BS4 button fallbacks
|
||||
"""
|
||||
cleaned = self._strip_uprot_honeypots(text).replace("\\/", "/")
|
||||
|
||||
def _valid(c):
|
||||
if not c:
|
||||
return None
|
||||
try:
|
||||
p = urlparse(c)
|
||||
if p.netloc and "maxstream.video" in p.netloc and p.path.startswith("/cdn-cgi/"):
|
||||
return None
|
||||
except Exception:
|
||||
pass
|
||||
return c
|
||||
|
||||
# 1. id="buttok" CONTINUE button
|
||||
m = re.search(
|
||||
r'href=["\'](https?://[^"\']+)["\'][^>]*>\s*<button[^>]*id=["\']buttok["\'][^>]*>\s*C\s*O\s*N\s*T\s*I\s*N\s*U\s*E',
|
||||
cleaned,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if m and _valid(m.group(1)):
|
||||
return m.group(1)
|
||||
|
||||
# 2. Generic <a><button>Continue</button></a>
|
||||
m = re.search(
|
||||
r'href=["\'](https?://[^"\']+)["\'][^>]*>\s*<button[^>]*>\s*[Cc]\s*[Oo]\s*[Nn]\s*[Tt]\s*[Ii]\s*[Nn]\s*[Uu]\s*[Ee]',
|
||||
cleaned,
|
||||
)
|
||||
if m and _valid(m.group(1)):
|
||||
return m.group(1)
|
||||
|
||||
# 3. Unique uprots/uprotem URL
|
||||
all_uprots = re.findall(
|
||||
r'href=["\'](https?://[^"\']*uprot(?:s|em)/[^"\']+)["\']',
|
||||
cleaned,
|
||||
re.IGNORECASE,
|
||||
)
|
||||
if all_uprots:
|
||||
counts: Dict[str, int] = {}
|
||||
for u in all_uprots:
|
||||
counts[u] = counts.get(u, 0) + 1
|
||||
unique = [u for u, c in counts.items() if c == 1]
|
||||
if unique and _valid(unique[0]):
|
||||
return unique[0]
|
||||
|
||||
# 4. Generic stayonline / maxstream regex
|
||||
m = re.search(
|
||||
r'https?://(?:www\.)?(?:stayonline\.pro|maxstream\.video)[^"\'\s<>\\ ]+',
|
||||
cleaned,
|
||||
)
|
||||
if m and "/uprots/123456789012" not in m.group(0) and _valid(m.group(0)):
|
||||
return m.group(0)
|
||||
|
||||
# 5. window.location / meta refresh
|
||||
m = re.search(r'window\.location(?:\.href)?\s*=\s*["\']([^"\']+)["\']', cleaned)
|
||||
if m and _valid(m.group(1)):
|
||||
return m.group(1)
|
||||
m = re.search(r'content=["\']0;\s*url=([^"\']+)["\']', cleaned, re.I)
|
||||
if m and _valid(m.group(1)):
|
||||
return m.group(1)
|
||||
|
||||
# 6. BS4 buttons / forms (rare paths)
|
||||
soup = BeautifulSoup(cleaned, "lxml")
|
||||
for btn in soup.find_all(["a", "button"]):
|
||||
t = btn.get_text().strip().lower()
|
||||
if "continue" in t or "continua" in t or "vai al" in t:
|
||||
href = btn.get("href")
|
||||
if not href and btn.parent and btn.parent.name == "a":
|
||||
href = btn.parent.get("href")
|
||||
if href and "uprot.net" not in href and _valid(href):
|
||||
return href
|
||||
return None
|
||||
|
||||
def _parse_uprot_folder(self, text: str, season, episode) -> Optional[str]:
|
||||
"""Parse a /msfld/ folder HTML and return the /msfi/ link for S{ss}E{ee}."""
|
||||
try:
|
||||
s_int = int(season)
|
||||
e_int = int(episode)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
s_pad = f"{s_int:02d}"
|
||||
e_pad = f"{e_int:02d}"
|
||||
patterns = [
|
||||
rf"S{s_pad}E{e_pad}",
|
||||
rf"\b0*{s_int}x0*{e_int}\b",
|
||||
rf"\b0*{s_int}×0*{e_int}\b",
|
||||
rf"\b0*{s_int}×0*{e_int}\b",
|
||||
]
|
||||
for pat in patterns:
|
||||
m = re.search(
|
||||
rf"{pat}[\s\S]{{0,500}}?href=['\"]([^'\"]+/msfi/[^'\"]+)['\"]",
|
||||
text,
|
||||
re.I,
|
||||
)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return None
|
||||
|
||||
# ─────────────────────── OCR backends ──────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _preprocess_captcha_png(img_bytes: bytes) -> bytes:
|
||||
"""Binarize + denoise the captcha PNG to boost ddddocr accuracy."""
|
||||
try:
|
||||
from PIL import Image, ImageFilter
|
||||
import io
|
||||
|
||||
img = Image.open(io.BytesIO(img_bytes)).convert("L")
|
||||
img = img.point(lambda p: 255 if p >= 140 else 0, mode="L")
|
||||
img = img.filter(ImageFilter.MaxFilter(3))
|
||||
img = img.filter(ImageFilter.MinFilter(3))
|
||||
out = io.BytesIO()
|
||||
img.save(out, format="PNG")
|
||||
return out.getvalue()
|
||||
except Exception:
|
||||
return img_bytes
|
||||
|
||||
@staticmethod
|
||||
def _tesseract_classify(img_bytes: bytes) -> str:
|
||||
try:
|
||||
import pytesseract
|
||||
from PIL import Image, ImageFilter
|
||||
import io
|
||||
|
||||
img = Image.open(io.BytesIO(img_bytes)).convert("L")
|
||||
img = img.point(lambda p: 255 if p >= 140 else 0, mode="L")
|
||||
img = img.filter(ImageFilter.MaxFilter(3))
|
||||
img = img.filter(ImageFilter.MinFilter(3))
|
||||
return pytesseract.image_to_string(img, config="--psm 7 -c tessedit_char_whitelist=0123456789").strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
async def _cf_worker_ocr(img_bytes: bytes, expected_digits: int = 4) -> str:
|
||||
"""Optional 3rd OCR backend: Cloudflare Workers AI vision LLM.
|
||||
|
||||
ddddocr + tesseract top out at ~50-65% on uprot's noisy captcha.
|
||||
A vision LLM (Llama 4 Scout / Gemma 3 / LLaVA) gets ~80-90%.
|
||||
POSTs the captcha PNG to a user-deployed CF Worker (see
|
||||
docs/MAXSTREAM_UPROT.md for setup).
|
||||
|
||||
Activated only when both env vars are set:
|
||||
CF_WORKER_OCR_URL
|
||||
CF_WORKER_OCR_AUTH
|
||||
Returns "" on any failure — caller falls through gracefully.
|
||||
"""
|
||||
base = (os.getenv("CF_WORKER_OCR_URL") or "").strip().rstrip("/")
|
||||
if not base:
|
||||
return ""
|
||||
auth = (os.getenv("CF_WORKER_OCR_AUTH") or "").strip()
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
headers = {"content-type": "image/png"}
|
||||
if auth:
|
||||
headers["x-worker-auth"] = auth
|
||||
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=20)) as s:
|
||||
async with s.post(
|
||||
f"{base}/?ocr=1&digits={expected_digits}",
|
||||
data=img_bytes,
|
||||
headers=headers,
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
return ""
|
||||
data = await resp.json()
|
||||
return (data.get("digits") or "").strip()
|
||||
except Exception as e:
|
||||
logger.debug(f"CF Worker OCR failed: {e}")
|
||||
return ""
|
||||
|
||||
# ─────────────────── Captcha solver loop ───────────────────────────
|
||||
|
||||
async def _solve_uprot_captcha_once(self, text: str, original_url: str, preprocess: bool = False) -> Optional[str]:
|
||||
try:
|
||||
import ddddocr
|
||||
except ImportError:
|
||||
logger.debug("ddddocr not installed — skipping captcha solve")
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(text, "lxml")
|
||||
img_tag = soup.find("img", src=re.compile(r"data:image/|/captcha|/image/|captcha\.php"))
|
||||
img_url = img_tag.get("src") if img_tag else None
|
||||
if not img_url:
|
||||
m = re.search(
|
||||
r'<img[^>]+src=["\']([^"\']*(?:data:image/|captcha|image)[^"\']*)["\']',
|
||||
text,
|
||||
)
|
||||
img_url = m.group(1) if m else None
|
||||
if not img_url:
|
||||
return None
|
||||
|
||||
form = soup.find("form")
|
||||
form_action = form.get("action") if form else ""
|
||||
if not form_action or form_action == "#":
|
||||
form_action = original_url
|
||||
elif form_action.startswith("/"):
|
||||
p = urlparse(original_url)
|
||||
form_action = f"{p.scheme}://{p.netloc}{form_action}"
|
||||
|
||||
# Download captcha image
|
||||
if img_url.startswith("data:"):
|
||||
try:
|
||||
import base64
|
||||
|
||||
_, b64 = img_url.split(",", 1)
|
||||
img_data = base64.b64decode(b64)
|
||||
except Exception:
|
||||
return None
|
||||
else:
|
||||
full_url = img_url
|
||||
if full_url.startswith("/"):
|
||||
p = urlparse(original_url)
|
||||
full_url = f"{p.scheme}://{p.netloc}{full_url}"
|
||||
res = await self._curl_cffi_fetch(full_url)
|
||||
if not res or not res.get("ok"):
|
||||
return None
|
||||
img_data = res.get("content") or b""
|
||||
|
||||
ocr_input = self._preprocess_captcha_png(img_data) if preprocess else img_data
|
||||
|
||||
if not hasattr(self, "_ocr_engine"):
|
||||
self._ocr_engine = ddddocr.DdddOcr(show_ad=False)
|
||||
res_str = self._ocr_engine.classification(ocr_input)
|
||||
res_digits = "".join(c for c in str(res_str) if c.isdigit())
|
||||
|
||||
# Accept 3-or-4 digit answers (uprot uses 4 today; legacy 3 still seen)
|
||||
def _ok(n):
|
||||
return 3 <= n <= 4
|
||||
|
||||
if not _ok(len(res_digits)):
|
||||
tess = self._tesseract_classify(ocr_input)
|
||||
tess_digits = "".join(c for c in str(tess) if c.isdigit())
|
||||
if _ok(len(tess_digits)):
|
||||
res_digits = tess_digits
|
||||
else:
|
||||
cf = await self._cf_worker_ocr(ocr_input, expected_digits=4)
|
||||
cf_digits = "".join(c for c in str(cf) if c.isdigit())
|
||||
if _ok(len(cf_digits)):
|
||||
res_digits = cf_digits
|
||||
else:
|
||||
return None
|
||||
|
||||
# Prepare POST data
|
||||
captcha_input = soup.find("input", {"name": re.compile(r"captcha|code|val", re.I)})
|
||||
if captcha_input and captcha_input.get("name"):
|
||||
field_name = captcha_input["name"]
|
||||
else:
|
||||
m = re.search(r'name=["\'](captcha|code|val|captch5)[^"\']*["\']', text, re.I)
|
||||
field_name = m.group(1) if m else "captcha"
|
||||
|
||||
post_data = {field_name: res_digits}
|
||||
if form:
|
||||
for inp in form.find_all(["input", "button", "select"]):
|
||||
n = inp.get("name")
|
||||
v = inp.get("value", "")
|
||||
if n and n not in post_data:
|
||||
post_data[n] = v
|
||||
|
||||
headers = {**self.base_headers, "referer": original_url}
|
||||
result = await self._curl_cffi_fetch(form_action, method="POST", data=urlencode(post_data), headers=headers)
|
||||
if not result:
|
||||
return None
|
||||
solved_text = result.get("text") or ""
|
||||
self._last_solve_text = solved_text if isinstance(solved_text, str) else None
|
||||
return self._parse_uprot_html(solved_text)
|
||||
|
||||
async def _solve_uprot_captcha(self, text: str, original_url: str, max_attempts: int = 4) -> Optional[str]:
|
||||
"""Solve the captcha with retries on fresh images.
|
||||
|
||||
Each wrong submit triggers uprot to serve a brand-new captcha
|
||||
image; we feed that fresh page back into the next attempt instead
|
||||
of OCRing the same image with different preprocessing.
|
||||
"""
|
||||
current = text
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
preprocess = attempt % 2 == 0
|
||||
result = await self._solve_uprot_captcha_once(current, original_url, preprocess=preprocess)
|
||||
if result:
|
||||
return result
|
||||
new_text = self._last_solve_text
|
||||
if new_text and new_text != current:
|
||||
current = new_text
|
||||
return None
|
||||
|
||||
# ──────────────────── Redirect chain ───────────────────────────────
|
||||
|
||||
async def _follow_uprots_chain(self, url: str, max_hops: int = 10) -> str:
|
||||
"""Walk the uprots/uprotem → maxstream redirect chain manually.
|
||||
|
||||
After captcha, the URL we extract is usually
|
||||
`maxstream.video/uprots/<token>` whose WAF only honours the token
|
||||
when reached via the proper redirect chain (Referer + cookie
|
||||
continuity from uprot.net). Direct GET → Error 131.
|
||||
|
||||
Walks hop-by-hop preserving cookies until landing on
|
||||
`maxsun{N}.online/watchfree/...` or `maxstream.video/emvvv/<id>`,
|
||||
then converts watchfree → emvvv so the existing packer extraction
|
||||
works.
|
||||
"""
|
||||
if "/uprots/" not in url and "/uprotem/" not in url:
|
||||
return url
|
||||
|
||||
current = url
|
||||
for _ in range(max_hops):
|
||||
res = await self._curl_cffi_fetch(
|
||||
current,
|
||||
headers={**self.base_headers, "referer": "https://uprot.net/"},
|
||||
allow_redirects=False,
|
||||
timeout=15,
|
||||
)
|
||||
if not res:
|
||||
break
|
||||
loc = (res.get("headers") or {}).get("location") or (res.get("headers") or {}).get("Location")
|
||||
if not loc:
|
||||
current = res.get("url") or current
|
||||
break
|
||||
current = urljoin(current, loc)
|
||||
if "/uprots/" not in current and "/uprotem/" not in current:
|
||||
break
|
||||
|
||||
if "watchfree/" in current:
|
||||
try:
|
||||
tail = current.split("watchfree/", 1)[1]
|
||||
segments = [s for s in tail.split("/") if s]
|
||||
if len(segments) >= 2:
|
||||
current = f"https://maxstream.video/emvvv/{segments[1]}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return current
|
||||
|
||||
# ─────────────────────── Public flow ───────────────────────────────
|
||||
|
||||
async def get_uprot(self, link: str, season=None, episode=None) -> str:
|
||||
"""Resolve a uprot URL to its maxstream destination.
|
||||
|
||||
Supports:
|
||||
- /msf/{id} single movie (legacy alias /mse/)
|
||||
- /msfi/{id} single episode
|
||||
- /msfld/{id} folder of episodes (requires season + episode)
|
||||
"""
|
||||
# Map only the modern /msf/ single-video path to its legacy /mse/
|
||||
# alias. A naive str.replace("msf", "mse") corrupts /msfld/ into
|
||||
# /mseld/ (404) and /msfi/ into /msei/ (deprecated 500 on new IDs).
|
||||
link = re.sub(r"/msf/", "/mse/", link)
|
||||
|
||||
# Try curl_cffi first; fall back to BaseExtractor._make_request if
|
||||
# curl_cffi isn't installed (legacy /msf/ path may still work).
|
||||
cffi = await self._curl_cffi_fetch(link)
|
||||
if cffi and cffi.get("ok"):
|
||||
text = cffi["text"]
|
||||
else:
|
||||
response = await self._make_request(link)
|
||||
text = response.text
|
||||
|
||||
if "/msfld/" in link:
|
||||
if season is None or episode is None:
|
||||
raise ExtractorError("msfld folder URL requires 'season' and 'episode' parameters")
|
||||
episode_link = self._parse_uprot_folder(text, season, episode)
|
||||
if not episode_link:
|
||||
raise ExtractorError(f"Episode S{season}E{episode} not found in msfld folder")
|
||||
link = episode_link
|
||||
cffi = await self._curl_cffi_fetch(link)
|
||||
if cffi and cffi.get("ok"):
|
||||
text = cffi["text"]
|
||||
else:
|
||||
response = await self._make_request(link)
|
||||
text = response.text
|
||||
|
||||
# 1. Direct parse — works on legacy uprot pages without captcha
|
||||
res = self._parse_uprot_html(text)
|
||||
if res:
|
||||
return res
|
||||
|
||||
# 2. Captcha solver
|
||||
res = await self._solve_uprot_captcha(text, link)
|
||||
if res:
|
||||
return res
|
||||
|
||||
raise ExtractorError("Redirect link not found in uprot page")
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Extract Maxstream URL."""
|
||||
maxstream_url = await self.get_uprot(url)
|
||||
response = await self._make_request(maxstream_url, headers={"accept-language": "en-US,en;q=0.5"})
|
||||
"""Extract Maxstream URL.
|
||||
|
||||
# Extract and decode URL
|
||||
match = re.search(r"\}\('(.+)',.+,'(.+)'\.split", response.text)
|
||||
if not match:
|
||||
For /msfld/ folder URLs, callers must pass season=N&episode=M as
|
||||
query parameters (forwarded by MFP routes as kwargs).
|
||||
|
||||
Optional persistent cache: if `mediaflow_proxy.services.uprot_url_cache`
|
||||
is importable, cache hits skip captcha+chain entirely (<100ms).
|
||||
"""
|
||||
season = kwargs.get("season")
|
||||
episode = kwargs.get("episode")
|
||||
|
||||
cached = None
|
||||
try:
|
||||
from mediaflow_proxy.services import uprot_url_cache # type: ignore
|
||||
|
||||
cached = uprot_url_cache.get(url, season=season, episode=episode)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if cached:
|
||||
logger.debug(f"uprot cache HIT: {url[:80]}")
|
||||
maxstream_url = cached
|
||||
else:
|
||||
maxstream_url = await self.get_uprot(url, season=season, episode=episode)
|
||||
maxstream_url = await self._follow_uprots_chain(maxstream_url)
|
||||
|
||||
# Fetch the maxstream embed page
|
||||
cffi = await self._curl_cffi_fetch(
|
||||
maxstream_url,
|
||||
headers={**self.base_headers, "referer": "https://uprot.net/", "accept-language": "en-US,en;q=0.5"},
|
||||
)
|
||||
if cffi and cffi.get("ok"):
|
||||
text = cffi["text"]
|
||||
else:
|
||||
response = await self._make_request(maxstream_url, headers={"accept-language": "en-US,en;q=0.5"})
|
||||
text = response.text
|
||||
|
||||
if not cached:
|
||||
try:
|
||||
from mediaflow_proxy.services import uprot_url_cache # type: ignore
|
||||
|
||||
uprot_url_cache.put(url, maxstream_url, season=season, episode=episode)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Direct sources check
|
||||
m = re.search(r'sources:\s*\[\{src:\s*"([^"]+)"', text)
|
||||
if m:
|
||||
return {
|
||||
"destination_url": m.group(1),
|
||||
"request_headers": {**self.base_headers, "referer": maxstream_url},
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
# Packer fallback
|
||||
m = re.search(r"\}\('(.+)',.+,'(.+)'\.split", text)
|
||||
if not m:
|
||||
m = re.search(r"eval\(function\(p,a,c,k,e,d\).+?\}\('(.+?)',.+?,'(.+?)'\.split", text, re.S)
|
||||
if not m:
|
||||
raise ExtractorError("Failed to extract URL components")
|
||||
|
||||
s1 = match.group(2)
|
||||
# Extract Terms
|
||||
terms = s1.split("|")
|
||||
urlset_index = terms.index("urlset")
|
||||
hls_index = terms.index("hls")
|
||||
sources_index = terms.index("sources")
|
||||
result = terms[urlset_index + 1 : hls_index]
|
||||
reversed_elements = result[::-1]
|
||||
first_part = terms[hls_index + 1 : sources_index]
|
||||
reversed_first_part = first_part[::-1]
|
||||
first_url_part = ""
|
||||
for first_part in reversed_first_part:
|
||||
if "0" in first_part:
|
||||
first_url_part += first_part
|
||||
else:
|
||||
first_url_part += first_part + "-"
|
||||
terms = m.group(2).split("|")
|
||||
try:
|
||||
urlset_index = terms.index("urlset")
|
||||
hls_index = terms.index("hls")
|
||||
sources_index = terms.index("sources")
|
||||
except ValueError as e:
|
||||
raise ExtractorError(f"Missing components in packer: {e}")
|
||||
|
||||
base_url = f"https://{first_url_part}.host-cdn.net/hls/"
|
||||
result_parts = terms[urlset_index + 1 : hls_index]
|
||||
reversed_elements = result_parts[::-1]
|
||||
first_part_terms = terms[hls_index + 1 : sources_index]
|
||||
reversed_first_part = first_part_terms[::-1]
|
||||
|
||||
first_url_part = ""
|
||||
for fp in reversed_first_part:
|
||||
if "0" in fp:
|
||||
first_url_part += fp
|
||||
else:
|
||||
first_url_part += fp + "-"
|
||||
|
||||
base_url = f"https://{first_url_part.rstrip('-')}.host-cdn.net/hls/"
|
||||
if len(reversed_elements) == 1:
|
||||
final_url = base_url + "," + reversed_elements[0] + ".urlset/master.m3u8"
|
||||
lenght = len(reversed_elements)
|
||||
i = 1
|
||||
for element in reversed_elements:
|
||||
base_url += element + ","
|
||||
if lenght == i:
|
||||
base_url += ".urlset/master.m3u8"
|
||||
else:
|
||||
i += 1
|
||||
final_url = base_url
|
||||
else:
|
||||
final_url = base_url
|
||||
for element in reversed_elements:
|
||||
final_url += element + ","
|
||||
final_url = final_url.rstrip(",") + ".urlset/master.m3u8"
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
return {
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
from typing import Dict, Any
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
|
||||
class StreamHGExtractor(BaseExtractor):
|
||||
"""StreamHG URL extractor."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Extract StreamHG URL."""
|
||||
headers = {}
|
||||
patterns = [r'"hls2":"([^"]+)"']
|
||||
|
||||
final_url = await eval_solver(self, url, headers, patterns)
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
@@ -15,12 +14,23 @@ class VidmolyExtractor(BaseExtractor):
|
||||
if not parsed.hostname or "vidmoly" not in parsed.hostname:
|
||||
raise ExtractorError("VIDMOLY: Invalid domain")
|
||||
|
||||
embed_id_match = re.search(r"/embed-([a-zA-Z0-9]+)\.html", parsed.path)
|
||||
if not embed_id_match:
|
||||
raise ExtractorError("VIDMOLY: Could not extract embed ID from URL")
|
||||
embed_id = embed_id_match.group(1)
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Connection": "keep-alive",
|
||||
"Cookie": f"cf_turnstile_demo_pass_{embed_id}=1",
|
||||
"Referer": url,
|
||||
"Sec-Fetch-Dest": "iframe",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "same-origin",
|
||||
}
|
||||
|
||||
# --- Fetch embed page ---
|
||||
@@ -33,11 +43,10 @@ class VidmolyExtractor(BaseExtractor):
|
||||
raise ExtractorError("VIDMOLY: Stream URL not found")
|
||||
|
||||
master_url = match.group(1)
|
||||
|
||||
if not master_url.startswith("http"):
|
||||
master_url = urljoin(url, master_url)
|
||||
|
||||
# --- Validate stream (prevents Stremio timeout) ---
|
||||
# --- Validate stream ---
|
||||
try:
|
||||
test = await self._make_request(master_url, headers=headers)
|
||||
except Exception as e:
|
||||
@@ -48,8 +57,6 @@ class VidmolyExtractor(BaseExtractor):
|
||||
if test.status >= 400:
|
||||
raise ExtractorError(f"VIDMOLY: Stream unavailable ({test.status})")
|
||||
|
||||
# Return MASTER playlist, not variant
|
||||
# Let MediaFlow Proxy handle variants
|
||||
return {
|
||||
"destination_url": master_url,
|
||||
"request_headers": headers,
|
||||
|
||||
@@ -54,9 +54,9 @@ class VixCloudExtractor(BaseExtractor):
|
||||
"Origin": f"{site_url}",
|
||||
}
|
||||
|
||||
response = await self._make_request(site_url + '/api' + parts[1])
|
||||
response = await self._make_request(site_url + "/api" + parts[1])
|
||||
|
||||
response = await self._make_request(site_url + '/' + response.json()['src'],headers=headers)
|
||||
response = await self._make_request(site_url + "/" + response.json()["src"], headers=headers)
|
||||
|
||||
if response.status != 200:
|
||||
raise ExtractorError("Failed to extract URL components, Invalid Request")
|
||||
|
||||
Reference in New Issue
Block a user