new version

This commit is contained in:
UrloMythus
2026-04-15 19:23:14 +02:00
parent 5120b19d0b
commit 8134936d59
135 changed files with 3013 additions and 1589 deletions
+11 -1
View File
@@ -9,7 +9,7 @@ import json
import logging
from mediaflow_proxy.configs import settings
from mediaflow_proxy.utils.http_client import create_aiohttp_session
from mediaflow_proxy.utils.http_client import create_aiohttp_session, _ensure_routing_initialized, get_routing_config
from mediaflow_proxy.utils.http_utils import DownloadError
logger = logging.getLogger(__name__)
@@ -65,6 +65,16 @@ class BaseExtractor(ABC):
# merge incoming headers (e.g. Accept-Language / Referer) with default base headers
self.base_headers.update(request_headers or {})
@staticmethod
def _get_proxy(url: str) -> str | None:
"""Return the configured proxy URL for *url*, or None if no proxy applies."""
try:
_ensure_routing_initialized()
route = get_routing_config().match_url(url)
return route.proxy_url
except Exception:
return None
async def _make_request(
self,
url: str,
+158
View File
@@ -0,0 +1,158 @@
import re
import json
import base64
from typing import Dict, Any
from urllib.parse import urlparse, parse_qs
from bs4 import BeautifulSoup
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class CityExtractor(BaseExtractor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"
def atob_fixed(self, data: str) -> str:
try:
return base64.b64decode(data).decode("utf-8", errors="ignore")
except Exception:
return ""
def extract_json_array(self, decoded: str):
start = decoded.find("file:")
if start == -1:
start = decoded.find("sources:")
if start == -1:
return None
start = decoded.find("[", start)
if start == -1:
return None
depth = 0
for i in range(start, len(decoded)):
if decoded[i] == "[":
depth += 1
elif decoded[i] == "]":
depth -= 1
if depth == 0:
return decoded[start : i + 1]
return None
def pick_stream(self, file_data, season: int = 1, episode: int = 1):
if isinstance(file_data, str):
return file_data
if isinstance(file_data, list):
if all(isinstance(x, dict) and "file" in x for x in file_data):
idx = max(0, episode - 1)
return file_data[idx]["file"]
selected_season = None
for s in file_data:
if not isinstance(s, dict):
continue
folder = s.get("folder")
if not folder:
continue
title = (s.get("title") or "").lower()
if re.search(rf"(season|s)\s*0*{season}\b", title):
selected_season = folder
break
if not selected_season:
for s in file_data:
folder = s.get("folder")
if folder:
selected_season = folder
break
if not selected_season:
return None
idx = max(0, episode - 1)
return selected_season[idx].get("file") if idx < len(selected_season) else selected_season[0].get("file")
return None
async def extract(self, url: str, season: int = 1, episode: int = 1, **kwargs) -> Dict[str, Any]:
"""Main extraction entry point"""
parsed = urlparse(url)
query = parse_qs(parsed.query)
if "s" in query:
try:
season = int(query["s"][0])
except Exception:
pass
if "e" in query:
try:
episode = int(query["e"][0])
except Exception:
pass
clean_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
cookie_b64 = "ZGxlX3VzZXJfaWQ9MzI3Mjk7IGRsZV9wYXNzd29yZD04OTQxNzFjNmE4ZGFiMThlZTU5NGQ1YzY1MjAwOWEzNTs="
cookie = base64.b64decode(cookie_b64).decode()
headers = {
"User-Agent": self.base_headers.get("user-agent"),
"Referer": clean_url,
"Cookie": cookie,
}
response = await self._make_request(clean_url, headers=headers)
if response.status != 200:
raise ExtractorError("Failed to load City page")
soup = BeautifulSoup(response.text, "lxml")
file_data = None
for script in soup.find_all("script"):
if file_data:
break
script_html = script.string or script.text or ""
if "atob" not in script_html:
continue
matches = re.finditer(r'atob\(\s*[\'"](.*?)[\'"]\s*\)', script_html)
for match in matches:
encoded = match.group(1)
decoded = self.atob_fixed(encoded)
if not decoded:
continue
raw_json = self.extract_json_array(decoded)
if raw_json:
try:
raw_json = re.sub(r"\\(.)", r"\1", raw_json)
file_data = json.loads(raw_json)
except Exception:
file_data = raw_json
break
file_match = re.search(r'file\s*:\s*[\'"](.*?)[\'"]', decoded, re.S)
if file_match:
file_data = file_match.group(1)
break
if not file_data:
raise ExtractorError("No stream found")
stream_url = self.pick_stream(file_data, season=season, episode=episode)
if not stream_url:
raise ExtractorError("Stream extraction failed")
return {
"destination_url": stream_url,
"request_headers": {
"Referer": clean_url,
"User-Agent": self.base_headers.get("user-agent"),
},
"mediaflow_endpoint": self.mediaflow_endpoint,
}
-704
View File
@@ -1,704 +0,0 @@
import hashlib
import hmac
import re
import time
import logging
from typing import Any, Dict, Optional
from urllib.parse import urlparse
import aiohttp
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
from mediaflow_proxy.utils.http_client import create_aiohttp_session
from mediaflow_proxy.configs import settings
logger = logging.getLogger(__name__)
# Silenzia l'errore ConnectionResetError su Windows
logging.getLogger("asyncio").setLevel(logging.CRITICAL)
# Default fingerprint parameters
DEFAULT_DLHD_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
DEFAULT_DLHD_SCREEN_RESOLUTION = "1920x1080"
DEFAULT_DLHD_TIMEZONE = "UTC"
DEFAULT_DLHD_LANGUAGE = "en"
def compute_fingerprint(
user_agent: str = DEFAULT_DLHD_USER_AGENT,
screen_resolution: str = DEFAULT_DLHD_SCREEN_RESOLUTION,
timezone: str = DEFAULT_DLHD_TIMEZONE,
language: str = DEFAULT_DLHD_LANGUAGE,
) -> str:
"""
Compute the X-Fingerprint header value.
Algorithm:
fingerprint = SHA256(useragent + screen_resolution + timezone + language).hex()[:16]
Args:
user_agent: The user agent string
screen_resolution: The screen resolution (e.g., "1920x1080")
timezone: The timezone (e.g., "UTC")
language: The language code (e.g., "en")
Returns:
The 16-character fingerprint
"""
combined = f"{user_agent}{screen_resolution}{timezone}{language}"
return hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16]
def compute_key_path(resource: str, number: str, timestamp: int, fingerprint: str, secret_key: str) -> str:
"""
Compute the X-Key-Path header value.
Algorithm:
key_path = HMAC-SHA256("resource|number|timestamp|fingerprint", secret_key).hex()[:16]
Args:
resource: The resource from the key URL
number: The number from the key URL
timestamp: The Unix timestamp
fingerprint: The fingerprint value
secret_key: The HMAC secret key (channel_salt)
Returns:
The 16-character key path
"""
combined = f"{resource}|{number}|{timestamp}|{fingerprint}"
hmac_hash = hmac.new(secret_key.encode("utf-8"), combined.encode("utf-8"), hashlib.sha256).hexdigest()
return hmac_hash[:16]
def compute_key_headers(key_url: str, secret_key: str) -> tuple[int, int, str, str] | None:
"""
Compute X-Key-Timestamp, X-Key-Nonce, X-Key-Path, and X-Fingerprint for a /key/ URL.
Algorithm:
1. Extract resource and number from URL pattern /key/{resource}/{number}
2. ts = Unix timestamp in seconds
3. hmac_hash = HMAC-SHA256(resource, secret_key).hex()
4. nonce = proof-of-work: find i where MD5(hmac+resource+number+ts+i)[:4] < 0x1000
5. fingerprint = compute_fingerprint()
6. key_path = HMAC-SHA256("resource|number|ts|fingerprint", secret_key).hex()[:16]
Args:
key_url: The key URL containing /key/{resource}/{number}
secret_key: The HMAC secret key (channel_salt)
Returns:
Tuple of (timestamp, nonce, key_path, fingerprint) or None if URL doesn't match pattern
"""
# Extract resource and number from URL
pattern = r"/key/([^/]+)/(\d+)"
match = re.search(pattern, key_url)
if not match:
return None
resource = match.group(1)
number = match.group(2)
ts = int(time.time())
# Compute HMAC-SHA256
hmac_hash = hmac.new(secret_key.encode("utf-8"), resource.encode("utf-8"), hashlib.sha256).hexdigest()
# Proof-of-work loop
nonce = 0
for i in range(100000):
combined = f"{hmac_hash}{resource}{number}{ts}{i}"
md5_hash = hashlib.md5(combined.encode("utf-8")).hexdigest()
prefix_value = int(md5_hash[:4], 16)
if prefix_value < 0x1000: # < 4096
nonce = i
break
fingerprint = compute_fingerprint()
key_path = compute_key_path(resource, number, ts, fingerprint, secret_key)
return ts, nonce, key_path, fingerprint
class DLHDExtractor(BaseExtractor):
"""DLHD (DaddyLive) URL extractor for M3U8 streams.
Supports the new authentication flow with:
- EPlayerAuth extraction (auth_token, channel_key, channel_salt)
- Server lookup for dynamic server selection
- Dynamic key header computation for AES-128 encrypted streams
"""
def __init__(self, request_headers: dict):
super().__init__(request_headers)
self.mediaflow_endpoint = "hls_key_proxy"
self._iframe_context: Optional[str] = None
self._flaresolverr_cookies: Optional[str] = None
self._flaresolverr_user_agent: Optional[str] = None
async def _fetch_via_flaresolverr(self, url: str) -> HttpResponse:
"""Fetch a URL using FlareSolverr to bypass Cloudflare protection."""
if not settings.flaresolverr_url:
raise ExtractorError("FlareSolverr URL not configured. Set FLARESOLVERR_URL in environment.")
flaresolverr_endpoint = f"{settings.flaresolverr_url.rstrip('/')}/v1"
payload = {
"cmd": "request.get",
"url": url,
"maxTimeout": settings.flaresolverr_timeout * 1000,
}
logger.info(f"Using FlareSolverr to fetch: {url}")
async with aiohttp.ClientSession() as session:
async with session.post(
flaresolverr_endpoint,
json=payload,
timeout=aiohttp.ClientTimeout(total=settings.flaresolverr_timeout + 10),
) as response:
if response.status != 200:
raise ExtractorError(f"FlareSolverr returned status {response.status}")
data = await response.json()
if data.get("status") != "ok":
raise ExtractorError(f"FlareSolverr failed: {data.get('message', 'Unknown error')}")
solution = data.get("solution", {})
html_content = solution.get("response", "")
final_url = solution.get("url", url)
status = solution.get("status", 200)
# Store cookies and user-agent for subsequent requests
cookies = solution.get("cookies", [])
if cookies:
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
self._flaresolverr_cookies = cookie_str
logger.info(f"FlareSolverr provided {len(cookies)} cookies")
user_agent = solution.get("userAgent")
if user_agent:
self._flaresolverr_user_agent = user_agent
logger.info(f"FlareSolverr user-agent: {user_agent}")
logger.info(f"FlareSolverr successfully bypassed Cloudflare for: {url}")
return HttpResponse(
status=status,
headers={},
text=html_content,
content=html_content.encode("utf-8", errors="replace"),
url=final_url,
)
async def _make_request(
self, url: str, method: str = "GET", headers: Optional[Dict] = None, use_flaresolverr: bool = False, **kwargs
) -> HttpResponse:
"""Override to disable SSL verification and optionally use FlareSolverr."""
# Use FlareSolverr for Cloudflare-protected pages
if use_flaresolverr and settings.flaresolverr_url:
return await self._fetch_via_flaresolverr(url)
timeout = kwargs.pop("timeout", 15)
kwargs.pop("retries", 3) # consumed but not used directly
kwargs.pop("backoff_factor", 0.5) # consumed but not used directly
# Merge headers
request_headers = self.base_headers.copy()
if headers:
request_headers.update(headers)
# Add FlareSolverr cookies if available
if self._flaresolverr_cookies:
existing_cookies = request_headers.get("Cookie", "")
if existing_cookies:
request_headers["Cookie"] = f"{existing_cookies}; {self._flaresolverr_cookies}"
else:
request_headers["Cookie"] = self._flaresolverr_cookies
# Use FlareSolverr user-agent if available
if self._flaresolverr_user_agent:
request_headers["User-Agent"] = self._flaresolverr_user_agent
# Use create_aiohttp_session with verify=False for SSL bypass
async with create_aiohttp_session(url, timeout=timeout, verify=False) as (session, proxy_url):
async with session.request(method, url, headers=request_headers, proxy=proxy_url, **kwargs) as response:
content = await response.read()
final_url = str(response.url)
status = response.status
resp_headers = dict(response.headers)
if status >= 400:
raise ExtractorError(f"HTTP error {status} while requesting {url}")
return HttpResponse(
status=status,
headers=resp_headers,
text=content.decode("utf-8", errors="replace"),
content=content,
url=final_url,
)
async def _extract_session_data(self, iframe_url: str, main_url: str) -> dict | None:
"""
Fetch the iframe URL and extract auth_token, channel_key, and channel_salt.
Args:
iframe_url: The iframe URL to fetch
main_url: The main site domain for Referer header
Returns:
Dict with auth_token, channel_key, channel_salt, or None if not found
"""
headers = {
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
"Referer": f"https://{main_url}/",
}
try:
resp = await self._make_request(iframe_url, headers=headers, timeout=12)
html = resp.text
except Exception as e:
logger.warning(f"Error fetching iframe URL: {e}")
return None
# Pattern to extract EPlayerAuth.init block with authToken, channelKey, channelSalt
# Matches: EPlayerAuth.init({ authToken: '...', channelKey: '...', ..., channelSalt: '...' });
auth_pattern = r"EPlayerAuth\.init\s*\(\s*\{\s*authToken:\s*'([^']+)'"
channel_key_pattern = r"channelKey:\s*'([^']+)'"
channel_salt_pattern = r"channelSalt:\s*'([^']+)'"
# Pattern to extract server lookup base URL from fetchWithRetry call
lookup_pattern = r"fetchWithRetry\s*\(\s*'([^']+server_lookup\?channel_id=)"
auth_match = re.search(auth_pattern, html)
channel_key_match = re.search(channel_key_pattern, html)
channel_salt_match = re.search(channel_salt_pattern, html)
lookup_match = re.search(lookup_pattern, html)
if auth_match and channel_key_match and channel_salt_match:
result = {
"auth_token": auth_match.group(1),
"channel_key": channel_key_match.group(1),
"channel_salt": channel_salt_match.group(1),
}
if lookup_match:
result["server_lookup_url"] = lookup_match.group(1) + result["channel_key"]
return result
return None
async def _get_server_key(self, server_lookup_url: str, iframe_url: str) -> str | None:
"""
Fetch the server lookup URL and extract the server_key.
Args:
server_lookup_url: The server lookup URL
iframe_url: The iframe URL for extracting the host for headers
Returns:
The server_key or None if not found
"""
parsed = urlparse(iframe_url)
iframe_host = parsed.netloc
headers = {
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
"Referer": f"https://{iframe_host}/",
"Origin": f"https://{iframe_host}",
}
try:
resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
data = resp.json()
return data.get("server_key")
except Exception as e:
logger.warning(f"Error fetching server lookup: {e}")
return None
def _build_m3u8_url(self, server_key: str, channel_key: str) -> str:
"""
Build the m3u8 URL based on the server_key.
Args:
server_key: The server key from server lookup
channel_key: The channel key
Returns:
The m3u8 URL (with .css extension as per the original implementation)
"""
if server_key == "top1/cdn":
return f"https://top1.dvalna.ru/top1/cdn/{channel_key}/mono.css"
else:
return f"https://{server_key}new.dvalna.ru/{server_key}/{channel_key}/mono.css"
async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
"""Handles the new authentication flow found in recent updates."""
def _extract_params(js: str) -> Dict[str, Optional[str]]:
params = {}
patterns = {
"channel_key": r'(?:const|var|let)\s+(?:CHANNEL_KEY|channelKey)\s*=\s*["\']([^"\']+)["\']',
"auth_token": r'(?:const|var|let)\s+AUTH_TOKEN\s*=\s*["\']([^"\']+)["\']',
"auth_country": r'(?:const|var|let)\s+AUTH_COUNTRY\s*=\s*["\']([^"\']+)["\']',
"auth_ts": r'(?:const|var|let)\s+AUTH_TS\s*=\s*["\']([^"\']+)["\']',
"auth_expiry": r'(?:const|var|let)\s+AUTH_EXPIRY\s*=\s*["\']([^"\']+)["\']',
}
for key, pattern in patterns.items():
match = re.search(pattern, js)
params[key] = match.group(1) if match else None
return params
params = _extract_params(iframe_content)
missing_params = [k for k, v in params.items() if not v]
if missing_params:
# This is not an error, just means it's not the new flow
raise ExtractorError(f"Not the new auth flow: missing params {missing_params}")
logger.info("New auth flow detected. Proceeding with POST auth.")
# 1. Initial Auth POST
auth_url = "https://security.newkso.ru/auth2.php"
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
auth_headers = headers.copy()
auth_headers.update(
{
"Accept": "*/*",
"Accept-Language": "en-US,en;q=0.9",
"Origin": iframe_origin,
"Referer": iframe_url,
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "cross-site",
"Priority": "u=1, i",
}
)
# Build form data for multipart/form-data
form_data = aiohttp.FormData()
form_data.add_field("channelKey", params["channel_key"])
form_data.add_field("country", params["auth_country"])
form_data.add_field("timestamp", params["auth_ts"])
form_data.add_field("expiry", params["auth_expiry"])
form_data.add_field("token", params["auth_token"])
try:
async with create_aiohttp_session(auth_url, timeout=12, verify=False) as (session, proxy_url):
async with session.post(
auth_url,
headers=auth_headers,
data=form_data,
proxy=proxy_url,
) as response:
content = await response.read()
response.raise_for_status()
import json
auth_data = json.loads(content.decode("utf-8"))
if not (auth_data.get("valid") or auth_data.get("success")):
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
logger.info("New auth flow: Initial auth successful.")
except ExtractorError:
raise
except Exception as e:
raise ExtractorError(f"New auth flow failed during initial auth POST: {e}")
# 2. Server Lookup
server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}"
try:
# Use _make_request as it handles retries
lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
server_data = lookup_resp.json()
server_key = server_data.get("server_key")
if not server_key:
raise ExtractorError(f"No server_key in lookup response: {server_data}")
logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}")
except ExtractorError:
raise
except Exception as e:
raise ExtractorError(f"New auth flow failed during server lookup: {e}")
# 3. Build final stream URL
channel_key = params["channel_key"]
auth_token = params["auth_token"]
# The JS logic uses .css, not .m3u8
if server_key == "top1/cdn":
stream_url = f"https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css"
else:
stream_url = f"https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css"
logger.info(f"New auth flow: Constructed stream URL: {stream_url}")
stream_headers = {
"User-Agent": headers["User-Agent"],
"Referer": iframe_url,
"Origin": iframe_origin,
"Authorization": f"Bearer {auth_token}",
"X-Channel-Key": channel_key,
}
return {
"destination_url": stream_url,
"request_headers": stream_headers,
"mediaflow_endpoint": "hls_manifest_proxy",
}
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
"""
Alternative extractor for lovecdn.ru iframe that uses a different format.
"""
try:
# Look for direct stream URL patterns
m3u8_patterns = [
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
r'source[:\s]+["\']([^"\']+)["\']',
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
]
stream_url = None
for pattern in m3u8_patterns:
matches = re.findall(pattern, iframe_content)
for match in matches:
if ".m3u8" in match and match.startswith("http"):
stream_url = match
logger.info(f"Found direct m3u8 URL: {stream_url}")
break
if stream_url:
break
# Pattern 2: Look for dynamic URL construction
if not stream_url:
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
if channel_match:
channel_name = channel_match.group(1)
server = server_match.group(1) if server_match else "newkso.ru"
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
logger.info(f"Constructed stream URL: {stream_url}")
if not stream_url:
# Fallback: look for any URL that looks like a stream
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
matches = re.findall(url_pattern, iframe_content)
if matches:
stream_url = matches[0]
logger.info(f"Found fallback stream URL: {stream_url}")
if not stream_url:
raise ExtractorError("Could not find stream URL in lovecdn.ru iframe")
# Use iframe URL as referer
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
stream_headers = {"User-Agent": headers["User-Agent"], "Referer": iframe_url, "Origin": iframe_origin}
# Determine endpoint based on the stream domain
endpoint = "hls_key_proxy"
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
return {
"destination_url": stream_url,
"request_headers": stream_headers,
"mediaflow_endpoint": endpoint,
}
except Exception as e:
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
async def _extract_direct_stream(self, channel_id: str) -> Dict[str, Any]:
"""
Direct stream extraction using server lookup API with the new auth flow.
This extracts auth_token, channel_key, channel_salt and computes key headers.
"""
# Common iframe domains for DLHD
iframe_domains = ["lefttoplay.xyz"]
for iframe_domain in iframe_domains:
try:
iframe_url = f"https://{iframe_domain}/premiumtv/daddyhd.php?id={channel_id}"
logger.info(f"Attempting extraction via {iframe_domain}")
session_data = await self._extract_session_data(iframe_url, "dlhd.link")
if not session_data:
logger.debug(f"No session data from {iframe_domain}")
continue
logger.info(f"Got session data from {iframe_domain}: channel_key={session_data['channel_key']}")
# Get server key
if "server_lookup_url" not in session_data:
logger.debug(f"No server lookup URL from {iframe_domain}")
continue
server_key = await self._get_server_key(session_data["server_lookup_url"], iframe_url)
if not server_key:
logger.debug(f"No server key from {iframe_domain}")
continue
logger.info(f"Got server key: {server_key}")
# Build m3u8 URL
m3u8_url = self._build_m3u8_url(server_key, session_data["channel_key"])
logger.info(f"M3U8 URL: {m3u8_url}")
# Build stream headers with auth
iframe_origin = f"https://{iframe_domain}"
stream_headers = {
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
"Referer": iframe_url,
"Origin": iframe_origin,
"Authorization": f"Bearer {session_data['auth_token']}",
}
# Return the result with key header parameters
# These will be used to compute headers when fetching keys
return {
"destination_url": m3u8_url,
"request_headers": stream_headers,
"mediaflow_endpoint": "hls_key_proxy",
# Force playlist processing since DLHD uses .css extension for m3u8
"force_playlist_proxy": True,
# Key header computation parameters
"dlhd_key_params": {
"channel_salt": session_data["channel_salt"],
"auth_token": session_data["auth_token"],
"iframe_url": iframe_url,
},
}
except Exception as e:
logger.warning(f"Failed extraction via {iframe_domain}: {e}")
continue
raise ExtractorError(f"Failed to extract stream from all iframe domains for channel {channel_id}")
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Main extraction flow - uses direct server lookup with new auth flow."""
def extract_channel_id(u: str) -> Optional[str]:
match_watch_id = re.search(r"watch\.php\?id=(\d+)", u)
if match_watch_id:
return match_watch_id.group(1)
# Also try stream-XXX pattern
match_stream = re.search(r"stream-(\d+)", u)
if match_stream:
return match_stream.group(1)
return None
try:
channel_id = extract_channel_id(url)
if not channel_id:
raise ExtractorError(f"Unable to extract channel ID from {url}")
logger.info(f"Extracting DLHD stream for channel ID: {channel_id}")
# Try direct stream extraction with new auth flow
try:
return await self._extract_direct_stream(channel_id)
except ExtractorError as e:
logger.warning(f"Direct stream extraction failed: {e}")
# Fallback to legacy iframe-based extraction if direct fails
logger.info("Falling back to iframe-based extraction...")
return await self._extract_via_iframe(url, channel_id)
except Exception as e:
raise ExtractorError(f"Extraction failed: {str(e)}")
async def _extract_via_iframe(self, url: str, channel_id: str) -> Dict[str, Any]:
"""Legacy iframe-based extraction flow - used as fallback."""
baseurl = "https://dlhd.dad/"
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
daddylive_headers = {
"User-Agent": self._flaresolverr_user_agent
or "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
"Referer": baseurl,
"Origin": daddy_origin,
}
# 1. Request initial page - use FlareSolverr if available to bypass Cloudflare
use_flaresolverr = settings.flaresolverr_url is not None
resp1 = await self._make_request(url, headers=daddylive_headers, timeout=15, use_flaresolverr=use_flaresolverr)
resp1_text = resp1.text
# Update headers with FlareSolverr user-agent after initial request
if self._flaresolverr_user_agent:
daddylive_headers["User-Agent"] = self._flaresolverr_user_agent
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1_text)
if not player_links:
raise ExtractorError("No player links found on the page.")
# Try all players and collect all valid iframes
last_player_error = None
iframe_candidates = []
for player_url in player_links:
try:
if not player_url.startswith("http"):
player_url = baseurl + player_url.lstrip("/")
daddylive_headers["Referer"] = player_url
daddylive_headers["Origin"] = player_url
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
resp2_text = resp2.text
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2_text)
# Collect all found iframes
for iframe in iframes2:
if iframe not in iframe_candidates:
iframe_candidates.append(iframe)
logger.info(f"Found iframe candidate: {iframe}")
except Exception as e:
last_player_error = e
logger.warning(f"Failed to process player link {player_url}: {e}")
continue
if not iframe_candidates:
if last_player_error:
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
raise ExtractorError("No valid iframe found in any player page")
# Try each iframe until one works
last_iframe_error = None
for iframe_candidate in iframe_candidates:
try:
logger.info(f"Trying iframe: {iframe_candidate}")
iframe_domain = urlparse(iframe_candidate).netloc
if not iframe_domain:
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
continue
self._iframe_context = iframe_candidate
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
iframe_content = resp3.text
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
if "lovecdn.ru" in iframe_domain:
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
else:
logger.info("Attempting new auth flow extraction.")
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
except Exception as e:
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
last_iframe_error = e
continue
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
+181 -21
View File
@@ -1,49 +1,209 @@
import logging
import re
import time
from urllib.parse import urlparse, urljoin
import aiohttp
from curl_cffi.requests import AsyncSession
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.configs import settings
logger = logging.getLogger(__name__)
_DOOD_UA = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)
class DoodStreamExtractor(BaseExtractor):
"""
Dood / MyVidPlay extractor
Resolves to direct CDN MP4
"""
DoodStream / PlayMogo extractor.
def __init__(self, request_headers: dict):
super().__init__(request_headers)
self.base_url = "https://myvidplay.com"
All DoodStream mirror domains (dsvplay.com, myvidplay.com, dood.to, …) now
redirect to playmogo.com which sits behind Cloudflare and may require a
Turnstile CAPTCHA before serving the pass_md5 URL.
Extraction order:
1. Byparr — set BYPARR_URL (Firefox/Camoufox → Turnstile auto-validates,
not blocked by DisableDevtool.js)
2. curl_cffi — Chrome impersonation; works when Turnstile is not triggered,
raises a descriptive error if captcha is detected.
"""
async def extract(self, url: str, **kwargs):
parsed = urlparse(url)
video_id = parsed.path.rstrip("/").split("/")[-1]
if not video_id:
raise ExtractorError("Invalid Dood URL")
raise ExtractorError("Invalid DoodStream URL: no video ID found")
headers = {
"User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0",
"Referer": f"{self.base_url}/",
if settings.byparr_url:
try:
return await self._extract_via_byparr(url, video_id)
except ExtractorError:
raise
return await self._extract_via_curl_cffi(url, video_id)
# ------------------------------------------------------------------
# Path 1 Byparr (Firefox/Camoufox → Turnstile auto-validates)
# ------------------------------------------------------------------
async def _extract_via_byparr(self, url: str, video_id: str) -> dict:
"""
Use Byparr to bypass Cloudflare protection on the DoodStream embed page.
Strategy: fetch the embed page without any injected script. Byparr's
Firefox/Camoufox browser auto-passes Cloudflare's bot checks and often
bypasses the Turnstile CAPTCHA gate directly, returning the embed HTML
with pass_md5. If the response doesn't contain pass_md5, reuse the CF
cookies + UA from Byparr in a follow-up curl_cffi request (which avoids
re-triggering the bot check).
"""
endpoint = f"{settings.byparr_url.rstrip('/')}/v1"
embed_url = url if "/e/" in url else f"https://{urlparse(url).netloc}/e/{video_id}"
payload = {
"cmd": "request.get",
"url": embed_url,
"maxTimeout": settings.byparr_timeout * 1000,
}
embed_url = f"{self.base_url}/e/{video_id}"
html = (await self._make_request(embed_url, headers=headers)).text
async with aiohttp.ClientSession() as session:
async with session.post(
endpoint,
json=payload,
timeout=aiohttp.ClientTimeout(total=settings.byparr_timeout + 15),
) as resp:
if resp.status != 200:
raise ExtractorError(f"Byparr HTTP {resp.status}")
data = await resp.json()
match = re.search(r"(\/pass_md5\/[^']+)", html)
if not match:
raise ExtractorError("Dood: pass_md5 not found")
if data.get("status") != "ok":
raise ExtractorError(f"Byparr: {data.get('message', 'unknown error')}")
pass_url = urljoin(self.base_url, match.group(1))
solution = data.get("solution", {})
final_url = solution.get("url", embed_url)
if not final_url.startswith("http"):
final_url = embed_url
base_url = f"https://{urlparse(final_url).netloc}"
html = solution.get("response", "")
base_stream = (await self._make_request(pass_url, headers=headers)).text.strip()
if "pass_md5" not in html:
# Byparr may not have the pass_md5 in the initial response.
# Try two recovery strategies in order:
#
# 1. Cookie reuse — if Byparr collected CF clearance cookies before
# the page loaded fully, inject them into a curl_cffi request.
# 2. Plain curl_cffi — Chrome TLS impersonation without JS execution.
raw_cookies = solution.get("cookies", [])
cookies = {c["name"]: c["value"] for c in raw_cookies}
ua = solution.get("userAgent", _DOOD_UA)
token_match = re.search(r"token=([^&]+)", html)
if cookies:
cf_domain = (
next(
(c.get("domain", "").lstrip(".") for c in raw_cookies if c.get("name") == "cf_clearance"),
None,
)
or "playmogo.com"
)
retry_url = f"https://{cf_domain}/e/{video_id}"
logger.debug(
"Byparr response lacked pass_md5 (final_url=%s); retrying %s with CF cookies via curl_cffi",
final_url,
retry_url,
)
proxy = self._get_proxy(retry_url)
async with AsyncSession() as s:
r = await s.get(
retry_url,
impersonate="chrome",
cookies=cookies,
headers={"User-Agent": ua, "Referer": f"https://{cf_domain}/"},
timeout=20,
**({"proxy": proxy} if proxy else {}),
)
html = r.text
final_url = str(r.url)
base_url = f"https://{urlparse(final_url).netloc}"
if "pass_md5" not in html:
logger.debug("Byparr cookie reuse also failed; falling back to curl_cffi for %s", embed_url)
return await self._extract_via_curl_cffi(embed_url, video_id)
return await self._parse_embed_html(html, base_url)
# ------------------------------------------------------------------
# Path 2 curl_cffi (bypasses CF bot protection; Turnstile may block)
# ------------------------------------------------------------------
async def _extract_via_curl_cffi(self, url: str, video_id: str) -> dict:
proxy = self._get_proxy(url)
async with AsyncSession() as s:
r = await s.get(
url,
impersonate="chrome",
headers={"Referer": f"https://{urlparse(url).netloc}/"},
timeout=30,
allow_redirects=True,
**({"proxy": proxy} if proxy else {}),
)
final_url = str(r.url)
html = r.text
base_url = f"https://{urlparse(final_url).netloc}"
if "pass_md5" not in html:
if "turnstile" in html.lower() or "captcha_l" in html:
raise ExtractorError(
"DoodStream: site is serving a Turnstile CAPTCHA that requires "
"browser interaction — cannot be bypassed automatically from this "
"network location. Try a residential IP or a VPN/proxy."
)
raise ExtractorError(f"DoodStream: pass_md5 not found in embed HTML ({final_url})")
return await self._parse_embed_html(html, base_url)
# ------------------------------------------------------------------
# Common HTML parser
# ------------------------------------------------------------------
async def _parse_embed_html(self, html: str, base_url: str) -> dict:
pass_match = re.search(r"(/pass_md5/[^'\"<>\s]+)", html)
if not pass_match:
raise ExtractorError("DoodStream: pass_md5 path not found in embed HTML")
pass_url = urljoin(base_url, pass_match.group(1))
ua = self.base_headers.get("user-agent") or _DOOD_UA
headers = {
"user-agent": ua,
"referer": f"{base_url}/",
}
proxy = self._get_proxy(pass_url)
async with AsyncSession() as s:
r = await s.get(
pass_url,
impersonate="chrome",
headers=headers,
timeout=20,
**({"proxy": proxy} if proxy else {}),
)
base_stream = r.text.strip()
if not base_stream or "RELOAD" in base_stream:
raise ExtractorError(
"DoodStream: pass_md5 endpoint returned no stream URL "
"(captcha session may have expired). "
"Ensure BYPARR_URL is set for reliable extraction."
)
token_match = re.search(r"token=([^&\s'\"]+)", html)
if not token_match:
raise ExtractorError("Dood: token missing")
raise ExtractorError("DoodStream: token not found in embed HTML")
token = token_match.group(1)
final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}"
expiry = int(time.time())
final_url = f"{base_stream}123456789?token={token}&expiry={expiry}"
return {
"destination_url": final_url,
+4 -2
View File
@@ -1,8 +1,8 @@
from typing import Dict, Type
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.extractors.dlhd import DLHDExtractor
from mediaflow_proxy.extractors.doodstream import DoodStreamExtractor
from mediaflow_proxy.extractors.city import CityExtractor
from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor
from mediaflow_proxy.extractors.filelions import FileLionsExtractor
from mediaflow_proxy.extractors.filemoon import FileMoonExtractor
@@ -24,12 +24,14 @@ from mediaflow_proxy.extractors.vidoza import VidozaExtractor
from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor
from mediaflow_proxy.extractors.fastream import FastreamExtractor
from mediaflow_proxy.extractors.voe import VoeExtractor
from mediaflow_proxy.extractors.vidfast import VidFastExtractor
class ExtractorFactory:
"""Factory for creating URL extractors."""
_extractors: Dict[str, Type[BaseExtractor]] = {
"City": CityExtractor,
"Doodstream": DoodStreamExtractor,
"FileLions": FileLionsExtractor,
"FileMoon": FileMoonExtractor,
@@ -46,13 +48,13 @@ class ExtractorFactory:
"Maxstream": MaxstreamExtractor,
"LiveTV": LiveTVExtractor,
"LuluStream": LuluStreamExtractor,
"DLHD": DLHDExtractor,
"Vavoo": VavooExtractor,
"Vidmoly": VidmolyExtractor,
"Vidoza": VidozaExtractor,
"Fastream": FastreamExtractor,
"Voe": VoeExtractor,
"Sportsonline": SportsonlineExtractor,
"VidFast": VidFastExtractor,
}
@classmethod
+22 -3
View File
@@ -1,23 +1,42 @@
import re
from typing import Dict, Any
from curl_cffi.requests import AsyncSession
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class LuluStreamExtractor(BaseExtractor):
"""LuluStream URL extractor.
Uses curl_cffi + Chrome impersonation to bypass Cloudflare protection.
lulustream.com embeds are served via luluvdo.com.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
response = await self._make_request(url)
proxy = self._get_proxy(url)
async with AsyncSession() as session:
response = await session.get(
url,
impersonate="chrome",
timeout=30,
allow_redirects=True,
**({"proxy": proxy} if proxy else {}),
)
if response.status_code >= 400:
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py
pattern = r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)"""
match = re.search(pattern, response.text, re.DOTALL)
if not match:
raise ExtractorError("Failed to extract source URL")
final_url = match.group(1)
raise ExtractorError("LuluStream: Failed to extract source URL")
final_url = match.group("url")
self.base_headers["referer"] = url
return {
+116 -70
View File
@@ -1,7 +1,7 @@
import re
import logging
from typing import Any, Dict
from urllib.parse import urlparse
from urllib.parse import urljoin, urlparse
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.utils.packed import unpack
@@ -14,7 +14,7 @@ class SportsonlineExtractor(BaseExtractor):
Strategy:
1. Fetch page -> find first <iframe src="...">
2. Fetch iframe with Referer=https://sportzonline.st/
2. Fetch iframe with dynamic source-page Referer/Origin
3. Collect packed eval blocks; if >=2 use second (index 1) else first.
4. Unpack P.A.C.K.E.R. and search var src="...m3u8".
5. Return final m3u8 with referer header.
@@ -33,56 +33,125 @@ class SportsonlineExtractor(BaseExtractor):
"""
Detect and extract packed eval blocks from HTML.
"""
# Find all eval(function...) blocks - more greedy to capture full packed code
pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL)
raw_matches = pattern.findall(html)
raw_matches: list[str] = []
strict_eval_pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\}\(.*?\)\)", re.DOTALL)
relaxed_eval_pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
# Prefer script-body extraction first. This is more resilient when the packed
# code has nested parentheses/semicolons that are hard to capture with a
# single regex.
script_pattern = re.compile(r"<script[^>]*>(.*?)</script>", re.IGNORECASE | re.DOTALL)
for script_body in script_pattern.findall(html):
if "eval(function(p,a,c,k,e" in script_body:
strict_matches = strict_eval_pattern.findall(script_body)
if strict_matches:
raw_matches.extend(strict_matches)
continue
relaxed_matches = relaxed_eval_pattern.findall(script_body)
if relaxed_matches:
raw_matches.extend(relaxed_matches)
if raw_matches:
return raw_matches
# Fallback: direct eval(...) extraction from raw HTML.
raw_matches = strict_eval_pattern.findall(html)
# If no matches with the strict pattern, try a more relaxed one
if not raw_matches:
# Try to find eval(function and capture until we find the closing ))
pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
raw_matches = pattern.findall(html)
raw_matches = relaxed_eval_pattern.findall(html)
return raw_matches
@staticmethod
def _extract_m3u8_candidate(text: str) -> str | None:
patterns = [
r"var\s+src\s*=\s*[\"']([^\"']+\.m3u8[^\"']*)[\"']",
r"src\s*=\s*[\"']([^\"']+\.m3u8[^\"']*)[\"']",
r"file\s*:\s*[\"']([^\"']+\.m3u8[^\"']*)[\"']",
r"[\"']([^\"']*https?://[^\"']+\.m3u8[^\"']*)[\"']",
r"(https?://[^\s\"'>]+\.m3u8[^\s\"'>]*)",
r"(//[^\s\"'>]+\.m3u8[^\s\"'>]*)",
r"(/[^\s\"'>]+\.m3u8[^\s\"'>]*)",
]
for pattern in patterns:
match = re.search(pattern, text)
if match:
return match.group(1)
return None
@staticmethod
def _normalize_stream_url(stream_url: str, base_url: str) -> str:
cleaned = stream_url.strip().strip("\"'").replace("\\/", "/")
if cleaned.startswith("//"):
parsed_base = urlparse(base_url)
return f"{parsed_base.scheme or 'https'}:{cleaned}"
if not urlparse(cleaned).scheme:
return urljoin(base_url, cleaned)
return cleaned
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Main extraction flow: fetch page, extract iframe, unpack and find m3u8."""
try:
parsed_source = urlparse(url)
source_origin = f"{parsed_source.scheme}://{parsed_source.netloc}"
source_referer = self.base_headers.get("Referer") or self.base_headers.get("referer") or f"{source_origin}/"
user_agent = self.base_headers.get("User-Agent") or self.base_headers.get("user-agent") or "Mozilla/5.0"
# Step 1: Fetch main page
logger.info(f"Fetching main page: {url}")
main_response = await self._make_request(url, timeout=15)
main_response = await self._make_request(
url,
headers={
"Referer": source_referer,
"Origin": source_origin,
"User-Agent": user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
"Cache-Control": "no-cache",
},
timeout=15,
)
main_html = main_response.text
parsed_main = urlparse(main_response.url)
main_origin = f"{parsed_main.scheme}://{parsed_main.netloc}"
# Extract first iframe
iframe_match = re.search(r'<iframe\s+src=["\']([^"\']+)["\']', main_html, re.IGNORECASE)
if not iframe_match:
raise ExtractorError("No iframe found on the page")
# Extract first iframe (src can appear in any attribute order)
iframe_match = re.search(r'<iframe[^>]+(?<!data-)src=["\']([^"\']+)["\']', main_html, re.IGNORECASE)
iframe_url = main_response.url
iframe_html = main_html
iframe_url = iframe_match.group(1)
if iframe_match:
iframe_url = self._normalize_stream_url(iframe_match.group(1), main_response.url)
logger.info(f"Found iframe URL: {iframe_url}")
# Normalize iframe URL
if iframe_url.startswith("//"):
iframe_url = "https:" + iframe_url
elif iframe_url.startswith("/"):
parsed_main = urlparse(url)
iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}"
# Step 2: Fetch iframe with source page as referer
iframe_headers = {
"Referer": main_response.url,
"Origin": main_origin,
"User-Agent": user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
"Cache-Control": "no-cache",
}
logger.info(f"Found iframe URL: {iframe_url}")
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
iframe_html = iframe_response.text
iframe_url = iframe_response.url
logger.debug(f"Iframe HTML length: {len(iframe_html)}")
else:
logger.warning("No iframe found on page, attempting extraction from main HTML")
# Step 2: Fetch iframe with Referer
iframe_headers = {
"Referer": "https://sportzonline.st/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
"Cache-Control": "no-cache",
parsed_iframe = urlparse(iframe_url)
playback_headers = {
"Referer": iframe_url,
"Origin": f"{parsed_iframe.scheme}://{parsed_iframe.netloc}",
"User-Agent": user_agent,
}
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
iframe_html = iframe_response.text
logger.debug(f"Iframe HTML length: {len(iframe_html)}")
# Step 3: Detect packed blocks
packed_blocks = self._detect_packed_blocks(iframe_html)
@@ -91,21 +160,19 @@ class SportsonlineExtractor(BaseExtractor):
if not packed_blocks:
logger.warning("No packed blocks found, trying direct m3u8 search")
# Fallback: try direct m3u8 search
direct_match = re.search(r'(https?://[^\s"\'>]+\.m3u8[^\s"\'>]*)', iframe_html)
direct_match = self._extract_m3u8_candidate(iframe_html)
if direct_match:
m3u8_url = direct_match.group(1)
m3u8_url = self._normalize_stream_url(direct_match, iframe_url)
logger.info(f"Found direct m3u8 URL: {m3u8_url}")
return {
"destination_url": m3u8_url,
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
"request_headers": playback_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
else:
raise ExtractorError("No packed blocks or direct m3u8 URL found")
logger.info(f"Found {len(packed_blocks)} packed blocks")
# Choose block: if >=2 use second (index 1), else first (index 0)
chosen_idx = 1 if len(packed_blocks) > 1 else 0
m3u8_url = None
@@ -123,22 +190,7 @@ class SportsonlineExtractor(BaseExtractor):
# Search for var src="...m3u8" with multiple patterns
if unpacked_code:
# Try multiple patterns as in the TypeScript version
patterns = [
r'var\s+src\s*=\s*["\']([^"\']+)["\']', # var src="..."
r'src\s*=\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # src="...m3u8"
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # file: "...m3u8"
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']', # any m3u8 URL
]
for pattern in patterns:
src_match = re.search(pattern, unpacked_code)
if src_match:
m3u8_url = src_match.group(1)
# Verify it looks like a valid m3u8 URL
if ".m3u8" in m3u8_url or "http" in m3u8_url:
break
m3u8_url = None
m3u8_url = self._extract_m3u8_candidate(unpacked_code)
# If not found, try all other blocks
if not m3u8_url:
@@ -148,36 +200,30 @@ class SportsonlineExtractor(BaseExtractor):
continue
try:
unpacked_code = unpack(block)
# Use the same patterns as above
for pattern in [
r'var\s+src\s*=\s*["\']([^"\']+)["\']',
r'src\s*=\s*["\']([^"\']+\.m3u8[^"\']*)["\']',
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']',
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']',
]:
src_match = re.search(pattern, unpacked_code)
if src_match:
test_url = src_match.group(1)
if ".m3u8" in test_url or "http" in test_url:
m3u8_url = test_url
logger.info(f"Found m3u8 in block {i}")
break
m3u8_url = self._extract_m3u8_candidate(unpacked_code)
if m3u8_url:
logger.info(f"Found m3u8 in block {i}")
break
except Exception as e:
logger.debug(f"Failed to process block {i}: {e}")
continue
if not m3u8_url:
fallback_candidate = self._extract_m3u8_candidate(iframe_html)
if fallback_candidate:
m3u8_url = fallback_candidate
if not m3u8_url:
raise ExtractorError("Could not extract m3u8 URL from packed code")
m3u8_url = self._normalize_stream_url(m3u8_url, iframe_url)
logger.info(f"Successfully extracted m3u8 URL: {m3u8_url}")
# Return stream configuration
return {
"destination_url": m3u8_url,
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
"request_headers": playback_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
+7 -2
View File
@@ -24,12 +24,17 @@ class SupervideoExtractor(BaseExtractor):
Uses curl_cffi with Chrome impersonation to bypass Cloudflare.
"""
patterns = [r'file:"(.*?)"']
proxy = self._get_proxy(url)
try:
async with AsyncSession() as session:
response = await session.get(url, impersonate="chrome")
response = await session.get(
url,
impersonate="chrome",
timeout=30,
**({"proxy": proxy} if proxy else {}),
)
if response.status_code != 200:
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
+25 -8
View File
@@ -1,22 +1,39 @@
import re
from typing import Dict
from typing import Dict, Any
from urllib.parse import urljoin
from curl_cffi.requests import AsyncSession
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class UqloadExtractor(BaseExtractor):
"""Uqload URL extractor."""
"""Uqload URL extractor.
async def extract(self, url: str, **kwargs) -> Dict[str, str]:
"""Extract Uqload URL."""
response = await self._make_request(url)
Uses curl_cffi + Chrome impersonation to handle Cloudflare protection.
Follows redirects automatically (uqload.bz/co/io all redirect to uqload.is).
"""
video_url_match = re.search(r'sources: \["(.*?)"]', response.text)
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
proxy = self._get_proxy(url)
async with AsyncSession() as session:
response = await session.get(
url,
impersonate="chrome",
timeout=30,
allow_redirects=True,
**({"proxy": proxy} if proxy else {}),
)
if response.status_code >= 400:
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
video_url_match = re.search(r'sources:\s*\["(https?://[^"]+)"', response.text)
if not video_url_match:
raise ExtractorError("Failed to extract video URL")
raise ExtractorError("Uqload: video URL not found in page source")
self.base_headers["referer"] = urljoin(url, "/")
final_url = str(response.url)
self.base_headers["referer"] = urljoin(final_url, "/")
return {
"destination_url": video_url_match.group(1),
"request_headers": self.base_headers,
+244 -174
View File
@@ -1,5 +1,9 @@
import logging
import time
import re
import uuid
from typing import Any, Dict, Optional
from urllib.parse import quote, urlparse
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
@@ -7,71 +11,29 @@ logger = logging.getLogger(__name__)
class VavooExtractor(BaseExtractor):
"""Vavoo URL extractor for resolving vavoo.to links.
"""Vavoo URL extractor per risolvere link vavoo.to"""
Supports two URL formats:
1. Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
These redirect (302) to external video hosts (Doodstream, etc.)
2. Legacy mediahubmx format (currently broken on Vavoo's end)
Features:
- Uses BaseExtractor's retry/timeouts
- Improved headers to mimic Android okhttp client
- Robust JSON handling and logging
"""
API_UA = "okhttp/4.11.0"
RESOLVE_UA = "MediaHubMX/2"
TS_UA = "VAVOO/2.6"
def __init__(self, request_headers: dict):
super().__init__(request_headers)
# Endpoint is resolved dynamically per-extraction based on the stream URL type.
self.mediaflow_endpoint = "proxy_stream_endpoint"
async def _resolve_web_vod_link(self, url: str) -> str:
"""Resolve a web-vod API link by getting the redirect Location header."""
import aiohttp
try:
# Use aiohttp directly with allow_redirects=False to get the Location header
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(
url,
headers={"Accept": "application/json"},
allow_redirects=False,
) as resp:
# Check for redirect
if resp.status in (301, 302, 303, 307, 308):
location = resp.headers.get("Location") or resp.headers.get("location")
if location:
logger.info(f"Vavoo web-vod redirected to: {location}")
return location
# If we got a 200, the response might contain the URL
if resp.status == 200:
text = await resp.text()
if text and text.startswith("http"):
logger.info(f"Vavoo web-vod resolved to: {text.strip()}")
return text.strip()
raise ExtractorError(f"Vavoo web-vod API returned unexpected status {resp.status}")
except ExtractorError:
raise
except Exception as e:
raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}")
async def get_auth_signature(self) -> Optional[str]:
"""Get authentication signature for Vavoo API (async)."""
async def _get_auth_signature(self) -> Optional[str]:
"""Get authentication signature via lokke.app/api/app/ping (aligned with working plugin)."""
unique_id = uuid.uuid4().hex[:16]
now_ms = int(time.time() * 1000)
headers = {
"user-agent": "okhttp/4.11.0",
"user-agent": self.API_UA,
"accept": "application/json",
"content-type": "application/json; charset=utf-8",
"accept-encoding": "gzip",
}
import time
current_time = int(time.time() * 1000)
data = {
"token": "",
body = {
"token": "ldCvE092e7gER0rVIajfsXIvRhwlrAzP6_1oEJ4q6HH89QHt24v6NNL_jQJO219hiLOXF2hqEfsUuEWitEIGN4EaHHEHb7Cd7gojc5SQYRFzU3XWo_kMeryAUbcwWnQrnf0-",
"reason": "app-blur",
"locale": "de",
"theme": "dark",
@@ -79,174 +41,282 @@ class VavooExtractor(BaseExtractor):
"device": {
"type": "Handset",
"brand": "google",
"model": "Pixel",
"name": "sdk_gphone64_arm64",
"uniqueId": "d10e5d99ab665233",
"model": "Nexus",
"name": "21081111RG",
"uniqueId": unique_id,
},
"os": {"name": "android", "version": "7.1.2", "abis": ["arm64-v8a"], "host": "android"},
"app": {
"platform": "android",
"version": "1.1.0",
"buildId": "97215000",
"engine": "hbc85",
"signatures": ["6e8a975e3cbf07d5de823a760d4c2547f86c1403105020adee5de67ac510999e"],
"installer": "com.android.vending",
},
"version": {"package": "app.lokke.main", "binary": "1.1.0", "js": "1.1.0"},
"platform": {
"isAndroid": True,
"isIOS": False,
"isTV": False,
"isWeb": False,
"isMobile": True,
"isWebTV": False,
"isElectron": False,
},
"os": {"name": "android", "version": "13"},
"app": {"platform": "android", "version": "3.1.21"},
"version": {"package": "tv.vavoo.app", "binary": "3.1.21", "js": "3.1.21"},
},
"appFocusTime": 0,
"playerActive": False,
"playDuration": 0,
"devMode": False,
"devMode": True,
"hasAddon": True,
"castConnected": False,
"package": "tv.vavoo.app",
"version": "3.1.21",
"package": "app.lokke.main",
"version": "1.1.0",
"process": "app",
"firstAppStart": current_time,
"lastAppStart": current_time,
"ipLocation": "",
"adblockEnabled": True,
"firstAppStart": now_ms - 86400000,
"lastAppStart": now_ms,
"ipLocation": None,
"adblockEnabled": False,
"proxy": {
"supported": ["ss", "openvpn"],
"engine": "ss",
"engine": "openvpn",
"ssVersion": 1,
"enabled": True,
"enabled": False,
"autoServer": True,
"id": "de-fra",
"id": "fi-hel",
},
"iap": {"supported": False},
"iap": {"supported": True},
}
try:
resp = await self._make_request(
"https://www.vavoo.tv/api/app/ping",
"https://www.lokke.app/api/app/ping",
method="POST",
json=data,
json=body,
headers=headers,
timeout=10,
timeout=15,
retries=2,
)
try:
result = resp.json()
except Exception:
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status)
logger.warning("Lokke ping returned non-json response (status=%s).", resp.status)
return None
addon_sig = result.get("addonSig") if isinstance(result, dict) else None
if addon_sig:
logger.info("Successfully obtained Vavoo authentication signature")
logger.info("Successfully obtained auth signature from lokke.app")
return addon_sig
else:
logger.warning("No addonSig in Vavoo API response: %s", result)
return None
except ExtractorError as e:
logger.warning("Failed to get Vavoo auth signature: %s", e)
logger.warning("No addonSig in lokke API response: %s", result)
return None
except Exception as e:
logger.debug("_get_auth_signature error: %s", e)
return None
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract Vavoo stream URL (async).
async def _get_ts_signature(self) -> Optional[str]:
"""Get TS-based signature via /api/box/ping2 (fallback)."""
vec = "9frjpxPjxSNilxJPCJ0XGYs6scej3dW/h/VWlnKUiLSG8IP7mfyDU7NirOlld+VtCKGj03XjetfliDMhIev7wcARo+YTU8KPFuVQP9E2DVXzY2BFo1NhE6qEmPfNDnm74eyl/7iFJ0EETm6XbYyz8IKBkAqPN/Spp3PZ2ulKg3QBSDxcVN4R5zRn7OsgLJ2CNTuWkd/h451lDCp+TtTuvnAEhcQckdsydFhTZCK5IiWrrTIC/d4qDXEd+GtOP4hPdoIuCaNzYfX3lLCwFENC6RZoTBYLrcKVVgbqyQZ7DnLqfLqvf3z0FVUWx9H21liGFpByzdnoxyFkue3NzrFtkRL37xkx9ITucepSYKzUVEfyBh+/3mtzKY26VIRkJFkpf8KVcCRNrTRQn47Wuq4gC7sSwT7eHCAydKSACcUMMdpPSvbvfOmIqeBNA83osX8FPFYUMZsjvYNEE3arbFiGsQlggBKgg1V3oN+5ni3Vjc5InHg/xv476LHDFnNdAJx448ph3DoAiJjr2g4ZTNynfSxdzA68qSuJY8UjyzgDjG0RIMv2h7DlQNjkAXv4k1BrPpfOiOqH67yIarNmkPIwrIV+W9TTV/yRyE1LEgOr4DK8uW2AUtHOPA2gn6P5sgFyi68w55MZBPepddfYTQ+E1N6R/hWnMYPt/i0xSUeMPekX47iucfpFBEv9Uh9zdGiEB+0P3LVMP+q+pbBU4o1NkKyY1V8wH1Wilr0a+q87kEnQ1LWYMMBhaP9yFseGSbYwdeLsX9uR1uPaN+u4woO2g8sw9Y5ze5XMgOVpFCZaut02I5k0U4WPyN5adQjG8sAzxsI3KsV04DEVymj224iqg2Lzz53Xz9yEy+7/85ILQpJ6llCyqpHLFyHq/kJxYPhDUF755WaHJEaFRPxUqbparNX+mCE9Xzy7Q/KTgAPiRS41FHXXv+7XSPp4cy9jli0BVnYf13Xsp28OGs/D8Nl3NgEn3/eUcMN80JRdsOrV62fnBVMBNf36+LbISdvsFAFr0xyuPGmlIETcFyxJkrGZnhHAxwzsvZ+Uwf8lffBfZFPRrNv+tgeeLpatVcHLHZGeTgWWml6tIHwWUqv2TVJeMkAEL5PPS4Gtbscau5HM+FEjtGS+KClfX1CNKvgYJl7mLDEf5ZYQv5kHaoQ6RcPaR6vUNn02zpq5/X3EPIgUKF0r/0ctmoT84B2J1BKfCbctdFY9br7JSJ6DvUxyde68jB+Il6qNcQwTFj4cNErk4x719Y42NoAnnQYC2/qfL/gAhJl8TKMvBt3Bno+va8ve8E0z8yEuMLUqe8OXLce6nCa+L5LYK1aBdb60BYbMeWk1qmG6Nk9OnYLhzDyrd9iHDd7X95OM6X5wiMVZRn5ebw4askTTc50xmrg4eic2U1w1JpSEjdH/u/hXrWKSMWAxaj34uQnMuWxPZEXoVxzGyuUbroXRfkhzpqmqqqOcypjsWPdq5BOUGL/Riwjm6yMI0x9kbO8+VoQ6RYfjAbxNriZ1cQ+AW1fqEgnRWXmjt4Z1M0ygUBi8w71bDML1YG6UHeC2cJ2CCCxSrfycKQhpSdI1QIuwd2eyIpd4LgwrMiY3xNWreAF+qobNxvE7ypKTISNrz0iYIhU0aKNlcGwYd0FXIRfKVBzSBe4MRK2pGLDNO6ytoHxvJweZ8h1XG8RWc4aB5gTnB7Tjiqym4b64lRdj1DPHJnzD4aqRixpXhzYzWVDN2kONCR5i2quYbnVFN4sSfLiKeOwKX4JdmzpYixNZXjLkG14seS6KR0Wl8Itp5IMIWFpnNokjRH76RYRZAcx0jP0V5/GfNNTi5QsEU98en0SiXHQGXnROiHpRUDXTl8FmJORjwXc0AjrEMuQ2FDJDmAIlKUSLhjbIiKw3iaqp5TVyXuz0ZMYBhnqhcwqULqtFSuIKpaW8FgF8QJfP2frADf4kKZG1bQ99MrRrb2A="
try:
resp = await self._make_request(
"https://www.vavoo.tv/api/box/ping2",
method="POST",
data={"vec": vec},
timeout=15,
retries=2,
)
try:
result = resp.json()
except Exception:
return None
return (result.get("response") or {}).get("signed")
except Exception as e:
logger.debug("_get_ts_signature error: %s", e)
return None
Supports:
- Direct play URLs: https://vavoo.to/play/{id}/index.m3u8 (Live TV)
- Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
- Legacy mediahubmx links (may not work due to Vavoo API changes)
"""
if "vavoo.to" not in url:
raise ExtractorError("Not a valid Vavoo URL")
# Check if this is a direct play URL (Live TV)
# These URLs are already m3u8 streams but need auth signature
if "/play/" in url and url.endswith(".m3u8"):
signature = await self.get_auth_signature()
if not signature:
raise ExtractorError("Failed to get Vavoo authentication signature for Live TV")
stream_headers = {
"user-agent": "okhttp/4.11.0",
"referer": "https://vavoo.to/",
"mediahubmx-signature": signature,
}
return {
"destination_url": url,
"request_headers": stream_headers,
"mediaflow_endpoint": "hls_manifest_proxy",
}
# Check if this is a web-vod API link (new format)
if "/web-vod/api/get" in url:
resolved_url = await self._resolve_web_vod_link(url)
stream_headers = {
"user-agent": self.base_headers.get("user-agent", "Mozilla/5.0"),
"referer": "https://vavoo.to/",
}
return {
"destination_url": resolved_url,
"request_headers": stream_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
# Legacy mediahubmx flow
signature = await self.get_auth_signature()
if not signature:
raise ExtractorError("Failed to get Vavoo authentication signature")
resolved_url = await self._resolve_vavoo_link(url, signature)
if not resolved_url:
raise ExtractorError("Failed to resolve Vavoo URL")
stream_headers = {
"user-agent": self.base_headers.get("user-agent", "okhttp/4.11.0"),
"referer": "https://vavoo.to/",
}
return {
"destination_url": resolved_url,
"request_headers": stream_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
async def _resolve_vavoo_link(self, link: str, signature: str) -> Optional[str]:
"""Resolve a Vavoo link using the MediaHubMX API (async)."""
async def _resolve_with_auth(self, url: str, signature: str) -> Optional[str]:
"""Resolve a Vavoo link using the MediaHubMX API with auth signature."""
headers = {
"user-agent": "okhttp/4.11.0",
"user-agent": self.RESOLVE_UA,
"accept": "application/json",
"content-type": "application/json; charset=utf-8",
"accept-encoding": "gzip",
"mediahubmx-signature": signature,
}
data = {"language": "de", "region": "AT", "url": link, "clientVersion": "3.1.21"}
payload = {"language": "de", "region": "AT", "url": url, "clientVersion": "3.0.2"}
try:
logger.info(f"Attempting to resolve Vavoo URL: {link}")
resp = await self._make_request(
"https://vavoo.to/mediahubmx-resolve.json",
method="POST",
json=data,
json=payload,
headers=headers,
timeout=12,
timeout=15,
retries=3,
backoff_factor=0.6,
)
try:
result = resp.json()
except Exception:
logger.warning(
"Vavoo resolve returned non-json response (status=%s). Body preview: %s",
resp.status,
getattr(resp, "text", "")[:500],
)
logger.warning("Vavoo resolve returned non-json (status=%s)", resp.status)
return None
logger.debug("Vavoo API response: %s", result)
# Accept either list or dict with 'url'
if isinstance(result, list) and result and isinstance(result[0], dict) and result[0].get("url"):
resolved_url = result[0]["url"]
logger.info("Successfully resolved Vavoo URL to: %s", resolved_url)
return resolved_url
elif isinstance(result, dict) and result.get("url"):
resolved_url = result["url"]
logger.info("Successfully resolved Vavoo URL to: %s", resolved_url)
return resolved_url
else:
logger.warning("No URL found in Vavoo API response: %s", result)
return None
except ExtractorError as e:
logger.error(f"Vavoo resolution failed for URL {link}: {e}")
raise ExtractorError(f"Vavoo resolution failed: {str(e)}") from e
return str(result[0]["url"])
if isinstance(result, dict):
if result.get("url"):
return str(result["url"])
if isinstance(result.get("data"), dict) and result["data"].get("url"):
return str(result["data"]["url"])
logger.warning("No URL found in Vavoo API response: %s", result)
return None
except Exception as e:
logger.error(f"Unexpected error while resolving Vavoo URL {link}: {e}")
raise ExtractorError(f"Vavoo resolution failed: {str(e)}") from e
logger.debug("_resolve_with_auth error: %s", e)
return None
async def _follow_stream_url(self, url: str) -> str:
"""Follow redirects and extract final stream URL."""
stream_headers = {
"User-Agent": self.API_UA,
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Connection": "close",
}
try:
resp = await self._make_request(url, method="HEAD", headers=stream_headers, timeout=15, retries=1)
final_url = str(getattr(resp, "url", url))
ctype = (getattr(resp, "headers", {}).get("Content-Type") or "").lower()
if "text/html" in ctype:
resp2 = await self._make_request(url, method="GET", headers=stream_headers, timeout=15, retries=1)
text = getattr(resp2, "text", "") or ""
m3u8 = re.findall(r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)', text)
if m3u8:
return m3u8[0]
generic = re.findall(
r'(https?://[^\s"\'<>]+(?:\.ts|/live/|/stream/|/playlist|/index)[^\s"\'<>]*)', text
)
if generic:
return generic[0]
return final_url
except Exception:
return url
async def _build_ts_fallback(self, url: str) -> Optional[str]:
"""Build a .ts fallback URL for vavoo-iptv streams using ping2 signature."""
if "vavoo-iptv" not in url:
return None
ts_sig = await self._get_ts_signature()
if not ts_sig:
return None
base = re.sub(r"/index\.m3u8(?:\?.*)?$", "", url.replace("vavoo-iptv", "live2")).rstrip("/")
ts_url = f"{base}.ts?n=1&b=5&vavoo_auth={quote(ts_sig, safe='')}"
try:
resp = await self._make_request(
ts_url, method="GET", headers={"User-Agent": self.TS_UA}, timeout=15, retries=1
)
if getattr(resp, "status", 400) < 400:
return ts_url
except Exception:
pass
return None
async def _resolve_web_vod_link(self, url: str) -> str:
"""Resolve a web-vod API link by getting the redirect Location header."""
try:
resp = await self._make_request(
url,
method="GET",
headers={"Accept": "application/json"},
timeout=10,
retries=2,
allow_redirects=False,
)
status = getattr(resp, "status", 0)
if status in (301, 302, 303, 307, 308):
location = getattr(resp, "headers", {}).get("Location") or getattr(resp, "headers", {}).get("location")
if location:
logger.info("Vavoo web-vod redirected to: %s", location)
return location
if status == 200:
text = getattr(resp, "text", "") or ""
if text and text.startswith("http"):
logger.info("Vavoo web-vod resolved to: %s", text.strip())
return text.strip()
raise ExtractorError(f"Vavoo web-vod API returned unexpected status {status}")
except ExtractorError:
raise
except Exception as e:
raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}")
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract Vavoo stream URL.
Flow:
1. Auth Resolve Mode: electron-mode signature → mediahubmx-resolve
2. TS Fallback Mode: ping2 signature → live2 .ts URL
3. Direct Fallback: raw URL with VAVOO UA
"""
if "vavoo.to" not in url:
raise ExtractorError("Not a valid Vavoo URL")
# Web-VOD links (new format)
if "/web-vod/api/get" in url:
resolved_url = await self._resolve_web_vod_link(url)
stream_headers = {
"user-agent": self.API_UA,
"referer": "https://vavoo.to/",
}
wv_path = urlparse(resolved_url).path.lower()
wv_endpoint = (
"hls_manifest_proxy" if wv_path.endswith((".m3u8", ".m3u", ".m3u_plus")) else self.mediaflow_endpoint
)
return {
"destination_url": resolved_url,
"request_headers": stream_headers,
"mediaflow_endpoint": wv_endpoint,
}
resolved_url = None
stream_headers = None
# Mode 1: Auth Resolve (electron signature + mediahubmx)
sig = await self._get_auth_signature()
if sig:
candidate = await self._resolve_with_auth(url, sig)
if candidate:
candidate = await self._follow_stream_url(candidate)
resolved_url = candidate
stream_headers = {
"user-agent": self.RESOLVE_UA,
"referer": "https://vavoo.to/",
"origin": "https://vavoo.to",
}
logger.info("Using Auth Resolve Mode: %s", resolved_url)
# Mode 2: TS Fallback (ping2 + live2 .ts)
if not resolved_url:
ts_url = await self._build_ts_fallback(url)
if ts_url:
resolved_url = ts_url
stream_headers = {"user-agent": self.TS_UA}
logger.info("Using TS Fallback Mode: %s", resolved_url)
# Mode 3: Direct Fallback
if not resolved_url:
resolved_url = url
stream_headers = {
"user-agent": self.TS_UA,
"referer": "https://vavoo.to/",
}
logger.info("Using Direct Fallback Mode: %s", resolved_url)
# Use HLS manifest proxy when the resolved URL is an M3U8 playlist so
# the proxy rewrites relative segment URLs before the player sees them.
# TS / raw stream URLs go through the stream proxy as-is.
path = urlparse(resolved_url).path.lower()
m3u8_endpoint = (
"hls_manifest_proxy" if path.endswith((".m3u8", ".m3u", ".m3u_plus")) else self.mediaflow_endpoint
)
return {
"destination_url": resolved_url,
"request_headers": stream_headers,
"mediaflow_endpoint": m3u8_endpoint,
}
+110
View File
@@ -0,0 +1,110 @@
import re
from typing import Dict, Any
from urllib.parse import urlparse
import aiohttp
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class VidFastExtractor(BaseExtractor):
"""
Extractor for vidfast.pro (movies and TV via ythd.org → cloudnestra.com).
URL formats accepted:
https://vidfast.pro/movie/{tmdb_id}
https://vidfast.pro/tv/{tmdb_id}/{season}/{episode}
Extraction flow:
1. Parse TMDB ID from the URL path.
2. Fetch https://ythd.org/embed/{tmdb_id} → grab first data-hash.
3. Fetch https://cloudnestra.com/rcp/{hash} (carrying ythd cookies)
→ grab /prorcp/ hash from the inline iframe src.
4. Fetch https://cloudnestra.com/prorcp/{prorcp_hash}
→ grab Playerjs `file:` parameter (HLS master playlist URL).
5. Replace the {v1} CDN placeholder with cloudnestra.com and return
the resolved HLS URL for MediaFlow's hls_manifest_proxy endpoint.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
parsed = urlparse(url)
parts = parsed.path.strip("/").split("/")
if len(parts) < 2:
raise ExtractorError(f"VidFast: cannot parse TMDB ID from path: {parsed.path!r}")
tmdb_id = parts[1]
ua = self.base_headers.get(
"user-agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
)
ythd_url = f"https://ythd.org/embed/{tmdb_id}"
# A single aiohttp session preserves cookies across the three hops.
cookie_jar = aiohttp.CookieJar()
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(cookie_jar=cookie_jar, timeout=timeout) as session:
# ── Step 1: ythd.org embed page ───────────────────────────────
async with session.get(ythd_url, headers={"User-Agent": ua}) as resp:
if resp.status >= 400:
raise ExtractorError(f"VidFast: ythd.org returned HTTP {resp.status}")
ythd_html = await resp.text()
hash_match = re.search(r'data-hash="([^"]+)"', ythd_html)
if not hash_match:
raise ExtractorError("VidFast: no data-hash attribute on ythd.org page")
data_hash = hash_match.group(1)
# ── Step 2: cloudnestra /rcp/ (needs ythd.org cookies) ────────
rcp_url = f"https://cloudnestra.com/rcp/{data_hash}"
async with session.get(
rcp_url,
headers={"User-Agent": ua, "Referer": ythd_url},
) as resp:
if resp.status >= 400:
raise ExtractorError(f"VidFast: cloudnestra /rcp/ returned HTTP {resp.status}")
rcp_html = await resp.text()
prorcp_match = re.search(r"src:\s*'/prorcp/([^']+)'", rcp_html)
if not prorcp_match:
raise ExtractorError("VidFast: /prorcp/ hash not found in cloudnestra page")
prorcp_hash = prorcp_match.group(1)
# ── Step 3: cloudnestra /prorcp/ (actual player page) ─────────
prorcp_url = f"https://cloudnestra.com/prorcp/{prorcp_hash}"
async with session.get(
prorcp_url,
headers={"User-Agent": ua, "Referer": rcp_url},
) as resp:
if resp.status >= 400:
raise ExtractorError(f"VidFast: cloudnestra /prorcp/ returned HTTP {resp.status}")
prorcp_html = await resp.text()
# ── Step 4: extract the HLS URL from Playerjs({…, file:"…"}) ──────
file_match = re.search(r'file:\s*"(https://[^"]+)"', prorcp_html)
if not file_match:
raise ExtractorError("VidFast: Playerjs file URL not found in /prorcp/ page")
# The file value may contain multiple fallback URLs separated by " or ".
first_url = file_match.group(1).split(" or ")[0].strip()
# {v1} is the primary CDN; tmstr4.cloudnestra.com hosts the proxied HLS.
stream_url = first_url.replace("{v1}", "cloudnestra.com")
if not stream_url.startswith("https://"):
raise ExtractorError(f"VidFast: unexpected stream URL: {stream_url[:120]!r}")
return {
"destination_url": stream_url,
"request_headers": {
"user-agent": ua,
"referer": "https://cloudnestra.com/",
},
"mediaflow_endpoint": self.mediaflow_endpoint,
}
+11 -1
View File
@@ -46,7 +46,17 @@ class VixCloudExtractor(BaseExtractor):
iframe = soup.find("iframe").get("src")
response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version})
elif "movie" in url or "tv" in url:
response = await self._make_request(url)
marker = "/movie" if "/movie" in url else "/tv"
site_url = (url.split(marker))[0]
parts = url.split(site_url)
headers = {
"Referer": f"{site_url}/",
"Origin": f"{site_url}",
}
response = await self._make_request(site_url + '/api' + parts[1])
response = await self._make_request(site_url + '/' + response.json()['src'],headers=headers)
if response.status != 200:
raise ExtractorError("Failed to extract URL components, Invalid Request")