This commit is contained in:
UrloMythus
2026-02-19 20:15:03 +01:00
parent 7785e8c604
commit cfc6bbabc9
181 changed files with 32141 additions and 4629 deletions

View File

@@ -65,9 +65,9 @@ class F16PxExtractor(BaseExtractor):
raise ExtractorError("F16PX: No playback data")
try:
iv = self._b64url_decode(pb["iv"]) # nonce
key = self._join_key_parts(pb["key_parts"]) # AES key
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
iv = self._b64url_decode(pb["iv"]) # nonce
key = self._join_key_parts(pb["key_parts"]) # AES key
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
cipher = python_aesgcm.new(key)
decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL
@@ -95,7 +95,7 @@ class F16PxExtractor(BaseExtractor):
self.base_headers["origin"] = origin
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
self.base_headers["Accept"] = "*/*"
self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
return {
"destination_url": best,

View File

@@ -1,21 +1,53 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, Optional, Any
from urllib.parse import urlparse
import asyncio
import httpx
import aiohttp
import json
import logging
from mediaflow_proxy.configs import settings
from mediaflow_proxy.utils.http_utils import create_httpx_client, DownloadError
from mediaflow_proxy.utils.http_client import create_aiohttp_session
from mediaflow_proxy.utils.http_utils import DownloadError
logger = logging.getLogger(__name__)
class ExtractorError(Exception):
"""Base exception for all extractors."""
pass
@dataclass
class HttpResponse:
"""
Simple response container for extractor HTTP requests.
Uses aiohttp-style naming conventions:
- status (not status_code)
- text (pre-loaded content as string)
- content (pre-loaded content as bytes)
"""
status: int
headers: Dict[str, str]
text: str
content: bytes
url: str
def json(self) -> Any:
"""Parse response content as JSON."""
return json.loads(self.text)
def get_origin(self) -> str:
"""Get the origin (scheme + host) from the response URL."""
parsed = urlparse(self.url)
return f"{parsed.scheme}://{parsed.netloc}"
class BaseExtractor(ABC):
"""Base class for all URL extractors.
@@ -43,74 +75,99 @@ class BaseExtractor(ABC):
backoff_factor: float = 0.5,
raise_on_status: bool = True,
**kwargs,
) -> httpx.Response:
) -> HttpResponse:
"""
Make HTTP request with retry and timeout support.
Make HTTP request with retry and timeout support using aiohttp.
Parameters
----------
url : str
The URL to request.
method : str
HTTP method (GET, POST, etc.). Defaults to GET.
headers : dict | None
Additional headers to merge with base headers.
timeout : float | None
Seconds to wait for the request (applied to httpx.Timeout). Defaults to 15s.
Seconds to wait for the request. Defaults to 15s.
retries : int
Number of attempts for transient errors.
backoff_factor : float
Base for exponential backoff between retries.
raise_on_status : bool
If True, HTTP non-2xx raises DownloadError (preserves status code).
If True, HTTP non-2xx raises DownloadError.
**kwargs
Additional arguments passed to aiohttp request (e.g., data, json).
Returns
-------
HttpResponse
Response object with pre-loaded content.
"""
attempt = 0
last_exc = None
# build request headers merging base and per-request
# Build request headers merging base and per-request
request_headers = self.base_headers.copy()
if headers:
request_headers.update(headers)
timeout_cfg = httpx.Timeout(timeout or 15.0)
timeout_val = timeout or 15.0
while attempt < retries:
try:
async with create_httpx_client(timeout=timeout_cfg) as client:
response = await client.request(
async with create_aiohttp_session(url, timeout=timeout_val) as (session, proxy_url):
async with session.request(
method,
url,
headers=request_headers,
proxy=proxy_url,
**kwargs,
)
) as response:
# Read content while session is still open
content = await response.read()
text = content.decode("utf-8", errors="replace")
final_url = str(response.url)
status = response.status
resp_headers = dict(response.headers)
if raise_on_status:
try:
response.raise_for_status()
except httpx.HTTPStatusError as e:
# Provide a short body preview for debugging
body_preview = ""
try:
body_preview = e.response.text[:500]
except Exception:
body_preview = "<unreadable body>"
if raise_on_status and status >= 400:
body_preview = text[:500]
logger.debug(
"HTTPStatusError for %s (status=%s) -- body preview: %s",
"HTTP error for %s (status=%s) -- body preview: %s",
url,
e.response.status_code,
status,
body_preview,
)
raise DownloadError(e.response.status_code, f"HTTP error {e.response.status_code} while requesting {url}")
return response
raise DownloadError(status, f"HTTP error {status} while requesting {url}")
return HttpResponse(
status=status,
headers=resp_headers,
text=text,
content=content,
url=final_url,
)
except DownloadError:
# Do not retry on explicit HTTP status errors (they are intentional)
raise
except (httpx.ReadTimeout, httpx.ConnectTimeout, httpx.NetworkError, httpx.TransportError) as e:
# Transient network error retry with backoff
except (asyncio.TimeoutError, aiohttp.ClientError) as e:
# Transient network error - retry with backoff
last_exc = e
attempt += 1
sleep_for = backoff_factor * (2 ** (attempt - 1))
logger.warning("Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
attempt, retries, url, e, sleep_for)
logger.warning(
"Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
attempt,
retries,
url,
e,
sleep_for,
)
await asyncio.sleep(sleep_for)
continue
except Exception as e:
# Unexpected exception wrap as ExtractorError to keep interface consistent
# Unexpected exception - wrap as ExtractorError to keep interface consistent
logger.exception("Unhandled exception while requesting %s: %s", url, e)
raise ExtractorError(f"Request failed for URL {url}: {str(e)}")

View File

@@ -1,133 +1,345 @@
import hashlib
import hmac
import re
import base64
import time
import logging
from typing import Any, Dict, Optional, List
from urllib.parse import urlparse, quote_plus, urljoin
from typing import Any, Dict, Optional
from urllib.parse import urlparse
import aiohttp
import httpx
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
from mediaflow_proxy.utils.http_client import create_aiohttp_session
from mediaflow_proxy.configs import settings
logger = logging.getLogger(__name__)
# Silenzia l'errore ConnectionResetError su Windows
logging.getLogger('asyncio').setLevel(logging.CRITICAL)
logging.getLogger("asyncio").setLevel(logging.CRITICAL)
# Default fingerprint parameters
DEFAULT_DLHD_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
DEFAULT_DLHD_SCREEN_RESOLUTION = "1920x1080"
DEFAULT_DLHD_TIMEZONE = "UTC"
DEFAULT_DLHD_LANGUAGE = "en"
def compute_fingerprint(
user_agent: str = DEFAULT_DLHD_USER_AGENT,
screen_resolution: str = DEFAULT_DLHD_SCREEN_RESOLUTION,
timezone: str = DEFAULT_DLHD_TIMEZONE,
language: str = DEFAULT_DLHD_LANGUAGE,
) -> str:
"""
Compute the X-Fingerprint header value.
Algorithm:
fingerprint = SHA256(useragent + screen_resolution + timezone + language).hex()[:16]
Args:
user_agent: The user agent string
screen_resolution: The screen resolution (e.g., "1920x1080")
timezone: The timezone (e.g., "UTC")
language: The language code (e.g., "en")
Returns:
The 16-character fingerprint
"""
combined = f"{user_agent}{screen_resolution}{timezone}{language}"
return hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16]
def compute_key_path(resource: str, number: str, timestamp: int, fingerprint: str, secret_key: str) -> str:
"""
Compute the X-Key-Path header value.
Algorithm:
key_path = HMAC-SHA256("resource|number|timestamp|fingerprint", secret_key).hex()[:16]
Args:
resource: The resource from the key URL
number: The number from the key URL
timestamp: The Unix timestamp
fingerprint: The fingerprint value
secret_key: The HMAC secret key (channel_salt)
Returns:
The 16-character key path
"""
combined = f"{resource}|{number}|{timestamp}|{fingerprint}"
hmac_hash = hmac.new(secret_key.encode("utf-8"), combined.encode("utf-8"), hashlib.sha256).hexdigest()
return hmac_hash[:16]
def compute_key_headers(key_url: str, secret_key: str) -> tuple[int, int, str, str] | None:
"""
Compute X-Key-Timestamp, X-Key-Nonce, X-Key-Path, and X-Fingerprint for a /key/ URL.
Algorithm:
1. Extract resource and number from URL pattern /key/{resource}/{number}
2. ts = Unix timestamp in seconds
3. hmac_hash = HMAC-SHA256(resource, secret_key).hex()
4. nonce = proof-of-work: find i where MD5(hmac+resource+number+ts+i)[:4] < 0x1000
5. fingerprint = compute_fingerprint()
6. key_path = HMAC-SHA256("resource|number|ts|fingerprint", secret_key).hex()[:16]
Args:
key_url: The key URL containing /key/{resource}/{number}
secret_key: The HMAC secret key (channel_salt)
Returns:
Tuple of (timestamp, nonce, key_path, fingerprint) or None if URL doesn't match pattern
"""
# Extract resource and number from URL
pattern = r"/key/([^/]+)/(\d+)"
match = re.search(pattern, key_url)
if not match:
return None
resource = match.group(1)
number = match.group(2)
ts = int(time.time())
# Compute HMAC-SHA256
hmac_hash = hmac.new(secret_key.encode("utf-8"), resource.encode("utf-8"), hashlib.sha256).hexdigest()
# Proof-of-work loop
nonce = 0
for i in range(100000):
combined = f"{hmac_hash}{resource}{number}{ts}{i}"
md5_hash = hashlib.md5(combined.encode("utf-8")).hexdigest()
prefix_value = int(md5_hash[:4], 16)
if prefix_value < 0x1000: # < 4096
nonce = i
break
fingerprint = compute_fingerprint()
key_path = compute_key_path(resource, number, ts, fingerprint, secret_key)
return ts, nonce, key_path, fingerprint
class DLHDExtractor(BaseExtractor):
"""DLHD (DaddyLive) URL extractor for M3U8 streams.
Notes:
- Multi-domain support for daddylive.sx / dlhd.dad
- Robust extraction of auth parameters and server lookup
- Uses retries/timeouts via BaseExtractor where possible
- Multi-iframe fallback for resilience
Supports the new authentication flow with:
- EPlayerAuth extraction (auth_token, channel_key, channel_salt)
- Server lookup for dynamic server selection
- Dynamic key header computation for AES-128 encrypted streams
"""
def __init__(self, request_headers: dict):
super().__init__(request_headers)
self.mediaflow_endpoint = "hls_manifest_proxy"
self.mediaflow_endpoint = "hls_key_proxy"
self._iframe_context: Optional[str] = None
self._flaresolverr_cookies: Optional[str] = None
self._flaresolverr_user_agent: Optional[str] = None
async def _fetch_via_flaresolverr(self, url: str) -> HttpResponse:
"""Fetch a URL using FlareSolverr to bypass Cloudflare protection."""
if not settings.flaresolverr_url:
raise ExtractorError("FlareSolverr URL not configured. Set FLARESOLVERR_URL in environment.")
flaresolverr_endpoint = f"{settings.flaresolverr_url.rstrip('/')}/v1"
payload = {
"cmd": "request.get",
"url": url,
"maxTimeout": settings.flaresolverr_timeout * 1000,
}
async def _make_request(self, url: str, method: str = "GET", headers: Optional[Dict] = None, **kwargs) -> Any:
"""Override to disable SSL verification for this extractor and use fetch_with_retry if available."""
from mediaflow_proxy.utils.http_utils import create_httpx_client, fetch_with_retry
logger.info(f"Using FlareSolverr to fetch: {url}")
async with aiohttp.ClientSession() as session:
async with session.post(
flaresolverr_endpoint,
json=payload,
timeout=aiohttp.ClientTimeout(total=settings.flaresolverr_timeout + 10),
) as response:
if response.status != 200:
raise ExtractorError(f"FlareSolverr returned status {response.status}")
data = await response.json()
if data.get("status") != "ok":
raise ExtractorError(f"FlareSolverr failed: {data.get('message', 'Unknown error')}")
solution = data.get("solution", {})
html_content = solution.get("response", "")
final_url = solution.get("url", url)
status = solution.get("status", 200)
# Store cookies and user-agent for subsequent requests
cookies = solution.get("cookies", [])
if cookies:
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
self._flaresolverr_cookies = cookie_str
logger.info(f"FlareSolverr provided {len(cookies)} cookies")
user_agent = solution.get("userAgent")
if user_agent:
self._flaresolverr_user_agent = user_agent
logger.info(f"FlareSolverr user-agent: {user_agent}")
logger.info(f"FlareSolverr successfully bypassed Cloudflare for: {url}")
return HttpResponse(
status=status,
headers={},
text=html_content,
content=html_content.encode("utf-8", errors="replace"),
url=final_url,
)
async def _make_request(
self, url: str, method: str = "GET", headers: Optional[Dict] = None, use_flaresolverr: bool = False, **kwargs
) -> HttpResponse:
"""Override to disable SSL verification and optionally use FlareSolverr."""
# Use FlareSolverr for Cloudflare-protected pages
if use_flaresolverr and settings.flaresolverr_url:
return await self._fetch_via_flaresolverr(url)
timeout = kwargs.pop("timeout", 15)
retries = kwargs.pop("retries", 3)
backoff_factor = kwargs.pop("backoff_factor", 0.5)
kwargs.pop("retries", 3) # consumed but not used directly
kwargs.pop("backoff_factor", 0.5) # consumed but not used directly
# Merge headers
request_headers = self.base_headers.copy()
if headers:
request_headers.update(headers)
async with create_httpx_client(verify=False, timeout=httpx.Timeout(timeout)) as client:
try:
return await fetch_with_retry(client, method, url, headers or {}, timeout=timeout)
except Exception:
logger.debug("fetch_with_retry failed or unavailable; falling back to direct request for %s", url)
response = await client.request(method, url, headers=headers or {}, timeout=timeout)
response.raise_for_status()
return response
# Add FlareSolverr cookies if available
if self._flaresolverr_cookies:
existing_cookies = request_headers.get("Cookie", "")
if existing_cookies:
request_headers["Cookie"] = f"{existing_cookies}; {self._flaresolverr_cookies}"
else:
request_headers["Cookie"] = self._flaresolverr_cookies
# Use FlareSolverr user-agent if available
if self._flaresolverr_user_agent:
request_headers["User-Agent"] = self._flaresolverr_user_agent
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
# Use create_aiohttp_session with verify=False for SSL bypass
async with create_aiohttp_session(url, timeout=timeout, verify=False) as (session, proxy_url):
async with session.request(method, url, headers=request_headers, proxy=proxy_url, **kwargs) as response:
content = await response.read()
final_url = str(response.url)
status = response.status
resp_headers = dict(response.headers)
if status >= 400:
raise ExtractorError(f"HTTP error {status} while requesting {url}")
return HttpResponse(
status=status,
headers=resp_headers,
text=content.decode("utf-8", errors="replace"),
content=content,
url=final_url,
)
async def _extract_session_data(self, iframe_url: str, main_url: str) -> dict | None:
"""
Estrattore alternativo per iframe lovecdn.ru che usa un formato diverso.
Fetch the iframe URL and extract auth_token, channel_key, and channel_salt.
Args:
iframe_url: The iframe URL to fetch
main_url: The main site domain for Referer header
Returns:
Dict with auth_token, channel_key, channel_salt, or None if not found
"""
headers = {
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
"Referer": f"https://{main_url}/",
}
try:
# Cerca pattern di stream URL diretto
m3u8_patterns = [
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
r'source[:\s]+["\']([^"\']+)["\']',
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
]
stream_url = None
for pattern in m3u8_patterns:
matches = re.findall(pattern, iframe_content)
for match in matches:
if '.m3u8' in match and match.startswith('http'):
stream_url = match
logger.info(f"Found direct m3u8 URL: {stream_url}")
break
if stream_url:
break
# Pattern 2: Cerca costruzione dinamica URL
if not stream_url:
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
if channel_match:
channel_name = channel_match.group(1)
server = server_match.group(1) if server_match else 'newkso.ru'
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
logger.info(f"Constructed stream URL: {stream_url}")
if not stream_url:
# Fallback: cerca qualsiasi URL che sembri uno stream
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
matches = re.findall(url_pattern, iframe_content)
if matches:
stream_url = matches[0]
logger.info(f"Found fallback stream URL: {stream_url}")
if not stream_url:
raise ExtractorError(f"Could not find stream URL in lovecdn.ru iframe")
# Usa iframe URL come referer
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
stream_headers = {
'User-Agent': headers['User-Agent'],
'Referer': iframe_url,
'Origin': iframe_origin
}
# Determina endpoint in base al dominio dello stream
endpoint = "hls_key_proxy"
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
return {
"destination_url": stream_url,
"request_headers": stream_headers,
"mediaflow_endpoint": endpoint,
}
resp = await self._make_request(iframe_url, headers=headers, timeout=12)
html = resp.text
except Exception as e:
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
logger.warning(f"Error fetching iframe URL: {e}")
return None
# Pattern to extract EPlayerAuth.init block with authToken, channelKey, channelSalt
# Matches: EPlayerAuth.init({ authToken: '...', channelKey: '...', ..., channelSalt: '...' });
auth_pattern = r"EPlayerAuth\.init\s*\(\s*\{\s*authToken:\s*'([^']+)'"
channel_key_pattern = r"channelKey:\s*'([^']+)'"
channel_salt_pattern = r"channelSalt:\s*'([^']+)'"
# Pattern to extract server lookup base URL from fetchWithRetry call
lookup_pattern = r"fetchWithRetry\s*\(\s*'([^']+server_lookup\?channel_id=)"
auth_match = re.search(auth_pattern, html)
channel_key_match = re.search(channel_key_pattern, html)
channel_salt_match = re.search(channel_salt_pattern, html)
lookup_match = re.search(lookup_pattern, html)
if auth_match and channel_key_match and channel_salt_match:
result = {
"auth_token": auth_match.group(1),
"channel_key": channel_key_match.group(1),
"channel_salt": channel_salt_match.group(1),
}
if lookup_match:
result["server_lookup_url"] = lookup_match.group(1) + result["channel_key"]
return result
return None
async def _get_server_key(self, server_lookup_url: str, iframe_url: str) -> str | None:
"""
Fetch the server lookup URL and extract the server_key.
Args:
server_lookup_url: The server lookup URL
iframe_url: The iframe URL for extracting the host for headers
Returns:
The server_key or None if not found
"""
parsed = urlparse(iframe_url)
iframe_host = parsed.netloc
headers = {
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
"Referer": f"https://{iframe_host}/",
"Origin": f"https://{iframe_host}",
}
try:
resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
data = resp.json()
return data.get("server_key")
except Exception as e:
logger.warning(f"Error fetching server lookup: {e}")
return None
def _build_m3u8_url(self, server_key: str, channel_key: str) -> str:
"""
Build the m3u8 URL based on the server_key.
Args:
server_key: The server key from server lookup
channel_key: The channel key
Returns:
The m3u8 URL (with .css extension as per the original implementation)
"""
if server_key == "top1/cdn":
return f"https://top1.dvalna.ru/top1/cdn/{channel_key}/mono.css"
else:
return f"https://{server_key}new.dvalna.ru/{server_key}/{channel_key}/mono.css"
async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
"""Handles the new authentication flow found in recent updates."""
def _extract_params(js: str) -> Dict[str, Optional[str]]:
params = {}
patterns = {
@@ -143,82 +355,93 @@ class DLHDExtractor(BaseExtractor):
return params
params = _extract_params(iframe_content)
missing_params = [k for k, v in params.items() if not v]
if missing_params:
# This is not an error, just means it's not the new flow
raise ExtractorError(f"Not the new auth flow: missing params {missing_params}")
logger.info("New auth flow detected. Proceeding with POST auth.")
# 1. Initial Auth POST
auth_url = 'https://security.newkso.ru/auth2.php'
# Use files parameter to force multipart/form-data which is required by the server
# (None, value) tells httpx to send it as a form field, not a file upload
multipart_data = {
'channelKey': (None, params["channel_key"]),
'country': (None, params["auth_country"]),
'timestamp': (None, params["auth_ts"]),
'expiry': (None, params["auth_expiry"]),
'token': (None, params["auth_token"]),
}
auth_url = "https://security.newkso.ru/auth2.php"
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
auth_headers = headers.copy()
auth_headers.update({
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.9',
'Origin': iframe_origin,
'Referer': iframe_url,
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'cross-site',
'Priority': 'u=1, i',
})
from mediaflow_proxy.utils.http_utils import create_httpx_client
auth_headers.update(
{
"Accept": "*/*",
"Accept-Language": "en-US,en;q=0.9",
"Origin": iframe_origin,
"Referer": iframe_url,
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "cross-site",
"Priority": "u=1, i",
}
)
# Build form data for multipart/form-data
form_data = aiohttp.FormData()
form_data.add_field("channelKey", params["channel_key"])
form_data.add_field("country", params["auth_country"])
form_data.add_field("timestamp", params["auth_ts"])
form_data.add_field("expiry", params["auth_expiry"])
form_data.add_field("token", params["auth_token"])
try:
async with create_httpx_client(verify=False) as client:
# Note: using 'files' instead of 'data' to ensure multipart/form-data Content-Type
auth_resp = await client.post(auth_url, files=multipart_data, headers=auth_headers, timeout=12)
auth_resp.raise_for_status()
auth_data = auth_resp.json()
if not (auth_data.get("valid") or auth_data.get("success")):
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
async with create_aiohttp_session(auth_url, timeout=12, verify=False) as (session, proxy_url):
async with session.post(
auth_url,
headers=auth_headers,
data=form_data,
proxy=proxy_url,
) as response:
content = await response.read()
response.raise_for_status()
import json
auth_data = json.loads(content.decode("utf-8"))
if not (auth_data.get("valid") or auth_data.get("success")):
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
logger.info("New auth flow: Initial auth successful.")
except ExtractorError:
raise
except Exception as e:
raise ExtractorError(f"New auth flow failed during initial auth POST: {e}")
# 2. Server Lookup
server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}"
try:
# Use _make_request as it handles retries and expects JSON
# Use _make_request as it handles retries
lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
server_data = lookup_resp.json()
server_key = server_data.get('server_key')
server_key = server_data.get("server_key")
if not server_key:
raise ExtractorError(f"No server_key in lookup response: {server_data}")
logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}")
except ExtractorError:
raise
except Exception as e:
raise ExtractorError(f"New auth flow failed during server lookup: {e}")
# 3. Build final stream URL
channel_key = params['channel_key']
auth_token = params['auth_token']
channel_key = params["channel_key"]
auth_token = params["auth_token"]
# The JS logic uses .css, not .m3u8
if server_key == 'top1/cdn':
stream_url = f'https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css'
if server_key == "top1/cdn":
stream_url = f"https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css"
else:
stream_url = f'https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css'
logger.info(f'New auth flow: Constructed stream URL: {stream_url}')
stream_url = f"https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css"
logger.info(f"New auth flow: Constructed stream URL: {stream_url}")
stream_headers = {
'User-Agent': headers['User-Agent'],
'Referer': iframe_url,
'Origin': iframe_origin,
'Authorization': f'Bearer {auth_token}',
'X-Channel-Key': channel_key
"User-Agent": headers["User-Agent"],
"Referer": iframe_url,
"Origin": iframe_origin,
"Authorization": f"Bearer {auth_token}",
"X-Channel-Key": channel_key,
}
return {
@@ -227,106 +450,255 @@ class DLHDExtractor(BaseExtractor):
"mediaflow_endpoint": "hls_manifest_proxy",
}
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Main extraction flow: resolve base, fetch players, extract iframe, auth and final m3u8."""
baseurl = "https://dlhd.dad/"
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
"""
Alternative extractor for lovecdn.ru iframe that uses a different format.
"""
try:
# Look for direct stream URL patterns
m3u8_patterns = [
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
r'source[:\s]+["\']([^"\']+)["\']',
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
]
def extract_channel_id(u: str) -> Optional[str]:
match_watch_id = re.search(r'watch\.php\?id=(\d+)', u)
if match_watch_id:
return match_watch_id.group(1)
return None
stream_url = None
for pattern in m3u8_patterns:
matches = re.findall(pattern, iframe_content)
for match in matches:
if ".m3u8" in match and match.startswith("http"):
stream_url = match
logger.info(f"Found direct m3u8 URL: {stream_url}")
break
if stream_url:
break
# Pattern 2: Look for dynamic URL construction
if not stream_url:
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
async def get_stream_data(initial_url: str):
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
daddylive_headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
'Referer': baseurl,
'Origin': daddy_origin
if channel_match:
channel_name = channel_match.group(1)
server = server_match.group(1) if server_match else "newkso.ru"
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
logger.info(f"Constructed stream URL: {stream_url}")
if not stream_url:
# Fallback: look for any URL that looks like a stream
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
matches = re.findall(url_pattern, iframe_content)
if matches:
stream_url = matches[0]
logger.info(f"Found fallback stream URL: {stream_url}")
if not stream_url:
raise ExtractorError("Could not find stream URL in lovecdn.ru iframe")
# Use iframe URL as referer
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
stream_headers = {"User-Agent": headers["User-Agent"], "Referer": iframe_url, "Origin": iframe_origin}
# Determine endpoint based on the stream domain
endpoint = "hls_key_proxy"
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
return {
"destination_url": stream_url,
"request_headers": stream_headers,
"mediaflow_endpoint": endpoint,
}
except Exception as e:
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
# 1. Request initial page
resp1 = await self._make_request(initial_url, headers=daddylive_headers, timeout=15)
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1.text)
if not player_links:
raise ExtractorError("No player links found on the page.")
async def _extract_direct_stream(self, channel_id: str) -> Dict[str, Any]:
"""
Direct stream extraction using server lookup API with the new auth flow.
This extracts auth_token, channel_key, channel_salt and computes key headers.
"""
# Common iframe domains for DLHD
iframe_domains = ["lefttoplay.xyz"]
for iframe_domain in iframe_domains:
try:
iframe_url = f"https://{iframe_domain}/premiumtv/daddyhd.php?id={channel_id}"
logger.info(f"Attempting extraction via {iframe_domain}")
# Prova tutti i player e raccogli tutti gli iframe validi
last_player_error = None
iframe_candidates = []
session_data = await self._extract_session_data(iframe_url, "dlhd.link")
for player_url in player_links:
try:
if not player_url.startswith('http'):
player_url = baseurl + player_url.lstrip('/')
daddylive_headers['Referer'] = player_url
daddylive_headers['Origin'] = player_url
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2.text)
# Raccogli tutti gli iframe trovati
for iframe in iframes2:
if iframe not in iframe_candidates:
iframe_candidates.append(iframe)
logger.info(f"Found iframe candidate: {iframe}")
except Exception as e:
last_player_error = e
logger.warning(f"Failed to process player link {player_url}: {e}")
if not session_data:
logger.debug(f"No session data from {iframe_domain}")
continue
logger.info(f"Got session data from {iframe_domain}: channel_key={session_data['channel_key']}")
if not iframe_candidates:
if last_player_error:
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
raise ExtractorError("No valid iframe found in any player page")
# Prova ogni iframe finché uno non funziona
last_iframe_error = None
for iframe_candidate in iframe_candidates:
try:
logger.info(f"Trying iframe: {iframe_candidate}")
iframe_domain = urlparse(iframe_candidate).netloc
if not iframe_domain:
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
continue
self._iframe_context = iframe_candidate
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
iframe_content = resp3.text
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
if 'lovecdn.ru' in iframe_domain:
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
else:
logger.info("Attempting new auth flow extraction.")
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
except Exception as e:
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
last_iframe_error = e
# Get server key
if "server_lookup_url" not in session_data:
logger.debug(f"No server lookup URL from {iframe_domain}")
continue
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
server_key = await self._get_server_key(session_data["server_lookup_url"], iframe_url)
if not server_key:
logger.debug(f"No server key from {iframe_domain}")
continue
logger.info(f"Got server key: {server_key}")
# Build m3u8 URL
m3u8_url = self._build_m3u8_url(server_key, session_data["channel_key"])
logger.info(f"M3U8 URL: {m3u8_url}")
# Build stream headers with auth
iframe_origin = f"https://{iframe_domain}"
stream_headers = {
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
"Referer": iframe_url,
"Origin": iframe_origin,
"Authorization": f"Bearer {session_data['auth_token']}",
}
# Return the result with key header parameters
# These will be used to compute headers when fetching keys
return {
"destination_url": m3u8_url,
"request_headers": stream_headers,
"mediaflow_endpoint": "hls_key_proxy",
# Force playlist processing since DLHD uses .css extension for m3u8
"force_playlist_proxy": True,
# Key header computation parameters
"dlhd_key_params": {
"channel_salt": session_data["channel_salt"],
"auth_token": session_data["auth_token"],
"iframe_url": iframe_url,
},
}
except Exception as e:
logger.warning(f"Failed extraction via {iframe_domain}: {e}")
continue
raise ExtractorError(f"Failed to extract stream from all iframe domains for channel {channel_id}")
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Main extraction flow - uses direct server lookup with new auth flow."""
def extract_channel_id(u: str) -> Optional[str]:
match_watch_id = re.search(r"watch\.php\?id=(\d+)", u)
if match_watch_id:
return match_watch_id.group(1)
# Also try stream-XXX pattern
match_stream = re.search(r"stream-(\d+)", u)
if match_stream:
return match_stream.group(1)
return None
try:
channel_id = extract_channel_id(url)
if not channel_id:
raise ExtractorError(f"Unable to extract channel ID from {url}")
logger.info(f"Using base domain: {baseurl}")
return await get_stream_data(url)
logger.info(f"Extracting DLHD stream for channel ID: {channel_id}")
# Try direct stream extraction with new auth flow
try:
return await self._extract_direct_stream(channel_id)
except ExtractorError as e:
logger.warning(f"Direct stream extraction failed: {e}")
# Fallback to legacy iframe-based extraction if direct fails
logger.info("Falling back to iframe-based extraction...")
return await self._extract_via_iframe(url, channel_id)
except Exception as e:
raise ExtractorError(f"Extraction failed: {str(e)}")
async def _extract_via_iframe(self, url: str, channel_id: str) -> Dict[str, Any]:
"""Legacy iframe-based extraction flow - used as fallback."""
baseurl = "https://dlhd.dad/"
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
daddylive_headers = {
"User-Agent": self._flaresolverr_user_agent
or "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
"Referer": baseurl,
"Origin": daddy_origin,
}
# 1. Request initial page - use FlareSolverr if available to bypass Cloudflare
use_flaresolverr = settings.flaresolverr_url is not None
resp1 = await self._make_request(url, headers=daddylive_headers, timeout=15, use_flaresolverr=use_flaresolverr)
resp1_text = resp1.text
# Update headers with FlareSolverr user-agent after initial request
if self._flaresolverr_user_agent:
daddylive_headers["User-Agent"] = self._flaresolverr_user_agent
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1_text)
if not player_links:
raise ExtractorError("No player links found on the page.")
# Try all players and collect all valid iframes
last_player_error = None
iframe_candidates = []
for player_url in player_links:
try:
if not player_url.startswith("http"):
player_url = baseurl + player_url.lstrip("/")
daddylive_headers["Referer"] = player_url
daddylive_headers["Origin"] = player_url
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
resp2_text = resp2.text
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2_text)
# Collect all found iframes
for iframe in iframes2:
if iframe not in iframe_candidates:
iframe_candidates.append(iframe)
logger.info(f"Found iframe candidate: {iframe}")
except Exception as e:
last_player_error = e
logger.warning(f"Failed to process player link {player_url}: {e}")
continue
if not iframe_candidates:
if last_player_error:
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
raise ExtractorError("No valid iframe found in any player page")
# Try each iframe until one works
last_iframe_error = None
for iframe_candidate in iframe_candidates:
try:
logger.info(f"Trying iframe: {iframe_candidate}")
iframe_domain = urlparse(iframe_candidate).netloc
if not iframe_domain:
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
continue
self._iframe_context = iframe_candidate
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
iframe_content = resp3.text
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
if "lovecdn.ru" in iframe_domain:
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
else:
logger.info("Attempting new auth flow extraction.")
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
except Exception as e:
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
last_iframe_error = e
continue
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")

View File

@@ -1,39 +1,52 @@
import re
import time
from typing import Dict
from urllib.parse import urlparse, urljoin
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class DoodStreamExtractor(BaseExtractor):
"""DoodStream URL extractor."""
"""
Dood / MyVidPlay extractor
Resolves to direct CDN MP4
"""
def __init__(self, request_headers: dict):
super().__init__(request_headers)
self.base_url = "https://d000d.com"
self.base_url = "https://myvidplay.com"
async def extract(self, url: str, **kwargs) -> Dict[str, str]:
"""Extract DoodStream URL."""
response = await self._make_request(url)
async def extract(self, url: str, **kwargs):
parsed = urlparse(url)
video_id = parsed.path.rstrip("/").split("/")[-1]
if not video_id:
raise ExtractorError("Invalid Dood URL")
# Extract URL pattern
pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
match = re.search(pattern, response.text, re.DOTALL)
headers = {
"User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0",
"Referer": f"{self.base_url}/",
}
embed_url = f"{self.base_url}/e/{video_id}"
html = (await self._make_request(embed_url, headers=headers)).text
match = re.search(r"(\/pass_md5\/[^']+)", html)
if not match:
raise ExtractorError("Failed to extract URL pattern")
raise ExtractorError("Dood: pass_md5 not found")
# Build final URL
pass_url = f"{self.base_url}{match[1]}"
referer = f"{self.base_url}/"
headers = {"range": "bytes=0-", "referer": referer}
pass_url = urljoin(self.base_url, match.group(1))
response = await self._make_request(pass_url, headers=headers)
timestamp = str(int(time.time()))
final_url = f"{response.text}123456789{match[2]}{timestamp}"
base_stream = (await self._make_request(pass_url, headers=headers)).text.strip()
token_match = re.search(r"token=([^&]+)", html)
if not token_match:
raise ExtractorError("Dood: token missing")
token = token_match.group(1)
final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}"
self.base_headers["referer"] = referer
return {
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
"request_headers": headers,
"mediaflow_endpoint": "proxy_stream_endpoint",
}

View File

@@ -7,6 +7,7 @@ from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor
from mediaflow_proxy.extractors.filelions import FileLionsExtractor
from mediaflow_proxy.extractors.filemoon import FileMoonExtractor
from mediaflow_proxy.extractors.F16Px import F16PxExtractor
from mediaflow_proxy.extractors.gupload import GuploadExtractor
from mediaflow_proxy.extractors.livetv import LiveTVExtractor
from mediaflow_proxy.extractors.lulustream import LuluStreamExtractor
from mediaflow_proxy.extractors.maxstream import MaxstreamExtractor
@@ -33,6 +34,7 @@ class ExtractorFactory:
"FileLions": FileLionsExtractor,
"FileMoon": FileMoonExtractor,
"F16Px": F16PxExtractor,
"Gupload": GuploadExtractor,
"Uqload": UqloadExtractor,
"Mixdrop": MixdropExtractor,
"Streamtape": StreamtapeExtractor,

View File

@@ -4,25 +4,29 @@ from mediaflow_proxy.extractors.base import BaseExtractor
from mediaflow_proxy.utils.packed import eval_solver
class FastreamExtractor(BaseExtractor):
"""Fastream URL extractor."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
headers = {'Accept': '*/*', 'Connection': 'keep-alive','Accept-Language': 'en-US,en;q=0.5','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'}
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Connection": "keep-alive",
"Accept-Language": "en-US,en;q=0.5",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0",
}
patterns = [r'file:"(.*?)"']
final_url = await eval_solver(self, url, headers, patterns)
self.base_headers["referer"] = f'https://{url.replace("https://","").split("/")[0]}/'
self.base_headers["origin"] = f'https://{url.replace("https://","").split("/")[0]}'
self.base_headers['Accept-Language'] = 'en-US,en;q=0.5'
self.base_headers['Accept'] = '*/*'
self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'
self.base_headers["referer"] = f"https://{url.replace('https://', '').split('/')[0]}/"
self.base_headers["origin"] = f"https://{url.replace('https://', '').split('/')[0]}"
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
self.base_headers["Accept"] = "*/*"
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
return {
"destination_url": final_url,

View File

@@ -3,17 +3,18 @@ from typing import Dict, Any
from mediaflow_proxy.extractors.base import BaseExtractor
from mediaflow_proxy.utils.packed import eval_solver
class FileLionsExtractor(BaseExtractor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
headers = {}
patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)''',
r'''["']hls4["']:\s*["'](?P<url>[^"']+)''',
r'''["']hls2["']:\s*["'](?P<url>[^"']+)'''
headers = {}
patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)""",
r"""["']hls4["']:\s*["'](?P<url>[^"']+)""",
r"""["']hls2["']:\s*["'](?P<url>[^"']+)""",
]
final_url = await eval_solver(self, url, headers, patterns)
@@ -23,4 +24,5 @@ class FileLionsExtractor(BaseExtractor):
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
"stream_transformer": "ts_stream",
}

View File

@@ -40,7 +40,7 @@ class FileMoonExtractor(BaseExtractor):
)
test_resp = await self._make_request(final_url, headers=headers)
if test_resp.status_code == 404:
if test_resp.status == 404:
raise ExtractorError("Stream not found (404)")
self.base_headers["referer"] = url

View File

@@ -0,0 +1,65 @@
import re
import base64
import json
from typing import Dict, Any
from urllib.parse import urlparse
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class GuploadExtractor(BaseExtractor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"
async def extract(self, url: str) -> Dict[str, Any]:
parsed = urlparse(url)
if not parsed.hostname or "gupload.xyz" not in parsed.hostname:
raise ExtractorError("GUPLOAD: Invalid domain")
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/144 Safari/537.36"
),
"Referer": "https://gupload.xyz/",
"Origin": "https://gupload.xyz",
}
# --- Fetch embed page ---
response = await self._make_request(url, headers=headers)
html = response.text
# --- Extract base64 payload ---
match = re.search(r"decodePayload\('([^']+)'\)", html)
if not match:
raise ExtractorError("GUPLOAD: Payload not found")
encoded = match.group(1).strip()
# --- Decode payload ---
try:
decoded = base64.b64decode(encoded).decode("utf-8", "ignore")
# payload format: <junk>|{json}
json_part = decoded.split("|", 1)[1]
payload = json.loads(json_part)
except Exception:
raise ExtractorError("GUPLOAD: Payload decode failed")
# --- Extract HLS URL ---
hls_url = payload.get("videoUrl")
if not hls_url:
raise ExtractorError("GUPLOAD: videoUrl missing")
# --- Validate stream (prevents client timeout) ---
test = await self._make_request(hls_url, headers=headers, raise_on_status=False)
if test.status >= 400:
raise ExtractorError(f"GUPLOAD: Stream unavailable ({test.status})")
# Return MASTER playlist
return {
"destination_url": hls_url,
"request_headers": headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}

View File

@@ -2,9 +2,9 @@ import re
from typing import Dict, Tuple, Optional
from urllib.parse import urljoin, urlparse, unquote
from httpx import Response
import aiohttp
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
class LiveTVExtractor(BaseExtractor):
@@ -33,20 +33,21 @@ class LiveTVExtractor(BaseExtractor):
stream_title: Optional stream title to filter specific stream
Returns:
Tuple[str, Dict[str, str]]: Stream URL and required headers
Dict containing destination_url, request_headers, and mediaflow_endpoint
"""
try:
# Get the channel page
response = await self._make_request(url)
response_text = response.text
self.base_headers["referer"] = urljoin(url, "/")
# Extract player API details
player_api_base, method = await self._extract_player_api_base(response.text)
player_api_base, method = await self._extract_player_api_base(response_text)
if not player_api_base:
raise ExtractorError("Failed to extract player API URL")
# Get player options
options_data = await self._get_player_options(response.text)
options_data = await self._get_player_options(response_text)
if not options_data:
raise ExtractorError("No player options found")
@@ -66,7 +67,7 @@ class LiveTVExtractor(BaseExtractor):
if not stream_url:
continue
response = {
result = {
"destination_url": stream_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
@@ -75,7 +76,7 @@ class LiveTVExtractor(BaseExtractor):
# Set endpoint based on stream type
if stream_data.get("type") == "mpd":
if stream_data.get("drm_key_id") and stream_data.get("drm_key"):
response.update(
result.update(
{
"query_params": {
"key_id": stream_data["drm_key_id"],
@@ -85,7 +86,7 @@ class LiveTVExtractor(BaseExtractor):
}
)
return response
return result
raise ExtractorError("No valid stream found")
@@ -120,7 +121,12 @@ class LiveTVExtractor(BaseExtractor):
api_url = f"{api_base}{post}/{type_}/{nume}"
response = await self._make_request(api_url)
else:
form_data = {"action": "doo_player_ajax", "post": post, "nume": nume, "type": type_}
# Use aiohttp FormData for POST requests
form_data = aiohttp.FormData()
form_data.add_field("action", "doo_player_ajax")
form_data.add_field("post", post)
form_data.add_field("nume", nume)
form_data.add_field("type", type_)
response = await self._make_request(api_base, method="POST", data=form_data)
# Get iframe URL from API response
@@ -136,7 +142,7 @@ class LiveTVExtractor(BaseExtractor):
except Exception as e:
raise ExtractorError(f"Failed to process player option: {str(e)}")
async def _extract_stream_url(self, iframe_response: Response, iframe_url: str) -> Dict:
async def _extract_stream_url(self, iframe_response: HttpResponse, iframe_url: str) -> Dict:
"""
Extract final stream URL from iframe content.
"""
@@ -147,8 +153,9 @@ class LiveTVExtractor(BaseExtractor):
# Check if content is already a direct M3U8 stream
content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"]
content_type = iframe_response.headers.get("content-type", "")
if any(ext in iframe_response.headers["content-type"] for ext in content_types):
if any(ext in content_type for ext in content_types):
return {"url": iframe_url, "type": "m3u8"}
stream_data = {}

View File

@@ -13,7 +13,7 @@ class LuluStreamExtractor(BaseExtractor):
response = await self._make_request(url)
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py
pattern = r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)'''
pattern = r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)"""
match = re.search(pattern, response.text, re.DOTALL)
if not match:
raise ExtractorError("Failed to extract source URL")

View File

@@ -1,6 +1,6 @@
from typing import Dict, Any
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.extractors.base import BaseExtractor
from mediaflow_proxy.utils.packed import eval_solver

View File

@@ -22,7 +22,9 @@ class OkruExtractor(BaseExtractor):
data_options = div.get("data-options")
data = json.loads(data_options)
metadata = json.loads(data["flashvars"]["metadata"])
final_url = metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl")
final_url = (
metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl") or metadata.get("ondemandHls")
)
self.base_headers["referer"] = url
return {
"destination_url": final_url,

View File

@@ -1,10 +1,10 @@
import re
import logging
from typing import Any, Dict, Optional
from typing import Any, Dict
from urllib.parse import urlparse
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.utils.packed import detect, unpack
from mediaflow_proxy.utils.packed import unpack
logger = logging.getLogger(__name__)
@@ -32,18 +32,17 @@ class SportsonlineExtractor(BaseExtractor):
def _detect_packed_blocks(self, html: str) -> list[str]:
"""
Detect and extract packed eval blocks from HTML.
Replicates the TypeScript logic: /eval\(function(.+?.+)/g
"""
# Find all eval(function...) blocks - more greedy to capture full packed code
pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL)
raw_matches = pattern.findall(html)
# If no matches with the strict pattern, try a more relaxed one
if not raw_matches:
# Try to find eval(function and capture until we find the closing ))
pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
raw_matches = pattern.findall(html)
return raw_matches
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
@@ -60,25 +59,25 @@ class SportsonlineExtractor(BaseExtractor):
raise ExtractorError("No iframe found on the page")
iframe_url = iframe_match.group(1)
# Normalize iframe URL
if iframe_url.startswith('//'):
iframe_url = 'https:' + iframe_url
elif iframe_url.startswith('/'):
if iframe_url.startswith("//"):
iframe_url = "https:" + iframe_url
elif iframe_url.startswith("/"):
parsed_main = urlparse(url)
iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}"
logger.info(f"Found iframe URL: {iframe_url}")
# Step 2: Fetch iframe with Referer
iframe_headers = {
'Referer': 'https://sportzonline.st/',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9,it;q=0.8',
'Cache-Control': 'no-cache'
"Referer": "https://sportzonline.st/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
"Cache-Control": "no-cache",
}
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
iframe_html = iframe_response.text
@@ -86,9 +85,9 @@ class SportsonlineExtractor(BaseExtractor):
# Step 3: Detect packed blocks
packed_blocks = self._detect_packed_blocks(iframe_html)
logger.info(f"Found {len(packed_blocks)} packed blocks")
if not packed_blocks:
logger.warning("No packed blocks found, trying direct m3u8 search")
# Fallback: try direct m3u8 search
@@ -96,13 +95,10 @@ class SportsonlineExtractor(BaseExtractor):
if direct_match:
m3u8_url = direct_match.group(1)
logger.info(f"Found direct m3u8 URL: {m3u8_url}")
return {
"destination_url": m3u8_url,
"request_headers": {
'Referer': iframe_url,
'User-Agent': iframe_headers['User-Agent']
},
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
"mediaflow_endpoint": self.mediaflow_endpoint,
}
else:
@@ -134,13 +130,13 @@ class SportsonlineExtractor(BaseExtractor):
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # file: "...m3u8"
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']', # any m3u8 URL
]
for pattern in patterns:
src_match = re.search(pattern, unpacked_code)
if src_match:
m3u8_url = src_match.group(1)
# Verify it looks like a valid m3u8 URL
if '.m3u8' in m3u8_url or 'http' in m3u8_url:
if ".m3u8" in m3u8_url or "http" in m3u8_url:
break
m3u8_url = None
@@ -162,11 +158,11 @@ class SportsonlineExtractor(BaseExtractor):
src_match = re.search(pattern, unpacked_code)
if src_match:
test_url = src_match.group(1)
if '.m3u8' in test_url or 'http' in test_url:
if ".m3u8" in test_url or "http" in test_url:
m3u8_url = test_url
logger.info(f"Found m3u8 in block {i}")
break
if m3u8_url:
break
except Exception as e:
@@ -181,10 +177,7 @@ class SportsonlineExtractor(BaseExtractor):
# Return stream configuration
return {
"destination_url": m3u8_url,
"request_headers": {
'Referer': iframe_url,
'User-Agent': iframe_headers['User-Agent']
},
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
"mediaflow_endpoint": self.mediaflow_endpoint,
}

View File

@@ -15,8 +15,8 @@ class StreamtapeExtractor(BaseExtractor):
if not matches:
raise ExtractorError("Failed to extract URL components")
i = 0
for i in range(len(matches)):
if matches[i-1] == matches[i] and "ip=" in matches[i]:
for i in range(len(matches)):
if matches[i - 1] == matches[i] and "ip=" in matches[i]:
final_url = f"https://streamtape.com/get_video?{matches[i]}"
self.base_headers["referer"] = url

View File

@@ -19,18 +19,11 @@ class StreamWishExtractor(BaseExtractor):
headers = {"Referer": referer}
response = await self._make_request(url, headers=headers)
iframe_match = re.search(
r'<iframe[^>]+src=["\']([^"\']+)["\']',
response.text,
re.DOTALL
)
iframe_match = re.search(r'<iframe[^>]+src=["\']([^"\']+)["\']', response.text, re.DOTALL)
iframe_url = urljoin(url, iframe_match.group(1)) if iframe_match else url
iframe_response = await self._make_request(
iframe_url,
headers=headers
)
iframe_response = await self._make_request(iframe_url, headers=headers)
html = iframe_response.text
final_url = self._extract_m3u8(html)
@@ -58,15 +51,18 @@ class StreamWishExtractor(BaseExtractor):
final_url = urljoin(iframe_url, final_url)
origin = f"{urlparse(referer).scheme}://{urlparse(referer).netloc}"
self.base_headers.update({
"Referer": referer,
"Origin": origin,
})
self.base_headers.update(
{
"Referer": referer,
"Origin": origin,
}
)
return {
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
"stream_transformer": "ts_stream",
}
@staticmethod
@@ -74,8 +70,5 @@ class StreamWishExtractor(BaseExtractor):
"""
Extract first absolute m3u8 URL from text
"""
match = re.search(
r'https?://[^"\']+\.m3u8[^"\']*',
text
)
match = re.search(r'https?://[^"\']+\.m3u8[^"\']*', text)
return match.group(0) if match else None

View File

@@ -1,27 +1,64 @@
import re
from typing import Dict, Any
from urllib.parse import urljoin, urlparse
from mediaflow_proxy.extractors.base import BaseExtractor
from mediaflow_proxy.utils.packed import eval_solver
from bs4 import BeautifulSoup, SoupStrainer
from curl_cffi.requests import AsyncSession
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.utils.packed import unpack, detect, UnpackingError
class SupervideoExtractor(BaseExtractor):
"""Supervideo URL extractor."""
"""Supervideo URL extractor.
Uses curl_cffi to bypass Cloudflare protection.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
headers = {'Accept': '*/*', 'Connection': 'keep-alive', 'User-Agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36', 'user-agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36'}
"""Extract video URL from Supervideo.
Uses curl_cffi with Chrome impersonation to bypass Cloudflare.
"""
patterns = [r'file:"(.*?)"']
final_url = await eval_solver(self, url, headers, patterns)
try:
async with AsyncSession() as session:
response = await session.get(url, impersonate="chrome")
self.base_headers["referer"] = url
return {
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
if response.status_code != 200:
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("script"))
script_all = soup.find_all("script")
for script in script_all:
if script.text and detect(script.text):
unpacked_code = unpack(script.text)
for pattern in patterns:
match = re.search(pattern, unpacked_code)
if match:
extracted_url = match.group(1)
if not urlparse(extracted_url).scheme:
extracted_url = urljoin(url, extracted_url)
self.base_headers["referer"] = url
return {
"destination_url": extracted_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
raise ExtractorError("No packed JS found or no file URL pattern matched")
except UnpackingError as e:
raise ExtractorError(f"Failed to unpack Supervideo JS: {e}")
except Exception as e:
if isinstance(e, ExtractorError):
raise
raise ExtractorError(f"Supervideo extraction failed: {e}")

View File

@@ -1,5 +1,4 @@
import re
from typing import Dict, Any
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
@@ -36,7 +35,7 @@ class TurboVidPlayExtractor(BaseExtractor):
if media_url.startswith("//"):
media_url = "https:" + media_url
elif media_url.startswith("/"):
media_url = response.url.origin + media_url
media_url = response.get_origin() + media_url
#
# 3. Fetch the intermediate playlist
@@ -53,16 +52,11 @@ class TurboVidPlayExtractor(BaseExtractor):
real_m3u8 = m2.group(0)
#
# 5. Final headers
#
self.base_headers["referer"] = url
#
# 6. Always return master proxy (your MediaFlow only supports this)
#
return {
"destination_url": real_m3u8,
"request_headers": self.base_headers,
"request_headers": {"origin": response.get_origin()},
"propagate_response_headers": {"content-type": "video/mp2t"},
"remove_response_headers": ["content-length", "content-range"],
"mediaflow_endpoint": "hls_manifest_proxy",
"stream_transformer": "ts_stream", # Use TS transformer for PNG/padding stripping
}

View File

@@ -1,5 +1,6 @@
import logging
from typing import Any, Dict, Optional
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
logger = logging.getLogger(__name__)
@@ -8,6 +9,11 @@ logger = logging.getLogger(__name__)
class VavooExtractor(BaseExtractor):
"""Vavoo URL extractor for resolving vavoo.to links.
Supports two URL formats:
1. Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
These redirect (302) to external video hosts (Doodstream, etc.)
2. Legacy mediahubmx format (currently broken on Vavoo's end)
Features:
- Uses BaseExtractor's retry/timeouts
- Improved headers to mimic Android okhttp client
@@ -18,6 +24,40 @@ class VavooExtractor(BaseExtractor):
super().__init__(request_headers)
self.mediaflow_endpoint = "proxy_stream_endpoint"
async def _resolve_web_vod_link(self, url: str) -> str:
"""Resolve a web-vod API link by getting the redirect Location header."""
import aiohttp
try:
# Use aiohttp directly with allow_redirects=False to get the Location header
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(
url,
headers={"Accept": "application/json"},
allow_redirects=False,
) as resp:
# Check for redirect
if resp.status in (301, 302, 303, 307, 308):
location = resp.headers.get("Location") or resp.headers.get("location")
if location:
logger.info(f"Vavoo web-vod redirected to: {location}")
return location
# If we got a 200, the response might contain the URL
if resp.status == 200:
text = await resp.text()
if text and text.startswith("http"):
logger.info(f"Vavoo web-vod resolved to: {text.strip()}")
return text.strip()
raise ExtractorError(f"Vavoo web-vod API returned unexpected status {resp.status}")
except ExtractorError:
raise
except Exception as e:
raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}")
async def get_auth_signature(self) -> Optional[str]:
"""Get authentication signature for Vavoo API (async)."""
headers = {
@@ -27,10 +67,11 @@ class VavooExtractor(BaseExtractor):
"accept-encoding": "gzip",
}
import time
current_time = int(time.time() * 1000)
data = {
"token": "tosFwQCJMS8qrW_AjLoHPQ41646J5dRNha6ZWHnijoYQQQoADQoXYSo7ki7O5-CsgN4CH0uRk6EEoJ0728ar9scCRQW3ZkbfrPfeCXW2VgopSW2FWDqPOoVYIuVPAOnXCZ5g",
"token": "",
"reason": "app-blur",
"locale": "de",
"theme": "dark",
@@ -40,21 +81,11 @@ class VavooExtractor(BaseExtractor):
"brand": "google",
"model": "Pixel",
"name": "sdk_gphone64_arm64",
"uniqueId": "d10e5d99ab665233"
},
"os": {
"name": "android",
"version": "13"
},
"app": {
"platform": "android",
"version": "3.1.21"
},
"version": {
"package": "tv.vavoo.app",
"binary": "3.1.21",
"js": "3.1.21"
"uniqueId": "d10e5d99ab665233",
},
"os": {"name": "android", "version": "13"},
"app": {"platform": "android", "version": "3.1.21"},
"version": {"package": "tv.vavoo.app", "binary": "3.1.21", "js": "3.1.21"},
},
"appFocusTime": 0,
"playerActive": False,
@@ -75,11 +106,9 @@ class VavooExtractor(BaseExtractor):
"ssVersion": 1,
"enabled": True,
"autoServer": True,
"id": "de-fra"
"id": "de-fra",
},
"iap": {
"supported": False
}
"iap": {"supported": False},
}
try:
@@ -94,7 +123,7 @@ class VavooExtractor(BaseExtractor):
try:
result = resp.json()
except Exception:
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status_code)
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status)
return None
addon_sig = result.get("addonSig") if isinstance(result, dict) else None
@@ -109,10 +138,48 @@ class VavooExtractor(BaseExtractor):
return None
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract Vavoo stream URL (async)."""
"""Extract Vavoo stream URL (async).
Supports:
- Direct play URLs: https://vavoo.to/play/{id}/index.m3u8 (Live TV)
- Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
- Legacy mediahubmx links (may not work due to Vavoo API changes)
"""
if "vavoo.to" not in url:
raise ExtractorError("Not a valid Vavoo URL")
# Check if this is a direct play URL (Live TV)
# These URLs are already m3u8 streams but need auth signature
if "/play/" in url and url.endswith(".m3u8"):
signature = await self.get_auth_signature()
if not signature:
raise ExtractorError("Failed to get Vavoo authentication signature for Live TV")
stream_headers = {
"user-agent": "okhttp/4.11.0",
"referer": "https://vavoo.to/",
"mediahubmx-signature": signature,
}
return {
"destination_url": url,
"request_headers": stream_headers,
"mediaflow_endpoint": "hls_manifest_proxy",
}
# Check if this is a web-vod API link (new format)
if "/web-vod/api/get" in url:
resolved_url = await self._resolve_web_vod_link(url)
stream_headers = {
"user-agent": self.base_headers.get("user-agent", "Mozilla/5.0"),
"referer": "https://vavoo.to/",
}
return {
"destination_url": resolved_url,
"request_headers": stream_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
# Legacy mediahubmx flow
signature = await self.get_auth_signature()
if not signature:
raise ExtractorError("Failed to get Vavoo authentication signature")
@@ -139,14 +206,9 @@ class VavooExtractor(BaseExtractor):
"accept": "application/json",
"content-type": "application/json; charset=utf-8",
"accept-encoding": "gzip",
"mediahubmx-signature": signature
}
data = {
"language": "de",
"region": "AT",
"url": link,
"clientVersion": "3.1.21"
"mediahubmx-signature": signature,
}
data = {"language": "de", "region": "AT", "url": link, "clientVersion": "3.1.21"}
try:
logger.info(f"Attempting to resolve Vavoo URL: {link}")
resp = await self._make_request(
@@ -161,7 +223,11 @@ class VavooExtractor(BaseExtractor):
try:
result = resp.json()
except Exception:
logger.warning("Vavoo resolve returned non-json response (status=%s). Body preview: %s", resp.status_code, getattr(resp, "text", "")[:500])
logger.warning(
"Vavoo resolve returned non-json response (status=%s). Body preview: %s",
resp.status,
getattr(resp, "text", "")[:500],
)
return None
logger.debug("Vavoo API response: %s", result)

View File

@@ -16,10 +16,9 @@ class VidmolyExtractor(BaseExtractor):
raise ExtractorError("VIDMOLY: Invalid domain")
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120 Safari/537.36",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120 Safari/537.36",
"Referer": url,
"Sec-Fetch-Dest": "iframe",
}
@@ -29,10 +28,7 @@ class VidmolyExtractor(BaseExtractor):
html = response.text
# --- Extract master m3u8 ---
match = re.search(
r'sources:\s*\[\{file:"([^"]+)',
html
)
match = re.search(r'sources\s*:\s*\[\s*\{\s*file\s*:\s*[\'"]([^\'"]+)', html)
if not match:
raise ExtractorError("VIDMOLY: Stream URL not found")
@@ -49,10 +45,8 @@ class VidmolyExtractor(BaseExtractor):
raise ExtractorError("VIDMOLY: Request timed out")
raise
if test.status_code >= 400:
raise ExtractorError(
f"VIDMOLY: Stream unavailable ({test.status_code})"
)
if test.status >= 400:
raise ExtractorError(f"VIDMOLY: Stream unavailable ({test.status})")
# Return MASTER playlist, not variant
# Let MediaFlow Proxy handle variants

View File

@@ -8,23 +8,23 @@ from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class VidozaExtractor(BaseExtractor):
def __init__(self, request_headers: dict):
super().__init__(request_headers)
# if your base doesnt set this, keep it; otherwise you can remove:
self.mediaflow_endpoint = "proxy_stream_endpoint"
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
parsed = urlparse(url)
# Accept vidoza + videzz
if not parsed.hostname or not (
parsed.hostname.endswith("vidoza.net")
or parsed.hostname.endswith("videzz.net")
parsed.hostname.endswith("vidoza.net") or parsed.hostname.endswith("videzz.net")
):
raise ExtractorError("VIDOZA: Invalid domain")
# Use the correct referer for clones
referer = f"https://{parsed.hostname}/"
headers = self.base_headers.copy()
headers.update(
{
"referer": "https://vidoza.net/",
"referer": referer,
"user-agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
@@ -35,16 +35,14 @@ class VidozaExtractor(BaseExtractor):
}
)
# 1) Fetch the embed page (or whatever URL you pass in)
# 1) Fetch embed page
response = await self._make_request(url, headers=headers)
html = response.text or ""
if not html:
raise ExtractorError("VIDOZA: Empty HTML from Vidoza")
raise ExtractorError("VIDOZA: Empty HTML")
cookies = response.cookies or {}
# 2) Extract final link with REGEX
# 2) Extract video URL
pattern = re.compile(
r"""["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)"""
r"""(?:[^}>\]]+)["']?\s*res\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)""",
@@ -53,21 +51,15 @@ class VidozaExtractor(BaseExtractor):
match = pattern.search(html)
if not match:
raise ExtractorError("VIDOZA: Unable to extract video + label from JS")
raise ExtractorError("VIDOZA: Video URL not found")
mp4_url = match.group("url")
label = match.group("label").strip()
video_url = match.group("url")
# Fix URLs like //str38.vidoza.net/...
if mp4_url.startswith("//"):
mp4_url = "https:" + mp4_url
# 3) Attach cookies (token may depend on these)
if cookies:
headers["cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items())
if video_url.startswith("//"):
video_url = "https:" + video_url
return {
"destination_url": mp4_url,
"destination_url": video_url,
"request_headers": headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}

View File

@@ -1,7 +1,6 @@
import json
import re
from typing import Dict, Any
from urllib.parse import urlparse, parse_qs
from bs4 import BeautifulSoup, SoupStrainer
@@ -25,7 +24,7 @@ class VixCloudExtractor(BaseExtractor):
"Origin": f"{site_url}",
},
)
if response.status_code != 200:
if response.status != 200:
raise ExtractorError("Outdated Url")
# Soup the response
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
@@ -48,8 +47,8 @@ class VixCloudExtractor(BaseExtractor):
response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version})
elif "movie" in url or "tv" in url:
response = await self._make_request(url)
if response.status_code != 200:
if response.status != 200:
raise ExtractorError("Failed to extract URL components, Invalid Request")
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body"))
if soup:
@@ -58,7 +57,7 @@ class VixCloudExtractor(BaseExtractor):
expires = re.search(r"'expires':\s*'(\d+)'", script).group(1)
server_url = re.search(r"url:\s*'([^']+)'", script).group(1)
if "?b=1" in server_url:
final_url = f'{server_url}&token={token}&expires={expires}'
final_url = f"{server_url}&token={token}&expires={expires}"
else:
final_url = f"{server_url}?token={token}&expires={expires}"
if "window.canPlayFHD = true" in script:

View File

@@ -15,7 +15,7 @@ class VoeExtractor(BaseExtractor):
response = await self._make_request(url)
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/voesx.py
redirect_pattern = r'''window\.location\.href\s*=\s*'([^']+)'''
redirect_pattern = r"""window\.location\.href\s*=\s*'([^']+)"""
redirect_match = re.search(redirect_pattern, response.text, re.DOTALL)
if redirect_match:
if redirected:
@@ -37,7 +37,7 @@ class VoeExtractor(BaseExtractor):
data = self.voe_decode(code_and_script_match.group(1), luts_match.group(1))
final_url = data.get('source')
final_url = data.get("source")
if not final_url:
raise ExtractorError("VOE: failed to extract video URL")
@@ -51,8 +51,9 @@ class VoeExtractor(BaseExtractor):
@staticmethod
def voe_decode(ct: str, luts: str) -> Dict[str, Any]:
import json
lut = [''.join([('\\' + x) if x in '.*+?^${}()|[]\\' else x for x in i]) for i in luts[2:-2].split("','")]
txt = ''
lut = ["".join([("\\" + x) if x in ".*+?^${}()|[]\\" else x for x in i]) for i in luts[2:-2].split("','")]
txt = ""
for i in ct:
x = ord(i)
if 64 < x < 91:
@@ -61,8 +62,8 @@ class VoeExtractor(BaseExtractor):
x = (x - 84) % 26 + 97
txt += chr(x)
for i in lut:
txt = re.sub(i, '', txt)
ct = base64.b64decode(txt).decode('utf-8')
txt = ''.join([chr(ord(i) - 3) for i in ct])
txt = base64.b64decode(txt[::-1]).decode('utf-8')
txt = re.sub(i, "", txt)
ct = base64.b64decode(txt).decode("utf-8")
txt = "".join([chr(ord(i) - 3) for i in ct])
txt = base64.b64decode(txt[::-1]).decode("utf-8")
return json.loads(txt)