mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 11:50:51 +00:00
new version
This commit is contained in:
104
mediaflow_proxy/extractors/F16Px.py
Normal file
104
mediaflow_proxy/extractors/F16Px.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# https://github.com/Gujal00/ResolveURL/blob/55c7f66524ebd65bc1f88650614e627b00167fa0/script.module.resolveurl/lib/resolveurl/plugins/f16px.py
|
||||
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils import python_aesgcm
|
||||
|
||||
|
||||
class F16PxExtractor(BaseExtractor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
@staticmethod
|
||||
def _b64url_decode(value: str) -> bytes:
|
||||
# base64url -> base64
|
||||
value = value.replace("-", "+").replace("_", "/")
|
||||
padding = (-len(value)) % 4
|
||||
if padding:
|
||||
value += "=" * padding
|
||||
return base64.b64decode(value)
|
||||
|
||||
def _join_key_parts(self, parts) -> bytes:
|
||||
return b"".join(self._b64url_decode(p) for p in parts)
|
||||
|
||||
async def extract(self, url: str) -> Dict[str, Any]:
|
||||
parsed = urlparse(url)
|
||||
host = parsed.netloc
|
||||
origin = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
match = re.search(r"/e/([A-Za-z0-9]+)", parsed.path or "")
|
||||
if not match:
|
||||
raise ExtractorError("F16PX: Invalid embed URL")
|
||||
|
||||
media_id = match.group(1)
|
||||
api_url = f"https://{host}/api/videos/{media_id}/embed/playback"
|
||||
|
||||
headers = self.base_headers.copy()
|
||||
headers["referer"] = f"https://{host}/"
|
||||
|
||||
resp = await self._make_request(api_url, headers=headers)
|
||||
try:
|
||||
data = resp.json()
|
||||
except Exception:
|
||||
raise ExtractorError("F16PX: Invalid JSON response")
|
||||
|
||||
# Case 1: plain sources
|
||||
if "sources" in data and data["sources"]:
|
||||
src = data["sources"][0].get("url")
|
||||
if not src:
|
||||
raise ExtractorError("F16PX: Empty source URL")
|
||||
return {
|
||||
"destination_url": src,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
# Case 2: encrypted playback
|
||||
pb = data.get("playback")
|
||||
if not pb:
|
||||
raise ExtractorError("F16PX: No playback data")
|
||||
|
||||
try:
|
||||
iv = self._b64url_decode(pb["iv"]) # nonce
|
||||
key = self._join_key_parts(pb["key_parts"]) # AES key
|
||||
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
|
||||
|
||||
cipher = python_aesgcm.new(key)
|
||||
decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL
|
||||
|
||||
if decrypted is None:
|
||||
raise ExtractorError("F16PX: GCM authentication failed")
|
||||
|
||||
decrypted_json = json.loads(decrypted.decode("utf-8", "ignore"))
|
||||
|
||||
except ExtractorError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"F16PX: Decryption failed ({e})")
|
||||
|
||||
sources = decrypted_json.get("sources") or []
|
||||
if not sources:
|
||||
raise ExtractorError("F16PX: No sources after decryption")
|
||||
|
||||
best = sources[0].get("url")
|
||||
if not best:
|
||||
raise ExtractorError("F16PX: Empty source URL after decryption")
|
||||
|
||||
self.base_headers.clear()
|
||||
self.base_headers["referer"] = f"{origin}/"
|
||||
self.base_headers["origin"] = origin
|
||||
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||
self.base_headers["Accept"] = "*/*"
|
||||
self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'
|
||||
|
||||
return {
|
||||
"destination_url": best,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
@@ -1,48 +1,121 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Optional, Any
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import logging
|
||||
|
||||
from mediaflow_proxy.configs import settings
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client, DownloadError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExtractorError(Exception):
|
||||
"""Base exception for all extractors."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class BaseExtractor(ABC):
|
||||
"""Base class for all URL extractors."""
|
||||
"""Base class for all URL extractors.
|
||||
|
||||
Improvements:
|
||||
- Built-in retry/backoff for transient network errors
|
||||
- Configurable timeouts and per-request overrides
|
||||
- Better logging of non-200 responses and body previews for debugging
|
||||
"""
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
self.base_headers = {
|
||||
"user-agent": settings.user_agent,
|
||||
}
|
||||
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||
self.base_headers.update(request_headers)
|
||||
# merge incoming headers (e.g. Accept-Language / Referer) with default base headers
|
||||
self.base_headers.update(request_headers or {})
|
||||
|
||||
async def _make_request(
|
||||
self, url: str, method: str = "GET", headers: Optional[Dict] = None, **kwargs
|
||||
self,
|
||||
url: str,
|
||||
method: str = "GET",
|
||||
headers: Optional[Dict] = None,
|
||||
timeout: Optional[float] = None,
|
||||
retries: int = 3,
|
||||
backoff_factor: float = 0.5,
|
||||
raise_on_status: bool = True,
|
||||
**kwargs,
|
||||
) -> httpx.Response:
|
||||
"""Make HTTP request with error handling."""
|
||||
try:
|
||||
async with create_httpx_client() as client:
|
||||
request_headers = self.base_headers.copy()
|
||||
request_headers.update(headers or {})
|
||||
response = await client.request(
|
||||
method,
|
||||
url,
|
||||
headers=request_headers,
|
||||
**kwargs,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except httpx.HTTPError as e:
|
||||
raise ExtractorError(f"HTTP request failed for URL {url}: {str(e)}")
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Request failed for URL {url}: {str(e)}")
|
||||
"""
|
||||
Make HTTP request with retry and timeout support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
timeout : float | None
|
||||
Seconds to wait for the request (applied to httpx.Timeout). Defaults to 15s.
|
||||
retries : int
|
||||
Number of attempts for transient errors.
|
||||
backoff_factor : float
|
||||
Base for exponential backoff between retries.
|
||||
raise_on_status : bool
|
||||
If True, HTTP non-2xx raises DownloadError (preserves status code).
|
||||
"""
|
||||
attempt = 0
|
||||
last_exc = None
|
||||
|
||||
# build request headers merging base and per-request
|
||||
request_headers = self.base_headers.copy()
|
||||
if headers:
|
||||
request_headers.update(headers)
|
||||
|
||||
timeout_cfg = httpx.Timeout(timeout or 15.0)
|
||||
|
||||
while attempt < retries:
|
||||
try:
|
||||
async with create_httpx_client(timeout=timeout_cfg) as client:
|
||||
response = await client.request(
|
||||
method,
|
||||
url,
|
||||
headers=request_headers,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if raise_on_status:
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
# Provide a short body preview for debugging
|
||||
body_preview = ""
|
||||
try:
|
||||
body_preview = e.response.text[:500]
|
||||
except Exception:
|
||||
body_preview = "<unreadable body>"
|
||||
logger.debug(
|
||||
"HTTPStatusError for %s (status=%s) -- body preview: %s",
|
||||
url,
|
||||
e.response.status_code,
|
||||
body_preview,
|
||||
)
|
||||
raise DownloadError(e.response.status_code, f"HTTP error {e.response.status_code} while requesting {url}")
|
||||
return response
|
||||
|
||||
except DownloadError:
|
||||
# Do not retry on explicit HTTP status errors (they are intentional)
|
||||
raise
|
||||
except (httpx.ReadTimeout, httpx.ConnectTimeout, httpx.NetworkError, httpx.TransportError) as e:
|
||||
# Transient network error — retry with backoff
|
||||
last_exc = e
|
||||
attempt += 1
|
||||
sleep_for = backoff_factor * (2 ** (attempt - 1))
|
||||
logger.warning("Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
|
||||
attempt, retries, url, e, sleep_for)
|
||||
await asyncio.sleep(sleep_for)
|
||||
continue
|
||||
except Exception as e:
|
||||
# Unexpected exception — wrap as ExtractorError to keep interface consistent
|
||||
logger.exception("Unhandled exception while requesting %s: %s", url, e)
|
||||
raise ExtractorError(f"Request failed for URL {url}: {str(e)}")
|
||||
|
||||
logger.error("All retries failed for %s: %s", url, last_exc)
|
||||
raise ExtractorError(f"Request failed for URL {url}: {str(last_exc)}")
|
||||
|
||||
@abstractmethod
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
|
||||
@@ -1,543 +1,332 @@
|
||||
import re
|
||||
import base64
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import urlparse, quote, urlunparse
|
||||
|
||||
from typing import Any, Dict, Optional, List
|
||||
from urllib.parse import urlparse, quote_plus, urljoin
|
||||
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Silenzia l'errore ConnectionResetError su Windows
|
||||
logging.getLogger('asyncio').setLevel(logging.CRITICAL)
|
||||
|
||||
|
||||
class DLHDExtractor(BaseExtractor):
|
||||
"""DLHD (DaddyLive) URL extractor for M3U8 streams."""
|
||||
"""DLHD (DaddyLive) URL extractor for M3U8 streams.
|
||||
|
||||
|
||||
Notes:
|
||||
- Multi-domain support for daddylive.sx / dlhd.dad
|
||||
- Robust extraction of auth parameters and server lookup
|
||||
- Uses retries/timeouts via BaseExtractor where possible
|
||||
- Multi-iframe fallback for resilience
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
# Default to HLS proxy endpoint
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
# Cache for the resolved base URL to avoid repeated network calls
|
||||
self._cached_base_url = None
|
||||
# Store iframe context for newkso.ru requests
|
||||
self._iframe_context = None
|
||||
self._iframe_context: Optional[str] = None
|
||||
|
||||
def _get_headers_for_url(self, url: str, base_headers: dict) -> dict:
|
||||
"""Get appropriate headers for the given URL, applying newkso.ru specific headers if needed."""
|
||||
headers = base_headers.copy()
|
||||
|
||||
# Check if URL contains newkso.ru domain
|
||||
parsed_url = urlparse(url)
|
||||
if "newkso.ru" in parsed_url.netloc:
|
||||
# Use iframe URL as referer if available, otherwise use the newkso domain itself
|
||||
if self._iframe_context:
|
||||
iframe_origin = f"https://{urlparse(self._iframe_context).netloc}"
|
||||
newkso_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
'Referer': self._iframe_context,
|
||||
'Origin': iframe_origin
|
||||
}
|
||||
logger.info(f"Applied newkso.ru specific headers with iframe context for URL: {url}")
|
||||
logger.debug(f"Headers applied: {newkso_headers}")
|
||||
else:
|
||||
# Fallback to newkso domain itself
|
||||
newkso_origin = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||
newkso_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
'Referer': newkso_origin,
|
||||
'Origin': newkso_origin
|
||||
}
|
||||
logger.info(f"Applied newkso.ru specific headers (fallback) for URL: {url}")
|
||||
logger.debug(f"Headers applied: {newkso_headers}")
|
||||
|
||||
|
||||
async def _make_request(self, url: str, method: str = "GET", headers: Optional[Dict] = None, **kwargs) -> Any:
|
||||
"""Override to disable SSL verification for this extractor and use fetch_with_retry if available."""
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client, fetch_with_retry
|
||||
|
||||
|
||||
timeout = kwargs.pop("timeout", 15)
|
||||
retries = kwargs.pop("retries", 3)
|
||||
backoff_factor = kwargs.pop("backoff_factor", 0.5)
|
||||
|
||||
|
||||
async with create_httpx_client(verify=False, timeout=httpx.Timeout(timeout)) as client:
|
||||
try:
|
||||
return await fetch_with_retry(client, method, url, headers or {}, timeout=timeout)
|
||||
except Exception:
|
||||
logger.debug("fetch_with_retry failed or unavailable; falling back to direct request for %s", url)
|
||||
response = await client.request(method, url, headers=headers or {}, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
|
||||
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||
"""
|
||||
Estrattore alternativo per iframe lovecdn.ru che usa un formato diverso.
|
||||
"""
|
||||
try:
|
||||
# Cerca pattern di stream URL diretto
|
||||
m3u8_patterns = [
|
||||
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
|
||||
r'source[:\s]+["\']([^"\']+)["\']',
|
||||
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
|
||||
]
|
||||
|
||||
headers.update(newkso_headers)
|
||||
|
||||
return headers
|
||||
stream_url = None
|
||||
for pattern in m3u8_patterns:
|
||||
matches = re.findall(pattern, iframe_content)
|
||||
for match in matches:
|
||||
if '.m3u8' in match and match.startswith('http'):
|
||||
stream_url = match
|
||||
logger.info(f"Found direct m3u8 URL: {stream_url}")
|
||||
break
|
||||
if stream_url:
|
||||
break
|
||||
|
||||
# Pattern 2: Cerca costruzione dinamica URL
|
||||
if not stream_url:
|
||||
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||
|
||||
if channel_match:
|
||||
channel_name = channel_match.group(1)
|
||||
server = server_match.group(1) if server_match else 'newkso.ru'
|
||||
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
|
||||
logger.info(f"Constructed stream URL: {stream_url}")
|
||||
|
||||
if not stream_url:
|
||||
# Fallback: cerca qualsiasi URL che sembri uno stream
|
||||
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
|
||||
matches = re.findall(url_pattern, iframe_content)
|
||||
if matches:
|
||||
stream_url = matches[0]
|
||||
logger.info(f"Found fallback stream URL: {stream_url}")
|
||||
|
||||
if not stream_url:
|
||||
raise ExtractorError(f"Could not find stream URL in lovecdn.ru iframe")
|
||||
|
||||
# Usa iframe URL come referer
|
||||
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||
stream_headers = {
|
||||
'User-Agent': headers['User-Agent'],
|
||||
'Referer': iframe_url,
|
||||
'Origin': iframe_origin
|
||||
}
|
||||
|
||||
# Determina endpoint in base al dominio dello stream
|
||||
endpoint = "hls_key_proxy"
|
||||
|
||||
async def _make_request(self, url: str, method: str = "GET", headers: dict = None, **kwargs):
|
||||
"""Override _make_request to apply newkso.ru specific headers when needed."""
|
||||
request_headers = headers or {}
|
||||
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
|
||||
|
||||
return {
|
||||
"destination_url": stream_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": endpoint,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
|
||||
|
||||
async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||
"""Handles the new authentication flow found in recent updates."""
|
||||
|
||||
# Apply newkso.ru specific headers if the URL contains newkso.ru
|
||||
final_headers = self._get_headers_for_url(url, request_headers)
|
||||
def _extract_params(js: str) -> Dict[str, Optional[str]]:
|
||||
params = {}
|
||||
patterns = {
|
||||
"channel_key": r'(?:const|var|let)\s+(?:CHANNEL_KEY|channelKey)\s*=\s*["\']([^"\']+)["\']',
|
||||
"auth_token": r'(?:const|var|let)\s+AUTH_TOKEN\s*=\s*["\']([^"\']+)["\']',
|
||||
"auth_country": r'(?:const|var|let)\s+AUTH_COUNTRY\s*=\s*["\']([^"\']+)["\']',
|
||||
"auth_ts": r'(?:const|var|let)\s+AUTH_TS\s*=\s*["\']([^"\']+)["\']',
|
||||
"auth_expiry": r'(?:const|var|let)\s+AUTH_EXPIRY\s*=\s*["\']([^"\']+)["\']',
|
||||
}
|
||||
for key, pattern in patterns.items():
|
||||
match = re.search(pattern, js)
|
||||
params[key] = match.group(1) if match else None
|
||||
return params
|
||||
|
||||
params = _extract_params(iframe_content)
|
||||
|
||||
return await super()._make_request(url, method, final_headers, **kwargs)
|
||||
missing_params = [k for k, v in params.items() if not v]
|
||||
if missing_params:
|
||||
# This is not an error, just means it's not the new flow
|
||||
raise ExtractorError(f"Not the new auth flow: missing params {missing_params}")
|
||||
|
||||
logger.info("New auth flow detected. Proceeding with POST auth.")
|
||||
|
||||
# 1. Initial Auth POST
|
||||
auth_url = 'https://security.newkso.ru/auth2.php'
|
||||
# Use files parameter to force multipart/form-data which is required by the server
|
||||
# (None, value) tells httpx to send it as a form field, not a file upload
|
||||
multipart_data = {
|
||||
'channelKey': (None, params["channel_key"]),
|
||||
'country': (None, params["auth_country"]),
|
||||
'timestamp': (None, params["auth_ts"]),
|
||||
'expiry': (None, params["auth_expiry"]),
|
||||
'token': (None, params["auth_token"]),
|
||||
}
|
||||
|
||||
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||
auth_headers = headers.copy()
|
||||
auth_headers.update({
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Origin': iframe_origin,
|
||||
'Referer': iframe_url,
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'cross-site',
|
||||
'Priority': 'u=1, i',
|
||||
})
|
||||
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client
|
||||
try:
|
||||
async with create_httpx_client(verify=False) as client:
|
||||
# Note: using 'files' instead of 'data' to ensure multipart/form-data Content-Type
|
||||
auth_resp = await client.post(auth_url, files=multipart_data, headers=auth_headers, timeout=12)
|
||||
auth_resp.raise_for_status()
|
||||
auth_data = auth_resp.json()
|
||||
if not (auth_data.get("valid") or auth_data.get("success")):
|
||||
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
|
||||
logger.info("New auth flow: Initial auth successful.")
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"New auth flow failed during initial auth POST: {e}")
|
||||
|
||||
# 2. Server Lookup
|
||||
server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}"
|
||||
try:
|
||||
# Use _make_request as it handles retries and expects JSON
|
||||
lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
||||
server_data = lookup_resp.json()
|
||||
server_key = server_data.get('server_key')
|
||||
if not server_key:
|
||||
raise ExtractorError(f"No server_key in lookup response: {server_data}")
|
||||
logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}")
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"New auth flow failed during server lookup: {e}")
|
||||
|
||||
# 3. Build final stream URL
|
||||
channel_key = params['channel_key']
|
||||
auth_token = params['auth_token']
|
||||
# The JS logic uses .css, not .m3u8
|
||||
if server_key == 'top1/cdn':
|
||||
stream_url = f'https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css'
|
||||
else:
|
||||
stream_url = f'https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css'
|
||||
|
||||
logger.info(f'New auth flow: Constructed stream URL: {stream_url}')
|
||||
|
||||
stream_headers = {
|
||||
'User-Agent': headers['User-Agent'],
|
||||
'Referer': iframe_url,
|
||||
'Origin': iframe_origin,
|
||||
'Authorization': f'Bearer {auth_token}',
|
||||
'X-Channel-Key': channel_key
|
||||
}
|
||||
|
||||
return {
|
||||
"destination_url": stream_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||
}
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Extract DLHD stream URL and required headers (logica tvproxy adattata async, con fallback su endpoint alternativi)."""
|
||||
from urllib.parse import urlparse, quote_plus
|
||||
"""Main extraction flow: resolve base, fetch players, extract iframe, auth and final m3u8."""
|
||||
baseurl = "https://dlhd.dad/"
|
||||
|
||||
async def get_daddylive_base_url():
|
||||
if self._cached_base_url:
|
||||
return self._cached_base_url
|
||||
try:
|
||||
resp = await self._make_request("https://daddylive.sx/")
|
||||
# resp.url is the final URL after redirects
|
||||
base_url = str(resp.url)
|
||||
if not base_url.endswith('/'):
|
||||
base_url += '/'
|
||||
self._cached_base_url = base_url
|
||||
return base_url
|
||||
except Exception:
|
||||
# Fallback to default if request fails
|
||||
return "https://daddylive.sx/"
|
||||
|
||||
def extract_channel_id(url):
|
||||
match_premium = re.search(r'/premium(\d+)/mono\.m3u8$', url)
|
||||
if match_premium:
|
||||
return match_premium.group(1)
|
||||
# Handle both normal and URL-encoded patterns
|
||||
match_player = re.search(r'/(?:watch|stream|cast|player)/stream-(\d+)\.php', url)
|
||||
if match_player:
|
||||
return match_player.group(1)
|
||||
# Handle URL-encoded patterns like %2Fstream%2Fstream-123.php or just stream-123.php
|
||||
match_encoded = re.search(r'(?:%2F|/)stream-(\d+)\.php', url, re.IGNORECASE)
|
||||
if match_encoded:
|
||||
return match_encoded.group(1)
|
||||
# Handle direct stream- pattern without path
|
||||
match_direct = re.search(r'stream-(\d+)\.php', url)
|
||||
if match_direct:
|
||||
return match_direct.group(1)
|
||||
def extract_channel_id(u: str) -> Optional[str]:
|
||||
match_watch_id = re.search(r'watch\.php\?id=(\d+)', u)
|
||||
if match_watch_id:
|
||||
return match_watch_id.group(1)
|
||||
return None
|
||||
|
||||
async def try_endpoint(baseurl, endpoint, channel_id):
|
||||
stream_url = f"{baseurl}{endpoint}stream-{channel_id}.php"
|
||||
|
||||
async def get_stream_data(initial_url: str):
|
||||
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
|
||||
daddylive_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
'Referer': baseurl,
|
||||
'Origin': daddy_origin
|
||||
}
|
||||
# 1. Richiesta alla pagina stream/cast/player/watch
|
||||
resp1 = await self._make_request(stream_url, headers=daddylive_headers)
|
||||
# 2. Estrai link Player 2
|
||||
iframes = re.findall(r'<a[^>]*href="([^"]+)"[^>]*>\s*<button[^>]*>\s*Player\s*2\s*</button>', resp1.text)
|
||||
if not iframes:
|
||||
raise ExtractorError("No Player 2 link found")
|
||||
url2 = iframes[0]
|
||||
url2 = baseurl + url2
|
||||
url2 = url2.replace('//cast', '/cast')
|
||||
daddylive_headers['Referer'] = url2
|
||||
daddylive_headers['Origin'] = url2
|
||||
# 3. Richiesta alla pagina Player 2
|
||||
resp2 = await self._make_request(url2, headers=daddylive_headers)
|
||||
# 4. Estrai iframe
|
||||
iframes2 = re.findall(r'iframe src="([^"]*)', resp2.text)
|
||||
if not iframes2:
|
||||
raise ExtractorError("No iframe found in Player 2 page")
|
||||
iframe_url = iframes2[0]
|
||||
# Store iframe context for newkso.ru requests
|
||||
self._iframe_context = iframe_url
|
||||
resp3 = await self._make_request(iframe_url, headers=daddylive_headers)
|
||||
iframe_content = resp3.text
|
||||
# 5. Estrai parametri auth (robusto) - Handle both old and new formats
|
||||
def extract_var_old_format(js, name):
|
||||
# Try multiple patterns for variable extraction (old format)
|
||||
patterns = [
|
||||
rf'var (?:__)?{name}\s*=\s*atob\("([^"]+)"\)',
|
||||
rf'var (?:__)?{name}\s*=\s*atob\(\'([^\']+)\'\)',
|
||||
rf'(?:var\s+)?(?:__)?{name}\s*=\s*atob\s*\(\s*["\']([^"\']+)["\']\s*\)',
|
||||
rf'(?:let|const)\s+(?:__)?{name}\s*=\s*atob\s*\(\s*["\']([^"\']+)["\']\s*\)'
|
||||
]
|
||||
for pattern in patterns:
|
||||
m = re.search(pattern, js)
|
||||
if m:
|
||||
try:
|
||||
return base64.b64decode(m.group(1)).decode('utf-8')
|
||||
except Exception as decode_error:
|
||||
logger.warning(f"Failed to decode base64 for variable {name}: {decode_error}")
|
||||
continue
|
||||
return None
|
||||
|
||||
def extract_xjz_format(js):
|
||||
"""Extract parameters from the new XJZ base64-encoded JSON format."""
|
||||
|
||||
|
||||
# 1. Request initial page
|
||||
resp1 = await self._make_request(initial_url, headers=daddylive_headers, timeout=15)
|
||||
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1.text)
|
||||
if not player_links:
|
||||
raise ExtractorError("No player links found on the page.")
|
||||
|
||||
|
||||
# Prova tutti i player e raccogli tutti gli iframe validi
|
||||
last_player_error = None
|
||||
iframe_candidates = []
|
||||
|
||||
for player_url in player_links:
|
||||
try:
|
||||
# Look for the XJZ variable assignment
|
||||
xjz_pattern = r'const\s+XJZ\s*=\s*["\']([^"\']+)["\']'
|
||||
match = re.search(xjz_pattern, js)
|
||||
if not match:
|
||||
return None
|
||||
xjz_b64 = match.group(1)
|
||||
import json
|
||||
# Decode the first base64 layer (JSON)
|
||||
xjz_json = base64.b64decode(xjz_b64).decode('utf-8')
|
||||
xjz_obj = json.loads(xjz_json)
|
||||
# Each value is also base64-encoded, decode each
|
||||
decoded = {}
|
||||
for k, v in xjz_obj.items():
|
||||
try:
|
||||
decoded[k] = base64.b64decode(v).decode('utf-8')
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to decode XJZ field {k}: {e}")
|
||||
decoded[k] = v
|
||||
return decoded
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract XJZ format: {e}")
|
||||
return None
|
||||
if not player_url.startswith('http'):
|
||||
player_url = baseurl + player_url.lstrip('/')
|
||||
|
||||
def extract_bundle_format(js):
|
||||
"""Extract parameters from new BUNDLE format (legacy fallback)."""
|
||||
|
||||
daddylive_headers['Referer'] = player_url
|
||||
daddylive_headers['Origin'] = player_url
|
||||
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
|
||||
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2.text)
|
||||
|
||||
# Raccogli tutti gli iframe trovati
|
||||
for iframe in iframes2:
|
||||
if iframe not in iframe_candidates:
|
||||
iframe_candidates.append(iframe)
|
||||
logger.info(f"Found iframe candidate: {iframe}")
|
||||
|
||||
except Exception as e:
|
||||
last_player_error = e
|
||||
logger.warning(f"Failed to process player link {player_url}: {e}")
|
||||
continue
|
||||
|
||||
|
||||
if not iframe_candidates:
|
||||
if last_player_error:
|
||||
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
|
||||
raise ExtractorError("No valid iframe found in any player page")
|
||||
|
||||
|
||||
# Prova ogni iframe finché uno non funziona
|
||||
last_iframe_error = None
|
||||
|
||||
for iframe_candidate in iframe_candidates:
|
||||
try:
|
||||
bundle_patterns = [
|
||||
r'const\s+BUNDLE\s*=\s*["\']([^"\']+)["\']',
|
||||
r'var\s+BUNDLE\s*=\s*["\']([^"\']+)["\']',
|
||||
r'let\s+BUNDLE\s*=\s*["\']([^"\']+)["\']'
|
||||
]
|
||||
bundle_data = None
|
||||
for pattern in bundle_patterns:
|
||||
match = re.search(pattern, js)
|
||||
if match:
|
||||
bundle_data = match.group(1)
|
||||
break
|
||||
if not bundle_data:
|
||||
return None
|
||||
import json
|
||||
bundle_json = base64.b64decode(bundle_data).decode('utf-8')
|
||||
bundle_obj = json.loads(bundle_json)
|
||||
decoded_bundle = {}
|
||||
for key, value in bundle_obj.items():
|
||||
try:
|
||||
decoded_bundle[key] = base64.b64decode(value).decode('utf-8')
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to decode bundle field {key}: {e}")
|
||||
decoded_bundle[key] = value
|
||||
return decoded_bundle
|
||||
logger.info(f"Trying iframe: {iframe_candidate}")
|
||||
|
||||
iframe_domain = urlparse(iframe_candidate).netloc
|
||||
if not iframe_domain:
|
||||
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
|
||||
continue
|
||||
|
||||
self._iframe_context = iframe_candidate
|
||||
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
|
||||
iframe_content = resp3.text
|
||||
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
|
||||
|
||||
if 'lovecdn.ru' in iframe_domain:
|
||||
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
|
||||
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
|
||||
else:
|
||||
logger.info("Attempting new auth flow extraction.")
|
||||
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract bundle format: {e}")
|
||||
return None
|
||||
|
||||
# Try multiple patterns for channel key extraction
|
||||
channel_key = None
|
||||
channel_key_patterns = [
|
||||
r'const\s+CHANNEL_KEY\s*=\s*["\']([^"\']+)["\']',
|
||||
r'var\s+CHANNEL_KEY\s*=\s*["\']([^"\']+)["\']',
|
||||
r'let\s+CHANNEL_KEY\s*=\s*["\']([^"\']+)["\']',
|
||||
r'channelKey\s*=\s*["\']([^"\']+)["\']',
|
||||
r'var\s+channelKey\s*=\s*["\']([^"\']+)["\']',
|
||||
r'(?:let|const)\s+channelKey\s*=\s*["\']([^"\']+)["\']'
|
||||
]
|
||||
for pattern in channel_key_patterns:
|
||||
match = re.search(pattern, iframe_content)
|
||||
if match:
|
||||
channel_key = match.group(1)
|
||||
break
|
||||
|
||||
# Try new XJZ format first
|
||||
xjz_data = extract_xjz_format(iframe_content)
|
||||
if xjz_data:
|
||||
logger.info("Using new XJZ format for parameter extraction")
|
||||
auth_host = xjz_data.get('b_host')
|
||||
auth_php = xjz_data.get('b_script')
|
||||
auth_ts = xjz_data.get('b_ts')
|
||||
auth_rnd = xjz_data.get('b_rnd')
|
||||
auth_sig = xjz_data.get('b_sig')
|
||||
logger.debug(f"XJZ data extracted: {xjz_data}")
|
||||
else:
|
||||
# Try bundle format (legacy fallback)
|
||||
bundle_data = extract_bundle_format(iframe_content)
|
||||
if bundle_data:
|
||||
logger.info("Using BUNDLE format for parameter extraction")
|
||||
auth_host = bundle_data.get('b_host')
|
||||
auth_php = bundle_data.get('b_script')
|
||||
auth_ts = bundle_data.get('b_ts')
|
||||
auth_rnd = bundle_data.get('b_rnd')
|
||||
auth_sig = bundle_data.get('b_sig')
|
||||
logger.debug(f"Bundle data extracted: {bundle_data}")
|
||||
else:
|
||||
logger.info("Falling back to old format for parameter extraction")
|
||||
# Fall back to old format
|
||||
auth_ts = extract_var_old_format(iframe_content, 'c')
|
||||
auth_rnd = extract_var_old_format(iframe_content, 'd')
|
||||
auth_sig = extract_var_old_format(iframe_content, 'e')
|
||||
auth_host = extract_var_old_format(iframe_content, 'a')
|
||||
auth_php = extract_var_old_format(iframe_content, 'b')
|
||||
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
|
||||
last_iframe_error = e
|
||||
continue
|
||||
|
||||
# Log what we found for debugging
|
||||
logger.debug(f"Extracted parameters: channel_key={channel_key}, auth_ts={auth_ts}, auth_rnd={auth_rnd}, auth_sig={auth_sig}, auth_host={auth_host}, auth_php={auth_php}")
|
||||
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
|
||||
|
||||
# Check which parameters are missing
|
||||
missing_params = []
|
||||
if not channel_key:
|
||||
missing_params.append('channel_key/CHANNEL_KEY')
|
||||
if not auth_ts:
|
||||
missing_params.append('auth_ts (var c / b_ts)')
|
||||
if not auth_rnd:
|
||||
missing_params.append('auth_rnd (var d / b_rnd)')
|
||||
if not auth_sig:
|
||||
missing_params.append('auth_sig (var e / b_sig)')
|
||||
if not auth_host:
|
||||
missing_params.append('auth_host (var a / b_host)')
|
||||
if not auth_php:
|
||||
missing_params.append('auth_php (var b / b_script)')
|
||||
|
||||
if missing_params:
|
||||
logger.error(f"Missing parameters: {', '.join(missing_params)}")
|
||||
# Log a portion of the iframe content for debugging (first 2000 chars)
|
||||
logger.debug(f"Iframe content sample: {iframe_content[:2000]}")
|
||||
raise ExtractorError(f"Error extracting parameters: missing {', '.join(missing_params)}")
|
||||
auth_sig = quote_plus(auth_sig)
|
||||
# 6. Richiesta auth
|
||||
# Se il sito fornisce ancora /a.php ma ora serve /auth.php, sostituisci
|
||||
# Normalize and robustly replace any variant of a.php with /auth.php
|
||||
if auth_php:
|
||||
normalized_auth_php = auth_php.strip().lstrip('/')
|
||||
if normalized_auth_php == 'a.php':
|
||||
logger.info("Sostituisco qualunque variante di a.php con /auth.php per compatibilità.")
|
||||
auth_php = '/auth.php'
|
||||
# Unisci host e script senza doppio slash
|
||||
if auth_host.endswith('/') and auth_php.startswith('/'):
|
||||
auth_url = f'{auth_host[:-1]}{auth_php}'
|
||||
elif not auth_host.endswith('/') and not auth_php.startswith('/'):
|
||||
auth_url = f'{auth_host}/{auth_php}'
|
||||
else:
|
||||
auth_url = f'{auth_host}{auth_php}'
|
||||
auth_url = f'{auth_url}?channel_id={channel_key}&ts={auth_ts}&rnd={auth_rnd}&sig={auth_sig}'
|
||||
auth_resp = await self._make_request(auth_url, headers=daddylive_headers)
|
||||
# 7. Lookup server - Extract host parameter
|
||||
host = None
|
||||
host_patterns = [
|
||||
r'(?s)m3u8 =.*?:.*?:.*?".*?".*?"([^"]*)', # Original pattern
|
||||
r'm3u8\s*=.*?"([^"]*)"', # Simplified m3u8 pattern
|
||||
r'host["\']?\s*[:=]\s*["\']([^"\']*)', # host: or host= pattern
|
||||
r'["\']([^"\']*\.newkso\.ru[^"\']*)', # Direct newkso.ru pattern
|
||||
r'["\']([^"\']*\/premium\d+[^"\']*)', # premium path pattern
|
||||
r'url.*?["\']([^"\']*newkso[^"\']*)', # URL with newkso
|
||||
]
|
||||
|
||||
for pattern in host_patterns:
|
||||
matches = re.findall(pattern, iframe_content)
|
||||
if matches:
|
||||
host = matches[0]
|
||||
logger.debug(f"Found host with pattern '{pattern}': {host}")
|
||||
break
|
||||
|
||||
if not host:
|
||||
logger.error("Failed to extract host from iframe content")
|
||||
logger.debug(f"Iframe content for host extraction: {iframe_content[:2000]}")
|
||||
# Try to find any newkso.ru related URLs
|
||||
potential_hosts = re.findall(r'["\']([^"\']*newkso[^"\']*)', iframe_content)
|
||||
if potential_hosts:
|
||||
logger.debug(f"Potential host URLs found: {potential_hosts}")
|
||||
raise ExtractorError("Failed to extract host parameter")
|
||||
|
||||
# Extract server lookup URL from fetchWithRetry call (dynamic extraction)
|
||||
server_lookup = None
|
||||
|
||||
# Look for the server_lookup.php pattern in JavaScript
|
||||
if "fetchWithRetry('/server_lookup.php?channel_id='" in iframe_content:
|
||||
server_lookup = '/server_lookup.php?channel_id='
|
||||
logger.debug('Found server lookup URL: /server_lookup.php?channel_id=')
|
||||
elif '/server_lookup.php' in iframe_content:
|
||||
# Try to extract the full path
|
||||
js_lines = iframe_content.split('\n')
|
||||
for js_line in js_lines:
|
||||
if 'server_lookup.php' in js_line and 'fetchWithRetry' in js_line:
|
||||
# Extract the URL from the fetchWithRetry call
|
||||
start = js_line.find("'")
|
||||
if start != -1:
|
||||
end = js_line.find("'", start + 1)
|
||||
if end != -1:
|
||||
potential_url = js_line[start+1:end]
|
||||
if 'server_lookup' in potential_url:
|
||||
server_lookup = potential_url
|
||||
logger.debug(f'Extracted server lookup URL: {server_lookup}')
|
||||
break
|
||||
|
||||
if not server_lookup:
|
||||
logger.error('Failed to extract server lookup URL from iframe content')
|
||||
logger.debug(f'Iframe content sample: {iframe_content[:2000]}')
|
||||
raise ExtractorError('Failed to extract server lookup URL')
|
||||
|
||||
server_lookup_url = f"https://{urlparse(iframe_url).netloc}{server_lookup}{channel_key}"
|
||||
logger.debug(f"Server lookup URL: {server_lookup_url}")
|
||||
|
||||
try:
|
||||
lookup_resp = await self._make_request(server_lookup_url, headers=daddylive_headers)
|
||||
server_data = lookup_resp.json()
|
||||
server_key = server_data.get('server_key')
|
||||
if not server_key:
|
||||
logger.error(f"No server_key in response: {server_data}")
|
||||
raise ExtractorError("Failed to get server key from lookup response")
|
||||
|
||||
logger.info(f"Server lookup successful - Server key: {server_key}")
|
||||
except Exception as lookup_error:
|
||||
logger.error(f"Server lookup request failed: {lookup_error}")
|
||||
raise ExtractorError(f"Server lookup failed: {str(lookup_error)}")
|
||||
|
||||
referer_raw = f'https://{urlparse(iframe_url).netloc}'
|
||||
|
||||
# Extract URL construction logic dynamically from JavaScript
|
||||
# Simple approach: look for newkso.ru URLs and construct based on server_key
|
||||
|
||||
# Check if we have the special case server_key
|
||||
if server_key == 'top1/cdn':
|
||||
clean_m3u8_url = f'https://top1.newkso.ru/top1/cdn/{channel_key}/mono.m3u8'
|
||||
logger.info(f'Using special case URL for server_key \'top1/cdn\': {clean_m3u8_url}')
|
||||
else:
|
||||
clean_m3u8_url = f'https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.m3u8'
|
||||
logger.info(f'Using general case URL for server_key \'{server_key}\': {clean_m3u8_url}')
|
||||
|
||||
logger.info(f'Generated stream URL: {clean_m3u8_url}')
|
||||
logger.debug(f'Server key: {server_key}, Channel key: {channel_key}')
|
||||
|
||||
# Check if the final stream URL is on newkso.ru domain
|
||||
if "newkso.ru" in clean_m3u8_url:
|
||||
# For newkso.ru streams, use iframe URL as referer
|
||||
stream_headers = {
|
||||
'User-Agent': daddylive_headers['User-Agent'],
|
||||
'Referer': iframe_url,
|
||||
'Origin': referer_raw
|
||||
}
|
||||
logger.info(f"Applied iframe-specific headers for newkso.ru stream URL: {clean_m3u8_url}")
|
||||
logger.debug(f"Stream headers for newkso.ru: {stream_headers}")
|
||||
else:
|
||||
# For other domains, use the original logic
|
||||
stream_headers = {
|
||||
'User-Agent': daddylive_headers['User-Agent'],
|
||||
'Referer': referer_raw,
|
||||
'Origin': referer_raw
|
||||
}
|
||||
return {
|
||||
"destination_url": clean_m3u8_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
try:
|
||||
clean_url = url
|
||||
channel_id = extract_channel_id(clean_url)
|
||||
channel_id = extract_channel_id(url)
|
||||
if not channel_id:
|
||||
raise ExtractorError(f"Unable to extract channel ID from {clean_url}")
|
||||
raise ExtractorError(f"Unable to extract channel ID from {url}")
|
||||
|
||||
logger.info(f"Using base domain: {baseurl}")
|
||||
return await get_stream_data(url)
|
||||
|
||||
|
||||
baseurl = await get_daddylive_base_url()
|
||||
endpoints = ["stream/", "cast/", "player/", "watch/"]
|
||||
last_exc = None
|
||||
for endpoint in endpoints:
|
||||
try:
|
||||
return await try_endpoint(baseurl, endpoint, channel_id)
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
continue
|
||||
raise ExtractorError(f"Extraction failed: {str(last_exc)}")
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Extraction failed: {str(e)}")
|
||||
|
||||
async def _lookup_server(
|
||||
self, lookup_url_base: str, auth_url_base: str, auth_data: Dict[str, str], headers: Dict[str, str]
|
||||
) -> str:
|
||||
"""Lookup server information and generate stream URL."""
|
||||
try:
|
||||
# Construct server lookup URL
|
||||
server_lookup_url = f"{lookup_url_base}/server_lookup.php?channel_id={quote(auth_data['channel_key'])}"
|
||||
|
||||
# Make server lookup request
|
||||
server_response = await self._make_request(server_lookup_url, headers=headers)
|
||||
|
||||
server_data = server_response.json()
|
||||
server_key = server_data.get("server_key")
|
||||
|
||||
if not server_key:
|
||||
raise ExtractorError("Failed to get server key")
|
||||
|
||||
# Extract domain parts from auth URL for constructing stream URL
|
||||
auth_domain_parts = urlparse(auth_url_base).netloc.split(".")
|
||||
domain_suffix = ".".join(auth_domain_parts[1:]) if len(auth_domain_parts) > 1 else auth_domain_parts[0]
|
||||
|
||||
# Generate the m3u8 URL based on server response pattern
|
||||
if "/" in server_key:
|
||||
# Handle special case like "top1/cdn"
|
||||
parts = server_key.split("/")
|
||||
return f"https://{parts[0]}.{domain_suffix}/{server_key}/{auth_data['channel_key']}/mono.m3u8"
|
||||
else:
|
||||
# Handle normal case
|
||||
return f"https://{server_key}new.{domain_suffix}/{server_key}/{auth_data['channel_key']}/mono.m3u8"
|
||||
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Server lookup failed: {str(e)}")
|
||||
|
||||
def _extract_auth_data(self, html_content: str) -> Dict[str, str]:
|
||||
"""Extract authentication data from player page."""
|
||||
try:
|
||||
channel_key_match = re.search(r'var\s+channelKey\s*=\s*["\']([^"\']+)["\']', html_content)
|
||||
if not channel_key_match:
|
||||
return {}
|
||||
channel_key = channel_key_match.group(1)
|
||||
|
||||
# New pattern with atob
|
||||
auth_ts_match = re.search(r'var\s+__c\s*=\s*atob\([\'"]([^\'"]+)[\'"]\)', html_content)
|
||||
auth_rnd_match = re.search(r'var\s+__d\s*=\s*atob\([\'"]([^\'"]+)[\'"]\)', html_content)
|
||||
auth_sig_match = re.search(r'var\s+__e\s*=\s*atob\([\'"]([^\'"]+)[\'"]\)', html_content)
|
||||
|
||||
if auth_ts_match and auth_rnd_match and auth_sig_match:
|
||||
return {
|
||||
"channel_key": channel_key,
|
||||
"auth_ts": base64.b64decode(auth_ts_match.group(1)).decode("utf-8"),
|
||||
"auth_rnd": base64.b64decode(auth_rnd_match.group(1)).decode("utf-8"),
|
||||
"auth_sig": base64.b64decode(auth_sig_match.group(1)).decode("utf-8"),
|
||||
}
|
||||
|
||||
# Original pattern
|
||||
auth_ts_match = re.search(r'var\s+authTs\s*=\s*["\']([^"\']+)["\']', html_content)
|
||||
auth_rnd_match = re.search(r'var\s+authRnd\s*=\s*["\']([^"\']+)["\']', html_content)
|
||||
auth_sig_match = re.search(r'var\s+authSig\s*=\s*["\']([^"\']+)["\']', html_content)
|
||||
|
||||
if auth_ts_match and auth_rnd_match and auth_sig_match:
|
||||
return {
|
||||
"channel_key": channel_key,
|
||||
"auth_ts": auth_ts_match.group(1),
|
||||
"auth_rnd": auth_rnd_match.group(1),
|
||||
"auth_sig": auth_sig_match.group(1),
|
||||
}
|
||||
return {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def _extract_auth_url_base(self, html_content: str) -> Optional[str]:
|
||||
"""Extract auth URL base from player page script content."""
|
||||
try:
|
||||
# New atob pattern for auth base URL
|
||||
auth_url_base_match = re.search(r'var\s+__a\s*=\s*atob\([\'"]([^\'"]+)[\'"]\)', html_content)
|
||||
if auth_url_base_match:
|
||||
decoded_url = base64.b64decode(auth_url_base_match.group(1)).decode("utf-8")
|
||||
return decoded_url.strip().rstrip("/")
|
||||
|
||||
# Look for auth URL or domain in fetchWithRetry call or similar patterns
|
||||
auth_url_match = re.search(r'fetchWithRetry\([\'"]([^\'"]*/auth\.php)', html_content)
|
||||
|
||||
if auth_url_match:
|
||||
auth_url = auth_url_match.group(1)
|
||||
# Extract base URL up to the auth.php part
|
||||
return auth_url.split("/auth.php")[0]
|
||||
|
||||
# Try finding domain directly
|
||||
domain_match = re.search(r'[\'"]https://([^/\'\"]+)(?:/[^\'\"]*)?/auth\.php', html_content)
|
||||
|
||||
if domain_match:
|
||||
return f"https://{domain_match.group(1)}"
|
||||
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _get_origin(self, url: str) -> str:
|
||||
"""Extract origin from URL."""
|
||||
parsed = urlparse(url)
|
||||
return f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
def _derive_auth_url_base(self, player_domain: str) -> Optional[str]:
|
||||
"""Attempt to derive auth URL base from player domain."""
|
||||
try:
|
||||
# Typical pattern is to use a subdomain for auth domain
|
||||
parsed = urlparse(player_domain)
|
||||
domain_parts = parsed.netloc.split(".")
|
||||
|
||||
# Get the top-level domain and second-level domain
|
||||
if len(domain_parts) >= 2:
|
||||
base_domain = ".".join(domain_parts[-2:])
|
||||
# Try common subdomains for auth
|
||||
for prefix in ["auth", "api", "cdn"]:
|
||||
potential_auth_domain = f"https://{prefix}.{base_domain}"
|
||||
return potential_auth_domain
|
||||
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@@ -3,17 +3,27 @@ from typing import Dict, Type
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.extractors.dlhd import DLHDExtractor
|
||||
from mediaflow_proxy.extractors.doodstream import DoodStreamExtractor
|
||||
from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor
|
||||
from mediaflow_proxy.extractors.filelions import FileLionsExtractor
|
||||
from mediaflow_proxy.extractors.filemoon import FileMoonExtractor
|
||||
from mediaflow_proxy.extractors.F16Px import F16PxExtractor
|
||||
from mediaflow_proxy.extractors.livetv import LiveTVExtractor
|
||||
from mediaflow_proxy.extractors.lulustream import LuluStreamExtractor
|
||||
from mediaflow_proxy.extractors.maxstream import MaxstreamExtractor
|
||||
from mediaflow_proxy.extractors.mixdrop import MixdropExtractor
|
||||
from mediaflow_proxy.extractors.okru import OkruExtractor
|
||||
from mediaflow_proxy.extractors.streamtape import StreamtapeExtractor
|
||||
from mediaflow_proxy.extractors.streamwish import StreamWishExtractor
|
||||
from mediaflow_proxy.extractors.supervideo import SupervideoExtractor
|
||||
from mediaflow_proxy.extractors.turbovidplay import TurboVidPlayExtractor
|
||||
from mediaflow_proxy.extractors.uqload import UqloadExtractor
|
||||
from mediaflow_proxy.extractors.vavoo import VavooExtractor
|
||||
from mediaflow_proxy.extractors.vidmoly import VidmolyExtractor
|
||||
from mediaflow_proxy.extractors.vidoza import VidozaExtractor
|
||||
from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor
|
||||
from mediaflow_proxy.extractors.fastream import FastreamExtractor
|
||||
from mediaflow_proxy.extractors.voe import VoeExtractor
|
||||
|
||||
|
||||
class ExtractorFactory:
|
||||
"""Factory for creating URL extractors."""
|
||||
@@ -21,17 +31,26 @@ class ExtractorFactory:
|
||||
_extractors: Dict[str, Type[BaseExtractor]] = {
|
||||
"Doodstream": DoodStreamExtractor,
|
||||
"FileLions": FileLionsExtractor,
|
||||
"FileMoon": FileMoonExtractor,
|
||||
"F16Px": F16PxExtractor,
|
||||
"Uqload": UqloadExtractor,
|
||||
"Mixdrop": MixdropExtractor,
|
||||
"Streamtape": StreamtapeExtractor,
|
||||
"StreamWish": StreamWishExtractor,
|
||||
"Supervideo": SupervideoExtractor,
|
||||
"TurboVidPlay": TurboVidPlayExtractor,
|
||||
"VixCloud": VixCloudExtractor,
|
||||
"Okru": OkruExtractor,
|
||||
"Maxstream": MaxstreamExtractor,
|
||||
"LiveTV": LiveTVExtractor,
|
||||
"LuluStream": LuluStreamExtractor,
|
||||
"DLHD": DLHDExtractor,
|
||||
"Vavoo": VavooExtractor,
|
||||
"Fastream": FastreamExtractor
|
||||
"Vidmoly": VidmolyExtractor,
|
||||
"Vidoza": VidozaExtractor,
|
||||
"Fastream": FastreamExtractor,
|
||||
"Voe": VoeExtractor,
|
||||
"Sportsonline": SportsonlineExtractor,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -12,7 +12,8 @@ class FileLionsExtractor(BaseExtractor):
|
||||
headers = {}
|
||||
patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
|
||||
r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)''',
|
||||
r'''["']hls[24]["']:\s*["'](?P<url>[^"']+)'''
|
||||
r'''["']hls4["']:\s*["'](?P<url>[^"']+)''',
|
||||
r'''["']hls2["']:\s*["'](?P<url>[^"']+)'''
|
||||
]
|
||||
|
||||
final_url = await eval_solver(self, url, headers, patterns)
|
||||
|
||||
52
mediaflow_proxy/extractors/filemoon.py
Normal file
52
mediaflow_proxy/extractors/filemoon.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
|
||||
class FileMoonExtractor(BaseExtractor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
response = await self._make_request(url)
|
||||
|
||||
pattern = r'iframe.*?src=["\'](.*?)["\']'
|
||||
match = re.search(pattern, response.text, re.DOTALL)
|
||||
if not match:
|
||||
raise ExtractorError("Failed to extract iframe URL")
|
||||
|
||||
iframe_url = match.group(1)
|
||||
|
||||
parsed = urlparse(str(response.url))
|
||||
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
if iframe_url.startswith("//"):
|
||||
iframe_url = f"{parsed.scheme}:{iframe_url}"
|
||||
elif not urlparse(iframe_url).scheme:
|
||||
iframe_url = urljoin(base_url, iframe_url)
|
||||
|
||||
headers = {"Referer": url}
|
||||
patterns = [r'file:"(.*?)"']
|
||||
|
||||
final_url = await eval_solver(
|
||||
self,
|
||||
iframe_url,
|
||||
headers,
|
||||
patterns,
|
||||
)
|
||||
|
||||
test_resp = await self._make_request(final_url, headers=headers)
|
||||
if test_resp.status_code == 404:
|
||||
raise ExtractorError("Stream not found (404)")
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
27
mediaflow_proxy/extractors/lulustream.py
Normal file
27
mediaflow_proxy/extractors/lulustream.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class LuluStreamExtractor(BaseExtractor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
response = await self._make_request(url)
|
||||
|
||||
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py
|
||||
pattern = r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)'''
|
||||
match = re.search(pattern, response.text, re.DOTALL)
|
||||
if not match:
|
||||
raise ExtractorError("Failed to extract source URL")
|
||||
final_url = match.group(1)
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
195
mediaflow_proxy/extractors/sportsonline.py
Normal file
195
mediaflow_proxy/extractors/sportsonline.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import re
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils.packed import detect, unpack
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SportsonlineExtractor(BaseExtractor):
|
||||
"""Sportsonline/Sportzonline URL extractor for M3U8 streams.
|
||||
|
||||
Strategy:
|
||||
1. Fetch page -> find first <iframe src="...">
|
||||
2. Fetch iframe with Referer=https://sportzonline.st/
|
||||
3. Collect packed eval blocks; if >=2 use second (index 1) else first.
|
||||
4. Unpack P.A.C.K.E.R. and search var src="...m3u8".
|
||||
5. Return final m3u8 with referer header.
|
||||
|
||||
Notes:
|
||||
- Multi-domain support for sportzonline.(st|bz|cc|top) and sportsonline.(si|sn)
|
||||
- Uses P.A.C.K.E.R. unpacking from utils.packed module
|
||||
- Returns streams suitable for hls_manifest_proxy endpoint
|
||||
"""
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
def _detect_packed_blocks(self, html: str) -> list[str]:
|
||||
"""
|
||||
Detect and extract packed eval blocks from HTML.
|
||||
Replicates the TypeScript logic: /eval\(function(.+?.+)/g
|
||||
"""
|
||||
# Find all eval(function...) blocks - more greedy to capture full packed code
|
||||
pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL)
|
||||
raw_matches = pattern.findall(html)
|
||||
|
||||
# If no matches with the strict pattern, try a more relaxed one
|
||||
if not raw_matches:
|
||||
# Try to find eval(function and capture until we find the closing ))
|
||||
pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
|
||||
raw_matches = pattern.findall(html)
|
||||
|
||||
return raw_matches
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Main extraction flow: fetch page, extract iframe, unpack and find m3u8."""
|
||||
try:
|
||||
# Step 1: Fetch main page
|
||||
logger.info(f"Fetching main page: {url}")
|
||||
main_response = await self._make_request(url, timeout=15)
|
||||
main_html = main_response.text
|
||||
|
||||
# Extract first iframe
|
||||
iframe_match = re.search(r'<iframe\s+src=["\']([^"\']+)["\']', main_html, re.IGNORECASE)
|
||||
if not iframe_match:
|
||||
raise ExtractorError("No iframe found on the page")
|
||||
|
||||
iframe_url = iframe_match.group(1)
|
||||
|
||||
# Normalize iframe URL
|
||||
if iframe_url.startswith('//'):
|
||||
iframe_url = 'https:' + iframe_url
|
||||
elif iframe_url.startswith('/'):
|
||||
parsed_main = urlparse(url)
|
||||
iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}"
|
||||
|
||||
logger.info(f"Found iframe URL: {iframe_url}")
|
||||
|
||||
# Step 2: Fetch iframe with Referer
|
||||
iframe_headers = {
|
||||
'Referer': 'https://sportzonline.st/',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.9,it;q=0.8',
|
||||
'Cache-Control': 'no-cache'
|
||||
}
|
||||
|
||||
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
|
||||
iframe_html = iframe_response.text
|
||||
|
||||
logger.debug(f"Iframe HTML length: {len(iframe_html)}")
|
||||
|
||||
# Step 3: Detect packed blocks
|
||||
packed_blocks = self._detect_packed_blocks(iframe_html)
|
||||
|
||||
logger.info(f"Found {len(packed_blocks)} packed blocks")
|
||||
|
||||
if not packed_blocks:
|
||||
logger.warning("No packed blocks found, trying direct m3u8 search")
|
||||
# Fallback: try direct m3u8 search
|
||||
direct_match = re.search(r'(https?://[^\s"\'>]+\.m3u8[^\s"\'>]*)', iframe_html)
|
||||
if direct_match:
|
||||
m3u8_url = direct_match.group(1)
|
||||
logger.info(f"Found direct m3u8 URL: {m3u8_url}")
|
||||
|
||||
return {
|
||||
"destination_url": m3u8_url,
|
||||
"request_headers": {
|
||||
'Referer': iframe_url,
|
||||
'User-Agent': iframe_headers['User-Agent']
|
||||
},
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
else:
|
||||
raise ExtractorError("No packed blocks or direct m3u8 URL found")
|
||||
|
||||
logger.info(f"Found {len(packed_blocks)} packed blocks")
|
||||
|
||||
# Choose block: if >=2 use second (index 1), else first (index 0)
|
||||
chosen_idx = 1 if len(packed_blocks) > 1 else 0
|
||||
m3u8_url = None
|
||||
unpacked_code = None
|
||||
|
||||
logger.info(f"Chosen packed block index: {chosen_idx}")
|
||||
|
||||
# Try to unpack chosen block
|
||||
try:
|
||||
unpacked_code = unpack(packed_blocks[chosen_idx])
|
||||
logger.info(f"Successfully unpacked block {chosen_idx}")
|
||||
logger.debug(f"Unpacked code preview: {unpacked_code[:500] if unpacked_code else 'empty'}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to unpack block {chosen_idx}: {e}")
|
||||
|
||||
# Search for var src="...m3u8" with multiple patterns
|
||||
if unpacked_code:
|
||||
# Try multiple patterns as in the TypeScript version
|
||||
patterns = [
|
||||
r'var\s+src\s*=\s*["\']([^"\']+)["\']', # var src="..."
|
||||
r'src\s*=\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # src="...m3u8"
|
||||
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # file: "...m3u8"
|
||||
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']', # any m3u8 URL
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
src_match = re.search(pattern, unpacked_code)
|
||||
if src_match:
|
||||
m3u8_url = src_match.group(1)
|
||||
# Verify it looks like a valid m3u8 URL
|
||||
if '.m3u8' in m3u8_url or 'http' in m3u8_url:
|
||||
break
|
||||
m3u8_url = None
|
||||
|
||||
# If not found, try all other blocks
|
||||
if not m3u8_url:
|
||||
logger.info("m3u8 not found in chosen block, trying all blocks")
|
||||
for i, block in enumerate(packed_blocks):
|
||||
if i == chosen_idx:
|
||||
continue
|
||||
try:
|
||||
unpacked_code = unpack(block)
|
||||
# Use the same patterns as above
|
||||
for pattern in [
|
||||
r'var\s+src\s*=\s*["\']([^"\']+)["\']',
|
||||
r'src\s*=\s*["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']',
|
||||
]:
|
||||
src_match = re.search(pattern, unpacked_code)
|
||||
if src_match:
|
||||
test_url = src_match.group(1)
|
||||
if '.m3u8' in test_url or 'http' in test_url:
|
||||
m3u8_url = test_url
|
||||
logger.info(f"Found m3u8 in block {i}")
|
||||
break
|
||||
|
||||
if m3u8_url:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to process block {i}: {e}")
|
||||
continue
|
||||
|
||||
if not m3u8_url:
|
||||
raise ExtractorError("Could not extract m3u8 URL from packed code")
|
||||
|
||||
logger.info(f"Successfully extracted m3u8 URL: {m3u8_url}")
|
||||
|
||||
# Return stream configuration
|
||||
return {
|
||||
"destination_url": m3u8_url,
|
||||
"request_headers": {
|
||||
'Referer': iframe_url,
|
||||
'User-Agent': iframe_headers['User-Agent']
|
||||
},
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
except ExtractorError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"Sportsonline extraction failed for {url}")
|
||||
raise ExtractorError(f"Extraction failed: {str(e)}")
|
||||
81
mediaflow_proxy/extractors/streamwish.py
Normal file
81
mediaflow_proxy/extractors/streamwish.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
|
||||
class StreamWishExtractor(BaseExtractor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **_kwargs: Any) -> Dict[str, Any]:
|
||||
referer = self.base_headers.get("Referer")
|
||||
if not referer:
|
||||
parsed = urlparse(url)
|
||||
referer = f"{parsed.scheme}://{parsed.netloc}/"
|
||||
|
||||
headers = {"Referer": referer}
|
||||
response = await self._make_request(url, headers=headers)
|
||||
|
||||
iframe_match = re.search(
|
||||
r'<iframe[^>]+src=["\']([^"\']+)["\']',
|
||||
response.text,
|
||||
re.DOTALL
|
||||
)
|
||||
iframe_url = urljoin(url, iframe_match.group(1)) if iframe_match else url
|
||||
|
||||
iframe_response = await self._make_request(
|
||||
iframe_url,
|
||||
headers=headers
|
||||
)
|
||||
html = iframe_response.text
|
||||
|
||||
final_url = self._extract_m3u8(html)
|
||||
|
||||
if not final_url and "eval(function(p,a,c,k,e,d)" in html:
|
||||
try:
|
||||
final_url = await eval_solver(
|
||||
self,
|
||||
iframe_url,
|
||||
headers,
|
||||
[
|
||||
# absolute m3u8
|
||||
r'(https?://[^"\']+\.m3u8[^"\']*)',
|
||||
# relative stream paths
|
||||
r'(\/stream\/[^"\']+\.m3u8[^"\']*)',
|
||||
],
|
||||
)
|
||||
except Exception:
|
||||
final_url = None
|
||||
|
||||
if not final_url:
|
||||
raise ExtractorError("StreamWish: Failed to extract m3u8")
|
||||
|
||||
if final_url.startswith("/"):
|
||||
final_url = urljoin(iframe_url, final_url)
|
||||
|
||||
origin = f"{urlparse(referer).scheme}://{urlparse(referer).netloc}"
|
||||
self.base_headers.update({
|
||||
"Referer": referer,
|
||||
"Origin": origin,
|
||||
})
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_m3u8(text: str) -> str | None:
|
||||
"""
|
||||
Extract first absolute m3u8 URL from text
|
||||
"""
|
||||
match = re.search(
|
||||
r'https?://[^"\']+\.m3u8[^"\']*',
|
||||
text
|
||||
)
|
||||
return match.group(0) if match else None
|
||||
68
mediaflow_proxy/extractors/turbovidplay.py
Normal file
68
mediaflow_proxy/extractors/turbovidplay.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class TurboVidPlayExtractor(BaseExtractor):
|
||||
domains = [
|
||||
"turboviplay.com",
|
||||
"emturbovid.com",
|
||||
"tuborstb.co",
|
||||
"javggvideo.xyz",
|
||||
"stbturbo.xyz",
|
||||
"turbovidhls.com",
|
||||
]
|
||||
|
||||
mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs):
|
||||
#
|
||||
# 1. Load embed
|
||||
#
|
||||
response = await self._make_request(url)
|
||||
html = response.text
|
||||
|
||||
#
|
||||
# 2. Extract urlPlay or data-hash
|
||||
#
|
||||
m = re.search(r'(?:urlPlay|data-hash)\s*=\s*[\'"]([^\'"]+)', html)
|
||||
if not m:
|
||||
raise ExtractorError("TurboViPlay: No media URL found")
|
||||
|
||||
media_url = m.group(1)
|
||||
|
||||
# Normalize protocol
|
||||
if media_url.startswith("//"):
|
||||
media_url = "https:" + media_url
|
||||
elif media_url.startswith("/"):
|
||||
media_url = response.url.origin + media_url
|
||||
|
||||
#
|
||||
# 3. Fetch the intermediate playlist
|
||||
#
|
||||
data_resp = await self._make_request(media_url, headers={"Referer": url})
|
||||
playlist = data_resp.text
|
||||
|
||||
#
|
||||
# 4. Extract real m3u8 URL
|
||||
#
|
||||
m2 = re.search(r'https?://[^\'"\s]+\.m3u8', playlist)
|
||||
if not m2:
|
||||
raise ExtractorError("TurboViPlay: Unable to extract playlist URL")
|
||||
|
||||
real_m3u8 = m2.group(0)
|
||||
|
||||
#
|
||||
# 5. Final headers
|
||||
#
|
||||
self.base_headers["referer"] = url
|
||||
|
||||
#
|
||||
# 6. Always return master proxy (your MediaFlow only supports this)
|
||||
#
|
||||
return {
|
||||
"destination_url": real_m3u8,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||
}
|
||||
@@ -4,24 +4,31 @@ from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VavooExtractor(BaseExtractor):
|
||||
"""Vavoo URL extractor for resolving vavoo.to links (solo httpx, async)."""
|
||||
"""Vavoo URL extractor for resolving vavoo.to links.
|
||||
|
||||
Features:
|
||||
- Uses BaseExtractor's retry/timeouts
|
||||
- Improved headers to mimic Android okhttp client
|
||||
- Robust JSON handling and logging
|
||||
"""
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||
|
||||
async def get_auth_signature(self) -> Optional[str]:
|
||||
"""Get authentication signature for Vavoo API (async, httpx, pulito)."""
|
||||
"""Get authentication signature for Vavoo API (async)."""
|
||||
headers = {
|
||||
"user-agent": "okhttp/4.11.0",
|
||||
"accept": "application/json",
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"accept-encoding": "gzip"
|
||||
"accept-encoding": "gzip",
|
||||
}
|
||||
import time
|
||||
current_time = int(time.time() * 1000)
|
||||
|
||||
|
||||
data = {
|
||||
"token": "tosFwQCJMS8qrW_AjLoHPQ41646J5dRNha6ZWHnijoYQQQoADQoXYSo7ki7O5-CsgN4CH0uRk6EEoJ0728ar9scCRQW3ZkbfrPfeCXW2VgopSW2FWDqPOoVYIuVPAOnXCZ5g",
|
||||
"reason": "app-blur",
|
||||
@@ -37,23 +44,17 @@ class VavooExtractor(BaseExtractor):
|
||||
},
|
||||
"os": {
|
||||
"name": "android",
|
||||
"version": "13",
|
||||
"abis": ["arm64-v8a", "armeabi-v7a", "armeabi"],
|
||||
"host": "android"
|
||||
"version": "13"
|
||||
},
|
||||
"app": {
|
||||
"platform": "android",
|
||||
"version": "3.1.21",
|
||||
"buildId": "289515000",
|
||||
"engine": "hbc85",
|
||||
"signatures": ["6e8a975e3cbf07d5de823a760d4c2547f86c1403105020adee5de67ac510999e"],
|
||||
"installer": "app.revanced.manager.flutter"
|
||||
"version": "3.1.21"
|
||||
},
|
||||
"version": {
|
||||
"package": "tv.vavoo.app",
|
||||
"binary": "3.1.21",
|
||||
"js": "3.1.21"
|
||||
}
|
||||
},
|
||||
},
|
||||
"appFocusTime": 0,
|
||||
"playerActive": False,
|
||||
@@ -70,7 +71,7 @@ class VavooExtractor(BaseExtractor):
|
||||
"adblockEnabled": True,
|
||||
"proxy": {
|
||||
"supported": ["ss", "openvpn"],
|
||||
"engine": "ss",
|
||||
"engine": "ss",
|
||||
"ssVersion": 1,
|
||||
"enabled": True,
|
||||
"autoServer": True,
|
||||
@@ -80,44 +81,48 @@ class VavooExtractor(BaseExtractor):
|
||||
"supported": False
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
try:
|
||||
resp = await self._make_request(
|
||||
"https://www.vavoo.tv/api/app/ping",
|
||||
method="POST",
|
||||
json=data,
|
||||
headers=headers
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
retries=2,
|
||||
)
|
||||
result = resp.json()
|
||||
addon_sig = result.get("addonSig")
|
||||
try:
|
||||
result = resp.json()
|
||||
except Exception:
|
||||
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status_code)
|
||||
return None
|
||||
|
||||
addon_sig = result.get("addonSig") if isinstance(result, dict) else None
|
||||
if addon_sig:
|
||||
logger.info("Successfully obtained Vavoo authentication signature")
|
||||
return addon_sig
|
||||
else:
|
||||
logger.warning("No addonSig in Vavoo API response")
|
||||
logger.warning("No addonSig in Vavoo API response: %s", result)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to get Vavoo authentication signature: {str(e)}")
|
||||
except ExtractorError as e:
|
||||
logger.warning("Failed to get Vavoo auth signature: %s", e)
|
||||
return None
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Extract Vavoo stream URL (async, httpx)."""
|
||||
"""Extract Vavoo stream URL (async)."""
|
||||
if "vavoo.to" not in url:
|
||||
raise ExtractorError("Not a valid Vavoo URL")
|
||||
|
||||
# Get authentication signature
|
||||
signature = await self.get_auth_signature()
|
||||
if not signature:
|
||||
raise ExtractorError("Failed to get Vavoo authentication signature")
|
||||
|
||||
# Resolve the URL
|
||||
resolved_url = await self._resolve_vavoo_link(url, signature)
|
||||
if not resolved_url:
|
||||
raise ExtractorError("Failed to resolve Vavoo URL")
|
||||
|
||||
# Set up headers for the resolved stream
|
||||
stream_headers = {
|
||||
"user-agent": self.base_headers["user-agent"],
|
||||
"user-agent": self.base_headers.get("user-agent", "okhttp/4.11.0"),
|
||||
"referer": "https://vavoo.to/",
|
||||
}
|
||||
|
||||
@@ -128,17 +133,17 @@ class VavooExtractor(BaseExtractor):
|
||||
}
|
||||
|
||||
async def _resolve_vavoo_link(self, link: str, signature: str) -> Optional[str]:
|
||||
"""Resolve a Vavoo link using the MediaHubMX API (async, httpx)."""
|
||||
"""Resolve a Vavoo link using the MediaHubMX API (async)."""
|
||||
headers = {
|
||||
"user-agent": "MediaHubMX/2",
|
||||
"user-agent": "okhttp/4.11.0",
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"accept-encoding": "gzip",
|
||||
"mediahubmx-signature": signature
|
||||
}
|
||||
data = {
|
||||
"language": "de",
|
||||
"region": "AT",
|
||||
"region": "AT",
|
||||
"url": link,
|
||||
"clientVersion": "3.1.21"
|
||||
}
|
||||
@@ -148,22 +153,34 @@ class VavooExtractor(BaseExtractor):
|
||||
"https://vavoo.to/mediahubmx-resolve.json",
|
||||
method="POST",
|
||||
json=data,
|
||||
headers=headers
|
||||
headers=headers,
|
||||
timeout=12,
|
||||
retries=3,
|
||||
backoff_factor=0.6,
|
||||
)
|
||||
result = resp.json()
|
||||
logger.info(f"Vavoo API response: {result}")
|
||||
|
||||
if isinstance(result, list) and result and result[0].get("url"):
|
||||
try:
|
||||
result = resp.json()
|
||||
except Exception:
|
||||
logger.warning("Vavoo resolve returned non-json response (status=%s). Body preview: %s", resp.status_code, getattr(resp, "text", "")[:500])
|
||||
return None
|
||||
|
||||
logger.debug("Vavoo API response: %s", result)
|
||||
|
||||
# Accept either list or dict with 'url'
|
||||
if isinstance(result, list) and result and isinstance(result[0], dict) and result[0].get("url"):
|
||||
resolved_url = result[0]["url"]
|
||||
logger.info(f"Successfully resolved Vavoo URL to: {resolved_url}")
|
||||
logger.info("Successfully resolved Vavoo URL to: %s", resolved_url)
|
||||
return resolved_url
|
||||
elif isinstance(result, dict) and result.get("url"):
|
||||
resolved_url = result["url"]
|
||||
logger.info(f"Successfully resolved Vavoo URL to: {resolved_url}")
|
||||
logger.info("Successfully resolved Vavoo URL to: %s", resolved_url)
|
||||
return resolved_url
|
||||
else:
|
||||
logger.warning(f"No URL found in Vavoo API response: {result}")
|
||||
logger.warning("No URL found in Vavoo API response: %s", result)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.exception(f"Vavoo resolution failed for URL {link}: {str(e)}")
|
||||
except ExtractorError as e:
|
||||
logger.error(f"Vavoo resolution failed for URL {link}: {e}")
|
||||
raise ExtractorError(f"Vavoo resolution failed: {str(e)}") from e
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error while resolving Vavoo URL {link}: {e}")
|
||||
raise ExtractorError(f"Vavoo resolution failed: {str(e)}") from e
|
||||
|
||||
63
mediaflow_proxy/extractors/vidmoly.py
Normal file
63
mediaflow_proxy/extractors/vidmoly.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class VidmolyExtractor(BaseExtractor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str) -> Dict[str, Any]:
|
||||
parsed = urlparse(url)
|
||||
if not parsed.hostname or "vidmoly" not in parsed.hostname:
|
||||
raise ExtractorError("VIDMOLY: Invalid domain")
|
||||
|
||||
headers = {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120 Safari/537.36",
|
||||
"Referer": url,
|
||||
"Sec-Fetch-Dest": "iframe",
|
||||
}
|
||||
|
||||
# --- Fetch embed page ---
|
||||
response = await self._make_request(url, headers=headers)
|
||||
html = response.text
|
||||
|
||||
# --- Extract master m3u8 ---
|
||||
match = re.search(
|
||||
r'sources:\s*\[\{file:"([^"]+)',
|
||||
html
|
||||
)
|
||||
if not match:
|
||||
raise ExtractorError("VIDMOLY: Stream URL not found")
|
||||
|
||||
master_url = match.group(1)
|
||||
|
||||
if not master_url.startswith("http"):
|
||||
master_url = urljoin(url, master_url)
|
||||
|
||||
# --- Validate stream (prevents Stremio timeout) ---
|
||||
try:
|
||||
test = await self._make_request(master_url, headers=headers)
|
||||
except Exception as e:
|
||||
if "timeout" in str(e).lower():
|
||||
raise ExtractorError("VIDMOLY: Request timed out")
|
||||
raise
|
||||
|
||||
if test.status_code >= 400:
|
||||
raise ExtractorError(
|
||||
f"VIDMOLY: Stream unavailable ({test.status_code})"
|
||||
)
|
||||
|
||||
# Return MASTER playlist, not variant
|
||||
# Let MediaFlow Proxy handle variants
|
||||
return {
|
||||
"destination_url": master_url,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
73
mediaflow_proxy/extractors/vidoza.py
Normal file
73
mediaflow_proxy/extractors/vidoza.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class VidozaExtractor(BaseExtractor):
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
# if your base doesn’t set this, keep it; otherwise you can remove:
|
||||
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
parsed = urlparse(url)
|
||||
|
||||
# Accept vidoza + videzz
|
||||
if not parsed.hostname or not (
|
||||
parsed.hostname.endswith("vidoza.net")
|
||||
or parsed.hostname.endswith("videzz.net")
|
||||
):
|
||||
raise ExtractorError("VIDOZA: Invalid domain")
|
||||
|
||||
headers = self.base_headers.copy()
|
||||
headers.update(
|
||||
{
|
||||
"referer": "https://vidoza.net/",
|
||||
"user-agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"accept": "*/*",
|
||||
"accept-language": "en-US,en;q=0.9",
|
||||
}
|
||||
)
|
||||
|
||||
# 1) Fetch the embed page (or whatever URL you pass in)
|
||||
response = await self._make_request(url, headers=headers)
|
||||
html = response.text or ""
|
||||
|
||||
if not html:
|
||||
raise ExtractorError("VIDOZA: Empty HTML from Vidoza")
|
||||
|
||||
cookies = response.cookies or {}
|
||||
|
||||
# 2) Extract final link with REGEX
|
||||
pattern = re.compile(
|
||||
r"""["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)"""
|
||||
r"""(?:[^}>\]]+)["']?\s*res\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)""",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
match = pattern.search(html)
|
||||
if not match:
|
||||
raise ExtractorError("VIDOZA: Unable to extract video + label from JS")
|
||||
|
||||
mp4_url = match.group("url")
|
||||
label = match.group("label").strip()
|
||||
|
||||
# Fix URLs like //str38.vidoza.net/...
|
||||
if mp4_url.startswith("//"):
|
||||
mp4_url = "https:" + mp4_url
|
||||
|
||||
# 3) Attach cookies (token may depend on these)
|
||||
if cookies:
|
||||
headers["cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items())
|
||||
|
||||
return {
|
||||
"destination_url": mp4_url,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
68
mediaflow_proxy/extractors/voe.py
Normal file
68
mediaflow_proxy/extractors/voe.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import base64
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class VoeExtractor(BaseExtractor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, redirected: bool = False, **kwargs) -> Dict[str, Any]:
|
||||
response = await self._make_request(url)
|
||||
|
||||
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/voesx.py
|
||||
redirect_pattern = r'''window\.location\.href\s*=\s*'([^']+)'''
|
||||
redirect_match = re.search(redirect_pattern, response.text, re.DOTALL)
|
||||
if redirect_match:
|
||||
if redirected:
|
||||
raise ExtractorError("VOE: too many redirects")
|
||||
|
||||
return await self.extract(redirect_match.group(1))
|
||||
|
||||
code_and_script_pattern = r'json">\["([^"]+)"]</script>\s*<script\s*src="([^"]+)'
|
||||
code_and_script_match = re.search(code_and_script_pattern, response.text, re.DOTALL)
|
||||
if not code_and_script_match:
|
||||
raise ExtractorError("VOE: unable to locate obfuscated payload or external script URL")
|
||||
|
||||
script_response = await self._make_request(urljoin(url, code_and_script_match.group(2)))
|
||||
|
||||
luts_pattern = r"(\[(?:'\W{2}'[,\]]){1,9})"
|
||||
luts_match = re.search(luts_pattern, script_response.text, re.DOTALL)
|
||||
if not luts_match:
|
||||
raise ExtractorError("VOE: unable to locate LUTs in external script")
|
||||
|
||||
data = self.voe_decode(code_and_script_match.group(1), luts_match.group(1))
|
||||
|
||||
final_url = data.get('source')
|
||||
if not final_url:
|
||||
raise ExtractorError("VOE: failed to extract video URL")
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def voe_decode(ct: str, luts: str) -> Dict[str, Any]:
|
||||
import json
|
||||
lut = [''.join([('\\' + x) if x in '.*+?^${}()|[]\\' else x for x in i]) for i in luts[2:-2].split("','")]
|
||||
txt = ''
|
||||
for i in ct:
|
||||
x = ord(i)
|
||||
if 64 < x < 91:
|
||||
x = (x - 52) % 26 + 65
|
||||
elif 96 < x < 123:
|
||||
x = (x - 84) % 26 + 97
|
||||
txt += chr(x)
|
||||
for i in lut:
|
||||
txt = re.sub(i, '', txt)
|
||||
ct = base64.b64decode(txt).decode('utf-8')
|
||||
txt = ''.join([chr(ord(i) - 3) for i in ct])
|
||||
txt = base64.b64decode(txt[::-1]).decode('utf-8')
|
||||
return json.loads(txt)
|
||||
Reference in New Issue
Block a user