mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-06-10 09:10:23 +00:00
update
This commit is contained in:
@@ -65,9 +65,9 @@ class F16PxExtractor(BaseExtractor):
|
||||
raise ExtractorError("F16PX: No playback data")
|
||||
|
||||
try:
|
||||
iv = self._b64url_decode(pb["iv"]) # nonce
|
||||
key = self._join_key_parts(pb["key_parts"]) # AES key
|
||||
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
|
||||
iv = self._b64url_decode(pb["iv"]) # nonce
|
||||
key = self._join_key_parts(pb["key_parts"]) # AES key
|
||||
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
|
||||
|
||||
cipher = python_aesgcm.new(key)
|
||||
decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL
|
||||
@@ -95,7 +95,7 @@ class F16PxExtractor(BaseExtractor):
|
||||
self.base_headers["origin"] = origin
|
||||
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||
self.base_headers["Accept"] = "*/*"
|
||||
self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'
|
||||
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
|
||||
|
||||
return {
|
||||
"destination_url": best,
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,21 +1,53 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Optional, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import aiohttp
|
||||
import json
|
||||
import logging
|
||||
|
||||
from mediaflow_proxy.configs import settings
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client, DownloadError
|
||||
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||
from mediaflow_proxy.utils.http_utils import DownloadError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExtractorError(Exception):
|
||||
"""Base exception for all extractors."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class HttpResponse:
|
||||
"""
|
||||
Simple response container for extractor HTTP requests.
|
||||
|
||||
Uses aiohttp-style naming conventions:
|
||||
- status (not status_code)
|
||||
- text (pre-loaded content as string)
|
||||
- content (pre-loaded content as bytes)
|
||||
"""
|
||||
|
||||
status: int
|
||||
headers: Dict[str, str]
|
||||
text: str
|
||||
content: bytes
|
||||
url: str
|
||||
|
||||
def json(self) -> Any:
|
||||
"""Parse response content as JSON."""
|
||||
return json.loads(self.text)
|
||||
|
||||
def get_origin(self) -> str:
|
||||
"""Get the origin (scheme + host) from the response URL."""
|
||||
parsed = urlparse(self.url)
|
||||
return f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
|
||||
class BaseExtractor(ABC):
|
||||
"""Base class for all URL extractors.
|
||||
|
||||
@@ -43,74 +75,99 @@ class BaseExtractor(ABC):
|
||||
backoff_factor: float = 0.5,
|
||||
raise_on_status: bool = True,
|
||||
**kwargs,
|
||||
) -> httpx.Response:
|
||||
) -> HttpResponse:
|
||||
"""
|
||||
Make HTTP request with retry and timeout support.
|
||||
Make HTTP request with retry and timeout support using aiohttp.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
The URL to request.
|
||||
method : str
|
||||
HTTP method (GET, POST, etc.). Defaults to GET.
|
||||
headers : dict | None
|
||||
Additional headers to merge with base headers.
|
||||
timeout : float | None
|
||||
Seconds to wait for the request (applied to httpx.Timeout). Defaults to 15s.
|
||||
Seconds to wait for the request. Defaults to 15s.
|
||||
retries : int
|
||||
Number of attempts for transient errors.
|
||||
backoff_factor : float
|
||||
Base for exponential backoff between retries.
|
||||
raise_on_status : bool
|
||||
If True, HTTP non-2xx raises DownloadError (preserves status code).
|
||||
If True, HTTP non-2xx raises DownloadError.
|
||||
**kwargs
|
||||
Additional arguments passed to aiohttp request (e.g., data, json).
|
||||
|
||||
Returns
|
||||
-------
|
||||
HttpResponse
|
||||
Response object with pre-loaded content.
|
||||
"""
|
||||
attempt = 0
|
||||
last_exc = None
|
||||
|
||||
# build request headers merging base and per-request
|
||||
# Build request headers merging base and per-request
|
||||
request_headers = self.base_headers.copy()
|
||||
if headers:
|
||||
request_headers.update(headers)
|
||||
|
||||
timeout_cfg = httpx.Timeout(timeout or 15.0)
|
||||
timeout_val = timeout or 15.0
|
||||
|
||||
while attempt < retries:
|
||||
try:
|
||||
async with create_httpx_client(timeout=timeout_cfg) as client:
|
||||
response = await client.request(
|
||||
async with create_aiohttp_session(url, timeout=timeout_val) as (session, proxy_url):
|
||||
async with session.request(
|
||||
method,
|
||||
url,
|
||||
headers=request_headers,
|
||||
proxy=proxy_url,
|
||||
**kwargs,
|
||||
)
|
||||
) as response:
|
||||
# Read content while session is still open
|
||||
content = await response.read()
|
||||
text = content.decode("utf-8", errors="replace")
|
||||
final_url = str(response.url)
|
||||
status = response.status
|
||||
resp_headers = dict(response.headers)
|
||||
|
||||
if raise_on_status:
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
# Provide a short body preview for debugging
|
||||
body_preview = ""
|
||||
try:
|
||||
body_preview = e.response.text[:500]
|
||||
except Exception:
|
||||
body_preview = "<unreadable body>"
|
||||
if raise_on_status and status >= 400:
|
||||
body_preview = text[:500]
|
||||
logger.debug(
|
||||
"HTTPStatusError for %s (status=%s) -- body preview: %s",
|
||||
"HTTP error for %s (status=%s) -- body preview: %s",
|
||||
url,
|
||||
e.response.status_code,
|
||||
status,
|
||||
body_preview,
|
||||
)
|
||||
raise DownloadError(e.response.status_code, f"HTTP error {e.response.status_code} while requesting {url}")
|
||||
return response
|
||||
raise DownloadError(status, f"HTTP error {status} while requesting {url}")
|
||||
|
||||
return HttpResponse(
|
||||
status=status,
|
||||
headers=resp_headers,
|
||||
text=text,
|
||||
content=content,
|
||||
url=final_url,
|
||||
)
|
||||
|
||||
except DownloadError:
|
||||
# Do not retry on explicit HTTP status errors (they are intentional)
|
||||
raise
|
||||
except (httpx.ReadTimeout, httpx.ConnectTimeout, httpx.NetworkError, httpx.TransportError) as e:
|
||||
# Transient network error — retry with backoff
|
||||
except (asyncio.TimeoutError, aiohttp.ClientError) as e:
|
||||
# Transient network error - retry with backoff
|
||||
last_exc = e
|
||||
attempt += 1
|
||||
sleep_for = backoff_factor * (2 ** (attempt - 1))
|
||||
logger.warning("Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
|
||||
attempt, retries, url, e, sleep_for)
|
||||
logger.warning(
|
||||
"Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
|
||||
attempt,
|
||||
retries,
|
||||
url,
|
||||
e,
|
||||
sleep_for,
|
||||
)
|
||||
await asyncio.sleep(sleep_for)
|
||||
continue
|
||||
except Exception as e:
|
||||
# Unexpected exception — wrap as ExtractorError to keep interface consistent
|
||||
# Unexpected exception - wrap as ExtractorError to keep interface consistent
|
||||
logger.exception("Unhandled exception while requesting %s: %s", url, e)
|
||||
raise ExtractorError(f"Request failed for URL {url}: {str(e)}")
|
||||
|
||||
|
||||
+588
-216
@@ -1,133 +1,345 @@
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import base64
|
||||
import time
|
||||
import logging
|
||||
|
||||
from typing import Any, Dict, Optional, List
|
||||
from urllib.parse import urlparse, quote_plus, urljoin
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import aiohttp
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
|
||||
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||
from mediaflow_proxy.configs import settings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Silenzia l'errore ConnectionResetError su Windows
|
||||
logging.getLogger('asyncio').setLevel(logging.CRITICAL)
|
||||
logging.getLogger("asyncio").setLevel(logging.CRITICAL)
|
||||
|
||||
# Default fingerprint parameters
|
||||
DEFAULT_DLHD_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
|
||||
DEFAULT_DLHD_SCREEN_RESOLUTION = "1920x1080"
|
||||
DEFAULT_DLHD_TIMEZONE = "UTC"
|
||||
DEFAULT_DLHD_LANGUAGE = "en"
|
||||
|
||||
|
||||
def compute_fingerprint(
|
||||
user_agent: str = DEFAULT_DLHD_USER_AGENT,
|
||||
screen_resolution: str = DEFAULT_DLHD_SCREEN_RESOLUTION,
|
||||
timezone: str = DEFAULT_DLHD_TIMEZONE,
|
||||
language: str = DEFAULT_DLHD_LANGUAGE,
|
||||
) -> str:
|
||||
"""
|
||||
Compute the X-Fingerprint header value.
|
||||
|
||||
Algorithm:
|
||||
fingerprint = SHA256(useragent + screen_resolution + timezone + language).hex()[:16]
|
||||
|
||||
Args:
|
||||
user_agent: The user agent string
|
||||
screen_resolution: The screen resolution (e.g., "1920x1080")
|
||||
timezone: The timezone (e.g., "UTC")
|
||||
language: The language code (e.g., "en")
|
||||
|
||||
Returns:
|
||||
The 16-character fingerprint
|
||||
"""
|
||||
combined = f"{user_agent}{screen_resolution}{timezone}{language}"
|
||||
return hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def compute_key_path(resource: str, number: str, timestamp: int, fingerprint: str, secret_key: str) -> str:
|
||||
"""
|
||||
Compute the X-Key-Path header value.
|
||||
|
||||
Algorithm:
|
||||
key_path = HMAC-SHA256("resource|number|timestamp|fingerprint", secret_key).hex()[:16]
|
||||
|
||||
Args:
|
||||
resource: The resource from the key URL
|
||||
number: The number from the key URL
|
||||
timestamp: The Unix timestamp
|
||||
fingerprint: The fingerprint value
|
||||
secret_key: The HMAC secret key (channel_salt)
|
||||
|
||||
Returns:
|
||||
The 16-character key path
|
||||
"""
|
||||
combined = f"{resource}|{number}|{timestamp}|{fingerprint}"
|
||||
hmac_hash = hmac.new(secret_key.encode("utf-8"), combined.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
return hmac_hash[:16]
|
||||
|
||||
|
||||
def compute_key_headers(key_url: str, secret_key: str) -> tuple[int, int, str, str] | None:
|
||||
"""
|
||||
Compute X-Key-Timestamp, X-Key-Nonce, X-Key-Path, and X-Fingerprint for a /key/ URL.
|
||||
|
||||
Algorithm:
|
||||
1. Extract resource and number from URL pattern /key/{resource}/{number}
|
||||
2. ts = Unix timestamp in seconds
|
||||
3. hmac_hash = HMAC-SHA256(resource, secret_key).hex()
|
||||
4. nonce = proof-of-work: find i where MD5(hmac+resource+number+ts+i)[:4] < 0x1000
|
||||
5. fingerprint = compute_fingerprint()
|
||||
6. key_path = HMAC-SHA256("resource|number|ts|fingerprint", secret_key).hex()[:16]
|
||||
|
||||
Args:
|
||||
key_url: The key URL containing /key/{resource}/{number}
|
||||
secret_key: The HMAC secret key (channel_salt)
|
||||
|
||||
Returns:
|
||||
Tuple of (timestamp, nonce, key_path, fingerprint) or None if URL doesn't match pattern
|
||||
"""
|
||||
# Extract resource and number from URL
|
||||
pattern = r"/key/([^/]+)/(\d+)"
|
||||
match = re.search(pattern, key_url)
|
||||
|
||||
if not match:
|
||||
return None
|
||||
|
||||
resource = match.group(1)
|
||||
number = match.group(2)
|
||||
|
||||
ts = int(time.time())
|
||||
|
||||
# Compute HMAC-SHA256
|
||||
hmac_hash = hmac.new(secret_key.encode("utf-8"), resource.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
|
||||
# Proof-of-work loop
|
||||
nonce = 0
|
||||
for i in range(100000):
|
||||
combined = f"{hmac_hash}{resource}{number}{ts}{i}"
|
||||
md5_hash = hashlib.md5(combined.encode("utf-8")).hexdigest()
|
||||
prefix_value = int(md5_hash[:4], 16)
|
||||
|
||||
if prefix_value < 0x1000: # < 4096
|
||||
nonce = i
|
||||
break
|
||||
|
||||
fingerprint = compute_fingerprint()
|
||||
key_path = compute_key_path(resource, number, ts, fingerprint, secret_key)
|
||||
|
||||
return ts, nonce, key_path, fingerprint
|
||||
|
||||
|
||||
class DLHDExtractor(BaseExtractor):
|
||||
"""DLHD (DaddyLive) URL extractor for M3U8 streams.
|
||||
|
||||
|
||||
Notes:
|
||||
- Multi-domain support for daddylive.sx / dlhd.dad
|
||||
- Robust extraction of auth parameters and server lookup
|
||||
- Uses retries/timeouts via BaseExtractor where possible
|
||||
- Multi-iframe fallback for resilience
|
||||
Supports the new authentication flow with:
|
||||
- EPlayerAuth extraction (auth_token, channel_key, channel_salt)
|
||||
- Server lookup for dynamic server selection
|
||||
- Dynamic key header computation for AES-128 encrypted streams
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
self.mediaflow_endpoint = "hls_key_proxy"
|
||||
self._iframe_context: Optional[str] = None
|
||||
self._flaresolverr_cookies: Optional[str] = None
|
||||
self._flaresolverr_user_agent: Optional[str] = None
|
||||
|
||||
async def _fetch_via_flaresolverr(self, url: str) -> HttpResponse:
|
||||
"""Fetch a URL using FlareSolverr to bypass Cloudflare protection."""
|
||||
if not settings.flaresolverr_url:
|
||||
raise ExtractorError("FlareSolverr URL not configured. Set FLARESOLVERR_URL in environment.")
|
||||
|
||||
flaresolverr_endpoint = f"{settings.flaresolverr_url.rstrip('/')}/v1"
|
||||
payload = {
|
||||
"cmd": "request.get",
|
||||
"url": url,
|
||||
"maxTimeout": settings.flaresolverr_timeout * 1000,
|
||||
}
|
||||
|
||||
async def _make_request(self, url: str, method: str = "GET", headers: Optional[Dict] = None, **kwargs) -> Any:
|
||||
"""Override to disable SSL verification for this extractor and use fetch_with_retry if available."""
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client, fetch_with_retry
|
||||
logger.info(f"Using FlareSolverr to fetch: {url}")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
flaresolverr_endpoint,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=settings.flaresolverr_timeout + 10),
|
||||
) as response:
|
||||
if response.status != 200:
|
||||
raise ExtractorError(f"FlareSolverr returned status {response.status}")
|
||||
|
||||
data = await response.json()
|
||||
|
||||
if data.get("status") != "ok":
|
||||
raise ExtractorError(f"FlareSolverr failed: {data.get('message', 'Unknown error')}")
|
||||
|
||||
solution = data.get("solution", {})
|
||||
html_content = solution.get("response", "")
|
||||
final_url = solution.get("url", url)
|
||||
status = solution.get("status", 200)
|
||||
|
||||
# Store cookies and user-agent for subsequent requests
|
||||
cookies = solution.get("cookies", [])
|
||||
if cookies:
|
||||
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
|
||||
self._flaresolverr_cookies = cookie_str
|
||||
logger.info(f"FlareSolverr provided {len(cookies)} cookies")
|
||||
|
||||
user_agent = solution.get("userAgent")
|
||||
if user_agent:
|
||||
self._flaresolverr_user_agent = user_agent
|
||||
logger.info(f"FlareSolverr user-agent: {user_agent}")
|
||||
|
||||
logger.info(f"FlareSolverr successfully bypassed Cloudflare for: {url}")
|
||||
|
||||
return HttpResponse(
|
||||
status=status,
|
||||
headers={},
|
||||
text=html_content,
|
||||
content=html_content.encode("utf-8", errors="replace"),
|
||||
url=final_url,
|
||||
)
|
||||
|
||||
async def _make_request(
|
||||
self, url: str, method: str = "GET", headers: Optional[Dict] = None, use_flaresolverr: bool = False, **kwargs
|
||||
) -> HttpResponse:
|
||||
"""Override to disable SSL verification and optionally use FlareSolverr."""
|
||||
# Use FlareSolverr for Cloudflare-protected pages
|
||||
if use_flaresolverr and settings.flaresolverr_url:
|
||||
return await self._fetch_via_flaresolverr(url)
|
||||
|
||||
timeout = kwargs.pop("timeout", 15)
|
||||
retries = kwargs.pop("retries", 3)
|
||||
backoff_factor = kwargs.pop("backoff_factor", 0.5)
|
||||
kwargs.pop("retries", 3) # consumed but not used directly
|
||||
kwargs.pop("backoff_factor", 0.5) # consumed but not used directly
|
||||
|
||||
# Merge headers
|
||||
request_headers = self.base_headers.copy()
|
||||
if headers:
|
||||
request_headers.update(headers)
|
||||
|
||||
async with create_httpx_client(verify=False, timeout=httpx.Timeout(timeout)) as client:
|
||||
try:
|
||||
return await fetch_with_retry(client, method, url, headers or {}, timeout=timeout)
|
||||
except Exception:
|
||||
logger.debug("fetch_with_retry failed or unavailable; falling back to direct request for %s", url)
|
||||
response = await client.request(method, url, headers=headers or {}, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
# Add FlareSolverr cookies if available
|
||||
if self._flaresolverr_cookies:
|
||||
existing_cookies = request_headers.get("Cookie", "")
|
||||
if existing_cookies:
|
||||
request_headers["Cookie"] = f"{existing_cookies}; {self._flaresolverr_cookies}"
|
||||
else:
|
||||
request_headers["Cookie"] = self._flaresolverr_cookies
|
||||
|
||||
# Use FlareSolverr user-agent if available
|
||||
if self._flaresolverr_user_agent:
|
||||
request_headers["User-Agent"] = self._flaresolverr_user_agent
|
||||
|
||||
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||
# Use create_aiohttp_session with verify=False for SSL bypass
|
||||
async with create_aiohttp_session(url, timeout=timeout, verify=False) as (session, proxy_url):
|
||||
async with session.request(method, url, headers=request_headers, proxy=proxy_url, **kwargs) as response:
|
||||
content = await response.read()
|
||||
final_url = str(response.url)
|
||||
status = response.status
|
||||
resp_headers = dict(response.headers)
|
||||
|
||||
if status >= 400:
|
||||
raise ExtractorError(f"HTTP error {status} while requesting {url}")
|
||||
|
||||
return HttpResponse(
|
||||
status=status,
|
||||
headers=resp_headers,
|
||||
text=content.decode("utf-8", errors="replace"),
|
||||
content=content,
|
||||
url=final_url,
|
||||
)
|
||||
|
||||
async def _extract_session_data(self, iframe_url: str, main_url: str) -> dict | None:
|
||||
"""
|
||||
Estrattore alternativo per iframe lovecdn.ru che usa un formato diverso.
|
||||
Fetch the iframe URL and extract auth_token, channel_key, and channel_salt.
|
||||
|
||||
Args:
|
||||
iframe_url: The iframe URL to fetch
|
||||
main_url: The main site domain for Referer header
|
||||
|
||||
Returns:
|
||||
Dict with auth_token, channel_key, channel_salt, or None if not found
|
||||
"""
|
||||
headers = {
|
||||
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||
"Referer": f"https://{main_url}/",
|
||||
}
|
||||
|
||||
try:
|
||||
# Cerca pattern di stream URL diretto
|
||||
m3u8_patterns = [
|
||||
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
|
||||
r'source[:\s]+["\']([^"\']+)["\']',
|
||||
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
|
||||
]
|
||||
|
||||
stream_url = None
|
||||
for pattern in m3u8_patterns:
|
||||
matches = re.findall(pattern, iframe_content)
|
||||
for match in matches:
|
||||
if '.m3u8' in match and match.startswith('http'):
|
||||
stream_url = match
|
||||
logger.info(f"Found direct m3u8 URL: {stream_url}")
|
||||
break
|
||||
if stream_url:
|
||||
break
|
||||
|
||||
# Pattern 2: Cerca costruzione dinamica URL
|
||||
if not stream_url:
|
||||
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||
|
||||
if channel_match:
|
||||
channel_name = channel_match.group(1)
|
||||
server = server_match.group(1) if server_match else 'newkso.ru'
|
||||
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
|
||||
logger.info(f"Constructed stream URL: {stream_url}")
|
||||
|
||||
if not stream_url:
|
||||
# Fallback: cerca qualsiasi URL che sembri uno stream
|
||||
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
|
||||
matches = re.findall(url_pattern, iframe_content)
|
||||
if matches:
|
||||
stream_url = matches[0]
|
||||
logger.info(f"Found fallback stream URL: {stream_url}")
|
||||
|
||||
if not stream_url:
|
||||
raise ExtractorError(f"Could not find stream URL in lovecdn.ru iframe")
|
||||
|
||||
# Usa iframe URL come referer
|
||||
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||
stream_headers = {
|
||||
'User-Agent': headers['User-Agent'],
|
||||
'Referer': iframe_url,
|
||||
'Origin': iframe_origin
|
||||
}
|
||||
|
||||
# Determina endpoint in base al dominio dello stream
|
||||
endpoint = "hls_key_proxy"
|
||||
|
||||
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
|
||||
|
||||
return {
|
||||
"destination_url": stream_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": endpoint,
|
||||
}
|
||||
|
||||
resp = await self._make_request(iframe_url, headers=headers, timeout=12)
|
||||
html = resp.text
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
|
||||
logger.warning(f"Error fetching iframe URL: {e}")
|
||||
return None
|
||||
|
||||
# Pattern to extract EPlayerAuth.init block with authToken, channelKey, channelSalt
|
||||
# Matches: EPlayerAuth.init({ authToken: '...', channelKey: '...', ..., channelSalt: '...' });
|
||||
auth_pattern = r"EPlayerAuth\.init\s*\(\s*\{\s*authToken:\s*'([^']+)'"
|
||||
channel_key_pattern = r"channelKey:\s*'([^']+)'"
|
||||
channel_salt_pattern = r"channelSalt:\s*'([^']+)'"
|
||||
|
||||
# Pattern to extract server lookup base URL from fetchWithRetry call
|
||||
lookup_pattern = r"fetchWithRetry\s*\(\s*'([^']+server_lookup\?channel_id=)"
|
||||
|
||||
auth_match = re.search(auth_pattern, html)
|
||||
channel_key_match = re.search(channel_key_pattern, html)
|
||||
channel_salt_match = re.search(channel_salt_pattern, html)
|
||||
lookup_match = re.search(lookup_pattern, html)
|
||||
|
||||
if auth_match and channel_key_match and channel_salt_match:
|
||||
result = {
|
||||
"auth_token": auth_match.group(1),
|
||||
"channel_key": channel_key_match.group(1),
|
||||
"channel_salt": channel_salt_match.group(1),
|
||||
}
|
||||
if lookup_match:
|
||||
result["server_lookup_url"] = lookup_match.group(1) + result["channel_key"]
|
||||
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
async def _get_server_key(self, server_lookup_url: str, iframe_url: str) -> str | None:
|
||||
"""
|
||||
Fetch the server lookup URL and extract the server_key.
|
||||
|
||||
Args:
|
||||
server_lookup_url: The server lookup URL
|
||||
iframe_url: The iframe URL for extracting the host for headers
|
||||
|
||||
Returns:
|
||||
The server_key or None if not found
|
||||
"""
|
||||
parsed = urlparse(iframe_url)
|
||||
iframe_host = parsed.netloc
|
||||
|
||||
headers = {
|
||||
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||
"Referer": f"https://{iframe_host}/",
|
||||
"Origin": f"https://{iframe_host}",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
||||
data = resp.json()
|
||||
return data.get("server_key")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error fetching server lookup: {e}")
|
||||
return None
|
||||
|
||||
def _build_m3u8_url(self, server_key: str, channel_key: str) -> str:
|
||||
"""
|
||||
Build the m3u8 URL based on the server_key.
|
||||
|
||||
Args:
|
||||
server_key: The server key from server lookup
|
||||
channel_key: The channel key
|
||||
|
||||
Returns:
|
||||
The m3u8 URL (with .css extension as per the original implementation)
|
||||
"""
|
||||
if server_key == "top1/cdn":
|
||||
return f"https://top1.dvalna.ru/top1/cdn/{channel_key}/mono.css"
|
||||
else:
|
||||
return f"https://{server_key}new.dvalna.ru/{server_key}/{channel_key}/mono.css"
|
||||
|
||||
async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||
"""Handles the new authentication flow found in recent updates."""
|
||||
|
||||
|
||||
def _extract_params(js: str) -> Dict[str, Optional[str]]:
|
||||
params = {}
|
||||
patterns = {
|
||||
@@ -143,82 +355,93 @@ class DLHDExtractor(BaseExtractor):
|
||||
return params
|
||||
|
||||
params = _extract_params(iframe_content)
|
||||
|
||||
|
||||
missing_params = [k for k, v in params.items() if not v]
|
||||
if missing_params:
|
||||
# This is not an error, just means it's not the new flow
|
||||
raise ExtractorError(f"Not the new auth flow: missing params {missing_params}")
|
||||
|
||||
logger.info("New auth flow detected. Proceeding with POST auth.")
|
||||
|
||||
|
||||
# 1. Initial Auth POST
|
||||
auth_url = 'https://security.newkso.ru/auth2.php'
|
||||
# Use files parameter to force multipart/form-data which is required by the server
|
||||
# (None, value) tells httpx to send it as a form field, not a file upload
|
||||
multipart_data = {
|
||||
'channelKey': (None, params["channel_key"]),
|
||||
'country': (None, params["auth_country"]),
|
||||
'timestamp': (None, params["auth_ts"]),
|
||||
'expiry': (None, params["auth_expiry"]),
|
||||
'token': (None, params["auth_token"]),
|
||||
}
|
||||
auth_url = "https://security.newkso.ru/auth2.php"
|
||||
|
||||
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||
auth_headers = headers.copy()
|
||||
auth_headers.update({
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
'Origin': iframe_origin,
|
||||
'Referer': iframe_url,
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'cross-site',
|
||||
'Priority': 'u=1, i',
|
||||
})
|
||||
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client
|
||||
auth_headers.update(
|
||||
{
|
||||
"Accept": "*/*",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Origin": iframe_origin,
|
||||
"Referer": iframe_url,
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "cross-site",
|
||||
"Priority": "u=1, i",
|
||||
}
|
||||
)
|
||||
|
||||
# Build form data for multipart/form-data
|
||||
form_data = aiohttp.FormData()
|
||||
form_data.add_field("channelKey", params["channel_key"])
|
||||
form_data.add_field("country", params["auth_country"])
|
||||
form_data.add_field("timestamp", params["auth_ts"])
|
||||
form_data.add_field("expiry", params["auth_expiry"])
|
||||
form_data.add_field("token", params["auth_token"])
|
||||
|
||||
try:
|
||||
async with create_httpx_client(verify=False) as client:
|
||||
# Note: using 'files' instead of 'data' to ensure multipart/form-data Content-Type
|
||||
auth_resp = await client.post(auth_url, files=multipart_data, headers=auth_headers, timeout=12)
|
||||
auth_resp.raise_for_status()
|
||||
auth_data = auth_resp.json()
|
||||
if not (auth_data.get("valid") or auth_data.get("success")):
|
||||
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
|
||||
async with create_aiohttp_session(auth_url, timeout=12, verify=False) as (session, proxy_url):
|
||||
async with session.post(
|
||||
auth_url,
|
||||
headers=auth_headers,
|
||||
data=form_data,
|
||||
proxy=proxy_url,
|
||||
) as response:
|
||||
content = await response.read()
|
||||
response.raise_for_status()
|
||||
import json
|
||||
|
||||
auth_data = json.loads(content.decode("utf-8"))
|
||||
if not (auth_data.get("valid") or auth_data.get("success")):
|
||||
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
|
||||
logger.info("New auth flow: Initial auth successful.")
|
||||
except ExtractorError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"New auth flow failed during initial auth POST: {e}")
|
||||
|
||||
# 2. Server Lookup
|
||||
server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}"
|
||||
try:
|
||||
# Use _make_request as it handles retries and expects JSON
|
||||
# Use _make_request as it handles retries
|
||||
lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
||||
server_data = lookup_resp.json()
|
||||
server_key = server_data.get('server_key')
|
||||
server_key = server_data.get("server_key")
|
||||
if not server_key:
|
||||
raise ExtractorError(f"No server_key in lookup response: {server_data}")
|
||||
logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}")
|
||||
except ExtractorError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"New auth flow failed during server lookup: {e}")
|
||||
|
||||
# 3. Build final stream URL
|
||||
channel_key = params['channel_key']
|
||||
auth_token = params['auth_token']
|
||||
channel_key = params["channel_key"]
|
||||
auth_token = params["auth_token"]
|
||||
# The JS logic uses .css, not .m3u8
|
||||
if server_key == 'top1/cdn':
|
||||
stream_url = f'https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css'
|
||||
if server_key == "top1/cdn":
|
||||
stream_url = f"https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css"
|
||||
else:
|
||||
stream_url = f'https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css'
|
||||
|
||||
logger.info(f'New auth flow: Constructed stream URL: {stream_url}')
|
||||
stream_url = f"https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css"
|
||||
|
||||
logger.info(f"New auth flow: Constructed stream URL: {stream_url}")
|
||||
|
||||
stream_headers = {
|
||||
'User-Agent': headers['User-Agent'],
|
||||
'Referer': iframe_url,
|
||||
'Origin': iframe_origin,
|
||||
'Authorization': f'Bearer {auth_token}',
|
||||
'X-Channel-Key': channel_key
|
||||
"User-Agent": headers["User-Agent"],
|
||||
"Referer": iframe_url,
|
||||
"Origin": iframe_origin,
|
||||
"Authorization": f"Bearer {auth_token}",
|
||||
"X-Channel-Key": channel_key,
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -227,106 +450,255 @@ class DLHDExtractor(BaseExtractor):
|
||||
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||
}
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Main extraction flow: resolve base, fetch players, extract iframe, auth and final m3u8."""
|
||||
baseurl = "https://dlhd.dad/"
|
||||
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||
"""
|
||||
Alternative extractor for lovecdn.ru iframe that uses a different format.
|
||||
"""
|
||||
try:
|
||||
# Look for direct stream URL patterns
|
||||
m3u8_patterns = [
|
||||
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
|
||||
r'source[:\s]+["\']([^"\']+)["\']',
|
||||
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
|
||||
]
|
||||
|
||||
def extract_channel_id(u: str) -> Optional[str]:
|
||||
match_watch_id = re.search(r'watch\.php\?id=(\d+)', u)
|
||||
if match_watch_id:
|
||||
return match_watch_id.group(1)
|
||||
return None
|
||||
stream_url = None
|
||||
for pattern in m3u8_patterns:
|
||||
matches = re.findall(pattern, iframe_content)
|
||||
for match in matches:
|
||||
if ".m3u8" in match and match.startswith("http"):
|
||||
stream_url = match
|
||||
logger.info(f"Found direct m3u8 URL: {stream_url}")
|
||||
break
|
||||
if stream_url:
|
||||
break
|
||||
|
||||
# Pattern 2: Look for dynamic URL construction
|
||||
if not stream_url:
|
||||
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||
|
||||
async def get_stream_data(initial_url: str):
|
||||
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
|
||||
daddylive_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
||||
'Referer': baseurl,
|
||||
'Origin': daddy_origin
|
||||
if channel_match:
|
||||
channel_name = channel_match.group(1)
|
||||
server = server_match.group(1) if server_match else "newkso.ru"
|
||||
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
|
||||
logger.info(f"Constructed stream URL: {stream_url}")
|
||||
|
||||
if not stream_url:
|
||||
# Fallback: look for any URL that looks like a stream
|
||||
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
|
||||
matches = re.findall(url_pattern, iframe_content)
|
||||
if matches:
|
||||
stream_url = matches[0]
|
||||
logger.info(f"Found fallback stream URL: {stream_url}")
|
||||
|
||||
if not stream_url:
|
||||
raise ExtractorError("Could not find stream URL in lovecdn.ru iframe")
|
||||
|
||||
# Use iframe URL as referer
|
||||
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||
stream_headers = {"User-Agent": headers["User-Agent"], "Referer": iframe_url, "Origin": iframe_origin}
|
||||
|
||||
# Determine endpoint based on the stream domain
|
||||
endpoint = "hls_key_proxy"
|
||||
|
||||
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
|
||||
|
||||
return {
|
||||
"destination_url": stream_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": endpoint,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
|
||||
|
||||
# 1. Request initial page
|
||||
resp1 = await self._make_request(initial_url, headers=daddylive_headers, timeout=15)
|
||||
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1.text)
|
||||
if not player_links:
|
||||
raise ExtractorError("No player links found on the page.")
|
||||
async def _extract_direct_stream(self, channel_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Direct stream extraction using server lookup API with the new auth flow.
|
||||
This extracts auth_token, channel_key, channel_salt and computes key headers.
|
||||
"""
|
||||
# Common iframe domains for DLHD
|
||||
iframe_domains = ["lefttoplay.xyz"]
|
||||
|
||||
for iframe_domain in iframe_domains:
|
||||
try:
|
||||
iframe_url = f"https://{iframe_domain}/premiumtv/daddyhd.php?id={channel_id}"
|
||||
logger.info(f"Attempting extraction via {iframe_domain}")
|
||||
|
||||
# Prova tutti i player e raccogli tutti gli iframe validi
|
||||
last_player_error = None
|
||||
iframe_candidates = []
|
||||
session_data = await self._extract_session_data(iframe_url, "dlhd.link")
|
||||
|
||||
for player_url in player_links:
|
||||
try:
|
||||
if not player_url.startswith('http'):
|
||||
player_url = baseurl + player_url.lstrip('/')
|
||||
|
||||
|
||||
daddylive_headers['Referer'] = player_url
|
||||
daddylive_headers['Origin'] = player_url
|
||||
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
|
||||
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2.text)
|
||||
|
||||
# Raccogli tutti gli iframe trovati
|
||||
for iframe in iframes2:
|
||||
if iframe not in iframe_candidates:
|
||||
iframe_candidates.append(iframe)
|
||||
logger.info(f"Found iframe candidate: {iframe}")
|
||||
|
||||
except Exception as e:
|
||||
last_player_error = e
|
||||
logger.warning(f"Failed to process player link {player_url}: {e}")
|
||||
if not session_data:
|
||||
logger.debug(f"No session data from {iframe_domain}")
|
||||
continue
|
||||
|
||||
logger.info(f"Got session data from {iframe_domain}: channel_key={session_data['channel_key']}")
|
||||
|
||||
if not iframe_candidates:
|
||||
if last_player_error:
|
||||
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
|
||||
raise ExtractorError("No valid iframe found in any player page")
|
||||
|
||||
|
||||
# Prova ogni iframe finché uno non funziona
|
||||
last_iframe_error = None
|
||||
|
||||
for iframe_candidate in iframe_candidates:
|
||||
try:
|
||||
logger.info(f"Trying iframe: {iframe_candidate}")
|
||||
|
||||
iframe_domain = urlparse(iframe_candidate).netloc
|
||||
if not iframe_domain:
|
||||
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
|
||||
continue
|
||||
|
||||
self._iframe_context = iframe_candidate
|
||||
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
|
||||
iframe_content = resp3.text
|
||||
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
|
||||
|
||||
if 'lovecdn.ru' in iframe_domain:
|
||||
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
|
||||
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
|
||||
else:
|
||||
logger.info("Attempting new auth flow extraction.")
|
||||
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
|
||||
last_iframe_error = e
|
||||
# Get server key
|
||||
if "server_lookup_url" not in session_data:
|
||||
logger.debug(f"No server lookup URL from {iframe_domain}")
|
||||
continue
|
||||
|
||||
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
|
||||
server_key = await self._get_server_key(session_data["server_lookup_url"], iframe_url)
|
||||
|
||||
if not server_key:
|
||||
logger.debug(f"No server key from {iframe_domain}")
|
||||
continue
|
||||
|
||||
logger.info(f"Got server key: {server_key}")
|
||||
|
||||
# Build m3u8 URL
|
||||
m3u8_url = self._build_m3u8_url(server_key, session_data["channel_key"])
|
||||
logger.info(f"M3U8 URL: {m3u8_url}")
|
||||
|
||||
# Build stream headers with auth
|
||||
iframe_origin = f"https://{iframe_domain}"
|
||||
stream_headers = {
|
||||
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||
"Referer": iframe_url,
|
||||
"Origin": iframe_origin,
|
||||
"Authorization": f"Bearer {session_data['auth_token']}",
|
||||
}
|
||||
|
||||
# Return the result with key header parameters
|
||||
# These will be used to compute headers when fetching keys
|
||||
return {
|
||||
"destination_url": m3u8_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": "hls_key_proxy",
|
||||
# Force playlist processing since DLHD uses .css extension for m3u8
|
||||
"force_playlist_proxy": True,
|
||||
# Key header computation parameters
|
||||
"dlhd_key_params": {
|
||||
"channel_salt": session_data["channel_salt"],
|
||||
"auth_token": session_data["auth_token"],
|
||||
"iframe_url": iframe_url,
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed extraction via {iframe_domain}: {e}")
|
||||
continue
|
||||
|
||||
raise ExtractorError(f"Failed to extract stream from all iframe domains for channel {channel_id}")
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Main extraction flow - uses direct server lookup with new auth flow."""
|
||||
|
||||
def extract_channel_id(u: str) -> Optional[str]:
|
||||
match_watch_id = re.search(r"watch\.php\?id=(\d+)", u)
|
||||
if match_watch_id:
|
||||
return match_watch_id.group(1)
|
||||
# Also try stream-XXX pattern
|
||||
match_stream = re.search(r"stream-(\d+)", u)
|
||||
if match_stream:
|
||||
return match_stream.group(1)
|
||||
return None
|
||||
|
||||
try:
|
||||
channel_id = extract_channel_id(url)
|
||||
if not channel_id:
|
||||
raise ExtractorError(f"Unable to extract channel ID from {url}")
|
||||
|
||||
logger.info(f"Using base domain: {baseurl}")
|
||||
return await get_stream_data(url)
|
||||
logger.info(f"Extracting DLHD stream for channel ID: {channel_id}")
|
||||
|
||||
# Try direct stream extraction with new auth flow
|
||||
try:
|
||||
return await self._extract_direct_stream(channel_id)
|
||||
except ExtractorError as e:
|
||||
logger.warning(f"Direct stream extraction failed: {e}")
|
||||
|
||||
# Fallback to legacy iframe-based extraction if direct fails
|
||||
logger.info("Falling back to iframe-based extraction...")
|
||||
return await self._extract_via_iframe(url, channel_id)
|
||||
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Extraction failed: {str(e)}")
|
||||
|
||||
async def _extract_via_iframe(self, url: str, channel_id: str) -> Dict[str, Any]:
|
||||
"""Legacy iframe-based extraction flow - used as fallback."""
|
||||
baseurl = "https://dlhd.dad/"
|
||||
|
||||
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
|
||||
daddylive_headers = {
|
||||
"User-Agent": self._flaresolverr_user_agent
|
||||
or "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||
"Referer": baseurl,
|
||||
"Origin": daddy_origin,
|
||||
}
|
||||
|
||||
# 1. Request initial page - use FlareSolverr if available to bypass Cloudflare
|
||||
use_flaresolverr = settings.flaresolverr_url is not None
|
||||
resp1 = await self._make_request(url, headers=daddylive_headers, timeout=15, use_flaresolverr=use_flaresolverr)
|
||||
resp1_text = resp1.text
|
||||
|
||||
# Update headers with FlareSolverr user-agent after initial request
|
||||
if self._flaresolverr_user_agent:
|
||||
daddylive_headers["User-Agent"] = self._flaresolverr_user_agent
|
||||
|
||||
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1_text)
|
||||
if not player_links:
|
||||
raise ExtractorError("No player links found on the page.")
|
||||
|
||||
# Try all players and collect all valid iframes
|
||||
last_player_error = None
|
||||
iframe_candidates = []
|
||||
|
||||
for player_url in player_links:
|
||||
try:
|
||||
if not player_url.startswith("http"):
|
||||
player_url = baseurl + player_url.lstrip("/")
|
||||
|
||||
daddylive_headers["Referer"] = player_url
|
||||
daddylive_headers["Origin"] = player_url
|
||||
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
|
||||
resp2_text = resp2.text
|
||||
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2_text)
|
||||
|
||||
# Collect all found iframes
|
||||
for iframe in iframes2:
|
||||
if iframe not in iframe_candidates:
|
||||
iframe_candidates.append(iframe)
|
||||
logger.info(f"Found iframe candidate: {iframe}")
|
||||
|
||||
except Exception as e:
|
||||
last_player_error = e
|
||||
logger.warning(f"Failed to process player link {player_url}: {e}")
|
||||
continue
|
||||
|
||||
if not iframe_candidates:
|
||||
if last_player_error:
|
||||
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
|
||||
raise ExtractorError("No valid iframe found in any player page")
|
||||
|
||||
# Try each iframe until one works
|
||||
last_iframe_error = None
|
||||
|
||||
for iframe_candidate in iframe_candidates:
|
||||
try:
|
||||
logger.info(f"Trying iframe: {iframe_candidate}")
|
||||
|
||||
iframe_domain = urlparse(iframe_candidate).netloc
|
||||
if not iframe_domain:
|
||||
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
|
||||
continue
|
||||
|
||||
self._iframe_context = iframe_candidate
|
||||
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
|
||||
iframe_content = resp3.text
|
||||
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
|
||||
|
||||
if "lovecdn.ru" in iframe_domain:
|
||||
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
|
||||
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
|
||||
else:
|
||||
logger.info("Attempting new auth flow extraction.")
|
||||
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
|
||||
last_iframe_error = e
|
||||
continue
|
||||
|
||||
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
|
||||
|
||||
@@ -1,39 +1,52 @@
|
||||
import re
|
||||
import time
|
||||
from typing import Dict
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class DoodStreamExtractor(BaseExtractor):
|
||||
"""DoodStream URL extractor."""
|
||||
"""
|
||||
Dood / MyVidPlay extractor
|
||||
Resolves to direct CDN MP4
|
||||
"""
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
self.base_url = "https://d000d.com"
|
||||
self.base_url = "https://myvidplay.com"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, str]:
|
||||
"""Extract DoodStream URL."""
|
||||
response = await self._make_request(url)
|
||||
async def extract(self, url: str, **kwargs):
|
||||
parsed = urlparse(url)
|
||||
video_id = parsed.path.rstrip("/").split("/")[-1]
|
||||
if not video_id:
|
||||
raise ExtractorError("Invalid Dood URL")
|
||||
|
||||
# Extract URL pattern
|
||||
pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
|
||||
match = re.search(pattern, response.text, re.DOTALL)
|
||||
headers = {
|
||||
"User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0",
|
||||
"Referer": f"{self.base_url}/",
|
||||
}
|
||||
|
||||
embed_url = f"{self.base_url}/e/{video_id}"
|
||||
html = (await self._make_request(embed_url, headers=headers)).text
|
||||
|
||||
match = re.search(r"(\/pass_md5\/[^']+)", html)
|
||||
if not match:
|
||||
raise ExtractorError("Failed to extract URL pattern")
|
||||
raise ExtractorError("Dood: pass_md5 not found")
|
||||
|
||||
# Build final URL
|
||||
pass_url = f"{self.base_url}{match[1]}"
|
||||
referer = f"{self.base_url}/"
|
||||
headers = {"range": "bytes=0-", "referer": referer}
|
||||
pass_url = urljoin(self.base_url, match.group(1))
|
||||
|
||||
response = await self._make_request(pass_url, headers=headers)
|
||||
timestamp = str(int(time.time()))
|
||||
final_url = f"{response.text}123456789{match[2]}{timestamp}"
|
||||
base_stream = (await self._make_request(pass_url, headers=headers)).text.strip()
|
||||
|
||||
token_match = re.search(r"token=([^&]+)", html)
|
||||
if not token_match:
|
||||
raise ExtractorError("Dood: token missing")
|
||||
|
||||
token = token_match.group(1)
|
||||
|
||||
final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}"
|
||||
|
||||
self.base_headers["referer"] = referer
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": "proxy_stream_endpoint",
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor
|
||||
from mediaflow_proxy.extractors.filelions import FileLionsExtractor
|
||||
from mediaflow_proxy.extractors.filemoon import FileMoonExtractor
|
||||
from mediaflow_proxy.extractors.F16Px import F16PxExtractor
|
||||
from mediaflow_proxy.extractors.gupload import GuploadExtractor
|
||||
from mediaflow_proxy.extractors.livetv import LiveTVExtractor
|
||||
from mediaflow_proxy.extractors.lulustream import LuluStreamExtractor
|
||||
from mediaflow_proxy.extractors.maxstream import MaxstreamExtractor
|
||||
@@ -33,6 +34,7 @@ class ExtractorFactory:
|
||||
"FileLions": FileLionsExtractor,
|
||||
"FileMoon": FileMoonExtractor,
|
||||
"F16Px": F16PxExtractor,
|
||||
"Gupload": GuploadExtractor,
|
||||
"Uqload": UqloadExtractor,
|
||||
"Mixdrop": MixdropExtractor,
|
||||
"Streamtape": StreamtapeExtractor,
|
||||
|
||||
@@ -4,25 +4,29 @@ from mediaflow_proxy.extractors.base import BaseExtractor
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
|
||||
|
||||
|
||||
class FastreamExtractor(BaseExtractor):
|
||||
"""Fastream URL extractor."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
headers = {'Accept': '*/*', 'Connection': 'keep-alive','Accept-Language': 'en-US,en;q=0.5','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'}
|
||||
headers = {
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Connection": "keep-alive",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0",
|
||||
}
|
||||
patterns = [r'file:"(.*?)"']
|
||||
|
||||
final_url = await eval_solver(self, url, headers, patterns)
|
||||
|
||||
self.base_headers["referer"] = f'https://{url.replace("https://","").split("/")[0]}/'
|
||||
self.base_headers["origin"] = f'https://{url.replace("https://","").split("/")[0]}'
|
||||
self.base_headers['Accept-Language'] = 'en-US,en;q=0.5'
|
||||
self.base_headers['Accept'] = '*/*'
|
||||
self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'
|
||||
self.base_headers["referer"] = f"https://{url.replace('https://', '').split('/')[0]}/"
|
||||
self.base_headers["origin"] = f"https://{url.replace('https://', '').split('/')[0]}"
|
||||
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||
self.base_headers["Accept"] = "*/*"
|
||||
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
|
||||
@@ -3,17 +3,18 @@ from typing import Dict, Any
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
|
||||
class FileLionsExtractor(BaseExtractor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
headers = {}
|
||||
patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
|
||||
r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)''',
|
||||
r'''["']hls4["']:\s*["'](?P<url>[^"']+)''',
|
||||
r'''["']hls2["']:\s*["'](?P<url>[^"']+)'''
|
||||
headers = {}
|
||||
patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
|
||||
r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)""",
|
||||
r"""["']hls4["']:\s*["'](?P<url>[^"']+)""",
|
||||
r"""["']hls2["']:\s*["'](?P<url>[^"']+)""",
|
||||
]
|
||||
|
||||
final_url = await eval_solver(self, url, headers, patterns)
|
||||
@@ -23,4 +24,5 @@ class FileLionsExtractor(BaseExtractor):
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
"stream_transformer": "ts_stream",
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ class FileMoonExtractor(BaseExtractor):
|
||||
)
|
||||
|
||||
test_resp = await self._make_request(final_url, headers=headers)
|
||||
if test_resp.status_code == 404:
|
||||
if test_resp.status == 404:
|
||||
raise ExtractorError("Stream not found (404)")
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class GuploadExtractor(BaseExtractor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str) -> Dict[str, Any]:
|
||||
parsed = urlparse(url)
|
||||
if not parsed.hostname or "gupload.xyz" not in parsed.hostname:
|
||||
raise ExtractorError("GUPLOAD: Invalid domain")
|
||||
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/144 Safari/537.36"
|
||||
),
|
||||
"Referer": "https://gupload.xyz/",
|
||||
"Origin": "https://gupload.xyz",
|
||||
}
|
||||
|
||||
# --- Fetch embed page ---
|
||||
response = await self._make_request(url, headers=headers)
|
||||
html = response.text
|
||||
|
||||
# --- Extract base64 payload ---
|
||||
match = re.search(r"decodePayload\('([^']+)'\)", html)
|
||||
if not match:
|
||||
raise ExtractorError("GUPLOAD: Payload not found")
|
||||
|
||||
encoded = match.group(1).strip()
|
||||
|
||||
# --- Decode payload ---
|
||||
try:
|
||||
decoded = base64.b64decode(encoded).decode("utf-8", "ignore")
|
||||
# payload format: <junk>|{json}
|
||||
json_part = decoded.split("|", 1)[1]
|
||||
payload = json.loads(json_part)
|
||||
except Exception:
|
||||
raise ExtractorError("GUPLOAD: Payload decode failed")
|
||||
|
||||
# --- Extract HLS URL ---
|
||||
hls_url = payload.get("videoUrl")
|
||||
if not hls_url:
|
||||
raise ExtractorError("GUPLOAD: videoUrl missing")
|
||||
|
||||
# --- Validate stream (prevents client timeout) ---
|
||||
test = await self._make_request(hls_url, headers=headers, raise_on_status=False)
|
||||
if test.status >= 400:
|
||||
raise ExtractorError(f"GUPLOAD: Stream unavailable ({test.status})")
|
||||
|
||||
# Return MASTER playlist
|
||||
return {
|
||||
"destination_url": hls_url,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
@@ -2,9 +2,9 @@ import re
|
||||
from typing import Dict, Tuple, Optional
|
||||
from urllib.parse import urljoin, urlparse, unquote
|
||||
|
||||
from httpx import Response
|
||||
import aiohttp
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
|
||||
|
||||
|
||||
class LiveTVExtractor(BaseExtractor):
|
||||
@@ -33,20 +33,21 @@ class LiveTVExtractor(BaseExtractor):
|
||||
stream_title: Optional stream title to filter specific stream
|
||||
|
||||
Returns:
|
||||
Tuple[str, Dict[str, str]]: Stream URL and required headers
|
||||
Dict containing destination_url, request_headers, and mediaflow_endpoint
|
||||
"""
|
||||
try:
|
||||
# Get the channel page
|
||||
response = await self._make_request(url)
|
||||
response_text = response.text
|
||||
self.base_headers["referer"] = urljoin(url, "/")
|
||||
|
||||
# Extract player API details
|
||||
player_api_base, method = await self._extract_player_api_base(response.text)
|
||||
player_api_base, method = await self._extract_player_api_base(response_text)
|
||||
if not player_api_base:
|
||||
raise ExtractorError("Failed to extract player API URL")
|
||||
|
||||
# Get player options
|
||||
options_data = await self._get_player_options(response.text)
|
||||
options_data = await self._get_player_options(response_text)
|
||||
if not options_data:
|
||||
raise ExtractorError("No player options found")
|
||||
|
||||
@@ -66,7 +67,7 @@ class LiveTVExtractor(BaseExtractor):
|
||||
if not stream_url:
|
||||
continue
|
||||
|
||||
response = {
|
||||
result = {
|
||||
"destination_url": stream_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
@@ -75,7 +76,7 @@ class LiveTVExtractor(BaseExtractor):
|
||||
# Set endpoint based on stream type
|
||||
if stream_data.get("type") == "mpd":
|
||||
if stream_data.get("drm_key_id") and stream_data.get("drm_key"):
|
||||
response.update(
|
||||
result.update(
|
||||
{
|
||||
"query_params": {
|
||||
"key_id": stream_data["drm_key_id"],
|
||||
@@ -85,7 +86,7 @@ class LiveTVExtractor(BaseExtractor):
|
||||
}
|
||||
)
|
||||
|
||||
return response
|
||||
return result
|
||||
|
||||
raise ExtractorError("No valid stream found")
|
||||
|
||||
@@ -120,7 +121,12 @@ class LiveTVExtractor(BaseExtractor):
|
||||
api_url = f"{api_base}{post}/{type_}/{nume}"
|
||||
response = await self._make_request(api_url)
|
||||
else:
|
||||
form_data = {"action": "doo_player_ajax", "post": post, "nume": nume, "type": type_}
|
||||
# Use aiohttp FormData for POST requests
|
||||
form_data = aiohttp.FormData()
|
||||
form_data.add_field("action", "doo_player_ajax")
|
||||
form_data.add_field("post", post)
|
||||
form_data.add_field("nume", nume)
|
||||
form_data.add_field("type", type_)
|
||||
response = await self._make_request(api_base, method="POST", data=form_data)
|
||||
|
||||
# Get iframe URL from API response
|
||||
@@ -136,7 +142,7 @@ class LiveTVExtractor(BaseExtractor):
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to process player option: {str(e)}")
|
||||
|
||||
async def _extract_stream_url(self, iframe_response: Response, iframe_url: str) -> Dict:
|
||||
async def _extract_stream_url(self, iframe_response: HttpResponse, iframe_url: str) -> Dict:
|
||||
"""
|
||||
Extract final stream URL from iframe content.
|
||||
"""
|
||||
@@ -147,8 +153,9 @@ class LiveTVExtractor(BaseExtractor):
|
||||
|
||||
# Check if content is already a direct M3U8 stream
|
||||
content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"]
|
||||
content_type = iframe_response.headers.get("content-type", "")
|
||||
|
||||
if any(ext in iframe_response.headers["content-type"] for ext in content_types):
|
||||
if any(ext in content_type for ext in content_types):
|
||||
return {"url": iframe_url, "type": "m3u8"}
|
||||
|
||||
stream_data = {}
|
||||
|
||||
@@ -13,7 +13,7 @@ class LuluStreamExtractor(BaseExtractor):
|
||||
response = await self._make_request(url)
|
||||
|
||||
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py
|
||||
pattern = r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)'''
|
||||
pattern = r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)"""
|
||||
match = re.search(pattern, response.text, re.DOTALL)
|
||||
if not match:
|
||||
raise ExtractorError("Failed to extract source URL")
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import Dict, Any
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
|
||||
|
||||
@@ -22,7 +22,9 @@ class OkruExtractor(BaseExtractor):
|
||||
data_options = div.get("data-options")
|
||||
data = json.loads(data_options)
|
||||
metadata = json.loads(data["flashvars"]["metadata"])
|
||||
final_url = metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl")
|
||||
final_url = (
|
||||
metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl") or metadata.get("ondemandHls")
|
||||
)
|
||||
self.base_headers["referer"] = url
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import re
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils.packed import detect, unpack
|
||||
from mediaflow_proxy.utils.packed import unpack
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -32,18 +32,17 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
def _detect_packed_blocks(self, html: str) -> list[str]:
|
||||
"""
|
||||
Detect and extract packed eval blocks from HTML.
|
||||
Replicates the TypeScript logic: /eval\(function(.+?.+)/g
|
||||
"""
|
||||
# Find all eval(function...) blocks - more greedy to capture full packed code
|
||||
pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL)
|
||||
raw_matches = pattern.findall(html)
|
||||
|
||||
|
||||
# If no matches with the strict pattern, try a more relaxed one
|
||||
if not raw_matches:
|
||||
# Try to find eval(function and capture until we find the closing ))
|
||||
pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
|
||||
raw_matches = pattern.findall(html)
|
||||
|
||||
|
||||
return raw_matches
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
@@ -60,25 +59,25 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
raise ExtractorError("No iframe found on the page")
|
||||
|
||||
iframe_url = iframe_match.group(1)
|
||||
|
||||
|
||||
# Normalize iframe URL
|
||||
if iframe_url.startswith('//'):
|
||||
iframe_url = 'https:' + iframe_url
|
||||
elif iframe_url.startswith('/'):
|
||||
if iframe_url.startswith("//"):
|
||||
iframe_url = "https:" + iframe_url
|
||||
elif iframe_url.startswith("/"):
|
||||
parsed_main = urlparse(url)
|
||||
iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}"
|
||||
|
||||
|
||||
logger.info(f"Found iframe URL: {iframe_url}")
|
||||
|
||||
# Step 2: Fetch iframe with Referer
|
||||
iframe_headers = {
|
||||
'Referer': 'https://sportzonline.st/',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.9,it;q=0.8',
|
||||
'Cache-Control': 'no-cache'
|
||||
"Referer": "https://sportzonline.st/",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
|
||||
"Cache-Control": "no-cache",
|
||||
}
|
||||
|
||||
|
||||
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
|
||||
iframe_html = iframe_response.text
|
||||
|
||||
@@ -86,9 +85,9 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
|
||||
# Step 3: Detect packed blocks
|
||||
packed_blocks = self._detect_packed_blocks(iframe_html)
|
||||
|
||||
|
||||
logger.info(f"Found {len(packed_blocks)} packed blocks")
|
||||
|
||||
|
||||
if not packed_blocks:
|
||||
logger.warning("No packed blocks found, trying direct m3u8 search")
|
||||
# Fallback: try direct m3u8 search
|
||||
@@ -96,13 +95,10 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
if direct_match:
|
||||
m3u8_url = direct_match.group(1)
|
||||
logger.info(f"Found direct m3u8 URL: {m3u8_url}")
|
||||
|
||||
|
||||
return {
|
||||
"destination_url": m3u8_url,
|
||||
"request_headers": {
|
||||
'Referer': iframe_url,
|
||||
'User-Agent': iframe_headers['User-Agent']
|
||||
},
|
||||
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
else:
|
||||
@@ -134,13 +130,13 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # file: "...m3u8"
|
||||
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']', # any m3u8 URL
|
||||
]
|
||||
|
||||
|
||||
for pattern in patterns:
|
||||
src_match = re.search(pattern, unpacked_code)
|
||||
if src_match:
|
||||
m3u8_url = src_match.group(1)
|
||||
# Verify it looks like a valid m3u8 URL
|
||||
if '.m3u8' in m3u8_url or 'http' in m3u8_url:
|
||||
if ".m3u8" in m3u8_url or "http" in m3u8_url:
|
||||
break
|
||||
m3u8_url = None
|
||||
|
||||
@@ -162,11 +158,11 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
src_match = re.search(pattern, unpacked_code)
|
||||
if src_match:
|
||||
test_url = src_match.group(1)
|
||||
if '.m3u8' in test_url or 'http' in test_url:
|
||||
if ".m3u8" in test_url or "http" in test_url:
|
||||
m3u8_url = test_url
|
||||
logger.info(f"Found m3u8 in block {i}")
|
||||
break
|
||||
|
||||
|
||||
if m3u8_url:
|
||||
break
|
||||
except Exception as e:
|
||||
@@ -181,10 +177,7 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
# Return stream configuration
|
||||
return {
|
||||
"destination_url": m3u8_url,
|
||||
"request_headers": {
|
||||
'Referer': iframe_url,
|
||||
'User-Agent': iframe_headers['User-Agent']
|
||||
},
|
||||
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
|
||||
@@ -15,8 +15,8 @@ class StreamtapeExtractor(BaseExtractor):
|
||||
if not matches:
|
||||
raise ExtractorError("Failed to extract URL components")
|
||||
i = 0
|
||||
for i in range(len(matches)):
|
||||
if matches[i-1] == matches[i] and "ip=" in matches[i]:
|
||||
for i in range(len(matches)):
|
||||
if matches[i - 1] == matches[i] and "ip=" in matches[i]:
|
||||
final_url = f"https://streamtape.com/get_video?{matches[i]}"
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
|
||||
@@ -19,18 +19,11 @@ class StreamWishExtractor(BaseExtractor):
|
||||
|
||||
headers = {"Referer": referer}
|
||||
response = await self._make_request(url, headers=headers)
|
||||
|
||||
iframe_match = re.search(
|
||||
r'<iframe[^>]+src=["\']([^"\']+)["\']',
|
||||
response.text,
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
iframe_match = re.search(r'<iframe[^>]+src=["\']([^"\']+)["\']', response.text, re.DOTALL)
|
||||
iframe_url = urljoin(url, iframe_match.group(1)) if iframe_match else url
|
||||
|
||||
iframe_response = await self._make_request(
|
||||
iframe_url,
|
||||
headers=headers
|
||||
)
|
||||
iframe_response = await self._make_request(iframe_url, headers=headers)
|
||||
html = iframe_response.text
|
||||
|
||||
final_url = self._extract_m3u8(html)
|
||||
@@ -58,15 +51,18 @@ class StreamWishExtractor(BaseExtractor):
|
||||
final_url = urljoin(iframe_url, final_url)
|
||||
|
||||
origin = f"{urlparse(referer).scheme}://{urlparse(referer).netloc}"
|
||||
self.base_headers.update({
|
||||
"Referer": referer,
|
||||
"Origin": origin,
|
||||
})
|
||||
self.base_headers.update(
|
||||
{
|
||||
"Referer": referer,
|
||||
"Origin": origin,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
"stream_transformer": "ts_stream",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -74,8 +70,5 @@ class StreamWishExtractor(BaseExtractor):
|
||||
"""
|
||||
Extract first absolute m3u8 URL from text
|
||||
"""
|
||||
match = re.search(
|
||||
r'https?://[^"\']+\.m3u8[^"\']*',
|
||||
text
|
||||
)
|
||||
match = re.search(r'https?://[^"\']+\.m3u8[^"\']*', text)
|
||||
return match.group(0) if match else None
|
||||
|
||||
@@ -1,27 +1,64 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||
from mediaflow_proxy.utils.packed import eval_solver
|
||||
|
||||
from bs4 import BeautifulSoup, SoupStrainer
|
||||
from curl_cffi.requests import AsyncSession
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils.packed import unpack, detect, UnpackingError
|
||||
|
||||
|
||||
class SupervideoExtractor(BaseExtractor):
|
||||
"""Supervideo URL extractor."""
|
||||
"""Supervideo URL extractor.
|
||||
|
||||
Uses curl_cffi to bypass Cloudflare protection.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
headers = {'Accept': '*/*', 'Connection': 'keep-alive', 'User-Agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36', 'user-agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36'}
|
||||
"""Extract video URL from Supervideo.
|
||||
|
||||
Uses curl_cffi with Chrome impersonation to bypass Cloudflare.
|
||||
"""
|
||||
|
||||
patterns = [r'file:"(.*?)"']
|
||||
|
||||
final_url = await eval_solver(self, url, headers, patterns)
|
||||
try:
|
||||
async with AsyncSession() as session:
|
||||
response = await session.get(url, impersonate="chrome")
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
if response.status_code != 200:
|
||||
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
|
||||
|
||||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("script"))
|
||||
script_all = soup.find_all("script")
|
||||
|
||||
for script in script_all:
|
||||
if script.text and detect(script.text):
|
||||
unpacked_code = unpack(script.text)
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, unpacked_code)
|
||||
if match:
|
||||
extracted_url = match.group(1)
|
||||
if not urlparse(extracted_url).scheme:
|
||||
extracted_url = urljoin(url, extracted_url)
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
return {
|
||||
"destination_url": extracted_url,
|
||||
"request_headers": self.base_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
raise ExtractorError("No packed JS found or no file URL pattern matched")
|
||||
|
||||
except UnpackingError as e:
|
||||
raise ExtractorError(f"Failed to unpack Supervideo JS: {e}")
|
||||
except Exception as e:
|
||||
if isinstance(e, ExtractorError):
|
||||
raise
|
||||
raise ExtractorError(f"Supervideo extraction failed: {e}")
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
@@ -36,7 +35,7 @@ class TurboVidPlayExtractor(BaseExtractor):
|
||||
if media_url.startswith("//"):
|
||||
media_url = "https:" + media_url
|
||||
elif media_url.startswith("/"):
|
||||
media_url = response.url.origin + media_url
|
||||
media_url = response.get_origin() + media_url
|
||||
|
||||
#
|
||||
# 3. Fetch the intermediate playlist
|
||||
@@ -53,16 +52,11 @@ class TurboVidPlayExtractor(BaseExtractor):
|
||||
|
||||
real_m3u8 = m2.group(0)
|
||||
|
||||
#
|
||||
# 5. Final headers
|
||||
#
|
||||
self.base_headers["referer"] = url
|
||||
|
||||
#
|
||||
# 6. Always return master proxy (your MediaFlow only supports this)
|
||||
#
|
||||
return {
|
||||
"destination_url": real_m3u8,
|
||||
"request_headers": self.base_headers,
|
||||
"request_headers": {"origin": response.get_origin()},
|
||||
"propagate_response_headers": {"content-type": "video/mp2t"},
|
||||
"remove_response_headers": ["content-length", "content-range"],
|
||||
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||
"stream_transformer": "ts_stream", # Use TS transformer for PNG/padding stripping
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -8,6 +9,11 @@ logger = logging.getLogger(__name__)
|
||||
class VavooExtractor(BaseExtractor):
|
||||
"""Vavoo URL extractor for resolving vavoo.to links.
|
||||
|
||||
Supports two URL formats:
|
||||
1. Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
|
||||
These redirect (302) to external video hosts (Doodstream, etc.)
|
||||
2. Legacy mediahubmx format (currently broken on Vavoo's end)
|
||||
|
||||
Features:
|
||||
- Uses BaseExtractor's retry/timeouts
|
||||
- Improved headers to mimic Android okhttp client
|
||||
@@ -18,6 +24,40 @@ class VavooExtractor(BaseExtractor):
|
||||
super().__init__(request_headers)
|
||||
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||
|
||||
async def _resolve_web_vod_link(self, url: str) -> str:
|
||||
"""Resolve a web-vod API link by getting the redirect Location header."""
|
||||
import aiohttp
|
||||
|
||||
try:
|
||||
# Use aiohttp directly with allow_redirects=False to get the Location header
|
||||
timeout = aiohttp.ClientTimeout(total=10)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.get(
|
||||
url,
|
||||
headers={"Accept": "application/json"},
|
||||
allow_redirects=False,
|
||||
) as resp:
|
||||
# Check for redirect
|
||||
if resp.status in (301, 302, 303, 307, 308):
|
||||
location = resp.headers.get("Location") or resp.headers.get("location")
|
||||
if location:
|
||||
logger.info(f"Vavoo web-vod redirected to: {location}")
|
||||
return location
|
||||
|
||||
# If we got a 200, the response might contain the URL
|
||||
if resp.status == 200:
|
||||
text = await resp.text()
|
||||
if text and text.startswith("http"):
|
||||
logger.info(f"Vavoo web-vod resolved to: {text.strip()}")
|
||||
return text.strip()
|
||||
|
||||
raise ExtractorError(f"Vavoo web-vod API returned unexpected status {resp.status}")
|
||||
|
||||
except ExtractorError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}")
|
||||
|
||||
async def get_auth_signature(self) -> Optional[str]:
|
||||
"""Get authentication signature for Vavoo API (async)."""
|
||||
headers = {
|
||||
@@ -27,10 +67,11 @@ class VavooExtractor(BaseExtractor):
|
||||
"accept-encoding": "gzip",
|
||||
}
|
||||
import time
|
||||
|
||||
current_time = int(time.time() * 1000)
|
||||
|
||||
data = {
|
||||
"token": "tosFwQCJMS8qrW_AjLoHPQ41646J5dRNha6ZWHnijoYQQQoADQoXYSo7ki7O5-CsgN4CH0uRk6EEoJ0728ar9scCRQW3ZkbfrPfeCXW2VgopSW2FWDqPOoVYIuVPAOnXCZ5g",
|
||||
"token": "",
|
||||
"reason": "app-blur",
|
||||
"locale": "de",
|
||||
"theme": "dark",
|
||||
@@ -40,21 +81,11 @@ class VavooExtractor(BaseExtractor):
|
||||
"brand": "google",
|
||||
"model": "Pixel",
|
||||
"name": "sdk_gphone64_arm64",
|
||||
"uniqueId": "d10e5d99ab665233"
|
||||
},
|
||||
"os": {
|
||||
"name": "android",
|
||||
"version": "13"
|
||||
},
|
||||
"app": {
|
||||
"platform": "android",
|
||||
"version": "3.1.21"
|
||||
},
|
||||
"version": {
|
||||
"package": "tv.vavoo.app",
|
||||
"binary": "3.1.21",
|
||||
"js": "3.1.21"
|
||||
"uniqueId": "d10e5d99ab665233",
|
||||
},
|
||||
"os": {"name": "android", "version": "13"},
|
||||
"app": {"platform": "android", "version": "3.1.21"},
|
||||
"version": {"package": "tv.vavoo.app", "binary": "3.1.21", "js": "3.1.21"},
|
||||
},
|
||||
"appFocusTime": 0,
|
||||
"playerActive": False,
|
||||
@@ -75,11 +106,9 @@ class VavooExtractor(BaseExtractor):
|
||||
"ssVersion": 1,
|
||||
"enabled": True,
|
||||
"autoServer": True,
|
||||
"id": "de-fra"
|
||||
"id": "de-fra",
|
||||
},
|
||||
"iap": {
|
||||
"supported": False
|
||||
}
|
||||
"iap": {"supported": False},
|
||||
}
|
||||
|
||||
try:
|
||||
@@ -94,7 +123,7 @@ class VavooExtractor(BaseExtractor):
|
||||
try:
|
||||
result = resp.json()
|
||||
except Exception:
|
||||
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status_code)
|
||||
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status)
|
||||
return None
|
||||
|
||||
addon_sig = result.get("addonSig") if isinstance(result, dict) else None
|
||||
@@ -109,10 +138,48 @@ class VavooExtractor(BaseExtractor):
|
||||
return None
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Extract Vavoo stream URL (async)."""
|
||||
"""Extract Vavoo stream URL (async).
|
||||
|
||||
Supports:
|
||||
- Direct play URLs: https://vavoo.to/play/{id}/index.m3u8 (Live TV)
|
||||
- Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
|
||||
- Legacy mediahubmx links (may not work due to Vavoo API changes)
|
||||
"""
|
||||
if "vavoo.to" not in url:
|
||||
raise ExtractorError("Not a valid Vavoo URL")
|
||||
|
||||
# Check if this is a direct play URL (Live TV)
|
||||
# These URLs are already m3u8 streams but need auth signature
|
||||
if "/play/" in url and url.endswith(".m3u8"):
|
||||
signature = await self.get_auth_signature()
|
||||
if not signature:
|
||||
raise ExtractorError("Failed to get Vavoo authentication signature for Live TV")
|
||||
|
||||
stream_headers = {
|
||||
"user-agent": "okhttp/4.11.0",
|
||||
"referer": "https://vavoo.to/",
|
||||
"mediahubmx-signature": signature,
|
||||
}
|
||||
return {
|
||||
"destination_url": url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||
}
|
||||
|
||||
# Check if this is a web-vod API link (new format)
|
||||
if "/web-vod/api/get" in url:
|
||||
resolved_url = await self._resolve_web_vod_link(url)
|
||||
stream_headers = {
|
||||
"user-agent": self.base_headers.get("user-agent", "Mozilla/5.0"),
|
||||
"referer": "https://vavoo.to/",
|
||||
}
|
||||
return {
|
||||
"destination_url": resolved_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
# Legacy mediahubmx flow
|
||||
signature = await self.get_auth_signature()
|
||||
if not signature:
|
||||
raise ExtractorError("Failed to get Vavoo authentication signature")
|
||||
@@ -139,14 +206,9 @@ class VavooExtractor(BaseExtractor):
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"accept-encoding": "gzip",
|
||||
"mediahubmx-signature": signature
|
||||
}
|
||||
data = {
|
||||
"language": "de",
|
||||
"region": "AT",
|
||||
"url": link,
|
||||
"clientVersion": "3.1.21"
|
||||
"mediahubmx-signature": signature,
|
||||
}
|
||||
data = {"language": "de", "region": "AT", "url": link, "clientVersion": "3.1.21"}
|
||||
try:
|
||||
logger.info(f"Attempting to resolve Vavoo URL: {link}")
|
||||
resp = await self._make_request(
|
||||
@@ -161,7 +223,11 @@ class VavooExtractor(BaseExtractor):
|
||||
try:
|
||||
result = resp.json()
|
||||
except Exception:
|
||||
logger.warning("Vavoo resolve returned non-json response (status=%s). Body preview: %s", resp.status_code, getattr(resp, "text", "")[:500])
|
||||
logger.warning(
|
||||
"Vavoo resolve returned non-json response (status=%s). Body preview: %s",
|
||||
resp.status,
|
||||
getattr(resp, "text", "")[:500],
|
||||
)
|
||||
return None
|
||||
|
||||
logger.debug("Vavoo API response: %s", result)
|
||||
|
||||
@@ -16,10 +16,9 @@ class VidmolyExtractor(BaseExtractor):
|
||||
raise ExtractorError("VIDMOLY: Invalid domain")
|
||||
|
||||
headers = {
|
||||
"User-Agent":
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120 Safari/537.36",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120 Safari/537.36",
|
||||
"Referer": url,
|
||||
"Sec-Fetch-Dest": "iframe",
|
||||
}
|
||||
@@ -29,10 +28,7 @@ class VidmolyExtractor(BaseExtractor):
|
||||
html = response.text
|
||||
|
||||
# --- Extract master m3u8 ---
|
||||
match = re.search(
|
||||
r'sources:\s*\[\{file:"([^"]+)',
|
||||
html
|
||||
)
|
||||
match = re.search(r'sources\s*:\s*\[\s*\{\s*file\s*:\s*[\'"]([^\'"]+)', html)
|
||||
if not match:
|
||||
raise ExtractorError("VIDMOLY: Stream URL not found")
|
||||
|
||||
@@ -49,10 +45,8 @@ class VidmolyExtractor(BaseExtractor):
|
||||
raise ExtractorError("VIDMOLY: Request timed out")
|
||||
raise
|
||||
|
||||
if test.status_code >= 400:
|
||||
raise ExtractorError(
|
||||
f"VIDMOLY: Stream unavailable ({test.status_code})"
|
||||
)
|
||||
if test.status >= 400:
|
||||
raise ExtractorError(f"VIDMOLY: Stream unavailable ({test.status})")
|
||||
|
||||
# Return MASTER playlist, not variant
|
||||
# Let MediaFlow Proxy handle variants
|
||||
|
||||
@@ -8,23 +8,23 @@ from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
class VidozaExtractor(BaseExtractor):
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
# if your base doesn’t set this, keep it; otherwise you can remove:
|
||||
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
parsed = urlparse(url)
|
||||
|
||||
# Accept vidoza + videzz
|
||||
if not parsed.hostname or not (
|
||||
parsed.hostname.endswith("vidoza.net")
|
||||
or parsed.hostname.endswith("videzz.net")
|
||||
parsed.hostname.endswith("vidoza.net") or parsed.hostname.endswith("videzz.net")
|
||||
):
|
||||
raise ExtractorError("VIDOZA: Invalid domain")
|
||||
|
||||
# Use the correct referer for clones
|
||||
referer = f"https://{parsed.hostname}/"
|
||||
|
||||
headers = self.base_headers.copy()
|
||||
headers.update(
|
||||
{
|
||||
"referer": "https://vidoza.net/",
|
||||
"referer": referer,
|
||||
"user-agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
@@ -35,16 +35,14 @@ class VidozaExtractor(BaseExtractor):
|
||||
}
|
||||
)
|
||||
|
||||
# 1) Fetch the embed page (or whatever URL you pass in)
|
||||
# 1) Fetch embed page
|
||||
response = await self._make_request(url, headers=headers)
|
||||
html = response.text or ""
|
||||
|
||||
if not html:
|
||||
raise ExtractorError("VIDOZA: Empty HTML from Vidoza")
|
||||
raise ExtractorError("VIDOZA: Empty HTML")
|
||||
|
||||
cookies = response.cookies or {}
|
||||
|
||||
# 2) Extract final link with REGEX
|
||||
# 2) Extract video URL
|
||||
pattern = re.compile(
|
||||
r"""["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)"""
|
||||
r"""(?:[^}>\]]+)["']?\s*res\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)""",
|
||||
@@ -53,21 +51,15 @@ class VidozaExtractor(BaseExtractor):
|
||||
|
||||
match = pattern.search(html)
|
||||
if not match:
|
||||
raise ExtractorError("VIDOZA: Unable to extract video + label from JS")
|
||||
raise ExtractorError("VIDOZA: Video URL not found")
|
||||
|
||||
mp4_url = match.group("url")
|
||||
label = match.group("label").strip()
|
||||
video_url = match.group("url")
|
||||
|
||||
# Fix URLs like //str38.vidoza.net/...
|
||||
if mp4_url.startswith("//"):
|
||||
mp4_url = "https:" + mp4_url
|
||||
|
||||
# 3) Attach cookies (token may depend on these)
|
||||
if cookies:
|
||||
headers["cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items())
|
||||
if video_url.startswith("//"):
|
||||
video_url = "https:" + video_url
|
||||
|
||||
return {
|
||||
"destination_url": mp4_url,
|
||||
"destination_url": video_url,
|
||||
"request_headers": headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import json
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
from bs4 import BeautifulSoup, SoupStrainer
|
||||
|
||||
@@ -25,7 +24,7 @@ class VixCloudExtractor(BaseExtractor):
|
||||
"Origin": f"{site_url}",
|
||||
},
|
||||
)
|
||||
if response.status_code != 200:
|
||||
if response.status != 200:
|
||||
raise ExtractorError("Outdated Url")
|
||||
# Soup the response
|
||||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
|
||||
@@ -48,8 +47,8 @@ class VixCloudExtractor(BaseExtractor):
|
||||
response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version})
|
||||
elif "movie" in url or "tv" in url:
|
||||
response = await self._make_request(url)
|
||||
|
||||
if response.status_code != 200:
|
||||
|
||||
if response.status != 200:
|
||||
raise ExtractorError("Failed to extract URL components, Invalid Request")
|
||||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body"))
|
||||
if soup:
|
||||
@@ -58,7 +57,7 @@ class VixCloudExtractor(BaseExtractor):
|
||||
expires = re.search(r"'expires':\s*'(\d+)'", script).group(1)
|
||||
server_url = re.search(r"url:\s*'([^']+)'", script).group(1)
|
||||
if "?b=1" in server_url:
|
||||
final_url = f'{server_url}&token={token}&expires={expires}'
|
||||
final_url = f"{server_url}&token={token}&expires={expires}"
|
||||
else:
|
||||
final_url = f"{server_url}?token={token}&expires={expires}"
|
||||
if "window.canPlayFHD = true" in script:
|
||||
|
||||
@@ -15,7 +15,7 @@ class VoeExtractor(BaseExtractor):
|
||||
response = await self._make_request(url)
|
||||
|
||||
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/voesx.py
|
||||
redirect_pattern = r'''window\.location\.href\s*=\s*'([^']+)'''
|
||||
redirect_pattern = r"""window\.location\.href\s*=\s*'([^']+)"""
|
||||
redirect_match = re.search(redirect_pattern, response.text, re.DOTALL)
|
||||
if redirect_match:
|
||||
if redirected:
|
||||
@@ -37,7 +37,7 @@ class VoeExtractor(BaseExtractor):
|
||||
|
||||
data = self.voe_decode(code_and_script_match.group(1), luts_match.group(1))
|
||||
|
||||
final_url = data.get('source')
|
||||
final_url = data.get("source")
|
||||
if not final_url:
|
||||
raise ExtractorError("VOE: failed to extract video URL")
|
||||
|
||||
@@ -51,8 +51,9 @@ class VoeExtractor(BaseExtractor):
|
||||
@staticmethod
|
||||
def voe_decode(ct: str, luts: str) -> Dict[str, Any]:
|
||||
import json
|
||||
lut = [''.join([('\\' + x) if x in '.*+?^${}()|[]\\' else x for x in i]) for i in luts[2:-2].split("','")]
|
||||
txt = ''
|
||||
|
||||
lut = ["".join([("\\" + x) if x in ".*+?^${}()|[]\\" else x for x in i]) for i in luts[2:-2].split("','")]
|
||||
txt = ""
|
||||
for i in ct:
|
||||
x = ord(i)
|
||||
if 64 < x < 91:
|
||||
@@ -61,8 +62,8 @@ class VoeExtractor(BaseExtractor):
|
||||
x = (x - 84) % 26 + 97
|
||||
txt += chr(x)
|
||||
for i in lut:
|
||||
txt = re.sub(i, '', txt)
|
||||
ct = base64.b64decode(txt).decode('utf-8')
|
||||
txt = ''.join([chr(ord(i) - 3) for i in ct])
|
||||
txt = base64.b64decode(txt[::-1]).decode('utf-8')
|
||||
txt = re.sub(i, "", txt)
|
||||
ct = base64.b64decode(txt).decode("utf-8")
|
||||
txt = "".join([chr(ord(i) - 3) for i in ct])
|
||||
txt = base64.b64decode(txt[::-1]).decode("utf-8")
|
||||
return json.loads(txt)
|
||||
|
||||
Reference in New Issue
Block a user