mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-06-10 09:10:23 +00:00
new version
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -9,7 +9,7 @@ import json
|
||||
import logging
|
||||
|
||||
from mediaflow_proxy.configs import settings
|
||||
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||
from mediaflow_proxy.utils.http_client import create_aiohttp_session, _ensure_routing_initialized, get_routing_config
|
||||
from mediaflow_proxy.utils.http_utils import DownloadError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -65,6 +65,16 @@ class BaseExtractor(ABC):
|
||||
# merge incoming headers (e.g. Accept-Language / Referer) with default base headers
|
||||
self.base_headers.update(request_headers or {})
|
||||
|
||||
@staticmethod
|
||||
def _get_proxy(url: str) -> str | None:
|
||||
"""Return the configured proxy URL for *url*, or None if no proxy applies."""
|
||||
try:
|
||||
_ensure_routing_initialized()
|
||||
route = get_routing_config().match_url(url)
|
||||
return route.proxy_url
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
async def _make_request(
|
||||
self,
|
||||
url: str,
|
||||
|
||||
@@ -0,0 +1,158 @@
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class CityExtractor(BaseExtractor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
def atob_fixed(self, data: str) -> str:
|
||||
try:
|
||||
return base64.b64decode(data).decode("utf-8", errors="ignore")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def extract_json_array(self, decoded: str):
|
||||
start = decoded.find("file:")
|
||||
if start == -1:
|
||||
start = decoded.find("sources:")
|
||||
if start == -1:
|
||||
return None
|
||||
|
||||
start = decoded.find("[", start)
|
||||
if start == -1:
|
||||
return None
|
||||
|
||||
depth = 0
|
||||
for i in range(start, len(decoded)):
|
||||
if decoded[i] == "[":
|
||||
depth += 1
|
||||
elif decoded[i] == "]":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
return decoded[start : i + 1]
|
||||
|
||||
return None
|
||||
|
||||
def pick_stream(self, file_data, season: int = 1, episode: int = 1):
|
||||
|
||||
if isinstance(file_data, str):
|
||||
return file_data
|
||||
|
||||
if isinstance(file_data, list):
|
||||
if all(isinstance(x, dict) and "file" in x for x in file_data):
|
||||
idx = max(0, episode - 1)
|
||||
return file_data[idx]["file"]
|
||||
|
||||
selected_season = None
|
||||
for s in file_data:
|
||||
if not isinstance(s, dict):
|
||||
continue
|
||||
folder = s.get("folder")
|
||||
if not folder:
|
||||
continue
|
||||
title = (s.get("title") or "").lower()
|
||||
if re.search(rf"(season|s)\s*0*{season}\b", title):
|
||||
selected_season = folder
|
||||
break
|
||||
|
||||
if not selected_season:
|
||||
for s in file_data:
|
||||
folder = s.get("folder")
|
||||
if folder:
|
||||
selected_season = folder
|
||||
break
|
||||
|
||||
if not selected_season:
|
||||
return None
|
||||
|
||||
idx = max(0, episode - 1)
|
||||
return selected_season[idx].get("file") if idx < len(selected_season) else selected_season[0].get("file")
|
||||
|
||||
return None
|
||||
|
||||
async def extract(self, url: str, season: int = 1, episode: int = 1, **kwargs) -> Dict[str, Any]:
|
||||
"""Main extraction entry point"""
|
||||
|
||||
parsed = urlparse(url)
|
||||
query = parse_qs(parsed.query)
|
||||
if "s" in query:
|
||||
try:
|
||||
season = int(query["s"][0])
|
||||
except Exception:
|
||||
pass
|
||||
if "e" in query:
|
||||
try:
|
||||
episode = int(query["e"][0])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
clean_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
|
||||
|
||||
cookie_b64 = "ZGxlX3VzZXJfaWQ9MzI3Mjk7IGRsZV9wYXNzd29yZD04OTQxNzFjNmE4ZGFiMThlZTU5NGQ1YzY1MjAwOWEzNTs="
|
||||
cookie = base64.b64decode(cookie_b64).decode()
|
||||
|
||||
headers = {
|
||||
"User-Agent": self.base_headers.get("user-agent"),
|
||||
"Referer": clean_url,
|
||||
"Cookie": cookie,
|
||||
}
|
||||
|
||||
response = await self._make_request(clean_url, headers=headers)
|
||||
if response.status != 200:
|
||||
raise ExtractorError("Failed to load City page")
|
||||
|
||||
soup = BeautifulSoup(response.text, "lxml")
|
||||
file_data = None
|
||||
|
||||
for script in soup.find_all("script"):
|
||||
if file_data:
|
||||
break
|
||||
|
||||
script_html = script.string or script.text or ""
|
||||
if "atob" not in script_html:
|
||||
continue
|
||||
|
||||
matches = re.finditer(r'atob\(\s*[\'"](.*?)[\'"]\s*\)', script_html)
|
||||
for match in matches:
|
||||
encoded = match.group(1)
|
||||
decoded = self.atob_fixed(encoded)
|
||||
if not decoded:
|
||||
continue
|
||||
|
||||
raw_json = self.extract_json_array(decoded)
|
||||
if raw_json:
|
||||
try:
|
||||
raw_json = re.sub(r"\\(.)", r"\1", raw_json)
|
||||
file_data = json.loads(raw_json)
|
||||
except Exception:
|
||||
file_data = raw_json
|
||||
break
|
||||
|
||||
file_match = re.search(r'file\s*:\s*[\'"](.*?)[\'"]', decoded, re.S)
|
||||
if file_match:
|
||||
file_data = file_match.group(1)
|
||||
break
|
||||
|
||||
if not file_data:
|
||||
raise ExtractorError("No stream found")
|
||||
|
||||
stream_url = self.pick_stream(file_data, season=season, episode=episode)
|
||||
if not stream_url:
|
||||
raise ExtractorError("Stream extraction failed")
|
||||
|
||||
return {
|
||||
"destination_url": stream_url,
|
||||
"request_headers": {
|
||||
"Referer": clean_url,
|
||||
"User-Agent": self.base_headers.get("user-agent"),
|
||||
},
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
@@ -1,704 +0,0 @@
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import aiohttp
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
|
||||
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||
from mediaflow_proxy.configs import settings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Silenzia l'errore ConnectionResetError su Windows
|
||||
logging.getLogger("asyncio").setLevel(logging.CRITICAL)
|
||||
|
||||
# Default fingerprint parameters
|
||||
DEFAULT_DLHD_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
|
||||
DEFAULT_DLHD_SCREEN_RESOLUTION = "1920x1080"
|
||||
DEFAULT_DLHD_TIMEZONE = "UTC"
|
||||
DEFAULT_DLHD_LANGUAGE = "en"
|
||||
|
||||
|
||||
def compute_fingerprint(
|
||||
user_agent: str = DEFAULT_DLHD_USER_AGENT,
|
||||
screen_resolution: str = DEFAULT_DLHD_SCREEN_RESOLUTION,
|
||||
timezone: str = DEFAULT_DLHD_TIMEZONE,
|
||||
language: str = DEFAULT_DLHD_LANGUAGE,
|
||||
) -> str:
|
||||
"""
|
||||
Compute the X-Fingerprint header value.
|
||||
|
||||
Algorithm:
|
||||
fingerprint = SHA256(useragent + screen_resolution + timezone + language).hex()[:16]
|
||||
|
||||
Args:
|
||||
user_agent: The user agent string
|
||||
screen_resolution: The screen resolution (e.g., "1920x1080")
|
||||
timezone: The timezone (e.g., "UTC")
|
||||
language: The language code (e.g., "en")
|
||||
|
||||
Returns:
|
||||
The 16-character fingerprint
|
||||
"""
|
||||
combined = f"{user_agent}{screen_resolution}{timezone}{language}"
|
||||
return hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def compute_key_path(resource: str, number: str, timestamp: int, fingerprint: str, secret_key: str) -> str:
|
||||
"""
|
||||
Compute the X-Key-Path header value.
|
||||
|
||||
Algorithm:
|
||||
key_path = HMAC-SHA256("resource|number|timestamp|fingerprint", secret_key).hex()[:16]
|
||||
|
||||
Args:
|
||||
resource: The resource from the key URL
|
||||
number: The number from the key URL
|
||||
timestamp: The Unix timestamp
|
||||
fingerprint: The fingerprint value
|
||||
secret_key: The HMAC secret key (channel_salt)
|
||||
|
||||
Returns:
|
||||
The 16-character key path
|
||||
"""
|
||||
combined = f"{resource}|{number}|{timestamp}|{fingerprint}"
|
||||
hmac_hash = hmac.new(secret_key.encode("utf-8"), combined.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
return hmac_hash[:16]
|
||||
|
||||
|
||||
def compute_key_headers(key_url: str, secret_key: str) -> tuple[int, int, str, str] | None:
|
||||
"""
|
||||
Compute X-Key-Timestamp, X-Key-Nonce, X-Key-Path, and X-Fingerprint for a /key/ URL.
|
||||
|
||||
Algorithm:
|
||||
1. Extract resource and number from URL pattern /key/{resource}/{number}
|
||||
2. ts = Unix timestamp in seconds
|
||||
3. hmac_hash = HMAC-SHA256(resource, secret_key).hex()
|
||||
4. nonce = proof-of-work: find i where MD5(hmac+resource+number+ts+i)[:4] < 0x1000
|
||||
5. fingerprint = compute_fingerprint()
|
||||
6. key_path = HMAC-SHA256("resource|number|ts|fingerprint", secret_key).hex()[:16]
|
||||
|
||||
Args:
|
||||
key_url: The key URL containing /key/{resource}/{number}
|
||||
secret_key: The HMAC secret key (channel_salt)
|
||||
|
||||
Returns:
|
||||
Tuple of (timestamp, nonce, key_path, fingerprint) or None if URL doesn't match pattern
|
||||
"""
|
||||
# Extract resource and number from URL
|
||||
pattern = r"/key/([^/]+)/(\d+)"
|
||||
match = re.search(pattern, key_url)
|
||||
|
||||
if not match:
|
||||
return None
|
||||
|
||||
resource = match.group(1)
|
||||
number = match.group(2)
|
||||
|
||||
ts = int(time.time())
|
||||
|
||||
# Compute HMAC-SHA256
|
||||
hmac_hash = hmac.new(secret_key.encode("utf-8"), resource.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
|
||||
# Proof-of-work loop
|
||||
nonce = 0
|
||||
for i in range(100000):
|
||||
combined = f"{hmac_hash}{resource}{number}{ts}{i}"
|
||||
md5_hash = hashlib.md5(combined.encode("utf-8")).hexdigest()
|
||||
prefix_value = int(md5_hash[:4], 16)
|
||||
|
||||
if prefix_value < 0x1000: # < 4096
|
||||
nonce = i
|
||||
break
|
||||
|
||||
fingerprint = compute_fingerprint()
|
||||
key_path = compute_key_path(resource, number, ts, fingerprint, secret_key)
|
||||
|
||||
return ts, nonce, key_path, fingerprint
|
||||
|
||||
|
||||
class DLHDExtractor(BaseExtractor):
|
||||
"""DLHD (DaddyLive) URL extractor for M3U8 streams.
|
||||
|
||||
Supports the new authentication flow with:
|
||||
- EPlayerAuth extraction (auth_token, channel_key, channel_salt)
|
||||
- Server lookup for dynamic server selection
|
||||
- Dynamic key header computation for AES-128 encrypted streams
|
||||
"""
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
self.mediaflow_endpoint = "hls_key_proxy"
|
||||
self._iframe_context: Optional[str] = None
|
||||
self._flaresolverr_cookies: Optional[str] = None
|
||||
self._flaresolverr_user_agent: Optional[str] = None
|
||||
|
||||
async def _fetch_via_flaresolverr(self, url: str) -> HttpResponse:
|
||||
"""Fetch a URL using FlareSolverr to bypass Cloudflare protection."""
|
||||
if not settings.flaresolverr_url:
|
||||
raise ExtractorError("FlareSolverr URL not configured. Set FLARESOLVERR_URL in environment.")
|
||||
|
||||
flaresolverr_endpoint = f"{settings.flaresolverr_url.rstrip('/')}/v1"
|
||||
payload = {
|
||||
"cmd": "request.get",
|
||||
"url": url,
|
||||
"maxTimeout": settings.flaresolverr_timeout * 1000,
|
||||
}
|
||||
|
||||
logger.info(f"Using FlareSolverr to fetch: {url}")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
flaresolverr_endpoint,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=settings.flaresolverr_timeout + 10),
|
||||
) as response:
|
||||
if response.status != 200:
|
||||
raise ExtractorError(f"FlareSolverr returned status {response.status}")
|
||||
|
||||
data = await response.json()
|
||||
|
||||
if data.get("status") != "ok":
|
||||
raise ExtractorError(f"FlareSolverr failed: {data.get('message', 'Unknown error')}")
|
||||
|
||||
solution = data.get("solution", {})
|
||||
html_content = solution.get("response", "")
|
||||
final_url = solution.get("url", url)
|
||||
status = solution.get("status", 200)
|
||||
|
||||
# Store cookies and user-agent for subsequent requests
|
||||
cookies = solution.get("cookies", [])
|
||||
if cookies:
|
||||
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
|
||||
self._flaresolverr_cookies = cookie_str
|
||||
logger.info(f"FlareSolverr provided {len(cookies)} cookies")
|
||||
|
||||
user_agent = solution.get("userAgent")
|
||||
if user_agent:
|
||||
self._flaresolverr_user_agent = user_agent
|
||||
logger.info(f"FlareSolverr user-agent: {user_agent}")
|
||||
|
||||
logger.info(f"FlareSolverr successfully bypassed Cloudflare for: {url}")
|
||||
|
||||
return HttpResponse(
|
||||
status=status,
|
||||
headers={},
|
||||
text=html_content,
|
||||
content=html_content.encode("utf-8", errors="replace"),
|
||||
url=final_url,
|
||||
)
|
||||
|
||||
async def _make_request(
|
||||
self, url: str, method: str = "GET", headers: Optional[Dict] = None, use_flaresolverr: bool = False, **kwargs
|
||||
) -> HttpResponse:
|
||||
"""Override to disable SSL verification and optionally use FlareSolverr."""
|
||||
# Use FlareSolverr for Cloudflare-protected pages
|
||||
if use_flaresolverr and settings.flaresolverr_url:
|
||||
return await self._fetch_via_flaresolverr(url)
|
||||
|
||||
timeout = kwargs.pop("timeout", 15)
|
||||
kwargs.pop("retries", 3) # consumed but not used directly
|
||||
kwargs.pop("backoff_factor", 0.5) # consumed but not used directly
|
||||
|
||||
# Merge headers
|
||||
request_headers = self.base_headers.copy()
|
||||
if headers:
|
||||
request_headers.update(headers)
|
||||
|
||||
# Add FlareSolverr cookies if available
|
||||
if self._flaresolverr_cookies:
|
||||
existing_cookies = request_headers.get("Cookie", "")
|
||||
if existing_cookies:
|
||||
request_headers["Cookie"] = f"{existing_cookies}; {self._flaresolverr_cookies}"
|
||||
else:
|
||||
request_headers["Cookie"] = self._flaresolverr_cookies
|
||||
|
||||
# Use FlareSolverr user-agent if available
|
||||
if self._flaresolverr_user_agent:
|
||||
request_headers["User-Agent"] = self._flaresolverr_user_agent
|
||||
|
||||
# Use create_aiohttp_session with verify=False for SSL bypass
|
||||
async with create_aiohttp_session(url, timeout=timeout, verify=False) as (session, proxy_url):
|
||||
async with session.request(method, url, headers=request_headers, proxy=proxy_url, **kwargs) as response:
|
||||
content = await response.read()
|
||||
final_url = str(response.url)
|
||||
status = response.status
|
||||
resp_headers = dict(response.headers)
|
||||
|
||||
if status >= 400:
|
||||
raise ExtractorError(f"HTTP error {status} while requesting {url}")
|
||||
|
||||
return HttpResponse(
|
||||
status=status,
|
||||
headers=resp_headers,
|
||||
text=content.decode("utf-8", errors="replace"),
|
||||
content=content,
|
||||
url=final_url,
|
||||
)
|
||||
|
||||
async def _extract_session_data(self, iframe_url: str, main_url: str) -> dict | None:
|
||||
"""
|
||||
Fetch the iframe URL and extract auth_token, channel_key, and channel_salt.
|
||||
|
||||
Args:
|
||||
iframe_url: The iframe URL to fetch
|
||||
main_url: The main site domain for Referer header
|
||||
|
||||
Returns:
|
||||
Dict with auth_token, channel_key, channel_salt, or None if not found
|
||||
"""
|
||||
headers = {
|
||||
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||
"Referer": f"https://{main_url}/",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = await self._make_request(iframe_url, headers=headers, timeout=12)
|
||||
html = resp.text
|
||||
except Exception as e:
|
||||
logger.warning(f"Error fetching iframe URL: {e}")
|
||||
return None
|
||||
|
||||
# Pattern to extract EPlayerAuth.init block with authToken, channelKey, channelSalt
|
||||
# Matches: EPlayerAuth.init({ authToken: '...', channelKey: '...', ..., channelSalt: '...' });
|
||||
auth_pattern = r"EPlayerAuth\.init\s*\(\s*\{\s*authToken:\s*'([^']+)'"
|
||||
channel_key_pattern = r"channelKey:\s*'([^']+)'"
|
||||
channel_salt_pattern = r"channelSalt:\s*'([^']+)'"
|
||||
|
||||
# Pattern to extract server lookup base URL from fetchWithRetry call
|
||||
lookup_pattern = r"fetchWithRetry\s*\(\s*'([^']+server_lookup\?channel_id=)"
|
||||
|
||||
auth_match = re.search(auth_pattern, html)
|
||||
channel_key_match = re.search(channel_key_pattern, html)
|
||||
channel_salt_match = re.search(channel_salt_pattern, html)
|
||||
lookup_match = re.search(lookup_pattern, html)
|
||||
|
||||
if auth_match and channel_key_match and channel_salt_match:
|
||||
result = {
|
||||
"auth_token": auth_match.group(1),
|
||||
"channel_key": channel_key_match.group(1),
|
||||
"channel_salt": channel_salt_match.group(1),
|
||||
}
|
||||
if lookup_match:
|
||||
result["server_lookup_url"] = lookup_match.group(1) + result["channel_key"]
|
||||
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
async def _get_server_key(self, server_lookup_url: str, iframe_url: str) -> str | None:
|
||||
"""
|
||||
Fetch the server lookup URL and extract the server_key.
|
||||
|
||||
Args:
|
||||
server_lookup_url: The server lookup URL
|
||||
iframe_url: The iframe URL for extracting the host for headers
|
||||
|
||||
Returns:
|
||||
The server_key or None if not found
|
||||
"""
|
||||
parsed = urlparse(iframe_url)
|
||||
iframe_host = parsed.netloc
|
||||
|
||||
headers = {
|
||||
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||
"Referer": f"https://{iframe_host}/",
|
||||
"Origin": f"https://{iframe_host}",
|
||||
}
|
||||
|
||||
try:
|
||||
resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
||||
data = resp.json()
|
||||
return data.get("server_key")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error fetching server lookup: {e}")
|
||||
return None
|
||||
|
||||
def _build_m3u8_url(self, server_key: str, channel_key: str) -> str:
|
||||
"""
|
||||
Build the m3u8 URL based on the server_key.
|
||||
|
||||
Args:
|
||||
server_key: The server key from server lookup
|
||||
channel_key: The channel key
|
||||
|
||||
Returns:
|
||||
The m3u8 URL (with .css extension as per the original implementation)
|
||||
"""
|
||||
if server_key == "top1/cdn":
|
||||
return f"https://top1.dvalna.ru/top1/cdn/{channel_key}/mono.css"
|
||||
else:
|
||||
return f"https://{server_key}new.dvalna.ru/{server_key}/{channel_key}/mono.css"
|
||||
|
||||
async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||
"""Handles the new authentication flow found in recent updates."""
|
||||
|
||||
def _extract_params(js: str) -> Dict[str, Optional[str]]:
|
||||
params = {}
|
||||
patterns = {
|
||||
"channel_key": r'(?:const|var|let)\s+(?:CHANNEL_KEY|channelKey)\s*=\s*["\']([^"\']+)["\']',
|
||||
"auth_token": r'(?:const|var|let)\s+AUTH_TOKEN\s*=\s*["\']([^"\']+)["\']',
|
||||
"auth_country": r'(?:const|var|let)\s+AUTH_COUNTRY\s*=\s*["\']([^"\']+)["\']',
|
||||
"auth_ts": r'(?:const|var|let)\s+AUTH_TS\s*=\s*["\']([^"\']+)["\']',
|
||||
"auth_expiry": r'(?:const|var|let)\s+AUTH_EXPIRY\s*=\s*["\']([^"\']+)["\']',
|
||||
}
|
||||
for key, pattern in patterns.items():
|
||||
match = re.search(pattern, js)
|
||||
params[key] = match.group(1) if match else None
|
||||
return params
|
||||
|
||||
params = _extract_params(iframe_content)
|
||||
|
||||
missing_params = [k for k, v in params.items() if not v]
|
||||
if missing_params:
|
||||
# This is not an error, just means it's not the new flow
|
||||
raise ExtractorError(f"Not the new auth flow: missing params {missing_params}")
|
||||
|
||||
logger.info("New auth flow detected. Proceeding with POST auth.")
|
||||
|
||||
# 1. Initial Auth POST
|
||||
auth_url = "https://security.newkso.ru/auth2.php"
|
||||
|
||||
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||
auth_headers = headers.copy()
|
||||
auth_headers.update(
|
||||
{
|
||||
"Accept": "*/*",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Origin": iframe_origin,
|
||||
"Referer": iframe_url,
|
||||
"Sec-Fetch-Dest": "empty",
|
||||
"Sec-Fetch-Mode": "cors",
|
||||
"Sec-Fetch-Site": "cross-site",
|
||||
"Priority": "u=1, i",
|
||||
}
|
||||
)
|
||||
|
||||
# Build form data for multipart/form-data
|
||||
form_data = aiohttp.FormData()
|
||||
form_data.add_field("channelKey", params["channel_key"])
|
||||
form_data.add_field("country", params["auth_country"])
|
||||
form_data.add_field("timestamp", params["auth_ts"])
|
||||
form_data.add_field("expiry", params["auth_expiry"])
|
||||
form_data.add_field("token", params["auth_token"])
|
||||
|
||||
try:
|
||||
async with create_aiohttp_session(auth_url, timeout=12, verify=False) as (session, proxy_url):
|
||||
async with session.post(
|
||||
auth_url,
|
||||
headers=auth_headers,
|
||||
data=form_data,
|
||||
proxy=proxy_url,
|
||||
) as response:
|
||||
content = await response.read()
|
||||
response.raise_for_status()
|
||||
import json
|
||||
|
||||
auth_data = json.loads(content.decode("utf-8"))
|
||||
if not (auth_data.get("valid") or auth_data.get("success")):
|
||||
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
|
||||
logger.info("New auth flow: Initial auth successful.")
|
||||
except ExtractorError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"New auth flow failed during initial auth POST: {e}")
|
||||
|
||||
# 2. Server Lookup
|
||||
server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}"
|
||||
try:
|
||||
# Use _make_request as it handles retries
|
||||
lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
||||
server_data = lookup_resp.json()
|
||||
server_key = server_data.get("server_key")
|
||||
if not server_key:
|
||||
raise ExtractorError(f"No server_key in lookup response: {server_data}")
|
||||
logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}")
|
||||
except ExtractorError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"New auth flow failed during server lookup: {e}")
|
||||
|
||||
# 3. Build final stream URL
|
||||
channel_key = params["channel_key"]
|
||||
auth_token = params["auth_token"]
|
||||
# The JS logic uses .css, not .m3u8
|
||||
if server_key == "top1/cdn":
|
||||
stream_url = f"https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css"
|
||||
else:
|
||||
stream_url = f"https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css"
|
||||
|
||||
logger.info(f"New auth flow: Constructed stream URL: {stream_url}")
|
||||
|
||||
stream_headers = {
|
||||
"User-Agent": headers["User-Agent"],
|
||||
"Referer": iframe_url,
|
||||
"Origin": iframe_origin,
|
||||
"Authorization": f"Bearer {auth_token}",
|
||||
"X-Channel-Key": channel_key,
|
||||
}
|
||||
|
||||
return {
|
||||
"destination_url": stream_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||
}
|
||||
|
||||
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||
"""
|
||||
Alternative extractor for lovecdn.ru iframe that uses a different format.
|
||||
"""
|
||||
try:
|
||||
# Look for direct stream URL patterns
|
||||
m3u8_patterns = [
|
||||
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
|
||||
r'source[:\s]+["\']([^"\']+)["\']',
|
||||
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
|
||||
]
|
||||
|
||||
stream_url = None
|
||||
for pattern in m3u8_patterns:
|
||||
matches = re.findall(pattern, iframe_content)
|
||||
for match in matches:
|
||||
if ".m3u8" in match and match.startswith("http"):
|
||||
stream_url = match
|
||||
logger.info(f"Found direct m3u8 URL: {stream_url}")
|
||||
break
|
||||
if stream_url:
|
||||
break
|
||||
|
||||
# Pattern 2: Look for dynamic URL construction
|
||||
if not stream_url:
|
||||
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||
|
||||
if channel_match:
|
||||
channel_name = channel_match.group(1)
|
||||
server = server_match.group(1) if server_match else "newkso.ru"
|
||||
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
|
||||
logger.info(f"Constructed stream URL: {stream_url}")
|
||||
|
||||
if not stream_url:
|
||||
# Fallback: look for any URL that looks like a stream
|
||||
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
|
||||
matches = re.findall(url_pattern, iframe_content)
|
||||
if matches:
|
||||
stream_url = matches[0]
|
||||
logger.info(f"Found fallback stream URL: {stream_url}")
|
||||
|
||||
if not stream_url:
|
||||
raise ExtractorError("Could not find stream URL in lovecdn.ru iframe")
|
||||
|
||||
# Use iframe URL as referer
|
||||
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||
stream_headers = {"User-Agent": headers["User-Agent"], "Referer": iframe_url, "Origin": iframe_origin}
|
||||
|
||||
# Determine endpoint based on the stream domain
|
||||
endpoint = "hls_key_proxy"
|
||||
|
||||
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
|
||||
|
||||
return {
|
||||
"destination_url": stream_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": endpoint,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
|
||||
|
||||
async def _extract_direct_stream(self, channel_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Direct stream extraction using server lookup API with the new auth flow.
|
||||
This extracts auth_token, channel_key, channel_salt and computes key headers.
|
||||
"""
|
||||
# Common iframe domains for DLHD
|
||||
iframe_domains = ["lefttoplay.xyz"]
|
||||
|
||||
for iframe_domain in iframe_domains:
|
||||
try:
|
||||
iframe_url = f"https://{iframe_domain}/premiumtv/daddyhd.php?id={channel_id}"
|
||||
logger.info(f"Attempting extraction via {iframe_domain}")
|
||||
|
||||
session_data = await self._extract_session_data(iframe_url, "dlhd.link")
|
||||
|
||||
if not session_data:
|
||||
logger.debug(f"No session data from {iframe_domain}")
|
||||
continue
|
||||
|
||||
logger.info(f"Got session data from {iframe_domain}: channel_key={session_data['channel_key']}")
|
||||
|
||||
# Get server key
|
||||
if "server_lookup_url" not in session_data:
|
||||
logger.debug(f"No server lookup URL from {iframe_domain}")
|
||||
continue
|
||||
|
||||
server_key = await self._get_server_key(session_data["server_lookup_url"], iframe_url)
|
||||
|
||||
if not server_key:
|
||||
logger.debug(f"No server key from {iframe_domain}")
|
||||
continue
|
||||
|
||||
logger.info(f"Got server key: {server_key}")
|
||||
|
||||
# Build m3u8 URL
|
||||
m3u8_url = self._build_m3u8_url(server_key, session_data["channel_key"])
|
||||
logger.info(f"M3U8 URL: {m3u8_url}")
|
||||
|
||||
# Build stream headers with auth
|
||||
iframe_origin = f"https://{iframe_domain}"
|
||||
stream_headers = {
|
||||
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||
"Referer": iframe_url,
|
||||
"Origin": iframe_origin,
|
||||
"Authorization": f"Bearer {session_data['auth_token']}",
|
||||
}
|
||||
|
||||
# Return the result with key header parameters
|
||||
# These will be used to compute headers when fetching keys
|
||||
return {
|
||||
"destination_url": m3u8_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": "hls_key_proxy",
|
||||
# Force playlist processing since DLHD uses .css extension for m3u8
|
||||
"force_playlist_proxy": True,
|
||||
# Key header computation parameters
|
||||
"dlhd_key_params": {
|
||||
"channel_salt": session_data["channel_salt"],
|
||||
"auth_token": session_data["auth_token"],
|
||||
"iframe_url": iframe_url,
|
||||
},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed extraction via {iframe_domain}: {e}")
|
||||
continue
|
||||
|
||||
raise ExtractorError(f"Failed to extract stream from all iframe domains for channel {channel_id}")
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Main extraction flow - uses direct server lookup with new auth flow."""
|
||||
|
||||
def extract_channel_id(u: str) -> Optional[str]:
|
||||
match_watch_id = re.search(r"watch\.php\?id=(\d+)", u)
|
||||
if match_watch_id:
|
||||
return match_watch_id.group(1)
|
||||
# Also try stream-XXX pattern
|
||||
match_stream = re.search(r"stream-(\d+)", u)
|
||||
if match_stream:
|
||||
return match_stream.group(1)
|
||||
return None
|
||||
|
||||
try:
|
||||
channel_id = extract_channel_id(url)
|
||||
if not channel_id:
|
||||
raise ExtractorError(f"Unable to extract channel ID from {url}")
|
||||
|
||||
logger.info(f"Extracting DLHD stream for channel ID: {channel_id}")
|
||||
|
||||
# Try direct stream extraction with new auth flow
|
||||
try:
|
||||
return await self._extract_direct_stream(channel_id)
|
||||
except ExtractorError as e:
|
||||
logger.warning(f"Direct stream extraction failed: {e}")
|
||||
|
||||
# Fallback to legacy iframe-based extraction if direct fails
|
||||
logger.info("Falling back to iframe-based extraction...")
|
||||
return await self._extract_via_iframe(url, channel_id)
|
||||
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Extraction failed: {str(e)}")
|
||||
|
||||
async def _extract_via_iframe(self, url: str, channel_id: str) -> Dict[str, Any]:
|
||||
"""Legacy iframe-based extraction flow - used as fallback."""
|
||||
baseurl = "https://dlhd.dad/"
|
||||
|
||||
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
|
||||
daddylive_headers = {
|
||||
"User-Agent": self._flaresolverr_user_agent
|
||||
or "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||
"Referer": baseurl,
|
||||
"Origin": daddy_origin,
|
||||
}
|
||||
|
||||
# 1. Request initial page - use FlareSolverr if available to bypass Cloudflare
|
||||
use_flaresolverr = settings.flaresolverr_url is not None
|
||||
resp1 = await self._make_request(url, headers=daddylive_headers, timeout=15, use_flaresolverr=use_flaresolverr)
|
||||
resp1_text = resp1.text
|
||||
|
||||
# Update headers with FlareSolverr user-agent after initial request
|
||||
if self._flaresolverr_user_agent:
|
||||
daddylive_headers["User-Agent"] = self._flaresolverr_user_agent
|
||||
|
||||
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1_text)
|
||||
if not player_links:
|
||||
raise ExtractorError("No player links found on the page.")
|
||||
|
||||
# Try all players and collect all valid iframes
|
||||
last_player_error = None
|
||||
iframe_candidates = []
|
||||
|
||||
for player_url in player_links:
|
||||
try:
|
||||
if not player_url.startswith("http"):
|
||||
player_url = baseurl + player_url.lstrip("/")
|
||||
|
||||
daddylive_headers["Referer"] = player_url
|
||||
daddylive_headers["Origin"] = player_url
|
||||
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
|
||||
resp2_text = resp2.text
|
||||
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2_text)
|
||||
|
||||
# Collect all found iframes
|
||||
for iframe in iframes2:
|
||||
if iframe not in iframe_candidates:
|
||||
iframe_candidates.append(iframe)
|
||||
logger.info(f"Found iframe candidate: {iframe}")
|
||||
|
||||
except Exception as e:
|
||||
last_player_error = e
|
||||
logger.warning(f"Failed to process player link {player_url}: {e}")
|
||||
continue
|
||||
|
||||
if not iframe_candidates:
|
||||
if last_player_error:
|
||||
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
|
||||
raise ExtractorError("No valid iframe found in any player page")
|
||||
|
||||
# Try each iframe until one works
|
||||
last_iframe_error = None
|
||||
|
||||
for iframe_candidate in iframe_candidates:
|
||||
try:
|
||||
logger.info(f"Trying iframe: {iframe_candidate}")
|
||||
|
||||
iframe_domain = urlparse(iframe_candidate).netloc
|
||||
if not iframe_domain:
|
||||
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
|
||||
continue
|
||||
|
||||
self._iframe_context = iframe_candidate
|
||||
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
|
||||
iframe_content = resp3.text
|
||||
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
|
||||
|
||||
if "lovecdn.ru" in iframe_domain:
|
||||
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
|
||||
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
|
||||
else:
|
||||
logger.info("Attempting new auth flow extraction.")
|
||||
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
|
||||
last_iframe_error = e
|
||||
continue
|
||||
|
||||
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
|
||||
@@ -1,49 +1,209 @@
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
import aiohttp
|
||||
from curl_cffi.requests import AsyncSession
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.configs import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DOOD_UA = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
|
||||
class DoodStreamExtractor(BaseExtractor):
|
||||
"""
|
||||
Dood / MyVidPlay extractor
|
||||
Resolves to direct CDN MP4
|
||||
"""
|
||||
DoodStream / PlayMogo extractor.
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
self.base_url = "https://myvidplay.com"
|
||||
All DoodStream mirror domains (dsvplay.com, myvidplay.com, dood.to, …) now
|
||||
redirect to playmogo.com which sits behind Cloudflare and may require a
|
||||
Turnstile CAPTCHA before serving the pass_md5 URL.
|
||||
|
||||
Extraction order:
|
||||
1. Byparr — set BYPARR_URL (Firefox/Camoufox → Turnstile auto-validates,
|
||||
not blocked by DisableDevtool.js)
|
||||
2. curl_cffi — Chrome impersonation; works when Turnstile is not triggered,
|
||||
raises a descriptive error if captcha is detected.
|
||||
"""
|
||||
|
||||
async def extract(self, url: str, **kwargs):
|
||||
parsed = urlparse(url)
|
||||
video_id = parsed.path.rstrip("/").split("/")[-1]
|
||||
if not video_id:
|
||||
raise ExtractorError("Invalid Dood URL")
|
||||
raise ExtractorError("Invalid DoodStream URL: no video ID found")
|
||||
|
||||
headers = {
|
||||
"User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0",
|
||||
"Referer": f"{self.base_url}/",
|
||||
if settings.byparr_url:
|
||||
try:
|
||||
return await self._extract_via_byparr(url, video_id)
|
||||
except ExtractorError:
|
||||
raise
|
||||
|
||||
return await self._extract_via_curl_cffi(url, video_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Path 1 – Byparr (Firefox/Camoufox → Turnstile auto-validates)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _extract_via_byparr(self, url: str, video_id: str) -> dict:
|
||||
"""
|
||||
Use Byparr to bypass Cloudflare protection on the DoodStream embed page.
|
||||
|
||||
Strategy: fetch the embed page without any injected script. Byparr's
|
||||
Firefox/Camoufox browser auto-passes Cloudflare's bot checks and often
|
||||
bypasses the Turnstile CAPTCHA gate directly, returning the embed HTML
|
||||
with pass_md5. If the response doesn't contain pass_md5, reuse the CF
|
||||
cookies + UA from Byparr in a follow-up curl_cffi request (which avoids
|
||||
re-triggering the bot check).
|
||||
"""
|
||||
endpoint = f"{settings.byparr_url.rstrip('/')}/v1"
|
||||
embed_url = url if "/e/" in url else f"https://{urlparse(url).netloc}/e/{video_id}"
|
||||
payload = {
|
||||
"cmd": "request.get",
|
||||
"url": embed_url,
|
||||
"maxTimeout": settings.byparr_timeout * 1000,
|
||||
}
|
||||
|
||||
embed_url = f"{self.base_url}/e/{video_id}"
|
||||
html = (await self._make_request(embed_url, headers=headers)).text
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
endpoint,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=settings.byparr_timeout + 15),
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
raise ExtractorError(f"Byparr HTTP {resp.status}")
|
||||
data = await resp.json()
|
||||
|
||||
match = re.search(r"(\/pass_md5\/[^']+)", html)
|
||||
if not match:
|
||||
raise ExtractorError("Dood: pass_md5 not found")
|
||||
if data.get("status") != "ok":
|
||||
raise ExtractorError(f"Byparr: {data.get('message', 'unknown error')}")
|
||||
|
||||
pass_url = urljoin(self.base_url, match.group(1))
|
||||
solution = data.get("solution", {})
|
||||
final_url = solution.get("url", embed_url)
|
||||
if not final_url.startswith("http"):
|
||||
final_url = embed_url
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
html = solution.get("response", "")
|
||||
|
||||
base_stream = (await self._make_request(pass_url, headers=headers)).text.strip()
|
||||
if "pass_md5" not in html:
|
||||
# Byparr may not have the pass_md5 in the initial response.
|
||||
# Try two recovery strategies in order:
|
||||
#
|
||||
# 1. Cookie reuse — if Byparr collected CF clearance cookies before
|
||||
# the page loaded fully, inject them into a curl_cffi request.
|
||||
# 2. Plain curl_cffi — Chrome TLS impersonation without JS execution.
|
||||
raw_cookies = solution.get("cookies", [])
|
||||
cookies = {c["name"]: c["value"] for c in raw_cookies}
|
||||
ua = solution.get("userAgent", _DOOD_UA)
|
||||
|
||||
token_match = re.search(r"token=([^&]+)", html)
|
||||
if cookies:
|
||||
cf_domain = (
|
||||
next(
|
||||
(c.get("domain", "").lstrip(".") for c in raw_cookies if c.get("name") == "cf_clearance"),
|
||||
None,
|
||||
)
|
||||
or "playmogo.com"
|
||||
)
|
||||
retry_url = f"https://{cf_domain}/e/{video_id}"
|
||||
logger.debug(
|
||||
"Byparr response lacked pass_md5 (final_url=%s); retrying %s with CF cookies via curl_cffi",
|
||||
final_url,
|
||||
retry_url,
|
||||
)
|
||||
proxy = self._get_proxy(retry_url)
|
||||
async with AsyncSession() as s:
|
||||
r = await s.get(
|
||||
retry_url,
|
||||
impersonate="chrome",
|
||||
cookies=cookies,
|
||||
headers={"User-Agent": ua, "Referer": f"https://{cf_domain}/"},
|
||||
timeout=20,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
html = r.text
|
||||
final_url = str(r.url)
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
|
||||
if "pass_md5" not in html:
|
||||
logger.debug("Byparr cookie reuse also failed; falling back to curl_cffi for %s", embed_url)
|
||||
return await self._extract_via_curl_cffi(embed_url, video_id)
|
||||
|
||||
return await self._parse_embed_html(html, base_url)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Path 2 – curl_cffi (bypasses CF bot protection; Turnstile may block)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _extract_via_curl_cffi(self, url: str, video_id: str) -> dict:
|
||||
proxy = self._get_proxy(url)
|
||||
async with AsyncSession() as s:
|
||||
r = await s.get(
|
||||
url,
|
||||
impersonate="chrome",
|
||||
headers={"Referer": f"https://{urlparse(url).netloc}/"},
|
||||
timeout=30,
|
||||
allow_redirects=True,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
final_url = str(r.url)
|
||||
html = r.text
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
|
||||
if "pass_md5" not in html:
|
||||
if "turnstile" in html.lower() or "captcha_l" in html:
|
||||
raise ExtractorError(
|
||||
"DoodStream: site is serving a Turnstile CAPTCHA that requires "
|
||||
"browser interaction — cannot be bypassed automatically from this "
|
||||
"network location. Try a residential IP or a VPN/proxy."
|
||||
)
|
||||
raise ExtractorError(f"DoodStream: pass_md5 not found in embed HTML ({final_url})")
|
||||
|
||||
return await self._parse_embed_html(html, base_url)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Common HTML parser
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _parse_embed_html(self, html: str, base_url: str) -> dict:
|
||||
pass_match = re.search(r"(/pass_md5/[^'\"<>\s]+)", html)
|
||||
if not pass_match:
|
||||
raise ExtractorError("DoodStream: pass_md5 path not found in embed HTML")
|
||||
|
||||
pass_url = urljoin(base_url, pass_match.group(1))
|
||||
ua = self.base_headers.get("user-agent") or _DOOD_UA
|
||||
headers = {
|
||||
"user-agent": ua,
|
||||
"referer": f"{base_url}/",
|
||||
}
|
||||
|
||||
proxy = self._get_proxy(pass_url)
|
||||
async with AsyncSession() as s:
|
||||
r = await s.get(
|
||||
pass_url,
|
||||
impersonate="chrome",
|
||||
headers=headers,
|
||||
timeout=20,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
|
||||
base_stream = r.text.strip()
|
||||
if not base_stream or "RELOAD" in base_stream:
|
||||
raise ExtractorError(
|
||||
"DoodStream: pass_md5 endpoint returned no stream URL "
|
||||
"(captcha session may have expired). "
|
||||
"Ensure BYPARR_URL is set for reliable extraction."
|
||||
)
|
||||
|
||||
token_match = re.search(r"token=([^&\s'\"]+)", html)
|
||||
if not token_match:
|
||||
raise ExtractorError("Dood: token missing")
|
||||
raise ExtractorError("DoodStream: token not found in embed HTML")
|
||||
|
||||
token = token_match.group(1)
|
||||
|
||||
final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}"
|
||||
expiry = int(time.time())
|
||||
final_url = f"{base_stream}123456789?token={token}&expiry={expiry}"
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from typing import Dict, Type
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.extractors.dlhd import DLHDExtractor
|
||||
from mediaflow_proxy.extractors.doodstream import DoodStreamExtractor
|
||||
from mediaflow_proxy.extractors.city import CityExtractor
|
||||
from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor
|
||||
from mediaflow_proxy.extractors.filelions import FileLionsExtractor
|
||||
from mediaflow_proxy.extractors.filemoon import FileMoonExtractor
|
||||
@@ -24,12 +24,14 @@ from mediaflow_proxy.extractors.vidoza import VidozaExtractor
|
||||
from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor
|
||||
from mediaflow_proxy.extractors.fastream import FastreamExtractor
|
||||
from mediaflow_proxy.extractors.voe import VoeExtractor
|
||||
from mediaflow_proxy.extractors.vidfast import VidFastExtractor
|
||||
|
||||
|
||||
class ExtractorFactory:
|
||||
"""Factory for creating URL extractors."""
|
||||
|
||||
_extractors: Dict[str, Type[BaseExtractor]] = {
|
||||
"City": CityExtractor,
|
||||
"Doodstream": DoodStreamExtractor,
|
||||
"FileLions": FileLionsExtractor,
|
||||
"FileMoon": FileMoonExtractor,
|
||||
@@ -46,13 +48,13 @@ class ExtractorFactory:
|
||||
"Maxstream": MaxstreamExtractor,
|
||||
"LiveTV": LiveTVExtractor,
|
||||
"LuluStream": LuluStreamExtractor,
|
||||
"DLHD": DLHDExtractor,
|
||||
"Vavoo": VavooExtractor,
|
||||
"Vidmoly": VidmolyExtractor,
|
||||
"Vidoza": VidozaExtractor,
|
||||
"Fastream": FastreamExtractor,
|
||||
"Voe": VoeExtractor,
|
||||
"Sportsonline": SportsonlineExtractor,
|
||||
"VidFast": VidFastExtractor,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -1,23 +1,42 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
|
||||
from curl_cffi.requests import AsyncSession
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class LuluStreamExtractor(BaseExtractor):
|
||||
"""LuluStream URL extractor.
|
||||
|
||||
Uses curl_cffi + Chrome impersonation to bypass Cloudflare protection.
|
||||
lulustream.com embeds are served via luluvdo.com.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
response = await self._make_request(url)
|
||||
proxy = self._get_proxy(url)
|
||||
async with AsyncSession() as session:
|
||||
response = await session.get(
|
||||
url,
|
||||
impersonate="chrome",
|
||||
timeout=30,
|
||||
allow_redirects=True,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
|
||||
if response.status_code >= 400:
|
||||
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
|
||||
|
||||
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py
|
||||
pattern = r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)"""
|
||||
match = re.search(pattern, response.text, re.DOTALL)
|
||||
if not match:
|
||||
raise ExtractorError("Failed to extract source URL")
|
||||
final_url = match.group(1)
|
||||
raise ExtractorError("LuluStream: Failed to extract source URL")
|
||||
final_url = match.group("url")
|
||||
|
||||
self.base_headers["referer"] = url
|
||||
return {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import re
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.utils.packed import unpack
|
||||
@@ -14,7 +14,7 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
|
||||
Strategy:
|
||||
1. Fetch page -> find first <iframe src="...">
|
||||
2. Fetch iframe with Referer=https://sportzonline.st/
|
||||
2. Fetch iframe with dynamic source-page Referer/Origin
|
||||
3. Collect packed eval blocks; if >=2 use second (index 1) else first.
|
||||
4. Unpack P.A.C.K.E.R. and search var src="...m3u8".
|
||||
5. Return final m3u8 with referer header.
|
||||
@@ -33,56 +33,125 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
"""
|
||||
Detect and extract packed eval blocks from HTML.
|
||||
"""
|
||||
# Find all eval(function...) blocks - more greedy to capture full packed code
|
||||
pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL)
|
||||
raw_matches = pattern.findall(html)
|
||||
raw_matches: list[str] = []
|
||||
strict_eval_pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\}\(.*?\)\)", re.DOTALL)
|
||||
relaxed_eval_pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
|
||||
|
||||
# Prefer script-body extraction first. This is more resilient when the packed
|
||||
# code has nested parentheses/semicolons that are hard to capture with a
|
||||
# single regex.
|
||||
script_pattern = re.compile(r"<script[^>]*>(.*?)</script>", re.IGNORECASE | re.DOTALL)
|
||||
for script_body in script_pattern.findall(html):
|
||||
if "eval(function(p,a,c,k,e" in script_body:
|
||||
strict_matches = strict_eval_pattern.findall(script_body)
|
||||
if strict_matches:
|
||||
raw_matches.extend(strict_matches)
|
||||
continue
|
||||
|
||||
relaxed_matches = relaxed_eval_pattern.findall(script_body)
|
||||
if relaxed_matches:
|
||||
raw_matches.extend(relaxed_matches)
|
||||
|
||||
if raw_matches:
|
||||
return raw_matches
|
||||
|
||||
# Fallback: direct eval(...) extraction from raw HTML.
|
||||
raw_matches = strict_eval_pattern.findall(html)
|
||||
|
||||
# If no matches with the strict pattern, try a more relaxed one
|
||||
if not raw_matches:
|
||||
# Try to find eval(function and capture until we find the closing ))
|
||||
pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
|
||||
raw_matches = pattern.findall(html)
|
||||
raw_matches = relaxed_eval_pattern.findall(html)
|
||||
|
||||
return raw_matches
|
||||
|
||||
@staticmethod
|
||||
def _extract_m3u8_candidate(text: str) -> str | None:
|
||||
patterns = [
|
||||
r"var\s+src\s*=\s*[\"']([^\"']+\.m3u8[^\"']*)[\"']",
|
||||
r"src\s*=\s*[\"']([^\"']+\.m3u8[^\"']*)[\"']",
|
||||
r"file\s*:\s*[\"']([^\"']+\.m3u8[^\"']*)[\"']",
|
||||
r"[\"']([^\"']*https?://[^\"']+\.m3u8[^\"']*)[\"']",
|
||||
r"(https?://[^\s\"'>]+\.m3u8[^\s\"'>]*)",
|
||||
r"(//[^\s\"'>]+\.m3u8[^\s\"'>]*)",
|
||||
r"(/[^\s\"'>]+\.m3u8[^\s\"'>]*)",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _normalize_stream_url(stream_url: str, base_url: str) -> str:
|
||||
cleaned = stream_url.strip().strip("\"'").replace("\\/", "/")
|
||||
if cleaned.startswith("//"):
|
||||
parsed_base = urlparse(base_url)
|
||||
return f"{parsed_base.scheme or 'https'}:{cleaned}"
|
||||
if not urlparse(cleaned).scheme:
|
||||
return urljoin(base_url, cleaned)
|
||||
return cleaned
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Main extraction flow: fetch page, extract iframe, unpack and find m3u8."""
|
||||
try:
|
||||
parsed_source = urlparse(url)
|
||||
source_origin = f"{parsed_source.scheme}://{parsed_source.netloc}"
|
||||
source_referer = self.base_headers.get("Referer") or self.base_headers.get("referer") or f"{source_origin}/"
|
||||
user_agent = self.base_headers.get("User-Agent") or self.base_headers.get("user-agent") or "Mozilla/5.0"
|
||||
|
||||
# Step 1: Fetch main page
|
||||
logger.info(f"Fetching main page: {url}")
|
||||
main_response = await self._make_request(url, timeout=15)
|
||||
main_response = await self._make_request(
|
||||
url,
|
||||
headers={
|
||||
"Referer": source_referer,
|
||||
"Origin": source_origin,
|
||||
"User-Agent": user_agent,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
|
||||
"Cache-Control": "no-cache",
|
||||
},
|
||||
timeout=15,
|
||||
)
|
||||
main_html = main_response.text
|
||||
parsed_main = urlparse(main_response.url)
|
||||
main_origin = f"{parsed_main.scheme}://{parsed_main.netloc}"
|
||||
|
||||
# Extract first iframe
|
||||
iframe_match = re.search(r'<iframe\s+src=["\']([^"\']+)["\']', main_html, re.IGNORECASE)
|
||||
if not iframe_match:
|
||||
raise ExtractorError("No iframe found on the page")
|
||||
# Extract first iframe (src can appear in any attribute order)
|
||||
iframe_match = re.search(r'<iframe[^>]+(?<!data-)src=["\']([^"\']+)["\']', main_html, re.IGNORECASE)
|
||||
iframe_url = main_response.url
|
||||
iframe_html = main_html
|
||||
|
||||
iframe_url = iframe_match.group(1)
|
||||
if iframe_match:
|
||||
iframe_url = self._normalize_stream_url(iframe_match.group(1), main_response.url)
|
||||
logger.info(f"Found iframe URL: {iframe_url}")
|
||||
|
||||
# Normalize iframe URL
|
||||
if iframe_url.startswith("//"):
|
||||
iframe_url = "https:" + iframe_url
|
||||
elif iframe_url.startswith("/"):
|
||||
parsed_main = urlparse(url)
|
||||
iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}"
|
||||
# Step 2: Fetch iframe with source page as referer
|
||||
iframe_headers = {
|
||||
"Referer": main_response.url,
|
||||
"Origin": main_origin,
|
||||
"User-Agent": user_agent,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
|
||||
"Cache-Control": "no-cache",
|
||||
}
|
||||
|
||||
logger.info(f"Found iframe URL: {iframe_url}")
|
||||
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
|
||||
iframe_html = iframe_response.text
|
||||
iframe_url = iframe_response.url
|
||||
logger.debug(f"Iframe HTML length: {len(iframe_html)}")
|
||||
else:
|
||||
logger.warning("No iframe found on page, attempting extraction from main HTML")
|
||||
|
||||
# Step 2: Fetch iframe with Referer
|
||||
iframe_headers = {
|
||||
"Referer": "https://sportzonline.st/",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
|
||||
"Cache-Control": "no-cache",
|
||||
parsed_iframe = urlparse(iframe_url)
|
||||
playback_headers = {
|
||||
"Referer": iframe_url,
|
||||
"Origin": f"{parsed_iframe.scheme}://{parsed_iframe.netloc}",
|
||||
"User-Agent": user_agent,
|
||||
}
|
||||
|
||||
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
|
||||
iframe_html = iframe_response.text
|
||||
|
||||
logger.debug(f"Iframe HTML length: {len(iframe_html)}")
|
||||
|
||||
# Step 3: Detect packed blocks
|
||||
packed_blocks = self._detect_packed_blocks(iframe_html)
|
||||
|
||||
@@ -91,21 +160,19 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
if not packed_blocks:
|
||||
logger.warning("No packed blocks found, trying direct m3u8 search")
|
||||
# Fallback: try direct m3u8 search
|
||||
direct_match = re.search(r'(https?://[^\s"\'>]+\.m3u8[^\s"\'>]*)', iframe_html)
|
||||
direct_match = self._extract_m3u8_candidate(iframe_html)
|
||||
if direct_match:
|
||||
m3u8_url = direct_match.group(1)
|
||||
m3u8_url = self._normalize_stream_url(direct_match, iframe_url)
|
||||
logger.info(f"Found direct m3u8 URL: {m3u8_url}")
|
||||
|
||||
return {
|
||||
"destination_url": m3u8_url,
|
||||
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
|
||||
"request_headers": playback_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
else:
|
||||
raise ExtractorError("No packed blocks or direct m3u8 URL found")
|
||||
|
||||
logger.info(f"Found {len(packed_blocks)} packed blocks")
|
||||
|
||||
# Choose block: if >=2 use second (index 1), else first (index 0)
|
||||
chosen_idx = 1 if len(packed_blocks) > 1 else 0
|
||||
m3u8_url = None
|
||||
@@ -123,22 +190,7 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
|
||||
# Search for var src="...m3u8" with multiple patterns
|
||||
if unpacked_code:
|
||||
# Try multiple patterns as in the TypeScript version
|
||||
patterns = [
|
||||
r'var\s+src\s*=\s*["\']([^"\']+)["\']', # var src="..."
|
||||
r'src\s*=\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # src="...m3u8"
|
||||
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # file: "...m3u8"
|
||||
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']', # any m3u8 URL
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
src_match = re.search(pattern, unpacked_code)
|
||||
if src_match:
|
||||
m3u8_url = src_match.group(1)
|
||||
# Verify it looks like a valid m3u8 URL
|
||||
if ".m3u8" in m3u8_url or "http" in m3u8_url:
|
||||
break
|
||||
m3u8_url = None
|
||||
m3u8_url = self._extract_m3u8_candidate(unpacked_code)
|
||||
|
||||
# If not found, try all other blocks
|
||||
if not m3u8_url:
|
||||
@@ -148,36 +200,30 @@ class SportsonlineExtractor(BaseExtractor):
|
||||
continue
|
||||
try:
|
||||
unpacked_code = unpack(block)
|
||||
# Use the same patterns as above
|
||||
for pattern in [
|
||||
r'var\s+src\s*=\s*["\']([^"\']+)["\']',
|
||||
r'src\s*=\s*["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']',
|
||||
]:
|
||||
src_match = re.search(pattern, unpacked_code)
|
||||
if src_match:
|
||||
test_url = src_match.group(1)
|
||||
if ".m3u8" in test_url or "http" in test_url:
|
||||
m3u8_url = test_url
|
||||
logger.info(f"Found m3u8 in block {i}")
|
||||
break
|
||||
|
||||
m3u8_url = self._extract_m3u8_candidate(unpacked_code)
|
||||
if m3u8_url:
|
||||
logger.info(f"Found m3u8 in block {i}")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to process block {i}: {e}")
|
||||
continue
|
||||
|
||||
if not m3u8_url:
|
||||
fallback_candidate = self._extract_m3u8_candidate(iframe_html)
|
||||
if fallback_candidate:
|
||||
m3u8_url = fallback_candidate
|
||||
|
||||
if not m3u8_url:
|
||||
raise ExtractorError("Could not extract m3u8 URL from packed code")
|
||||
|
||||
m3u8_url = self._normalize_stream_url(m3u8_url, iframe_url)
|
||||
|
||||
logger.info(f"Successfully extracted m3u8 URL: {m3u8_url}")
|
||||
|
||||
# Return stream configuration
|
||||
return {
|
||||
"destination_url": m3u8_url,
|
||||
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
|
||||
"request_headers": playback_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
|
||||
@@ -24,12 +24,17 @@ class SupervideoExtractor(BaseExtractor):
|
||||
|
||||
Uses curl_cffi with Chrome impersonation to bypass Cloudflare.
|
||||
"""
|
||||
|
||||
patterns = [r'file:"(.*?)"']
|
||||
proxy = self._get_proxy(url)
|
||||
|
||||
try:
|
||||
async with AsyncSession() as session:
|
||||
response = await session.get(url, impersonate="chrome")
|
||||
response = await session.get(
|
||||
url,
|
||||
impersonate="chrome",
|
||||
timeout=30,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
|
||||
|
||||
@@ -1,22 +1,39 @@
|
||||
import re
|
||||
from typing import Dict
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from curl_cffi.requests import AsyncSession
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class UqloadExtractor(BaseExtractor):
|
||||
"""Uqload URL extractor."""
|
||||
"""Uqload URL extractor.
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, str]:
|
||||
"""Extract Uqload URL."""
|
||||
response = await self._make_request(url)
|
||||
Uses curl_cffi + Chrome impersonation to handle Cloudflare protection.
|
||||
Follows redirects automatically (uqload.bz/co/io all redirect to uqload.is).
|
||||
"""
|
||||
|
||||
video_url_match = re.search(r'sources: \["(.*?)"]', response.text)
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
proxy = self._get_proxy(url)
|
||||
async with AsyncSession() as session:
|
||||
response = await session.get(
|
||||
url,
|
||||
impersonate="chrome",
|
||||
timeout=30,
|
||||
allow_redirects=True,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
|
||||
if response.status_code >= 400:
|
||||
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
|
||||
|
||||
video_url_match = re.search(r'sources:\s*\["(https?://[^"]+)"', response.text)
|
||||
if not video_url_match:
|
||||
raise ExtractorError("Failed to extract video URL")
|
||||
raise ExtractorError("Uqload: video URL not found in page source")
|
||||
|
||||
self.base_headers["referer"] = urljoin(url, "/")
|
||||
final_url = str(response.url)
|
||||
self.base_headers["referer"] = urljoin(final_url, "/")
|
||||
return {
|
||||
"destination_url": video_url_match.group(1),
|
||||
"request_headers": self.base_headers,
|
||||
|
||||
+244
-174
@@ -1,5 +1,9 @@
|
||||
import logging
|
||||
import time
|
||||
import re
|
||||
import uuid
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib.parse import quote, urlparse
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
@@ -7,71 +11,29 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VavooExtractor(BaseExtractor):
|
||||
"""Vavoo URL extractor for resolving vavoo.to links.
|
||||
"""Vavoo URL extractor per risolvere link vavoo.to"""
|
||||
|
||||
Supports two URL formats:
|
||||
1. Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
|
||||
These redirect (302) to external video hosts (Doodstream, etc.)
|
||||
2. Legacy mediahubmx format (currently broken on Vavoo's end)
|
||||
|
||||
Features:
|
||||
- Uses BaseExtractor's retry/timeouts
|
||||
- Improved headers to mimic Android okhttp client
|
||||
- Robust JSON handling and logging
|
||||
"""
|
||||
API_UA = "okhttp/4.11.0"
|
||||
RESOLVE_UA = "MediaHubMX/2"
|
||||
TS_UA = "VAVOO/2.6"
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
# Endpoint is resolved dynamically per-extraction based on the stream URL type.
|
||||
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||
|
||||
async def _resolve_web_vod_link(self, url: str) -> str:
|
||||
"""Resolve a web-vod API link by getting the redirect Location header."""
|
||||
import aiohttp
|
||||
|
||||
try:
|
||||
# Use aiohttp directly with allow_redirects=False to get the Location header
|
||||
timeout = aiohttp.ClientTimeout(total=10)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.get(
|
||||
url,
|
||||
headers={"Accept": "application/json"},
|
||||
allow_redirects=False,
|
||||
) as resp:
|
||||
# Check for redirect
|
||||
if resp.status in (301, 302, 303, 307, 308):
|
||||
location = resp.headers.get("Location") or resp.headers.get("location")
|
||||
if location:
|
||||
logger.info(f"Vavoo web-vod redirected to: {location}")
|
||||
return location
|
||||
|
||||
# If we got a 200, the response might contain the URL
|
||||
if resp.status == 200:
|
||||
text = await resp.text()
|
||||
if text and text.startswith("http"):
|
||||
logger.info(f"Vavoo web-vod resolved to: {text.strip()}")
|
||||
return text.strip()
|
||||
|
||||
raise ExtractorError(f"Vavoo web-vod API returned unexpected status {resp.status}")
|
||||
|
||||
except ExtractorError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}")
|
||||
|
||||
async def get_auth_signature(self) -> Optional[str]:
|
||||
"""Get authentication signature for Vavoo API (async)."""
|
||||
async def _get_auth_signature(self) -> Optional[str]:
|
||||
"""Get authentication signature via lokke.app/api/app/ping (aligned with working plugin)."""
|
||||
unique_id = uuid.uuid4().hex[:16]
|
||||
now_ms = int(time.time() * 1000)
|
||||
headers = {
|
||||
"user-agent": "okhttp/4.11.0",
|
||||
"user-agent": self.API_UA,
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"accept-encoding": "gzip",
|
||||
}
|
||||
import time
|
||||
|
||||
current_time = int(time.time() * 1000)
|
||||
|
||||
data = {
|
||||
"token": "",
|
||||
body = {
|
||||
"token": "ldCvE092e7gER0rVIajfsXIvRhwlrAzP6_1oEJ4q6HH89QHt24v6NNL_jQJO219hiLOXF2hqEfsUuEWitEIGN4EaHHEHb7Cd7gojc5SQYRFzU3XWo_kMeryAUbcwWnQrnf0-",
|
||||
"reason": "app-blur",
|
||||
"locale": "de",
|
||||
"theme": "dark",
|
||||
@@ -79,174 +41,282 @@ class VavooExtractor(BaseExtractor):
|
||||
"device": {
|
||||
"type": "Handset",
|
||||
"brand": "google",
|
||||
"model": "Pixel",
|
||||
"name": "sdk_gphone64_arm64",
|
||||
"uniqueId": "d10e5d99ab665233",
|
||||
"model": "Nexus",
|
||||
"name": "21081111RG",
|
||||
"uniqueId": unique_id,
|
||||
},
|
||||
"os": {"name": "android", "version": "7.1.2", "abis": ["arm64-v8a"], "host": "android"},
|
||||
"app": {
|
||||
"platform": "android",
|
||||
"version": "1.1.0",
|
||||
"buildId": "97215000",
|
||||
"engine": "hbc85",
|
||||
"signatures": ["6e8a975e3cbf07d5de823a760d4c2547f86c1403105020adee5de67ac510999e"],
|
||||
"installer": "com.android.vending",
|
||||
},
|
||||
"version": {"package": "app.lokke.main", "binary": "1.1.0", "js": "1.1.0"},
|
||||
"platform": {
|
||||
"isAndroid": True,
|
||||
"isIOS": False,
|
||||
"isTV": False,
|
||||
"isWeb": False,
|
||||
"isMobile": True,
|
||||
"isWebTV": False,
|
||||
"isElectron": False,
|
||||
},
|
||||
"os": {"name": "android", "version": "13"},
|
||||
"app": {"platform": "android", "version": "3.1.21"},
|
||||
"version": {"package": "tv.vavoo.app", "binary": "3.1.21", "js": "3.1.21"},
|
||||
},
|
||||
"appFocusTime": 0,
|
||||
"playerActive": False,
|
||||
"playDuration": 0,
|
||||
"devMode": False,
|
||||
"devMode": True,
|
||||
"hasAddon": True,
|
||||
"castConnected": False,
|
||||
"package": "tv.vavoo.app",
|
||||
"version": "3.1.21",
|
||||
"package": "app.lokke.main",
|
||||
"version": "1.1.0",
|
||||
"process": "app",
|
||||
"firstAppStart": current_time,
|
||||
"lastAppStart": current_time,
|
||||
"ipLocation": "",
|
||||
"adblockEnabled": True,
|
||||
"firstAppStart": now_ms - 86400000,
|
||||
"lastAppStart": now_ms,
|
||||
"ipLocation": None,
|
||||
"adblockEnabled": False,
|
||||
"proxy": {
|
||||
"supported": ["ss", "openvpn"],
|
||||
"engine": "ss",
|
||||
"engine": "openvpn",
|
||||
"ssVersion": 1,
|
||||
"enabled": True,
|
||||
"enabled": False,
|
||||
"autoServer": True,
|
||||
"id": "de-fra",
|
||||
"id": "fi-hel",
|
||||
},
|
||||
"iap": {"supported": False},
|
||||
"iap": {"supported": True},
|
||||
}
|
||||
|
||||
try:
|
||||
resp = await self._make_request(
|
||||
"https://www.vavoo.tv/api/app/ping",
|
||||
"https://www.lokke.app/api/app/ping",
|
||||
method="POST",
|
||||
json=data,
|
||||
json=body,
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
timeout=15,
|
||||
retries=2,
|
||||
)
|
||||
try:
|
||||
result = resp.json()
|
||||
except Exception:
|
||||
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status)
|
||||
logger.warning("Lokke ping returned non-json response (status=%s).", resp.status)
|
||||
return None
|
||||
|
||||
addon_sig = result.get("addonSig") if isinstance(result, dict) else None
|
||||
if addon_sig:
|
||||
logger.info("Successfully obtained Vavoo authentication signature")
|
||||
logger.info("Successfully obtained auth signature from lokke.app")
|
||||
return addon_sig
|
||||
else:
|
||||
logger.warning("No addonSig in Vavoo API response: %s", result)
|
||||
return None
|
||||
except ExtractorError as e:
|
||||
logger.warning("Failed to get Vavoo auth signature: %s", e)
|
||||
logger.warning("No addonSig in lokke API response: %s", result)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug("_get_auth_signature error: %s", e)
|
||||
return None
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Extract Vavoo stream URL (async).
|
||||
async def _get_ts_signature(self) -> Optional[str]:
|
||||
"""Get TS-based signature via /api/box/ping2 (fallback)."""
|
||||
vec = "9frjpxPjxSNilxJPCJ0XGYs6scej3dW/h/VWlnKUiLSG8IP7mfyDU7NirOlld+VtCKGj03XjetfliDMhIev7wcARo+YTU8KPFuVQP9E2DVXzY2BFo1NhE6qEmPfNDnm74eyl/7iFJ0EETm6XbYyz8IKBkAqPN/Spp3PZ2ulKg3QBSDxcVN4R5zRn7OsgLJ2CNTuWkd/h451lDCp+TtTuvnAEhcQckdsydFhTZCK5IiWrrTIC/d4qDXEd+GtOP4hPdoIuCaNzYfX3lLCwFENC6RZoTBYLrcKVVgbqyQZ7DnLqfLqvf3z0FVUWx9H21liGFpByzdnoxyFkue3NzrFtkRL37xkx9ITucepSYKzUVEfyBh+/3mtzKY26VIRkJFkpf8KVcCRNrTRQn47Wuq4gC7sSwT7eHCAydKSACcUMMdpPSvbvfOmIqeBNA83osX8FPFYUMZsjvYNEE3arbFiGsQlggBKgg1V3oN+5ni3Vjc5InHg/xv476LHDFnNdAJx448ph3DoAiJjr2g4ZTNynfSxdzA68qSuJY8UjyzgDjG0RIMv2h7DlQNjkAXv4k1BrPpfOiOqH67yIarNmkPIwrIV+W9TTV/yRyE1LEgOr4DK8uW2AUtHOPA2gn6P5sgFyi68w55MZBPepddfYTQ+E1N6R/hWnMYPt/i0xSUeMPekX47iucfpFBEv9Uh9zdGiEB+0P3LVMP+q+pbBU4o1NkKyY1V8wH1Wilr0a+q87kEnQ1LWYMMBhaP9yFseGSbYwdeLsX9uR1uPaN+u4woO2g8sw9Y5ze5XMgOVpFCZaut02I5k0U4WPyN5adQjG8sAzxsI3KsV04DEVymj224iqg2Lzz53Xz9yEy+7/85ILQpJ6llCyqpHLFyHq/kJxYPhDUF755WaHJEaFRPxUqbparNX+mCE9Xzy7Q/KTgAPiRS41FHXXv+7XSPp4cy9jli0BVnYf13Xsp28OGs/D8Nl3NgEn3/eUcMN80JRdsOrV62fnBVMBNf36+LbISdvsFAFr0xyuPGmlIETcFyxJkrGZnhHAxwzsvZ+Uwf8lffBfZFPRrNv+tgeeLpatVcHLHZGeTgWWml6tIHwWUqv2TVJeMkAEL5PPS4Gtbscau5HM+FEjtGS+KClfX1CNKvgYJl7mLDEf5ZYQv5kHaoQ6RcPaR6vUNn02zpq5/X3EPIgUKF0r/0ctmoT84B2J1BKfCbctdFY9br7JSJ6DvUxyde68jB+Il6qNcQwTFj4cNErk4x719Y42NoAnnQYC2/qfL/gAhJl8TKMvBt3Bno+va8ve8E0z8yEuMLUqe8OXLce6nCa+L5LYK1aBdb60BYbMeWk1qmG6Nk9OnYLhzDyrd9iHDd7X95OM6X5wiMVZRn5ebw4askTTc50xmrg4eic2U1w1JpSEjdH/u/hXrWKSMWAxaj34uQnMuWxPZEXoVxzGyuUbroXRfkhzpqmqqqOcypjsWPdq5BOUGL/Riwjm6yMI0x9kbO8+VoQ6RYfjAbxNriZ1cQ+AW1fqEgnRWXmjt4Z1M0ygUBi8w71bDML1YG6UHeC2cJ2CCCxSrfycKQhpSdI1QIuwd2eyIpd4LgwrMiY3xNWreAF+qobNxvE7ypKTISNrz0iYIhU0aKNlcGwYd0FXIRfKVBzSBe4MRK2pGLDNO6ytoHxvJweZ8h1XG8RWc4aB5gTnB7Tjiqym4b64lRdj1DPHJnzD4aqRixpXhzYzWVDN2kONCR5i2quYbnVFN4sSfLiKeOwKX4JdmzpYixNZXjLkG14seS6KR0Wl8Itp5IMIWFpnNokjRH76RYRZAcx0jP0V5/GfNNTi5QsEU98en0SiXHQGXnROiHpRUDXTl8FmJORjwXc0AjrEMuQ2FDJDmAIlKUSLhjbIiKw3iaqp5TVyXuz0ZMYBhnqhcwqULqtFSuIKpaW8FgF8QJfP2frADf4kKZG1bQ99MrRrb2A="
|
||||
try:
|
||||
resp = await self._make_request(
|
||||
"https://www.vavoo.tv/api/box/ping2",
|
||||
method="POST",
|
||||
data={"vec": vec},
|
||||
timeout=15,
|
||||
retries=2,
|
||||
)
|
||||
try:
|
||||
result = resp.json()
|
||||
except Exception:
|
||||
return None
|
||||
return (result.get("response") or {}).get("signed")
|
||||
except Exception as e:
|
||||
logger.debug("_get_ts_signature error: %s", e)
|
||||
return None
|
||||
|
||||
Supports:
|
||||
- Direct play URLs: https://vavoo.to/play/{id}/index.m3u8 (Live TV)
|
||||
- Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
|
||||
- Legacy mediahubmx links (may not work due to Vavoo API changes)
|
||||
"""
|
||||
if "vavoo.to" not in url:
|
||||
raise ExtractorError("Not a valid Vavoo URL")
|
||||
|
||||
# Check if this is a direct play URL (Live TV)
|
||||
# These URLs are already m3u8 streams but need auth signature
|
||||
if "/play/" in url and url.endswith(".m3u8"):
|
||||
signature = await self.get_auth_signature()
|
||||
if not signature:
|
||||
raise ExtractorError("Failed to get Vavoo authentication signature for Live TV")
|
||||
|
||||
stream_headers = {
|
||||
"user-agent": "okhttp/4.11.0",
|
||||
"referer": "https://vavoo.to/",
|
||||
"mediahubmx-signature": signature,
|
||||
}
|
||||
return {
|
||||
"destination_url": url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||
}
|
||||
|
||||
# Check if this is a web-vod API link (new format)
|
||||
if "/web-vod/api/get" in url:
|
||||
resolved_url = await self._resolve_web_vod_link(url)
|
||||
stream_headers = {
|
||||
"user-agent": self.base_headers.get("user-agent", "Mozilla/5.0"),
|
||||
"referer": "https://vavoo.to/",
|
||||
}
|
||||
return {
|
||||
"destination_url": resolved_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
# Legacy mediahubmx flow
|
||||
signature = await self.get_auth_signature()
|
||||
if not signature:
|
||||
raise ExtractorError("Failed to get Vavoo authentication signature")
|
||||
|
||||
resolved_url = await self._resolve_vavoo_link(url, signature)
|
||||
if not resolved_url:
|
||||
raise ExtractorError("Failed to resolve Vavoo URL")
|
||||
|
||||
stream_headers = {
|
||||
"user-agent": self.base_headers.get("user-agent", "okhttp/4.11.0"),
|
||||
"referer": "https://vavoo.to/",
|
||||
}
|
||||
|
||||
return {
|
||||
"destination_url": resolved_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
|
||||
async def _resolve_vavoo_link(self, link: str, signature: str) -> Optional[str]:
|
||||
"""Resolve a Vavoo link using the MediaHubMX API (async)."""
|
||||
async def _resolve_with_auth(self, url: str, signature: str) -> Optional[str]:
|
||||
"""Resolve a Vavoo link using the MediaHubMX API with auth signature."""
|
||||
headers = {
|
||||
"user-agent": "okhttp/4.11.0",
|
||||
"user-agent": self.RESOLVE_UA,
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json; charset=utf-8",
|
||||
"accept-encoding": "gzip",
|
||||
"mediahubmx-signature": signature,
|
||||
}
|
||||
data = {"language": "de", "region": "AT", "url": link, "clientVersion": "3.1.21"}
|
||||
payload = {"language": "de", "region": "AT", "url": url, "clientVersion": "3.0.2"}
|
||||
try:
|
||||
logger.info(f"Attempting to resolve Vavoo URL: {link}")
|
||||
resp = await self._make_request(
|
||||
"https://vavoo.to/mediahubmx-resolve.json",
|
||||
method="POST",
|
||||
json=data,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=12,
|
||||
timeout=15,
|
||||
retries=3,
|
||||
backoff_factor=0.6,
|
||||
)
|
||||
try:
|
||||
result = resp.json()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Vavoo resolve returned non-json response (status=%s). Body preview: %s",
|
||||
resp.status,
|
||||
getattr(resp, "text", "")[:500],
|
||||
)
|
||||
logger.warning("Vavoo resolve returned non-json (status=%s)", resp.status)
|
||||
return None
|
||||
|
||||
logger.debug("Vavoo API response: %s", result)
|
||||
|
||||
# Accept either list or dict with 'url'
|
||||
if isinstance(result, list) and result and isinstance(result[0], dict) and result[0].get("url"):
|
||||
resolved_url = result[0]["url"]
|
||||
logger.info("Successfully resolved Vavoo URL to: %s", resolved_url)
|
||||
return resolved_url
|
||||
elif isinstance(result, dict) and result.get("url"):
|
||||
resolved_url = result["url"]
|
||||
logger.info("Successfully resolved Vavoo URL to: %s", resolved_url)
|
||||
return resolved_url
|
||||
else:
|
||||
logger.warning("No URL found in Vavoo API response: %s", result)
|
||||
return None
|
||||
except ExtractorError as e:
|
||||
logger.error(f"Vavoo resolution failed for URL {link}: {e}")
|
||||
raise ExtractorError(f"Vavoo resolution failed: {str(e)}") from e
|
||||
return str(result[0]["url"])
|
||||
if isinstance(result, dict):
|
||||
if result.get("url"):
|
||||
return str(result["url"])
|
||||
if isinstance(result.get("data"), dict) and result["data"].get("url"):
|
||||
return str(result["data"]["url"])
|
||||
logger.warning("No URL found in Vavoo API response: %s", result)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error while resolving Vavoo URL {link}: {e}")
|
||||
raise ExtractorError(f"Vavoo resolution failed: {str(e)}") from e
|
||||
logger.debug("_resolve_with_auth error: %s", e)
|
||||
return None
|
||||
|
||||
async def _follow_stream_url(self, url: str) -> str:
|
||||
"""Follow redirects and extract final stream URL."""
|
||||
stream_headers = {
|
||||
"User-Agent": self.API_UA,
|
||||
"Accept": "*/*",
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Connection": "close",
|
||||
}
|
||||
try:
|
||||
resp = await self._make_request(url, method="HEAD", headers=stream_headers, timeout=15, retries=1)
|
||||
final_url = str(getattr(resp, "url", url))
|
||||
ctype = (getattr(resp, "headers", {}).get("Content-Type") or "").lower()
|
||||
if "text/html" in ctype:
|
||||
resp2 = await self._make_request(url, method="GET", headers=stream_headers, timeout=15, retries=1)
|
||||
text = getattr(resp2, "text", "") or ""
|
||||
m3u8 = re.findall(r'(https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*)', text)
|
||||
if m3u8:
|
||||
return m3u8[0]
|
||||
generic = re.findall(
|
||||
r'(https?://[^\s"\'<>]+(?:\.ts|/live/|/stream/|/playlist|/index)[^\s"\'<>]*)', text
|
||||
)
|
||||
if generic:
|
||||
return generic[0]
|
||||
return final_url
|
||||
except Exception:
|
||||
return url
|
||||
|
||||
async def _build_ts_fallback(self, url: str) -> Optional[str]:
|
||||
"""Build a .ts fallback URL for vavoo-iptv streams using ping2 signature."""
|
||||
if "vavoo-iptv" not in url:
|
||||
return None
|
||||
ts_sig = await self._get_ts_signature()
|
||||
if not ts_sig:
|
||||
return None
|
||||
base = re.sub(r"/index\.m3u8(?:\?.*)?$", "", url.replace("vavoo-iptv", "live2")).rstrip("/")
|
||||
ts_url = f"{base}.ts?n=1&b=5&vavoo_auth={quote(ts_sig, safe='')}"
|
||||
try:
|
||||
resp = await self._make_request(
|
||||
ts_url, method="GET", headers={"User-Agent": self.TS_UA}, timeout=15, retries=1
|
||||
)
|
||||
if getattr(resp, "status", 400) < 400:
|
||||
return ts_url
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
async def _resolve_web_vod_link(self, url: str) -> str:
|
||||
"""Resolve a web-vod API link by getting the redirect Location header."""
|
||||
try:
|
||||
resp = await self._make_request(
|
||||
url,
|
||||
method="GET",
|
||||
headers={"Accept": "application/json"},
|
||||
timeout=10,
|
||||
retries=2,
|
||||
allow_redirects=False,
|
||||
)
|
||||
status = getattr(resp, "status", 0)
|
||||
if status in (301, 302, 303, 307, 308):
|
||||
location = getattr(resp, "headers", {}).get("Location") or getattr(resp, "headers", {}).get("location")
|
||||
if location:
|
||||
logger.info("Vavoo web-vod redirected to: %s", location)
|
||||
return location
|
||||
if status == 200:
|
||||
text = getattr(resp, "text", "") or ""
|
||||
if text and text.startswith("http"):
|
||||
logger.info("Vavoo web-vod resolved to: %s", text.strip())
|
||||
return text.strip()
|
||||
raise ExtractorError(f"Vavoo web-vod API returned unexpected status {status}")
|
||||
except ExtractorError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}")
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Extract Vavoo stream URL.
|
||||
|
||||
Flow:
|
||||
1. Auth Resolve Mode: electron-mode signature → mediahubmx-resolve
|
||||
2. TS Fallback Mode: ping2 signature → live2 .ts URL
|
||||
3. Direct Fallback: raw URL with VAVOO UA
|
||||
"""
|
||||
if "vavoo.to" not in url:
|
||||
raise ExtractorError("Not a valid Vavoo URL")
|
||||
|
||||
# Web-VOD links (new format)
|
||||
if "/web-vod/api/get" in url:
|
||||
resolved_url = await self._resolve_web_vod_link(url)
|
||||
stream_headers = {
|
||||
"user-agent": self.API_UA,
|
||||
"referer": "https://vavoo.to/",
|
||||
}
|
||||
wv_path = urlparse(resolved_url).path.lower()
|
||||
wv_endpoint = (
|
||||
"hls_manifest_proxy" if wv_path.endswith((".m3u8", ".m3u", ".m3u_plus")) else self.mediaflow_endpoint
|
||||
)
|
||||
return {
|
||||
"destination_url": resolved_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": wv_endpoint,
|
||||
}
|
||||
|
||||
resolved_url = None
|
||||
stream_headers = None
|
||||
|
||||
# Mode 1: Auth Resolve (electron signature + mediahubmx)
|
||||
sig = await self._get_auth_signature()
|
||||
if sig:
|
||||
candidate = await self._resolve_with_auth(url, sig)
|
||||
if candidate:
|
||||
candidate = await self._follow_stream_url(candidate)
|
||||
resolved_url = candidate
|
||||
stream_headers = {
|
||||
"user-agent": self.RESOLVE_UA,
|
||||
"referer": "https://vavoo.to/",
|
||||
"origin": "https://vavoo.to",
|
||||
}
|
||||
logger.info("Using Auth Resolve Mode: %s", resolved_url)
|
||||
|
||||
# Mode 2: TS Fallback (ping2 + live2 .ts)
|
||||
if not resolved_url:
|
||||
ts_url = await self._build_ts_fallback(url)
|
||||
if ts_url:
|
||||
resolved_url = ts_url
|
||||
stream_headers = {"user-agent": self.TS_UA}
|
||||
logger.info("Using TS Fallback Mode: %s", resolved_url)
|
||||
|
||||
# Mode 3: Direct Fallback
|
||||
if not resolved_url:
|
||||
resolved_url = url
|
||||
stream_headers = {
|
||||
"user-agent": self.TS_UA,
|
||||
"referer": "https://vavoo.to/",
|
||||
}
|
||||
logger.info("Using Direct Fallback Mode: %s", resolved_url)
|
||||
|
||||
# Use HLS manifest proxy when the resolved URL is an M3U8 playlist so
|
||||
# the proxy rewrites relative segment URLs before the player sees them.
|
||||
# TS / raw stream URLs go through the stream proxy as-is.
|
||||
path = urlparse(resolved_url).path.lower()
|
||||
m3u8_endpoint = (
|
||||
"hls_manifest_proxy" if path.endswith((".m3u8", ".m3u", ".m3u_plus")) else self.mediaflow_endpoint
|
||||
)
|
||||
|
||||
return {
|
||||
"destination_url": resolved_url,
|
||||
"request_headers": stream_headers,
|
||||
"mediaflow_endpoint": m3u8_endpoint,
|
||||
}
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
import re
|
||||
from typing import Dict, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import aiohttp
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
|
||||
|
||||
class VidFastExtractor(BaseExtractor):
|
||||
"""
|
||||
Extractor for vidfast.pro (movies and TV via ythd.org → cloudnestra.com).
|
||||
|
||||
URL formats accepted:
|
||||
https://vidfast.pro/movie/{tmdb_id}
|
||||
https://vidfast.pro/tv/{tmdb_id}/{season}/{episode}
|
||||
|
||||
Extraction flow:
|
||||
1. Parse TMDB ID from the URL path.
|
||||
2. Fetch https://ythd.org/embed/{tmdb_id} → grab first data-hash.
|
||||
3. Fetch https://cloudnestra.com/rcp/{hash} (carrying ythd cookies)
|
||||
→ grab /prorcp/ hash from the inline iframe src.
|
||||
4. Fetch https://cloudnestra.com/prorcp/{prorcp_hash}
|
||||
→ grab Playerjs `file:` parameter (HLS master playlist URL).
|
||||
5. Replace the {v1} CDN placeholder with cloudnestra.com and return
|
||||
the resolved HLS URL for MediaFlow's hls_manifest_proxy endpoint.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||
|
||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||
parsed = urlparse(url)
|
||||
parts = parsed.path.strip("/").split("/")
|
||||
if len(parts) < 2:
|
||||
raise ExtractorError(f"VidFast: cannot parse TMDB ID from path: {parsed.path!r}")
|
||||
|
||||
tmdb_id = parts[1]
|
||||
ua = self.base_headers.get(
|
||||
"user-agent",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
)
|
||||
|
||||
ythd_url = f"https://ythd.org/embed/{tmdb_id}"
|
||||
|
||||
# A single aiohttp session preserves cookies across the three hops.
|
||||
cookie_jar = aiohttp.CookieJar()
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
|
||||
async with aiohttp.ClientSession(cookie_jar=cookie_jar, timeout=timeout) as session:
|
||||
# ── Step 1: ythd.org embed page ───────────────────────────────
|
||||
async with session.get(ythd_url, headers={"User-Agent": ua}) as resp:
|
||||
if resp.status >= 400:
|
||||
raise ExtractorError(f"VidFast: ythd.org returned HTTP {resp.status}")
|
||||
ythd_html = await resp.text()
|
||||
|
||||
hash_match = re.search(r'data-hash="([^"]+)"', ythd_html)
|
||||
if not hash_match:
|
||||
raise ExtractorError("VidFast: no data-hash attribute on ythd.org page")
|
||||
data_hash = hash_match.group(1)
|
||||
|
||||
# ── Step 2: cloudnestra /rcp/ (needs ythd.org cookies) ────────
|
||||
rcp_url = f"https://cloudnestra.com/rcp/{data_hash}"
|
||||
async with session.get(
|
||||
rcp_url,
|
||||
headers={"User-Agent": ua, "Referer": ythd_url},
|
||||
) as resp:
|
||||
if resp.status >= 400:
|
||||
raise ExtractorError(f"VidFast: cloudnestra /rcp/ returned HTTP {resp.status}")
|
||||
rcp_html = await resp.text()
|
||||
|
||||
prorcp_match = re.search(r"src:\s*'/prorcp/([^']+)'", rcp_html)
|
||||
if not prorcp_match:
|
||||
raise ExtractorError("VidFast: /prorcp/ hash not found in cloudnestra page")
|
||||
prorcp_hash = prorcp_match.group(1)
|
||||
|
||||
# ── Step 3: cloudnestra /prorcp/ (actual player page) ─────────
|
||||
prorcp_url = f"https://cloudnestra.com/prorcp/{prorcp_hash}"
|
||||
async with session.get(
|
||||
prorcp_url,
|
||||
headers={"User-Agent": ua, "Referer": rcp_url},
|
||||
) as resp:
|
||||
if resp.status >= 400:
|
||||
raise ExtractorError(f"VidFast: cloudnestra /prorcp/ returned HTTP {resp.status}")
|
||||
prorcp_html = await resp.text()
|
||||
|
||||
# ── Step 4: extract the HLS URL from Playerjs({…, file:"…"}) ──────
|
||||
file_match = re.search(r'file:\s*"(https://[^"]+)"', prorcp_html)
|
||||
if not file_match:
|
||||
raise ExtractorError("VidFast: Playerjs file URL not found in /prorcp/ page")
|
||||
|
||||
# The file value may contain multiple fallback URLs separated by " or ".
|
||||
first_url = file_match.group(1).split(" or ")[0].strip()
|
||||
|
||||
# {v1} is the primary CDN; tmstr4.cloudnestra.com hosts the proxied HLS.
|
||||
stream_url = first_url.replace("{v1}", "cloudnestra.com")
|
||||
|
||||
if not stream_url.startswith("https://"):
|
||||
raise ExtractorError(f"VidFast: unexpected stream URL: {stream_url[:120]!r}")
|
||||
|
||||
return {
|
||||
"destination_url": stream_url,
|
||||
"request_headers": {
|
||||
"user-agent": ua,
|
||||
"referer": "https://cloudnestra.com/",
|
||||
},
|
||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||
}
|
||||
@@ -46,7 +46,17 @@ class VixCloudExtractor(BaseExtractor):
|
||||
iframe = soup.find("iframe").get("src")
|
||||
response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version})
|
||||
elif "movie" in url or "tv" in url:
|
||||
response = await self._make_request(url)
|
||||
marker = "/movie" if "/movie" in url else "/tv"
|
||||
site_url = (url.split(marker))[0]
|
||||
parts = url.split(site_url)
|
||||
headers = {
|
||||
"Referer": f"{site_url}/",
|
||||
"Origin": f"{site_url}",
|
||||
}
|
||||
|
||||
response = await self._make_request(site_url + '/api' + parts[1])
|
||||
|
||||
response = await self._make_request(site_url + '/' + response.json()['src'],headers=headers)
|
||||
|
||||
if response.status != 200:
|
||||
raise ExtractorError("Failed to extract URL components, Invalid Request")
|
||||
|
||||
Reference in New Issue
Block a user