Files
UnHided/mediaflow_proxy/extractors/supervideo.py
UrloMythus cfc6bbabc9 update
2026-02-19 20:15:03 +01:00

65 lines
2.5 KiB
Python

import re
from typing import Dict, Any
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup, SoupStrainer
from curl_cffi.requests import AsyncSession
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.utils.packed import unpack, detect, UnpackingError
class SupervideoExtractor(BaseExtractor):
"""Supervideo URL extractor.
Uses curl_cffi to bypass Cloudflare protection.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.mediaflow_endpoint = "hls_manifest_proxy"
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract video URL from Supervideo.
Uses curl_cffi with Chrome impersonation to bypass Cloudflare.
"""
patterns = [r'file:"(.*?)"']
try:
async with AsyncSession() as session:
response = await session.get(url, impersonate="chrome")
if response.status_code != 200:
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("script"))
script_all = soup.find_all("script")
for script in script_all:
if script.text and detect(script.text):
unpacked_code = unpack(script.text)
for pattern in patterns:
match = re.search(pattern, unpacked_code)
if match:
extracted_url = match.group(1)
if not urlparse(extracted_url).scheme:
extracted_url = urljoin(url, extracted_url)
self.base_headers["referer"] = url
return {
"destination_url": extracted_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
raise ExtractorError("No packed JS found or no file URL pattern matched")
except UnpackingError as e:
raise ExtractorError(f"Failed to unpack Supervideo JS: {e}")
except Exception as e:
if isinstance(e, ExtractorError):
raise
raise ExtractorError(f"Supervideo extraction failed: {e}")