mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-06-10 09:10:23 +00:00
new version
This commit is contained in:
@@ -1,49 +1,209 @@
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from urllib.parse import urlparse, urljoin
|
||||
|
||||
import aiohttp
|
||||
from curl_cffi.requests import AsyncSession
|
||||
|
||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||
from mediaflow_proxy.configs import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DOOD_UA = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
|
||||
class DoodStreamExtractor(BaseExtractor):
|
||||
"""
|
||||
Dood / MyVidPlay extractor
|
||||
Resolves to direct CDN MP4
|
||||
"""
|
||||
DoodStream / PlayMogo extractor.
|
||||
|
||||
def __init__(self, request_headers: dict):
|
||||
super().__init__(request_headers)
|
||||
self.base_url = "https://myvidplay.com"
|
||||
All DoodStream mirror domains (dsvplay.com, myvidplay.com, dood.to, …) now
|
||||
redirect to playmogo.com which sits behind Cloudflare and may require a
|
||||
Turnstile CAPTCHA before serving the pass_md5 URL.
|
||||
|
||||
Extraction order:
|
||||
1. Byparr — set BYPARR_URL (Firefox/Camoufox → Turnstile auto-validates,
|
||||
not blocked by DisableDevtool.js)
|
||||
2. curl_cffi — Chrome impersonation; works when Turnstile is not triggered,
|
||||
raises a descriptive error if captcha is detected.
|
||||
"""
|
||||
|
||||
async def extract(self, url: str, **kwargs):
|
||||
parsed = urlparse(url)
|
||||
video_id = parsed.path.rstrip("/").split("/")[-1]
|
||||
if not video_id:
|
||||
raise ExtractorError("Invalid Dood URL")
|
||||
raise ExtractorError("Invalid DoodStream URL: no video ID found")
|
||||
|
||||
headers = {
|
||||
"User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0",
|
||||
"Referer": f"{self.base_url}/",
|
||||
if settings.byparr_url:
|
||||
try:
|
||||
return await self._extract_via_byparr(url, video_id)
|
||||
except ExtractorError:
|
||||
raise
|
||||
|
||||
return await self._extract_via_curl_cffi(url, video_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Path 1 – Byparr (Firefox/Camoufox → Turnstile auto-validates)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _extract_via_byparr(self, url: str, video_id: str) -> dict:
|
||||
"""
|
||||
Use Byparr to bypass Cloudflare protection on the DoodStream embed page.
|
||||
|
||||
Strategy: fetch the embed page without any injected script. Byparr's
|
||||
Firefox/Camoufox browser auto-passes Cloudflare's bot checks and often
|
||||
bypasses the Turnstile CAPTCHA gate directly, returning the embed HTML
|
||||
with pass_md5. If the response doesn't contain pass_md5, reuse the CF
|
||||
cookies + UA from Byparr in a follow-up curl_cffi request (which avoids
|
||||
re-triggering the bot check).
|
||||
"""
|
||||
endpoint = f"{settings.byparr_url.rstrip('/')}/v1"
|
||||
embed_url = url if "/e/" in url else f"https://{urlparse(url).netloc}/e/{video_id}"
|
||||
payload = {
|
||||
"cmd": "request.get",
|
||||
"url": embed_url,
|
||||
"maxTimeout": settings.byparr_timeout * 1000,
|
||||
}
|
||||
|
||||
embed_url = f"{self.base_url}/e/{video_id}"
|
||||
html = (await self._make_request(embed_url, headers=headers)).text
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
endpoint,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=settings.byparr_timeout + 15),
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
raise ExtractorError(f"Byparr HTTP {resp.status}")
|
||||
data = await resp.json()
|
||||
|
||||
match = re.search(r"(\/pass_md5\/[^']+)", html)
|
||||
if not match:
|
||||
raise ExtractorError("Dood: pass_md5 not found")
|
||||
if data.get("status") != "ok":
|
||||
raise ExtractorError(f"Byparr: {data.get('message', 'unknown error')}")
|
||||
|
||||
pass_url = urljoin(self.base_url, match.group(1))
|
||||
solution = data.get("solution", {})
|
||||
final_url = solution.get("url", embed_url)
|
||||
if not final_url.startswith("http"):
|
||||
final_url = embed_url
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
html = solution.get("response", "")
|
||||
|
||||
base_stream = (await self._make_request(pass_url, headers=headers)).text.strip()
|
||||
if "pass_md5" not in html:
|
||||
# Byparr may not have the pass_md5 in the initial response.
|
||||
# Try two recovery strategies in order:
|
||||
#
|
||||
# 1. Cookie reuse — if Byparr collected CF clearance cookies before
|
||||
# the page loaded fully, inject them into a curl_cffi request.
|
||||
# 2. Plain curl_cffi — Chrome TLS impersonation without JS execution.
|
||||
raw_cookies = solution.get("cookies", [])
|
||||
cookies = {c["name"]: c["value"] for c in raw_cookies}
|
||||
ua = solution.get("userAgent", _DOOD_UA)
|
||||
|
||||
token_match = re.search(r"token=([^&]+)", html)
|
||||
if cookies:
|
||||
cf_domain = (
|
||||
next(
|
||||
(c.get("domain", "").lstrip(".") for c in raw_cookies if c.get("name") == "cf_clearance"),
|
||||
None,
|
||||
)
|
||||
or "playmogo.com"
|
||||
)
|
||||
retry_url = f"https://{cf_domain}/e/{video_id}"
|
||||
logger.debug(
|
||||
"Byparr response lacked pass_md5 (final_url=%s); retrying %s with CF cookies via curl_cffi",
|
||||
final_url,
|
||||
retry_url,
|
||||
)
|
||||
proxy = self._get_proxy(retry_url)
|
||||
async with AsyncSession() as s:
|
||||
r = await s.get(
|
||||
retry_url,
|
||||
impersonate="chrome",
|
||||
cookies=cookies,
|
||||
headers={"User-Agent": ua, "Referer": f"https://{cf_domain}/"},
|
||||
timeout=20,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
html = r.text
|
||||
final_url = str(r.url)
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
|
||||
if "pass_md5" not in html:
|
||||
logger.debug("Byparr cookie reuse also failed; falling back to curl_cffi for %s", embed_url)
|
||||
return await self._extract_via_curl_cffi(embed_url, video_id)
|
||||
|
||||
return await self._parse_embed_html(html, base_url)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Path 2 – curl_cffi (bypasses CF bot protection; Turnstile may block)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _extract_via_curl_cffi(self, url: str, video_id: str) -> dict:
|
||||
proxy = self._get_proxy(url)
|
||||
async with AsyncSession() as s:
|
||||
r = await s.get(
|
||||
url,
|
||||
impersonate="chrome",
|
||||
headers={"Referer": f"https://{urlparse(url).netloc}/"},
|
||||
timeout=30,
|
||||
allow_redirects=True,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
final_url = str(r.url)
|
||||
html = r.text
|
||||
base_url = f"https://{urlparse(final_url).netloc}"
|
||||
|
||||
if "pass_md5" not in html:
|
||||
if "turnstile" in html.lower() or "captcha_l" in html:
|
||||
raise ExtractorError(
|
||||
"DoodStream: site is serving a Turnstile CAPTCHA that requires "
|
||||
"browser interaction — cannot be bypassed automatically from this "
|
||||
"network location. Try a residential IP or a VPN/proxy."
|
||||
)
|
||||
raise ExtractorError(f"DoodStream: pass_md5 not found in embed HTML ({final_url})")
|
||||
|
||||
return await self._parse_embed_html(html, base_url)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Common HTML parser
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _parse_embed_html(self, html: str, base_url: str) -> dict:
|
||||
pass_match = re.search(r"(/pass_md5/[^'\"<>\s]+)", html)
|
||||
if not pass_match:
|
||||
raise ExtractorError("DoodStream: pass_md5 path not found in embed HTML")
|
||||
|
||||
pass_url = urljoin(base_url, pass_match.group(1))
|
||||
ua = self.base_headers.get("user-agent") or _DOOD_UA
|
||||
headers = {
|
||||
"user-agent": ua,
|
||||
"referer": f"{base_url}/",
|
||||
}
|
||||
|
||||
proxy = self._get_proxy(pass_url)
|
||||
async with AsyncSession() as s:
|
||||
r = await s.get(
|
||||
pass_url,
|
||||
impersonate="chrome",
|
||||
headers=headers,
|
||||
timeout=20,
|
||||
**({"proxy": proxy} if proxy else {}),
|
||||
)
|
||||
|
||||
base_stream = r.text.strip()
|
||||
if not base_stream or "RELOAD" in base_stream:
|
||||
raise ExtractorError(
|
||||
"DoodStream: pass_md5 endpoint returned no stream URL "
|
||||
"(captcha session may have expired). "
|
||||
"Ensure BYPARR_URL is set for reliable extraction."
|
||||
)
|
||||
|
||||
token_match = re.search(r"token=([^&\s'\"]+)", html)
|
||||
if not token_match:
|
||||
raise ExtractorError("Dood: token missing")
|
||||
raise ExtractorError("DoodStream: token not found in embed HTML")
|
||||
|
||||
token = token_match.group(1)
|
||||
|
||||
final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}"
|
||||
expiry = int(time.time())
|
||||
final_url = f"{base_stream}123456789?token={token}&expiry={expiry}"
|
||||
|
||||
return {
|
||||
"destination_url": final_url,
|
||||
|
||||
Reference in New Issue
Block a user