Files
UnHided/mediaflow_proxy/extractors/doodstream.py
T
UrloMythus 8134936d59 new version
2026-04-15 19:23:14 +02:00

213 lines
8.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import logging
import re
import time
from urllib.parse import urlparse, urljoin
import aiohttp
from curl_cffi.requests import AsyncSession
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.configs import settings
logger = logging.getLogger(__name__)
_DOOD_UA = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)
class DoodStreamExtractor(BaseExtractor):
"""
DoodStream / PlayMogo extractor.
All DoodStream mirror domains (dsvplay.com, myvidplay.com, dood.to, …) now
redirect to playmogo.com which sits behind Cloudflare and may require a
Turnstile CAPTCHA before serving the pass_md5 URL.
Extraction order:
1. Byparr — set BYPARR_URL (Firefox/Camoufox → Turnstile auto-validates,
not blocked by DisableDevtool.js)
2. curl_cffi — Chrome impersonation; works when Turnstile is not triggered,
raises a descriptive error if captcha is detected.
"""
async def extract(self, url: str, **kwargs):
parsed = urlparse(url)
video_id = parsed.path.rstrip("/").split("/")[-1]
if not video_id:
raise ExtractorError("Invalid DoodStream URL: no video ID found")
if settings.byparr_url:
try:
return await self._extract_via_byparr(url, video_id)
except ExtractorError:
raise
return await self._extract_via_curl_cffi(url, video_id)
# ------------------------------------------------------------------
# Path 1 Byparr (Firefox/Camoufox → Turnstile auto-validates)
# ------------------------------------------------------------------
async def _extract_via_byparr(self, url: str, video_id: str) -> dict:
"""
Use Byparr to bypass Cloudflare protection on the DoodStream embed page.
Strategy: fetch the embed page without any injected script. Byparr's
Firefox/Camoufox browser auto-passes Cloudflare's bot checks and often
bypasses the Turnstile CAPTCHA gate directly, returning the embed HTML
with pass_md5. If the response doesn't contain pass_md5, reuse the CF
cookies + UA from Byparr in a follow-up curl_cffi request (which avoids
re-triggering the bot check).
"""
endpoint = f"{settings.byparr_url.rstrip('/')}/v1"
embed_url = url if "/e/" in url else f"https://{urlparse(url).netloc}/e/{video_id}"
payload = {
"cmd": "request.get",
"url": embed_url,
"maxTimeout": settings.byparr_timeout * 1000,
}
async with aiohttp.ClientSession() as session:
async with session.post(
endpoint,
json=payload,
timeout=aiohttp.ClientTimeout(total=settings.byparr_timeout + 15),
) as resp:
if resp.status != 200:
raise ExtractorError(f"Byparr HTTP {resp.status}")
data = await resp.json()
if data.get("status") != "ok":
raise ExtractorError(f"Byparr: {data.get('message', 'unknown error')}")
solution = data.get("solution", {})
final_url = solution.get("url", embed_url)
if not final_url.startswith("http"):
final_url = embed_url
base_url = f"https://{urlparse(final_url).netloc}"
html = solution.get("response", "")
if "pass_md5" not in html:
# Byparr may not have the pass_md5 in the initial response.
# Try two recovery strategies in order:
#
# 1. Cookie reuse — if Byparr collected CF clearance cookies before
# the page loaded fully, inject them into a curl_cffi request.
# 2. Plain curl_cffi — Chrome TLS impersonation without JS execution.
raw_cookies = solution.get("cookies", [])
cookies = {c["name"]: c["value"] for c in raw_cookies}
ua = solution.get("userAgent", _DOOD_UA)
if cookies:
cf_domain = (
next(
(c.get("domain", "").lstrip(".") for c in raw_cookies if c.get("name") == "cf_clearance"),
None,
)
or "playmogo.com"
)
retry_url = f"https://{cf_domain}/e/{video_id}"
logger.debug(
"Byparr response lacked pass_md5 (final_url=%s); retrying %s with CF cookies via curl_cffi",
final_url,
retry_url,
)
proxy = self._get_proxy(retry_url)
async with AsyncSession() as s:
r = await s.get(
retry_url,
impersonate="chrome",
cookies=cookies,
headers={"User-Agent": ua, "Referer": f"https://{cf_domain}/"},
timeout=20,
**({"proxy": proxy} if proxy else {}),
)
html = r.text
final_url = str(r.url)
base_url = f"https://{urlparse(final_url).netloc}"
if "pass_md5" not in html:
logger.debug("Byparr cookie reuse also failed; falling back to curl_cffi for %s", embed_url)
return await self._extract_via_curl_cffi(embed_url, video_id)
return await self._parse_embed_html(html, base_url)
# ------------------------------------------------------------------
# Path 2 curl_cffi (bypasses CF bot protection; Turnstile may block)
# ------------------------------------------------------------------
async def _extract_via_curl_cffi(self, url: str, video_id: str) -> dict:
proxy = self._get_proxy(url)
async with AsyncSession() as s:
r = await s.get(
url,
impersonate="chrome",
headers={"Referer": f"https://{urlparse(url).netloc}/"},
timeout=30,
allow_redirects=True,
**({"proxy": proxy} if proxy else {}),
)
final_url = str(r.url)
html = r.text
base_url = f"https://{urlparse(final_url).netloc}"
if "pass_md5" not in html:
if "turnstile" in html.lower() or "captcha_l" in html:
raise ExtractorError(
"DoodStream: site is serving a Turnstile CAPTCHA that requires "
"browser interaction — cannot be bypassed automatically from this "
"network location. Try a residential IP or a VPN/proxy."
)
raise ExtractorError(f"DoodStream: pass_md5 not found in embed HTML ({final_url})")
return await self._parse_embed_html(html, base_url)
# ------------------------------------------------------------------
# Common HTML parser
# ------------------------------------------------------------------
async def _parse_embed_html(self, html: str, base_url: str) -> dict:
pass_match = re.search(r"(/pass_md5/[^'\"<>\s]+)", html)
if not pass_match:
raise ExtractorError("DoodStream: pass_md5 path not found in embed HTML")
pass_url = urljoin(base_url, pass_match.group(1))
ua = self.base_headers.get("user-agent") or _DOOD_UA
headers = {
"user-agent": ua,
"referer": f"{base_url}/",
}
proxy = self._get_proxy(pass_url)
async with AsyncSession() as s:
r = await s.get(
pass_url,
impersonate="chrome",
headers=headers,
timeout=20,
**({"proxy": proxy} if proxy else {}),
)
base_stream = r.text.strip()
if not base_stream or "RELOAD" in base_stream:
raise ExtractorError(
"DoodStream: pass_md5 endpoint returned no stream URL "
"(captcha session may have expired). "
"Ensure BYPARR_URL is set for reliable extraction."
)
token_match = re.search(r"token=([^&\s'\"]+)", html)
if not token_match:
raise ExtractorError("DoodStream: token not found in embed HTML")
token = token_match.group(1)
expiry = int(time.time())
final_url = f"{base_stream}123456789?token={token}&expiry={expiry}"
return {
"destination_url": final_url,
"request_headers": headers,
"mediaflow_endpoint": "proxy_stream_endpoint",
}