new version

This commit is contained in:
UrloMythus
2026-04-15 19:23:14 +02:00
parent 5120b19d0b
commit 8134936d59
135 changed files with 3013 additions and 1589 deletions
+181 -21
View File
@@ -1,49 +1,209 @@
import logging
import re
import time
from urllib.parse import urlparse, urljoin
import aiohttp
from curl_cffi.requests import AsyncSession
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.configs import settings
logger = logging.getLogger(__name__)
_DOOD_UA = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)
class DoodStreamExtractor(BaseExtractor):
"""
Dood / MyVidPlay extractor
Resolves to direct CDN MP4
"""
DoodStream / PlayMogo extractor.
def __init__(self, request_headers: dict):
super().__init__(request_headers)
self.base_url = "https://myvidplay.com"
All DoodStream mirror domains (dsvplay.com, myvidplay.com, dood.to, …) now
redirect to playmogo.com which sits behind Cloudflare and may require a
Turnstile CAPTCHA before serving the pass_md5 URL.
Extraction order:
1. Byparr — set BYPARR_URL (Firefox/Camoufox → Turnstile auto-validates,
not blocked by DisableDevtool.js)
2. curl_cffi — Chrome impersonation; works when Turnstile is not triggered,
raises a descriptive error if captcha is detected.
"""
async def extract(self, url: str, **kwargs):
parsed = urlparse(url)
video_id = parsed.path.rstrip("/").split("/")[-1]
if not video_id:
raise ExtractorError("Invalid Dood URL")
raise ExtractorError("Invalid DoodStream URL: no video ID found")
headers = {
"User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0",
"Referer": f"{self.base_url}/",
if settings.byparr_url:
try:
return await self._extract_via_byparr(url, video_id)
except ExtractorError:
raise
return await self._extract_via_curl_cffi(url, video_id)
# ------------------------------------------------------------------
# Path 1 Byparr (Firefox/Camoufox → Turnstile auto-validates)
# ------------------------------------------------------------------
async def _extract_via_byparr(self, url: str, video_id: str) -> dict:
"""
Use Byparr to bypass Cloudflare protection on the DoodStream embed page.
Strategy: fetch the embed page without any injected script. Byparr's
Firefox/Camoufox browser auto-passes Cloudflare's bot checks and often
bypasses the Turnstile CAPTCHA gate directly, returning the embed HTML
with pass_md5. If the response doesn't contain pass_md5, reuse the CF
cookies + UA from Byparr in a follow-up curl_cffi request (which avoids
re-triggering the bot check).
"""
endpoint = f"{settings.byparr_url.rstrip('/')}/v1"
embed_url = url if "/e/" in url else f"https://{urlparse(url).netloc}/e/{video_id}"
payload = {
"cmd": "request.get",
"url": embed_url,
"maxTimeout": settings.byparr_timeout * 1000,
}
embed_url = f"{self.base_url}/e/{video_id}"
html = (await self._make_request(embed_url, headers=headers)).text
async with aiohttp.ClientSession() as session:
async with session.post(
endpoint,
json=payload,
timeout=aiohttp.ClientTimeout(total=settings.byparr_timeout + 15),
) as resp:
if resp.status != 200:
raise ExtractorError(f"Byparr HTTP {resp.status}")
data = await resp.json()
match = re.search(r"(\/pass_md5\/[^']+)", html)
if not match:
raise ExtractorError("Dood: pass_md5 not found")
if data.get("status") != "ok":
raise ExtractorError(f"Byparr: {data.get('message', 'unknown error')}")
pass_url = urljoin(self.base_url, match.group(1))
solution = data.get("solution", {})
final_url = solution.get("url", embed_url)
if not final_url.startswith("http"):
final_url = embed_url
base_url = f"https://{urlparse(final_url).netloc}"
html = solution.get("response", "")
base_stream = (await self._make_request(pass_url, headers=headers)).text.strip()
if "pass_md5" not in html:
# Byparr may not have the pass_md5 in the initial response.
# Try two recovery strategies in order:
#
# 1. Cookie reuse — if Byparr collected CF clearance cookies before
# the page loaded fully, inject them into a curl_cffi request.
# 2. Plain curl_cffi — Chrome TLS impersonation without JS execution.
raw_cookies = solution.get("cookies", [])
cookies = {c["name"]: c["value"] for c in raw_cookies}
ua = solution.get("userAgent", _DOOD_UA)
token_match = re.search(r"token=([^&]+)", html)
if cookies:
cf_domain = (
next(
(c.get("domain", "").lstrip(".") for c in raw_cookies if c.get("name") == "cf_clearance"),
None,
)
or "playmogo.com"
)
retry_url = f"https://{cf_domain}/e/{video_id}"
logger.debug(
"Byparr response lacked pass_md5 (final_url=%s); retrying %s with CF cookies via curl_cffi",
final_url,
retry_url,
)
proxy = self._get_proxy(retry_url)
async with AsyncSession() as s:
r = await s.get(
retry_url,
impersonate="chrome",
cookies=cookies,
headers={"User-Agent": ua, "Referer": f"https://{cf_domain}/"},
timeout=20,
**({"proxy": proxy} if proxy else {}),
)
html = r.text
final_url = str(r.url)
base_url = f"https://{urlparse(final_url).netloc}"
if "pass_md5" not in html:
logger.debug("Byparr cookie reuse also failed; falling back to curl_cffi for %s", embed_url)
return await self._extract_via_curl_cffi(embed_url, video_id)
return await self._parse_embed_html(html, base_url)
# ------------------------------------------------------------------
# Path 2 curl_cffi (bypasses CF bot protection; Turnstile may block)
# ------------------------------------------------------------------
async def _extract_via_curl_cffi(self, url: str, video_id: str) -> dict:
proxy = self._get_proxy(url)
async with AsyncSession() as s:
r = await s.get(
url,
impersonate="chrome",
headers={"Referer": f"https://{urlparse(url).netloc}/"},
timeout=30,
allow_redirects=True,
**({"proxy": proxy} if proxy else {}),
)
final_url = str(r.url)
html = r.text
base_url = f"https://{urlparse(final_url).netloc}"
if "pass_md5" not in html:
if "turnstile" in html.lower() or "captcha_l" in html:
raise ExtractorError(
"DoodStream: site is serving a Turnstile CAPTCHA that requires "
"browser interaction — cannot be bypassed automatically from this "
"network location. Try a residential IP or a VPN/proxy."
)
raise ExtractorError(f"DoodStream: pass_md5 not found in embed HTML ({final_url})")
return await self._parse_embed_html(html, base_url)
# ------------------------------------------------------------------
# Common HTML parser
# ------------------------------------------------------------------
async def _parse_embed_html(self, html: str, base_url: str) -> dict:
pass_match = re.search(r"(/pass_md5/[^'\"<>\s]+)", html)
if not pass_match:
raise ExtractorError("DoodStream: pass_md5 path not found in embed HTML")
pass_url = urljoin(base_url, pass_match.group(1))
ua = self.base_headers.get("user-agent") or _DOOD_UA
headers = {
"user-agent": ua,
"referer": f"{base_url}/",
}
proxy = self._get_proxy(pass_url)
async with AsyncSession() as s:
r = await s.get(
pass_url,
impersonate="chrome",
headers=headers,
timeout=20,
**({"proxy": proxy} if proxy else {}),
)
base_stream = r.text.strip()
if not base_stream or "RELOAD" in base_stream:
raise ExtractorError(
"DoodStream: pass_md5 endpoint returned no stream URL "
"(captcha session may have expired). "
"Ensure BYPARR_URL is set for reliable extraction."
)
token_match = re.search(r"token=([^&\s'\"]+)", html)
if not token_match:
raise ExtractorError("Dood: token missing")
raise ExtractorError("DoodStream: token not found in embed HTML")
token = token_match.group(1)
final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}"
expiry = int(time.time())
final_url = f"{base_stream}123456789?token={token}&expiry={expiry}"
return {
"destination_url": final_url,