new version

2026-06-10 09:10:23 +00:00 · 2026-04-15 19:23:14 +02:00
parent 5120b19d0b
commit 8134936d59
135 changed files with 3013 additions and 1589 deletions
@@ -0,0 +1,110 @@
+import re
+from typing import Dict, Any
+from urllib.parse import urlparse
+
+import aiohttp
+
+from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
+
+
+class VidFastExtractor(BaseExtractor):
+    """
+    Extractor for vidfast.pro (movies and TV via ythd.org → cloudnestra.com).
+
+    URL formats accepted:
+      https://vidfast.pro/movie/{tmdb_id}
+      https://vidfast.pro/tv/{tmdb_id}/{season}/{episode}
+
+    Extraction flow:
+      1. Parse TMDB ID from the URL path.
+      2. Fetch https://ythd.org/embed/{tmdb_id}  →  grab first data-hash.
+      3. Fetch https://cloudnestra.com/rcp/{hash} (carrying ythd cookies)
+         →  grab /prorcp/ hash from the inline iframe src.
+      4. Fetch https://cloudnestra.com/prorcp/{prorcp_hash}
+         →  grab Playerjs `file:` parameter (HLS master playlist URL).
+      5. Replace the {v1} CDN placeholder with cloudnestra.com and return
+         the resolved HLS URL for MediaFlow's hls_manifest_proxy endpoint.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.mediaflow_endpoint = "hls_manifest_proxy"
+
+    async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
+        parsed = urlparse(url)
+        parts = parsed.path.strip("/").split("/")
+        if len(parts) < 2:
+            raise ExtractorError(f"VidFast: cannot parse TMDB ID from path: {parsed.path!r}")
+
+        tmdb_id = parts[1]
+        ua = self.base_headers.get(
+            "user-agent",
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+            "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+        )
+
+        ythd_url = f"https://ythd.org/embed/{tmdb_id}"
+
+        # A single aiohttp session preserves cookies across the three hops.
+        cookie_jar = aiohttp.CookieJar()
+        timeout = aiohttp.ClientTimeout(total=30)
+
+        async with aiohttp.ClientSession(cookie_jar=cookie_jar, timeout=timeout) as session:
+            # ── Step 1: ythd.org embed page ───────────────────────────────
+            async with session.get(ythd_url, headers={"User-Agent": ua}) as resp:
+                if resp.status >= 400:
+                    raise ExtractorError(f"VidFast: ythd.org returned HTTP {resp.status}")
+                ythd_html = await resp.text()
+
+            hash_match = re.search(r'data-hash="([^"]+)"', ythd_html)
+            if not hash_match:
+                raise ExtractorError("VidFast: no data-hash attribute on ythd.org page")
+            data_hash = hash_match.group(1)
+
+            # ── Step 2: cloudnestra /rcp/ (needs ythd.org cookies) ────────
+            rcp_url = f"https://cloudnestra.com/rcp/{data_hash}"
+            async with session.get(
+                rcp_url,
+                headers={"User-Agent": ua, "Referer": ythd_url},
+            ) as resp:
+                if resp.status >= 400:
+                    raise ExtractorError(f"VidFast: cloudnestra /rcp/ returned HTTP {resp.status}")
+                rcp_html = await resp.text()
+
+            prorcp_match = re.search(r"src:\s*'/prorcp/([^']+)'", rcp_html)
+            if not prorcp_match:
+                raise ExtractorError("VidFast: /prorcp/ hash not found in cloudnestra page")
+            prorcp_hash = prorcp_match.group(1)
+
+            # ── Step 3: cloudnestra /prorcp/ (actual player page) ─────────
+            prorcp_url = f"https://cloudnestra.com/prorcp/{prorcp_hash}"
+            async with session.get(
+                prorcp_url,
+                headers={"User-Agent": ua, "Referer": rcp_url},
+            ) as resp:
+                if resp.status >= 400:
+                    raise ExtractorError(f"VidFast: cloudnestra /prorcp/ returned HTTP {resp.status}")
+                prorcp_html = await resp.text()
+
+        # ── Step 4: extract the HLS URL from Playerjs({…, file:"…"}) ──────
+        file_match = re.search(r'file:\s*"(https://[^"]+)"', prorcp_html)
+        if not file_match:
+            raise ExtractorError("VidFast: Playerjs file URL not found in /prorcp/ page")
+
+        # The file value may contain multiple fallback URLs separated by " or ".
+        first_url = file_match.group(1).split(" or ")[0].strip()
+
+        # {v1} is the primary CDN; tmstr4.cloudnestra.com hosts the proxied HLS.
+        stream_url = first_url.replace("{v1}", "cloudnestra.com")
+
+        if not stream_url.startswith("https://"):
+            raise ExtractorError(f"VidFast: unexpected stream URL: {stream_url[:120]!r}")
+
+        return {
+            "destination_url": stream_url,
+            "request_headers": {
+                "user-agent": ua,
+                "referer": "https://cloudnestra.com/",
+            },
+            "mediaflow_endpoint": self.mediaflow_endpoint,
+        }