update

2026-06-10 09:10:23 +00:00 · 2026-02-19 20:15:03 +01:00
parent 7785e8c604
commit cfc6bbabc9
181 changed files with 32141 additions and 4629 deletions
@@ -1,7 +1,6 @@
-from typing import Dict, Literal, Optional, Union
+from typing import Dict, Literal, Optional
-import httpx
+from pydantic import BaseModel, Field, SecretStr
 from pydantic import BaseModel, Field
 from pydantic_settings import BaseSettings
@@ -28,48 +27,6 @@ class TransportConfig(BaseSettings):
    )
    timeout: int = Field(60, description="Timeout for HTTP requests in seconds")
    def get_mounts(
        self, async_http: bool = True
    ) -> Dict[str, Optional[Union[httpx.HTTPTransport, httpx.AsyncHTTPTransport]]]:
        """
        Get a dictionary of httpx mount points to transport instances.
        """
        mounts = {}
        transport_cls = httpx.AsyncHTTPTransport if async_http else httpx.HTTPTransport
        global_verify = not self.disable_ssl_verification_globally
        # Configure specific routes
        for pattern, route in self.transport_routes.items():
            mounts[pattern] = transport_cls(
                verify=route.verify_ssl if global_verify else False,
                proxy=route.proxy_url or self.proxy_url if route.proxy else None,
            )
        # Hardcoded configuration for jxoplay.xyz domain - SSL verification disabled
        mounts["all://jxoplay.xyz"] = transport_cls(
            verify=False, proxy=self.proxy_url if self.all_proxy else None
        )
        mounts["all://dlhd.dad"] = transport_cls(
            verify=False, proxy=self.proxy_url if self.all_proxy else None
        )
        mounts["all://*.newkso.ru"] = transport_cls(
            verify=False, proxy=self.proxy_url if self.all_proxy else None
        )
        # Apply global settings for proxy and SSL
        default_proxy_url = self.proxy_url if self.all_proxy else None
        if default_proxy_url or not global_verify:
            mounts["all://"] = transport_cls(proxy=default_proxy_url, verify=global_verify)
        # Set default proxy for all routes if enabled
        # This part is now handled above to combine proxy and SSL settings
        # if self.all_proxy:
        #     mounts["all://"] = transport_cls(proxy=self.proxy_url)
        return mounts
    class Config:
        env_file = ".env"
        extra = "ignore"
@@ -78,30 +35,80 @@ class TransportConfig(BaseSettings):
 class Settings(BaseSettings):
    api_password: str | None = None  # The password for protecting the API endpoints.
    log_level: str = "INFO"  # The logging level to use.
-    transport_config: TransportConfig = Field(default_factory=TransportConfig)  # Configuration for httpx transport.
+    transport_config: TransportConfig = Field(default_factory=TransportConfig)  # Configuration for HTTP transport.
    enable_streaming_progress: bool = False  # Whether to enable streaming progress tracking.
    disable_home_page: bool = False  # Whether to disable the home page UI.
    disable_docs: bool = False  # Whether to disable the API documentation (Swagger UI).
    disable_speedtest: bool = False  # Whether to disable the speedtest UI.
    clear_cache_on_startup: bool = (
        False  # Whether to clear all caches (extractor, MPD, etc.) on startup. Useful for development.
    )
    stremio_proxy_url: str | None = None  # The Stremio server URL for alternative content proxying.
    m3u8_content_routing: Literal["mediaflow", "stremio", "direct"] = (
        "mediaflow"  # Routing strategy for M3U8 content URLs: "mediaflow", "stremio", or "direct"
    )
-    enable_hls_prebuffer: bool = False  # Whether to enable HLS pre-buffering for improved streaming performance.
+    enable_hls_prebuffer: bool = True  # Whether to enable HLS pre-buffering for improved streaming performance.
    livestream_start_offset: (
        float | None
    ) = -18  # Default start offset for live streams (e.g., -18 to start 18 seconds behind live edge). Applies to HLS and MPD live playlists. Set to None to disable.
    hls_prebuffer_segments: int = 5  # Number of segments to pre-buffer ahead.
    hls_prebuffer_cache_size: int = 50  # Maximum number of segments to cache in memory.
    hls_prebuffer_max_memory_percent: int = 80  # Maximum percentage of system memory to use for HLS pre-buffer cache.
    hls_prebuffer_emergency_threshold: int = 90  # Emergency threshold percentage to trigger aggressive cache cleanup.
-    enable_dash_prebuffer: bool = False  # Whether to enable DASH pre-buffering for improved streaming performance.
+    hls_prebuffer_inactivity_timeout: int = 60  # Seconds of inactivity before stopping playlist refresh loop.
    hls_segment_cache_ttl: int = 300  # TTL (seconds) for cached HLS segments; 300s (5min) for VOD, lower for live.
    enable_dash_prebuffer: bool = True  # Whether to enable DASH pre-buffering for improved streaming performance.
    dash_prebuffer_segments: int = 5  # Number of segments to pre-buffer ahead.
    dash_prebuffer_cache_size: int = 50  # Maximum number of segments to cache in memory.
    dash_prebuffer_max_memory_percent: int = 80  # Maximum percentage of system memory to use for DASH pre-buffer cache.
    dash_prebuffer_emergency_threshold: int = 90  # Emergency threshold percentage to trigger aggressive cache cleanup.
-    mpd_live_init_cache_ttl: int = 0  # TTL (seconds) for live init segment cache; 0 disables caching.
+    dash_prebuffer_inactivity_timeout: int = 60  # Seconds of inactivity before cleaning up stream state.
    dash_segment_cache_ttl: int = 60  # TTL (seconds) for cached media segments; longer = better for slow playback.
    mpd_live_init_cache_ttl: int = 60  # TTL (seconds) for live init segment cache; 0 disables caching.
    mpd_live_playlist_depth: int = 8  # Number of recent segments to expose per live playlist variant.
    remux_to_ts: bool = False  # Remux fMP4 segments to MPEG-TS for ExoPlayer/VLC compatibility.
    processed_segment_cache_ttl: int = 60  # TTL (seconds) for caching processed (decrypted/remuxed) segments.
-    user_agent: str = (
+    # FlareSolverr settings (for Cloudflare bypass)
-        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"  # The user agent to use for HTTP requests.
+    flaresolverr_url: str | None = None  # FlareSolverr service URL. Example: http://localhost:8191
    flaresolverr_timeout: int = 60  # Timeout (seconds) for FlareSolverr requests.
    # Acestream settings
    enable_acestream: bool = False  # Whether to enable Acestream proxy support.
    acestream_host: str = "localhost"  # Acestream engine host.
    acestream_port: int = 6878  # Acestream engine port.
    acestream_buffer_size: int = 4 * 1024 * 1024  # Buffer size for MPEG-TS streaming (4MB default, like acexy).
    acestream_empty_timeout: int = 30  # Timeout (seconds) when no data is received from upstream.
    acestream_session_timeout: int = 60  # Session timeout (seconds) for cleanup of inactive sessions.
    acestream_keepalive_interval: int = 15  # Interval (seconds) for session keepalive polling.
    # Telegram MTProto settings
    enable_telegram: bool = False  # Whether to enable Telegram MTProto proxy support.
    telegram_api_id: int | None = None  # Telegram API ID from https://my.telegram.org/apps
    telegram_api_hash: SecretStr | None = None  # Telegram API hash from https://my.telegram.org/apps
    telegram_session_string: SecretStr | None = None  # Persistent session string (avoids re-authentication).
    telegram_max_connections: int = 8  # Max parallel DC connections for downloads (max 20, careful of floods).
    telegram_request_timeout: int = 30  # Request timeout in seconds.
    # Transcode settings
    enable_transcode: bool = True  # Whether to enable on-the-fly transcoding endpoints (MKV→fMP4, HLS VOD).
    transcode_prefer_gpu: bool = True  # Prefer GPU acceleration (NVENC/VideoToolbox/VAAPI) when available.
    transcode_video_bitrate: str = "4M"  # Target video bitrate for re-encoding (e.g. "4M", "2000k").
    transcode_audio_bitrate: int = 192000  # AAC audio bitrate in bits/s for the Python transcode pipeline.
    transcode_video_preset: str = "medium"  # Encoding speed/quality tradeoff (libx264: ultrafast..veryslow).
    user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"  # The user agent to use for HTTP requests.
    # Upstream error resilience settings
    upstream_retry_on_disconnect: bool = True  # Enable/disable retry when upstream disconnects mid-stream.
    upstream_retry_attempts: int = 2  # Number of retry attempts when upstream disconnects during streaming.
    upstream_retry_delay: float = 1.0  # Delay (seconds) between retry attempts.
    graceful_stream_end: bool = True  # Return valid empty playlist instead of error when upstream fails.
    # Redis settings
    redis_url: str | None = None  # Redis URL for distributed locking and caching. None = disabled.
    cache_namespace: str | None = (
        None  # Optional namespace for instance-specific caches (e.g. pod name or hostname). When set, extractor results and other IP-bound data are stored under this namespace so multiple pods sharing one Redis don't serve each other's IP-specific URLs.
    )
    class Config:
@@ -65,9 +65,9 @@ class F16PxExtractor(BaseExtractor):
            raise ExtractorError("F16PX: No playback data")
        try:
-            iv = self._b64url_decode(pb["iv"])             # nonce
+            iv = self._b64url_decode(pb["iv"])  # nonce
-            key = self._join_key_parts(pb["key_parts"])    # AES key
+            key = self._join_key_parts(pb["key_parts"])  # AES key
-            payload = self._b64url_decode(pb["payload"])   # ciphertext + tag
+            payload = self._b64url_decode(pb["payload"])  # ciphertext + tag
            cipher = python_aesgcm.new(key)
            decrypted = cipher.open(iv, payload)  # AAD = '' like ResolveURL
@@ -95,7 +95,7 @@ class F16PxExtractor(BaseExtractor):
        self.base_headers["origin"] = origin
        self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
        self.base_headers["Accept"] = "*/*"
-        self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'
+        self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
        return {
            "destination_url": best,
@@ -1,21 +1,53 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Dict, Optional, Any
 from urllib.parse import urlparse
 import asyncio
-import httpx
+import aiohttp
 import json
 import logging
 from mediaflow_proxy.configs import settings
-from mediaflow_proxy.utils.http_utils import create_httpx_client, DownloadError
+from mediaflow_proxy.utils.http_client import create_aiohttp_session
 from mediaflow_proxy.utils.http_utils import DownloadError
 logger = logging.getLogger(__name__)
 class ExtractorError(Exception):
    """Base exception for all extractors."""
    pass
@dataclass
 class HttpResponse:
    """
    Simple response container for extractor HTTP requests.
    Uses aiohttp-style naming conventions:
    - status (not status_code)
    - text (pre-loaded content as string)
    - content (pre-loaded content as bytes)
    """
    status: int
    headers: Dict[str, str]
    text: str
    content: bytes
    url: str
    def json(self) -> Any:
        """Parse response content as JSON."""
        return json.loads(self.text)
    def get_origin(self) -> str:
        """Get the origin (scheme + host) from the response URL."""
        parsed = urlparse(self.url)
        return f"{parsed.scheme}://{parsed.netloc}"
 class BaseExtractor(ABC):
    """Base class for all URL extractors.
@@ -43,74 +75,99 @@ class BaseExtractor(ABC):
        backoff_factor: float = 0.5,
        raise_on_status: bool = True,
        **kwargs,
-    ) -> httpx.Response:
+    ) -> HttpResponse:
        """
-        Make HTTP request with retry and timeout support.
+        Make HTTP request with retry and timeout support using aiohttp.
        Parameters
        ----------
        url : str
            The URL to request.
        method : str
            HTTP method (GET, POST, etc.). Defaults to GET.
        headers : dict | None
            Additional headers to merge with base headers.
        timeout : float | None
-            Seconds to wait for the request (applied to httpx.Timeout). Defaults to 15s.
+            Seconds to wait for the request. Defaults to 15s.
        retries : int
            Number of attempts for transient errors.
        backoff_factor : float
            Base for exponential backoff between retries.
        raise_on_status : bool
-            If True, HTTP non-2xx raises DownloadError (preserves status code).
+            If True, HTTP non-2xx raises DownloadError.
        **kwargs
            Additional arguments passed to aiohttp request (e.g., data, json).
        Returns
        -------
        HttpResponse
            Response object with pre-loaded content.
        """
        attempt = 0
        last_exc = None
-        # build request headers merging base and per-request
+        # Build request headers merging base and per-request
        request_headers = self.base_headers.copy()
        if headers:
            request_headers.update(headers)
-        timeout_cfg = httpx.Timeout(timeout or 15.0)
+        timeout_val = timeout or 15.0
        while attempt < retries:
            try:
-                async with create_httpx_client(timeout=timeout_cfg) as client:
+                async with create_aiohttp_session(url, timeout=timeout_val) as (session, proxy_url):
-                    response = await client.request(
+                    async with session.request(
                        method,
                        url,
                        headers=request_headers,
                        proxy=proxy_url,
                        **kwargs,
-                    )
+                    ) as response:
                        # Read content while session is still open
                        content = await response.read()
                        text = content.decode("utf-8", errors="replace")
                        final_url = str(response.url)
                        status = response.status
                        resp_headers = dict(response.headers)
-                    if raise_on_status:
+                        if raise_on_status and status >= 400:
-                        try:
+                            body_preview = text[:500]
                            response.raise_for_status()
                        except httpx.HTTPStatusError as e:
                            # Provide a short body preview for debugging
                            body_preview = ""
                            try:
                                body_preview = e.response.text[:500]
                            except Exception:
                                body_preview = "<unreadable body>"
                            logger.debug(
-                                "HTTPStatusError for %s (status=%s) -- body preview: %s",
+                                "HTTP error for %s (status=%s) -- body preview: %s",
                                url,
-                                e.response.status_code,
+                                status,
                                body_preview,
                            )
-                            raise DownloadError(e.response.status_code, f"HTTP error {e.response.status_code} while requesting {url}")
+                            raise DownloadError(status, f"HTTP error {status} while requesting {url}")
-                    return response
+
                        return HttpResponse(
                            status=status,
                            headers=resp_headers,
                            text=text,
                            content=content,
                            url=final_url,
                        )
            except DownloadError:
                # Do not retry on explicit HTTP status errors (they are intentional)
                raise
-            except (httpx.ReadTimeout, httpx.ConnectTimeout, httpx.NetworkError, httpx.TransportError) as e:
+            except (asyncio.TimeoutError, aiohttp.ClientError) as e:
-                # Transient network error — retry with backoff
+                # Transient network error - retry with backoff
                last_exc = e
                attempt += 1
                sleep_for = backoff_factor * (2 ** (attempt - 1))
-                logger.warning("Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
+                logger.warning(
-                               attempt, retries, url, e, sleep_for)
+                    "Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
                    attempt,
                    retries,
                    url,
                    e,
                    sleep_for,
                )
                await asyncio.sleep(sleep_for)
                continue
            except Exception as e:
-                # Unexpected exception — wrap as ExtractorError to keep interface consistent
+                # Unexpected exception - wrap as ExtractorError to keep interface consistent
                logger.exception("Unhandled exception while requesting %s: %s", url, e)
                raise ExtractorError(f"Request failed for URL {url}: {str(e)}")
@@ -1,133 +1,345 @@
 import hashlib
 import hmac
 import re
-import base64
+import time
 import logging
-from typing import Any, Dict, Optional, List
+from typing import Any, Dict, Optional
-from urllib.parse import urlparse, quote_plus, urljoin
+from urllib.parse import urlparse
 import aiohttp
-import httpx
+from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
-
+from mediaflow_proxy.utils.http_client import create_aiohttp_session
-
+from mediaflow_proxy.configs import settings
 from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
 logger = logging.getLogger(__name__)
 # Silenzia l'errore ConnectionResetError su Windows
-logging.getLogger('asyncio').setLevel(logging.CRITICAL)
+logging.getLogger("asyncio").setLevel(logging.CRITICAL)
 # Default fingerprint parameters
 DEFAULT_DLHD_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
 DEFAULT_DLHD_SCREEN_RESOLUTION = "1920x1080"
 DEFAULT_DLHD_TIMEZONE = "UTC"
 DEFAULT_DLHD_LANGUAGE = "en"
 def compute_fingerprint(
    user_agent: str = DEFAULT_DLHD_USER_AGENT,
    screen_resolution: str = DEFAULT_DLHD_SCREEN_RESOLUTION,
    timezone: str = DEFAULT_DLHD_TIMEZONE,
    language: str = DEFAULT_DLHD_LANGUAGE,
 ) -> str:
    """
    Compute the X-Fingerprint header value.
    Algorithm:
    fingerprint = SHA256(useragent + screen_resolution + timezone + language).hex()[:16]
    Args:
        user_agent: The user agent string
        screen_resolution: The screen resolution (e.g., "1920x1080")
        timezone: The timezone (e.g., "UTC")
        language: The language code (e.g., "en")
    Returns:
        The 16-character fingerprint
    """
    combined = f"{user_agent}{screen_resolution}{timezone}{language}"
    return hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16]
 def compute_key_path(resource: str, number: str, timestamp: int, fingerprint: str, secret_key: str) -> str:
    """
    Compute the X-Key-Path header value.
    Algorithm:
    key_path = HMAC-SHA256("resource|number|timestamp|fingerprint", secret_key).hex()[:16]
    Args:
        resource: The resource from the key URL
        number: The number from the key URL
        timestamp: The Unix timestamp
        fingerprint: The fingerprint value
        secret_key: The HMAC secret key (channel_salt)
    Returns:
        The 16-character key path
    """
    combined = f"{resource}|{number}|{timestamp}|{fingerprint}"
    hmac_hash = hmac.new(secret_key.encode("utf-8"), combined.encode("utf-8"), hashlib.sha256).hexdigest()
    return hmac_hash[:16]
 def compute_key_headers(key_url: str, secret_key: str) -> tuple[int, int, str, str] | None:
    """
    Compute X-Key-Timestamp, X-Key-Nonce, X-Key-Path, and X-Fingerprint for a /key/ URL.
    Algorithm:
    1. Extract resource and number from URL pattern /key/{resource}/{number}
    2. ts = Unix timestamp in seconds
    3. hmac_hash = HMAC-SHA256(resource, secret_key).hex()
    4. nonce = proof-of-work: find i where MD5(hmac+resource+number+ts+i)[:4] < 0x1000
    5. fingerprint = compute_fingerprint()
    6. key_path = HMAC-SHA256("resource|number|ts|fingerprint", secret_key).hex()[:16]
    Args:
        key_url: The key URL containing /key/{resource}/{number}
        secret_key: The HMAC secret key (channel_salt)
    Returns:
        Tuple of (timestamp, nonce, key_path, fingerprint) or None if URL doesn't match pattern
    """
    # Extract resource and number from URL
    pattern = r"/key/([^/]+)/(\d+)"
    match = re.search(pattern, key_url)
    if not match:
        return None
    resource = match.group(1)
    number = match.group(2)
    ts = int(time.time())
    # Compute HMAC-SHA256
    hmac_hash = hmac.new(secret_key.encode("utf-8"), resource.encode("utf-8"), hashlib.sha256).hexdigest()
    # Proof-of-work loop
    nonce = 0
    for i in range(100000):
        combined = f"{hmac_hash}{resource}{number}{ts}{i}"
        md5_hash = hashlib.md5(combined.encode("utf-8")).hexdigest()
        prefix_value = int(md5_hash[:4], 16)
        if prefix_value < 0x1000:  # < 4096
            nonce = i
            break
    fingerprint = compute_fingerprint()
    key_path = compute_key_path(resource, number, ts, fingerprint, secret_key)
    return ts, nonce, key_path, fingerprint
 class DLHDExtractor(BaseExtractor):
    """DLHD (DaddyLive) URL extractor for M3U8 streams.
-
+    Supports the new authentication flow with:
-    Notes:
+    - EPlayerAuth extraction (auth_token, channel_key, channel_salt)
-    - Multi-domain support for daddylive.sx / dlhd.dad
+    - Server lookup for dynamic server selection
-    - Robust extraction of auth parameters and server lookup
+    - Dynamic key header computation for AES-128 encrypted streams
    - Uses retries/timeouts via BaseExtractor where possible
    - Multi-iframe fallback for resilience
    """
    def __init__(self, request_headers: dict):
        super().__init__(request_headers)
-        self.mediaflow_endpoint = "hls_manifest_proxy"
+        self.mediaflow_endpoint = "hls_key_proxy"
        self._iframe_context: Optional[str] = None
        self._flaresolverr_cookies: Optional[str] = None
        self._flaresolverr_user_agent: Optional[str] = None
    async def _fetch_via_flaresolverr(self, url: str) -> HttpResponse:
        """Fetch a URL using FlareSolverr to bypass Cloudflare protection."""
        if not settings.flaresolverr_url:
            raise ExtractorError("FlareSolverr URL not configured. Set FLARESOLVERR_URL in environment.")
        flaresolverr_endpoint = f"{settings.flaresolverr_url.rstrip('/')}/v1"
        payload = {
            "cmd": "request.get",
            "url": url,
            "maxTimeout": settings.flaresolverr_timeout * 1000,
        }
-    async def _make_request(self, url: str, method: str = "GET", headers: Optional[Dict] = None, **kwargs) -> Any:
+        logger.info(f"Using FlareSolverr to fetch: {url}")
        """Override to disable SSL verification for this extractor and use fetch_with_retry if available."""
        from mediaflow_proxy.utils.http_utils import create_httpx_client, fetch_with_retry
        async with aiohttp.ClientSession() as session:
            async with session.post(
                flaresolverr_endpoint,
                json=payload,
                timeout=aiohttp.ClientTimeout(total=settings.flaresolverr_timeout + 10),
            ) as response:
                if response.status != 200:
                    raise ExtractorError(f"FlareSolverr returned status {response.status}")
                data = await response.json()
        if data.get("status") != "ok":
            raise ExtractorError(f"FlareSolverr failed: {data.get('message', 'Unknown error')}")
        solution = data.get("solution", {})
        html_content = solution.get("response", "")
        final_url = solution.get("url", url)
        status = solution.get("status", 200)
        # Store cookies and user-agent for subsequent requests
        cookies = solution.get("cookies", [])
        if cookies:
            cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
            self._flaresolverr_cookies = cookie_str
            logger.info(f"FlareSolverr provided {len(cookies)} cookies")
        user_agent = solution.get("userAgent")
        if user_agent:
            self._flaresolverr_user_agent = user_agent
            logger.info(f"FlareSolverr user-agent: {user_agent}")
        logger.info(f"FlareSolverr successfully bypassed Cloudflare for: {url}")
        return HttpResponse(
            status=status,
            headers={},
            text=html_content,
            content=html_content.encode("utf-8", errors="replace"),
            url=final_url,
        )
    async def _make_request(
        self, url: str, method: str = "GET", headers: Optional[Dict] = None, use_flaresolverr: bool = False, **kwargs
    ) -> HttpResponse:
        """Override to disable SSL verification and optionally use FlareSolverr."""
        # Use FlareSolverr for Cloudflare-protected pages
        if use_flaresolverr and settings.flaresolverr_url:
            return await self._fetch_via_flaresolverr(url)
        timeout = kwargs.pop("timeout", 15)
-        retries = kwargs.pop("retries", 3)
+        kwargs.pop("retries", 3)  # consumed but not used directly
-        backoff_factor = kwargs.pop("backoff_factor", 0.5)
+        kwargs.pop("backoff_factor", 0.5)  # consumed but not used directly
        # Merge headers
        request_headers = self.base_headers.copy()
        if headers:
            request_headers.update(headers)
-        async with create_httpx_client(verify=False, timeout=httpx.Timeout(timeout)) as client:
+        # Add FlareSolverr cookies if available
-            try:
+        if self._flaresolverr_cookies:
-                return await fetch_with_retry(client, method, url, headers or {}, timeout=timeout)
+            existing_cookies = request_headers.get("Cookie", "")
-            except Exception:
+            if existing_cookies:
-                logger.debug("fetch_with_retry failed or unavailable; falling back to direct request for %s", url)
+                request_headers["Cookie"] = f"{existing_cookies}; {self._flaresolverr_cookies}"
-                response = await client.request(method, url, headers=headers or {}, timeout=timeout)
+            else:
-                response.raise_for_status()
+                request_headers["Cookie"] = self._flaresolverr_cookies
                return response
        # Use FlareSolverr user-agent if available
        if self._flaresolverr_user_agent:
            request_headers["User-Agent"] = self._flaresolverr_user_agent
-    async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
+        # Use create_aiohttp_session with verify=False for SSL bypass
        async with create_aiohttp_session(url, timeout=timeout, verify=False) as (session, proxy_url):
            async with session.request(method, url, headers=request_headers, proxy=proxy_url, **kwargs) as response:
                content = await response.read()
                final_url = str(response.url)
                status = response.status
                resp_headers = dict(response.headers)
                if status >= 400:
                    raise ExtractorError(f"HTTP error {status} while requesting {url}")
                return HttpResponse(
                    status=status,
                    headers=resp_headers,
                    text=content.decode("utf-8", errors="replace"),
                    content=content,
                    url=final_url,
                )
    async def _extract_session_data(self, iframe_url: str, main_url: str) -> dict | None:
        """
-        Estrattore alternativo per iframe lovecdn.ru che usa un formato diverso.
+        Fetch the iframe URL and extract auth_token, channel_key, and channel_salt.
        Args:
            iframe_url: The iframe URL to fetch
            main_url: The main site domain for Referer header
        Returns:
            Dict with auth_token, channel_key, channel_salt, or None if not found
        """
        headers = {
            "User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
            "Referer": f"https://{main_url}/",
        }
        try:
-            # Cerca pattern di stream URL diretto
+            resp = await self._make_request(iframe_url, headers=headers, timeout=12)
-            m3u8_patterns = [
+            html = resp.text
                r'["\']([^"\']*\.m3u8[^"\']*)["\']',
                r'source[:\s]+["\']([^"\']+)["\']',
                r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
                r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
            ]
            stream_url = None
            for pattern in m3u8_patterns:
                matches = re.findall(pattern, iframe_content)
                for match in matches:
                    if '.m3u8' in match and match.startswith('http'):
                        stream_url = match
                        logger.info(f"Found direct m3u8 URL: {stream_url}")
                        break
                if stream_url:
                    break
            # Pattern 2: Cerca costruzione dinamica URL
            if not stream_url:
                channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
                server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
                if channel_match:
                    channel_name = channel_match.group(1)
                    server = server_match.group(1) if server_match else 'newkso.ru'
                    stream_url = f"https://{server}/{channel_name}/mono.m3u8"
                    logger.info(f"Constructed stream URL: {stream_url}")
            if not stream_url:
                # Fallback: cerca qualsiasi URL che sembri uno stream
                url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
                matches = re.findall(url_pattern, iframe_content)
                if matches:
                    stream_url = matches[0]
                    logger.info(f"Found fallback stream URL: {stream_url}")
            if not stream_url:
                raise ExtractorError(f"Could not find stream URL in lovecdn.ru iframe")
            # Usa iframe URL come referer
            iframe_origin = f"https://{urlparse(iframe_url).netloc}"
            stream_headers = {
                'User-Agent': headers['User-Agent'],
                'Referer': iframe_url,
                'Origin': iframe_origin
            }
            # Determina endpoint in base al dominio dello stream
            endpoint = "hls_key_proxy"
            logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
            return {
                "destination_url": stream_url,
                "request_headers": stream_headers,
                "mediaflow_endpoint": endpoint,
            }
        except Exception as e:
-            raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
+            logger.warning(f"Error fetching iframe URL: {e}")
            return None
        # Pattern to extract EPlayerAuth.init block with authToken, channelKey, channelSalt
        # Matches: EPlayerAuth.init({ authToken: '...', channelKey: '...', ..., channelSalt: '...' });
        auth_pattern = r"EPlayerAuth\.init\s*\(\s*\{\s*authToken:\s*'([^']+)'"
        channel_key_pattern = r"channelKey:\s*'([^']+)'"
        channel_salt_pattern = r"channelSalt:\s*'([^']+)'"
        # Pattern to extract server lookup base URL from fetchWithRetry call
        lookup_pattern = r"fetchWithRetry\s*\(\s*'([^']+server_lookup\?channel_id=)"
        auth_match = re.search(auth_pattern, html)
        channel_key_match = re.search(channel_key_pattern, html)
        channel_salt_match = re.search(channel_salt_pattern, html)
        lookup_match = re.search(lookup_pattern, html)
        if auth_match and channel_key_match and channel_salt_match:
            result = {
                "auth_token": auth_match.group(1),
                "channel_key": channel_key_match.group(1),
                "channel_salt": channel_salt_match.group(1),
            }
            if lookup_match:
                result["server_lookup_url"] = lookup_match.group(1) + result["channel_key"]
            return result
        return None
    async def _get_server_key(self, server_lookup_url: str, iframe_url: str) -> str | None:
        """
        Fetch the server lookup URL and extract the server_key.
        Args:
            server_lookup_url: The server lookup URL
            iframe_url: The iframe URL for extracting the host for headers
        Returns:
            The server_key or None if not found
        """
        parsed = urlparse(iframe_url)
        iframe_host = parsed.netloc
        headers = {
            "User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
            "Referer": f"https://{iframe_host}/",
            "Origin": f"https://{iframe_host}",
        }
        try:
            resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
            data = resp.json()
            return data.get("server_key")
        except Exception as e:
            logger.warning(f"Error fetching server lookup: {e}")
            return None
    def _build_m3u8_url(self, server_key: str, channel_key: str) -> str:
        """
        Build the m3u8 URL based on the server_key.
        Args:
            server_key: The server key from server lookup
            channel_key: The channel key
        Returns:
            The m3u8 URL (with .css extension as per the original implementation)
        """
        if server_key == "top1/cdn":
            return f"https://top1.dvalna.ru/top1/cdn/{channel_key}/mono.css"
        else:
            return f"https://{server_key}new.dvalna.ru/{server_key}/{channel_key}/mono.css"
    async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
        """Handles the new authentication flow found in recent updates."""
-        
+
        def _extract_params(js: str) -> Dict[str, Optional[str]]:
            params = {}
            patterns = {
@@ -143,82 +355,93 @@ class DLHDExtractor(BaseExtractor):
            return params
        params = _extract_params(iframe_content)
-        
+
        missing_params = [k for k, v in params.items() if not v]
        if missing_params:
            # This is not an error, just means it's not the new flow
            raise ExtractorError(f"Not the new auth flow: missing params {missing_params}")
        logger.info("New auth flow detected. Proceeding with POST auth.")
-        
+
        # 1. Initial Auth POST
-        auth_url = 'https://security.newkso.ru/auth2.php'
+        auth_url = "https://security.newkso.ru/auth2.php"
        # Use files parameter to force multipart/form-data which is required by the server
        # (None, value) tells httpx to send it as a form field, not a file upload
        multipart_data = {
            'channelKey': (None, params["channel_key"]),
            'country': (None, params["auth_country"]),
            'timestamp': (None, params["auth_ts"]),
            'expiry': (None, params["auth_expiry"]),
            'token': (None, params["auth_token"]),
        }
        iframe_origin = f"https://{urlparse(iframe_url).netloc}"
        auth_headers = headers.copy()
-        auth_headers.update({
+        auth_headers.update(
-            'Accept': '*/*',
+            {
-            'Accept-Language': 'en-US,en;q=0.9',
+                "Accept": "*/*",
-            'Origin': iframe_origin,
+                "Accept-Language": "en-US,en;q=0.9",
-            'Referer': iframe_url,
+                "Origin": iframe_origin,
-            'Sec-Fetch-Dest': 'empty',
+                "Referer": iframe_url,
-            'Sec-Fetch-Mode': 'cors',
+                "Sec-Fetch-Dest": "empty",
-            'Sec-Fetch-Site': 'cross-site',
+                "Sec-Fetch-Mode": "cors",
-            'Priority': 'u=1, i',
+                "Sec-Fetch-Site": "cross-site",
-        })
+                "Priority": "u=1, i",
-        
+            }
-        from mediaflow_proxy.utils.http_utils import create_httpx_client
+        )
        # Build form data for multipart/form-data
        form_data = aiohttp.FormData()
        form_data.add_field("channelKey", params["channel_key"])
        form_data.add_field("country", params["auth_country"])
        form_data.add_field("timestamp", params["auth_ts"])
        form_data.add_field("expiry", params["auth_expiry"])
        form_data.add_field("token", params["auth_token"])
        try:
-            async with create_httpx_client(verify=False) as client:
+            async with create_aiohttp_session(auth_url, timeout=12, verify=False) as (session, proxy_url):
-                # Note: using 'files' instead of 'data' to ensure multipart/form-data Content-Type
+                async with session.post(
-                auth_resp = await client.post(auth_url, files=multipart_data, headers=auth_headers, timeout=12)
+                    auth_url,
-                auth_resp.raise_for_status()
+                    headers=auth_headers,
-                auth_data = auth_resp.json()
+                    data=form_data,
-                if not (auth_data.get("valid") or auth_data.get("success")):
+                    proxy=proxy_url,
-                    raise ExtractorError(f"Initial auth failed with response: {auth_data}")
+                ) as response:
                    content = await response.read()
                    response.raise_for_status()
                    import json
                    auth_data = json.loads(content.decode("utf-8"))
                    if not (auth_data.get("valid") or auth_data.get("success")):
                        raise ExtractorError(f"Initial auth failed with response: {auth_data}")
            logger.info("New auth flow: Initial auth successful.")
        except ExtractorError:
            raise
        except Exception as e:
            raise ExtractorError(f"New auth flow failed during initial auth POST: {e}")
        # 2. Server Lookup
        server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}"
        try:
-            # Use _make_request as it handles retries and expects JSON
+            # Use _make_request as it handles retries
            lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
            server_data = lookup_resp.json()
-            server_key = server_data.get('server_key')
+            server_key = server_data.get("server_key")
            if not server_key:
                raise ExtractorError(f"No server_key in lookup response: {server_data}")
            logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}")
        except ExtractorError:
            raise
        except Exception as e:
            raise ExtractorError(f"New auth flow failed during server lookup: {e}")
        # 3. Build final stream URL
-        channel_key = params['channel_key']
+        channel_key = params["channel_key"]
-        auth_token = params['auth_token']
+        auth_token = params["auth_token"]
        # The JS logic uses .css, not .m3u8
-        if server_key == 'top1/cdn':
+        if server_key == "top1/cdn":
-            stream_url = f'https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css'
+            stream_url = f"https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css"
        else:
-            stream_url = f'https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css'
+            stream_url = f"https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css"
-        
+
-        logger.info(f'New auth flow: Constructed stream URL: {stream_url}')
+        logger.info(f"New auth flow: Constructed stream URL: {stream_url}")
        stream_headers = {
-            'User-Agent': headers['User-Agent'],
+            "User-Agent": headers["User-Agent"],
-            'Referer': iframe_url,
+            "Referer": iframe_url,
-            'Origin': iframe_origin,
+            "Origin": iframe_origin,
-            'Authorization': f'Bearer {auth_token}',
+            "Authorization": f"Bearer {auth_token}",
-            'X-Channel-Key': channel_key
+            "X-Channel-Key": channel_key,
        }
        return {
@@ -227,106 +450,255 @@ class DLHDExtractor(BaseExtractor):
            "mediaflow_endpoint": "hls_manifest_proxy",
        }
-    async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
+    async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
-        """Main extraction flow: resolve base, fetch players, extract iframe, auth and final m3u8."""
+        """
-        baseurl = "https://dlhd.dad/"
+        Alternative extractor for lovecdn.ru iframe that uses a different format.
        """
        try:
            # Look for direct stream URL patterns
            m3u8_patterns = [
                r'["\']([^"\']*\.m3u8[^"\']*)["\']',
                r'source[:\s]+["\']([^"\']+)["\']',
                r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
                r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
            ]
-        def extract_channel_id(u: str) -> Optional[str]:
+            stream_url = None
-            match_watch_id = re.search(r'watch\.php\?id=(\d+)', u)
+            for pattern in m3u8_patterns:
-            if match_watch_id:
+                matches = re.findall(pattern, iframe_content)
-                return match_watch_id.group(1)
+                for match in matches:
-            return None
+                    if ".m3u8" in match and match.startswith("http"):
                        stream_url = match
                        logger.info(f"Found direct m3u8 URL: {stream_url}")
                        break
                if stream_url:
                    break
            # Pattern 2: Look for dynamic URL construction
            if not stream_url:
                channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
                server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
-        async def get_stream_data(initial_url: str):
+                if channel_match:
-            daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
+                    channel_name = channel_match.group(1)
-            daddylive_headers = {
+                    server = server_match.group(1) if server_match else "newkso.ru"
-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
+                    stream_url = f"https://{server}/{channel_name}/mono.m3u8"
-                'Referer': baseurl,
+                    logger.info(f"Constructed stream URL: {stream_url}")
-                'Origin': daddy_origin
+
            if not stream_url:
                # Fallback: look for any URL that looks like a stream
                url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
                matches = re.findall(url_pattern, iframe_content)
                if matches:
                    stream_url = matches[0]
                    logger.info(f"Found fallback stream URL: {stream_url}")
            if not stream_url:
                raise ExtractorError("Could not find stream URL in lovecdn.ru iframe")
            # Use iframe URL as referer
            iframe_origin = f"https://{urlparse(iframe_url).netloc}"
            stream_headers = {"User-Agent": headers["User-Agent"], "Referer": iframe_url, "Origin": iframe_origin}
            # Determine endpoint based on the stream domain
            endpoint = "hls_key_proxy"
            logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
            return {
                "destination_url": stream_url,
                "request_headers": stream_headers,
                "mediaflow_endpoint": endpoint,
            }
        except Exception as e:
            raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
-            # 1. Request initial page
+    async def _extract_direct_stream(self, channel_id: str) -> Dict[str, Any]:
-            resp1 = await self._make_request(initial_url, headers=daddylive_headers, timeout=15)
+        """
-            player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1.text)
+        Direct stream extraction using server lookup API with the new auth flow.
-            if not player_links:
+        This extracts auth_token, channel_key, channel_salt and computes key headers.
-                raise ExtractorError("No player links found on the page.")
+        """
        # Common iframe domains for DLHD
        iframe_domains = ["lefttoplay.xyz"]
        for iframe_domain in iframe_domains:
            try:
                iframe_url = f"https://{iframe_domain}/premiumtv/daddyhd.php?id={channel_id}"
                logger.info(f"Attempting extraction via {iframe_domain}")
-            # Prova tutti i player e raccogli tutti gli iframe validi
+                session_data = await self._extract_session_data(iframe_url, "dlhd.link")
            last_player_error = None
            iframe_candidates = []
-            for player_url in player_links:
+                if not session_data:
-                try:
+                    logger.debug(f"No session data from {iframe_domain}")
                    if not player_url.startswith('http'):
                        player_url = baseurl + player_url.lstrip('/')
                    daddylive_headers['Referer'] = player_url
                    daddylive_headers['Origin'] = player_url
                    resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
                    iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2.text)
                    # Raccogli tutti gli iframe trovati
                    for iframe in iframes2:
                        if iframe not in iframe_candidates:
                            iframe_candidates.append(iframe)
                            logger.info(f"Found iframe candidate: {iframe}")
                except Exception as e:
                    last_player_error = e
                    logger.warning(f"Failed to process player link {player_url}: {e}")
                    continue
                logger.info(f"Got session data from {iframe_domain}: channel_key={session_data['channel_key']}")
-            if not iframe_candidates:
+                # Get server key
-                if last_player_error:
+                if "server_lookup_url" not in session_data:
-                    raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
+                    logger.debug(f"No server lookup URL from {iframe_domain}")
                raise ExtractorError("No valid iframe found in any player page")
            # Prova ogni iframe finché uno non funziona
            last_iframe_error = None
            for iframe_candidate in iframe_candidates:
                try:
                    logger.info(f"Trying iframe: {iframe_candidate}")
                    iframe_domain = urlparse(iframe_candidate).netloc
                    if not iframe_domain:
                        logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
                        continue
                    self._iframe_context = iframe_candidate
                    resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
                    iframe_content = resp3.text
                    logger.info(f"Successfully loaded iframe from: {iframe_domain}")
                    if 'lovecdn.ru' in iframe_domain:
                        logger.info("Detected lovecdn.ru iframe - using alternative extraction")
                        return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
                    else:
                        logger.info("Attempting new auth flow extraction.")
                        return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
                except Exception as e:
                    logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
                    last_iframe_error = e
                    continue
-            raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
+                server_key = await self._get_server_key(session_data["server_lookup_url"], iframe_url)
                if not server_key:
                    logger.debug(f"No server key from {iframe_domain}")
                    continue
                logger.info(f"Got server key: {server_key}")
                # Build m3u8 URL
                m3u8_url = self._build_m3u8_url(server_key, session_data["channel_key"])
                logger.info(f"M3U8 URL: {m3u8_url}")
                # Build stream headers with auth
                iframe_origin = f"https://{iframe_domain}"
                stream_headers = {
                    "User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
                    "Referer": iframe_url,
                    "Origin": iframe_origin,
                    "Authorization": f"Bearer {session_data['auth_token']}",
                }
                # Return the result with key header parameters
                # These will be used to compute headers when fetching keys
                return {
                    "destination_url": m3u8_url,
                    "request_headers": stream_headers,
                    "mediaflow_endpoint": "hls_key_proxy",
                    # Force playlist processing since DLHD uses .css extension for m3u8
                    "force_playlist_proxy": True,
                    # Key header computation parameters
                    "dlhd_key_params": {
                        "channel_salt": session_data["channel_salt"],
                        "auth_token": session_data["auth_token"],
                        "iframe_url": iframe_url,
                    },
                }
            except Exception as e:
                logger.warning(f"Failed extraction via {iframe_domain}: {e}")
                continue
        raise ExtractorError(f"Failed to extract stream from all iframe domains for channel {channel_id}")
    async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
        """Main extraction flow - uses direct server lookup with new auth flow."""
        def extract_channel_id(u: str) -> Optional[str]:
            match_watch_id = re.search(r"watch\.php\?id=(\d+)", u)
            if match_watch_id:
                return match_watch_id.group(1)
            # Also try stream-XXX pattern
            match_stream = re.search(r"stream-(\d+)", u)
            if match_stream:
                return match_stream.group(1)
            return None
        try:
            channel_id = extract_channel_id(url)
            if not channel_id:
                raise ExtractorError(f"Unable to extract channel ID from {url}")
-            logger.info(f"Using base domain: {baseurl}")
+            logger.info(f"Extracting DLHD stream for channel ID: {channel_id}")
            return await get_stream_data(url)
            # Try direct stream extraction with new auth flow
            try:
                return await self._extract_direct_stream(channel_id)
            except ExtractorError as e:
                logger.warning(f"Direct stream extraction failed: {e}")
            # Fallback to legacy iframe-based extraction if direct fails
            logger.info("Falling back to iframe-based extraction...")
            return await self._extract_via_iframe(url, channel_id)
        except Exception as e:
            raise ExtractorError(f"Extraction failed: {str(e)}")
    async def _extract_via_iframe(self, url: str, channel_id: str) -> Dict[str, Any]:
        """Legacy iframe-based extraction flow - used as fallback."""
        baseurl = "https://dlhd.dad/"
        daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
        daddylive_headers = {
            "User-Agent": self._flaresolverr_user_agent
            or "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
            "Referer": baseurl,
            "Origin": daddy_origin,
        }
        # 1. Request initial page - use FlareSolverr if available to bypass Cloudflare
        use_flaresolverr = settings.flaresolverr_url is not None
        resp1 = await self._make_request(url, headers=daddylive_headers, timeout=15, use_flaresolverr=use_flaresolverr)
        resp1_text = resp1.text
        # Update headers with FlareSolverr user-agent after initial request
        if self._flaresolverr_user_agent:
            daddylive_headers["User-Agent"] = self._flaresolverr_user_agent
        player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1_text)
        if not player_links:
            raise ExtractorError("No player links found on the page.")
        # Try all players and collect all valid iframes
        last_player_error = None
        iframe_candidates = []
        for player_url in player_links:
            try:
                if not player_url.startswith("http"):
                    player_url = baseurl + player_url.lstrip("/")
                daddylive_headers["Referer"] = player_url
                daddylive_headers["Origin"] = player_url
                resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
                resp2_text = resp2.text
                iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2_text)
                # Collect all found iframes
                for iframe in iframes2:
                    if iframe not in iframe_candidates:
                        iframe_candidates.append(iframe)
                        logger.info(f"Found iframe candidate: {iframe}")
            except Exception as e:
                last_player_error = e
                logger.warning(f"Failed to process player link {player_url}: {e}")
                continue
        if not iframe_candidates:
            if last_player_error:
                raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
            raise ExtractorError("No valid iframe found in any player page")
        # Try each iframe until one works
        last_iframe_error = None
        for iframe_candidate in iframe_candidates:
            try:
                logger.info(f"Trying iframe: {iframe_candidate}")
                iframe_domain = urlparse(iframe_candidate).netloc
                if not iframe_domain:
                    logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
                    continue
                self._iframe_context = iframe_candidate
                resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
                iframe_content = resp3.text
                logger.info(f"Successfully loaded iframe from: {iframe_domain}")
                if "lovecdn.ru" in iframe_domain:
                    logger.info("Detected lovecdn.ru iframe - using alternative extraction")
                    return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
                else:
                    logger.info("Attempting new auth flow extraction.")
                    return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
            except Exception as e:
                logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
                last_iframe_error = e
                continue
        raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
@@ -1,39 +1,52 @@
 import re
 import time
-from typing import Dict
+from urllib.parse import urlparse, urljoin
 from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
 class DoodStreamExtractor(BaseExtractor):
-    """DoodStream URL extractor."""
+    """
    Dood / MyVidPlay extractor
    Resolves to direct CDN MP4
    """
    def __init__(self, request_headers: dict):
        super().__init__(request_headers)
-        self.base_url = "https://d000d.com"
+        self.base_url = "https://myvidplay.com"
-    async def extract(self, url: str, **kwargs) -> Dict[str, str]:
+    async def extract(self, url: str, **kwargs):
-        """Extract DoodStream URL."""
+        parsed = urlparse(url)
-        response = await self._make_request(url)
+        video_id = parsed.path.rstrip("/").split("/")[-1]
        if not video_id:
            raise ExtractorError("Invalid Dood URL")
-        # Extract URL pattern
+        headers = {
-        pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
+            "User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0",
-        match = re.search(pattern, response.text, re.DOTALL)
+            "Referer": f"{self.base_url}/",
        }
        embed_url = f"{self.base_url}/e/{video_id}"
        html = (await self._make_request(embed_url, headers=headers)).text
        match = re.search(r"(\/pass_md5\/[^']+)", html)
        if not match:
-            raise ExtractorError("Failed to extract URL pattern")
+            raise ExtractorError("Dood: pass_md5 not found")
-        # Build final URL
+        pass_url = urljoin(self.base_url, match.group(1))
        pass_url = f"{self.base_url}{match[1]}"
        referer = f"{self.base_url}/"
        headers = {"range": "bytes=0-", "referer": referer}
-        response = await self._make_request(pass_url, headers=headers)
+        base_stream = (await self._make_request(pass_url, headers=headers)).text.strip()
-        timestamp = str(int(time.time()))
+
-        final_url = f"{response.text}123456789{match[2]}{timestamp}"
+        token_match = re.search(r"token=([^&]+)", html)
        if not token_match:
            raise ExtractorError("Dood: token missing")
        token = token_match.group(1)
        final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}"
        self.base_headers["referer"] = referer
        return {
            "destination_url": final_url,
-            "request_headers": self.base_headers,
+            "request_headers": headers,
-            "mediaflow_endpoint": self.mediaflow_endpoint,
+            "mediaflow_endpoint": "proxy_stream_endpoint",
        }
@@ -7,6 +7,7 @@ from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor
 from mediaflow_proxy.extractors.filelions import FileLionsExtractor
 from mediaflow_proxy.extractors.filemoon import FileMoonExtractor
 from mediaflow_proxy.extractors.F16Px import F16PxExtractor
 from mediaflow_proxy.extractors.gupload import GuploadExtractor
 from mediaflow_proxy.extractors.livetv import LiveTVExtractor
 from mediaflow_proxy.extractors.lulustream import LuluStreamExtractor
 from mediaflow_proxy.extractors.maxstream import MaxstreamExtractor
@@ -33,6 +34,7 @@ class ExtractorFactory:
        "FileLions": FileLionsExtractor,
        "FileMoon": FileMoonExtractor,
        "F16Px": F16PxExtractor,
        "Gupload": GuploadExtractor,
        "Uqload": UqloadExtractor,
        "Mixdrop": MixdropExtractor,
        "Streamtape": StreamtapeExtractor,
@@ -4,25 +4,29 @@ from mediaflow_proxy.extractors.base import BaseExtractor
 from mediaflow_proxy.utils.packed import eval_solver
 class FastreamExtractor(BaseExtractor):
    """Fastream URL extractor."""
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mediaflow_endpoint = "hls_manifest_proxy"
    async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
-        headers  = {'Accept': '*/*', 'Connection': 'keep-alive','Accept-Language': 'en-US,en;q=0.5','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'}
+        headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Connection": "keep-alive",
            "Accept-Language": "en-US,en;q=0.5",
            "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0",
        }
        patterns = [r'file:"(.*?)"']
        final_url = await eval_solver(self, url, headers, patterns)
-        self.base_headers["referer"] = f'https://{url.replace("https://","").split("/")[0]}/'
+        self.base_headers["referer"] = f"https://{url.replace('https://', '').split('/')[0]}/"
-        self.base_headers["origin"] = f'https://{url.replace("https://","").split("/")[0]}'
+        self.base_headers["origin"] = f"https://{url.replace('https://', '').split('/')[0]}"
-        self.base_headers['Accept-Language'] = 'en-US,en;q=0.5'
+        self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
-        self.base_headers['Accept'] = '*/*'
+        self.base_headers["Accept"] = "*/*"
-        self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'
+        self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
        return {
            "destination_url": final_url,
@@ -3,17 +3,18 @@ from typing import Dict, Any
 from mediaflow_proxy.extractors.base import BaseExtractor
 from mediaflow_proxy.utils.packed import eval_solver
 class FileLionsExtractor(BaseExtractor):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mediaflow_endpoint = "hls_manifest_proxy"
    async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
-        headers  = {}
+        headers = {}
-        patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
+        patterns = [  # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
-            r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)''',
+            r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)""",
-            r'''["']hls4["']:\s*["'](?P<url>[^"']+)''',
+            r"""["']hls4["']:\s*["'](?P<url>[^"']+)""",
-            r'''["']hls2["']:\s*["'](?P<url>[^"']+)'''
+            r"""["']hls2["']:\s*["'](?P<url>[^"']+)""",
        ]
        final_url = await eval_solver(self, url, headers, patterns)
@@ -23,4 +24,5 @@ class FileLionsExtractor(BaseExtractor):
            "destination_url": final_url,
            "request_headers": self.base_headers,
            "mediaflow_endpoint": self.mediaflow_endpoint,
            "stream_transformer": "ts_stream",
        }
@@ -40,7 +40,7 @@ class FileMoonExtractor(BaseExtractor):
        )
        test_resp = await self._make_request(final_url, headers=headers)
-        if test_resp.status_code == 404:
+        if test_resp.status == 404:
            raise ExtractorError("Stream not found (404)")
        self.base_headers["referer"] = url
@@ -0,0 +1,65 @@
 import re
 import base64
 import json
 from typing import Dict, Any
 from urllib.parse import urlparse
 from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
 class GuploadExtractor(BaseExtractor):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mediaflow_endpoint = "hls_manifest_proxy"
    async def extract(self, url: str) -> Dict[str, Any]:
        parsed = urlparse(url)
        if not parsed.hostname or "gupload.xyz" not in parsed.hostname:
            raise ExtractorError("GUPLOAD: Invalid domain")
        headers = {
            "User-Agent": (
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                "AppleWebKit/537.36 (KHTML, like Gecko) "
                "Chrome/144 Safari/537.36"
            ),
            "Referer": "https://gupload.xyz/",
            "Origin": "https://gupload.xyz",
        }
        # --- Fetch embed page ---
        response = await self._make_request(url, headers=headers)
        html = response.text
        # --- Extract base64 payload ---
        match = re.search(r"decodePayload\('([^']+)'\)", html)
        if not match:
            raise ExtractorError("GUPLOAD: Payload not found")
        encoded = match.group(1).strip()
        # --- Decode payload ---
        try:
            decoded = base64.b64decode(encoded).decode("utf-8", "ignore")
            # payload format: <junk>|{json}
            json_part = decoded.split("|", 1)[1]
            payload = json.loads(json_part)
        except Exception:
            raise ExtractorError("GUPLOAD: Payload decode failed")
        # --- Extract HLS URL ---
        hls_url = payload.get("videoUrl")
        if not hls_url:
            raise ExtractorError("GUPLOAD: videoUrl missing")
        # --- Validate stream (prevents client timeout) ---
        test = await self._make_request(hls_url, headers=headers, raise_on_status=False)
        if test.status >= 400:
            raise ExtractorError(f"GUPLOAD: Stream unavailable ({test.status})")
        # Return MASTER playlist
        return {
            "destination_url": hls_url,
            "request_headers": headers,
            "mediaflow_endpoint": self.mediaflow_endpoint,
        }
@@ -2,9 +2,9 @@ import re
 from typing import Dict, Tuple, Optional
 from urllib.parse import urljoin, urlparse, unquote
-from httpx import Response
+import aiohttp
-from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
+from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
 class LiveTVExtractor(BaseExtractor):
@@ -33,20 +33,21 @@ class LiveTVExtractor(BaseExtractor):
            stream_title: Optional stream title to filter specific stream
        Returns:
-            Tuple[str, Dict[str, str]]: Stream URL and required headers
+            Dict containing destination_url, request_headers, and mediaflow_endpoint
        """
        try:
            # Get the channel page
            response = await self._make_request(url)
            response_text = response.text
            self.base_headers["referer"] = urljoin(url, "/")
            # Extract player API details
-            player_api_base, method = await self._extract_player_api_base(response.text)
+            player_api_base, method = await self._extract_player_api_base(response_text)
            if not player_api_base:
                raise ExtractorError("Failed to extract player API URL")
            # Get player options
-            options_data = await self._get_player_options(response.text)
+            options_data = await self._get_player_options(response_text)
            if not options_data:
                raise ExtractorError("No player options found")
@@ -66,7 +67,7 @@ class LiveTVExtractor(BaseExtractor):
                    if not stream_url:
                        continue
-                    response = {
+                    result = {
                        "destination_url": stream_url,
                        "request_headers": self.base_headers,
                        "mediaflow_endpoint": self.mediaflow_endpoint,
@@ -75,7 +76,7 @@ class LiveTVExtractor(BaseExtractor):
                    # Set endpoint based on stream type
                    if stream_data.get("type") == "mpd":
                        if stream_data.get("drm_key_id") and stream_data.get("drm_key"):
-                            response.update(
+                            result.update(
                                {
                                    "query_params": {
                                        "key_id": stream_data["drm_key_id"],
@@ -85,7 +86,7 @@ class LiveTVExtractor(BaseExtractor):
                                }
                            )
-                    return response
+                    return result
            raise ExtractorError("No valid stream found")
@@ -120,7 +121,12 @@ class LiveTVExtractor(BaseExtractor):
            api_url = f"{api_base}{post}/{type_}/{nume}"
            response = await self._make_request(api_url)
        else:
-            form_data = {"action": "doo_player_ajax", "post": post, "nume": nume, "type": type_}
+            # Use aiohttp FormData for POST requests
            form_data = aiohttp.FormData()
            form_data.add_field("action", "doo_player_ajax")
            form_data.add_field("post", post)
            form_data.add_field("nume", nume)
            form_data.add_field("type", type_)
            response = await self._make_request(api_base, method="POST", data=form_data)
        # Get iframe URL from API response
@@ -136,7 +142,7 @@ class LiveTVExtractor(BaseExtractor):
        except Exception as e:
            raise ExtractorError(f"Failed to process player option: {str(e)}")
-    async def _extract_stream_url(self, iframe_response: Response, iframe_url: str) -> Dict:
+    async def _extract_stream_url(self, iframe_response: HttpResponse, iframe_url: str) -> Dict:
        """
        Extract final stream URL from iframe content.
        """
@@ -147,8 +153,9 @@ class LiveTVExtractor(BaseExtractor):
            # Check if content is already a direct M3U8 stream
            content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"]
            content_type = iframe_response.headers.get("content-type", "")
-            if any(ext in iframe_response.headers["content-type"] for ext in content_types):
+            if any(ext in content_type for ext in content_types):
                return {"url": iframe_url, "type": "m3u8"}
            stream_data = {}
@@ -13,7 +13,7 @@ class LuluStreamExtractor(BaseExtractor):
        response = await self._make_request(url)
        # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py
-        pattern = r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)'''
+        pattern = r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)"""
        match = re.search(pattern, response.text, re.DOTALL)
        if not match:
            raise ExtractorError("Failed to extract source URL")
@@ -1,6 +1,6 @@
 from typing import Dict, Any
-from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
+from mediaflow_proxy.extractors.base import BaseExtractor
 from mediaflow_proxy.utils.packed import eval_solver
@@ -22,7 +22,9 @@ class OkruExtractor(BaseExtractor):
            data_options = div.get("data-options")
            data = json.loads(data_options)
            metadata = json.loads(data["flashvars"]["metadata"])
-            final_url = metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl")
+            final_url = (
                metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl") or metadata.get("ondemandHls")
            )
            self.base_headers["referer"] = url
            return {
                "destination_url": final_url,
@@ -1,10 +1,10 @@
 import re
 import logging
-from typing import Any, Dict, Optional
+from typing import Any, Dict
 from urllib.parse import urlparse
 from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
-from mediaflow_proxy.utils.packed import detect, unpack
+from mediaflow_proxy.utils.packed import unpack
 logger = logging.getLogger(__name__)
@@ -32,18 +32,17 @@ class SportsonlineExtractor(BaseExtractor):
    def _detect_packed_blocks(self, html: str) -> list[str]:
        """
        Detect and extract packed eval blocks from HTML.
        Replicates the TypeScript logic: /eval\(function(.+?.+)/g
        """
        # Find all eval(function...) blocks - more greedy to capture full packed code
        pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL)
        raw_matches = pattern.findall(html)
-        
+
        # If no matches with the strict pattern, try a more relaxed one
        if not raw_matches:
            # Try to find eval(function and capture until we find the closing ))
            pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
            raw_matches = pattern.findall(html)
-        
+
        return raw_matches
    async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
@@ -60,25 +59,25 @@ class SportsonlineExtractor(BaseExtractor):
                raise ExtractorError("No iframe found on the page")
            iframe_url = iframe_match.group(1)
-            
+
            # Normalize iframe URL
-            if iframe_url.startswith('//'):
+            if iframe_url.startswith("//"):
-                iframe_url = 'https:' + iframe_url
+                iframe_url = "https:" + iframe_url
-            elif iframe_url.startswith('/'):
+            elif iframe_url.startswith("/"):
                parsed_main = urlparse(url)
                iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}"
-            
+
            logger.info(f"Found iframe URL: {iframe_url}")
            # Step 2: Fetch iframe with Referer
            iframe_headers = {
-                'Referer': 'https://sportzonline.st/',
+                "Referer": "https://sportzonline.st/",
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36',
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
-                'Accept-Language': 'en-US,en;q=0.9,it;q=0.8',
+                "Accept-Language": "en-US,en;q=0.9,it;q=0.8",
-                'Cache-Control': 'no-cache'
+                "Cache-Control": "no-cache",
            }
-            
+
            iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
            iframe_html = iframe_response.text
@@ -86,9 +85,9 @@ class SportsonlineExtractor(BaseExtractor):
            # Step 3: Detect packed blocks
            packed_blocks = self._detect_packed_blocks(iframe_html)
-            
+
            logger.info(f"Found {len(packed_blocks)} packed blocks")
-            
+
            if not packed_blocks:
                logger.warning("No packed blocks found, trying direct m3u8 search")
                # Fallback: try direct m3u8 search
@@ -96,13 +95,10 @@ class SportsonlineExtractor(BaseExtractor):
                if direct_match:
                    m3u8_url = direct_match.group(1)
                    logger.info(f"Found direct m3u8 URL: {m3u8_url}")
-                    
+
                    return {
                        "destination_url": m3u8_url,
-                        "request_headers": {
+                        "request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
                            'Referer': iframe_url,
                            'User-Agent': iframe_headers['User-Agent']
                        },
                        "mediaflow_endpoint": self.mediaflow_endpoint,
                    }
                else:
@@ -134,13 +130,13 @@ class SportsonlineExtractor(BaseExtractor):
                    r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']',  # file: "...m3u8"
                    r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']',  # any m3u8 URL
                ]
-                
+
                for pattern in patterns:
                    src_match = re.search(pattern, unpacked_code)
                    if src_match:
                        m3u8_url = src_match.group(1)
                        # Verify it looks like a valid m3u8 URL
-                        if '.m3u8' in m3u8_url or 'http' in m3u8_url:
+                        if ".m3u8" in m3u8_url or "http" in m3u8_url:
                            break
                        m3u8_url = None
@@ -162,11 +158,11 @@ class SportsonlineExtractor(BaseExtractor):
                            src_match = re.search(pattern, unpacked_code)
                            if src_match:
                                test_url = src_match.group(1)
-                                if '.m3u8' in test_url or 'http' in test_url:
+                                if ".m3u8" in test_url or "http" in test_url:
                                    m3u8_url = test_url
                                    logger.info(f"Found m3u8 in block {i}")
                                    break
-                        
+
                        if m3u8_url:
                            break
                    except Exception as e:
@@ -181,10 +177,7 @@ class SportsonlineExtractor(BaseExtractor):
            # Return stream configuration
            return {
                "destination_url": m3u8_url,
-                "request_headers": {
+                "request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
                    'Referer': iframe_url,
                    'User-Agent': iframe_headers['User-Agent']
                },
                "mediaflow_endpoint": self.mediaflow_endpoint,
            }
@@ -15,8 +15,8 @@ class StreamtapeExtractor(BaseExtractor):
        if not matches:
            raise ExtractorError("Failed to extract URL components")
        i = 0
-        for  i in range(len(matches)):
+        for i in range(len(matches)):
-            if matches[i-1] == matches[i] and "ip=" in matches[i]:
+            if matches[i - 1] == matches[i] and "ip=" in matches[i]:
                final_url = f"https://streamtape.com/get_video?{matches[i]}"
        self.base_headers["referer"] = url
@@ -19,18 +19,11 @@ class StreamWishExtractor(BaseExtractor):
        headers = {"Referer": referer}
        response = await self._make_request(url, headers=headers)
-        
+
-        iframe_match = re.search(
+        iframe_match = re.search(r'<iframe[^>]+src=["\']([^"\']+)["\']', response.text, re.DOTALL)
            r'<iframe[^>]+src=["\']([^"\']+)["\']',
            response.text,
            re.DOTALL
        )
        iframe_url = urljoin(url, iframe_match.group(1)) if iframe_match else url
-        iframe_response = await self._make_request(
+        iframe_response = await self._make_request(iframe_url, headers=headers)
            iframe_url,
            headers=headers
        )
        html = iframe_response.text
        final_url = self._extract_m3u8(html)
@@ -58,15 +51,18 @@ class StreamWishExtractor(BaseExtractor):
            final_url = urljoin(iframe_url, final_url)
        origin = f"{urlparse(referer).scheme}://{urlparse(referer).netloc}"
-        self.base_headers.update({
+        self.base_headers.update(
-            "Referer": referer,
+            {
-            "Origin": origin,
+                "Referer": referer,
-        })
+                "Origin": origin,
            }
        )
        return {
            "destination_url": final_url,
            "request_headers": self.base_headers,
            "mediaflow_endpoint": self.mediaflow_endpoint,
            "stream_transformer": "ts_stream",
        }
    @staticmethod
@@ -74,8 +70,5 @@ class StreamWishExtractor(BaseExtractor):
        """
        Extract first absolute m3u8 URL from text
        """
-        match = re.search(
+        match = re.search(r'https?://[^"\']+\.m3u8[^"\']*', text)
            r'https?://[^"\']+\.m3u8[^"\']*',
            text
        )
        return match.group(0) if match else None
@@ -1,27 +1,64 @@
 import re
 from typing import Dict, Any
 from urllib.parse import urljoin, urlparse
-from mediaflow_proxy.extractors.base import BaseExtractor
+from bs4 import BeautifulSoup, SoupStrainer
-from mediaflow_proxy.utils.packed import eval_solver
+from curl_cffi.requests import AsyncSession
 from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
 from mediaflow_proxy.utils.packed import unpack, detect, UnpackingError
 class SupervideoExtractor(BaseExtractor):
-    """Supervideo URL extractor."""
+    """Supervideo URL extractor.
    Uses curl_cffi to bypass Cloudflare protection.
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mediaflow_endpoint = "hls_manifest_proxy"
-        
+
    async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
-        headers  = {'Accept': '*/*', 'Connection': 'keep-alive', 'User-Agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36', 'user-agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36'}
+        """Extract video URL from Supervideo.
        Uses curl_cffi with Chrome impersonation to bypass Cloudflare.
        """
        patterns = [r'file:"(.*?)"']
-        final_url = await eval_solver(self, url, headers, patterns)
+        try:
            async with AsyncSession() as session:
                response = await session.get(url, impersonate="chrome")
-        self.base_headers["referer"] = url
+                if response.status_code != 200:
-        return {
+                    raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
-            "destination_url": final_url,
+
-            "request_headers": self.base_headers,
+                soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("script"))
-            "mediaflow_endpoint": self.mediaflow_endpoint,
+                script_all = soup.find_all("script")
-        }
+
                for script in script_all:
                    if script.text and detect(script.text):
                        unpacked_code = unpack(script.text)
                        for pattern in patterns:
                            match = re.search(pattern, unpacked_code)
                            if match:
                                extracted_url = match.group(1)
                                if not urlparse(extracted_url).scheme:
                                    extracted_url = urljoin(url, extracted_url)
                                self.base_headers["referer"] = url
                                return {
                                    "destination_url": extracted_url,
                                    "request_headers": self.base_headers,
                                    "mediaflow_endpoint": self.mediaflow_endpoint,
                                }
                raise ExtractorError("No packed JS found or no file URL pattern matched")
        except UnpackingError as e:
            raise ExtractorError(f"Failed to unpack Supervideo JS: {e}")
        except Exception as e:
            if isinstance(e, ExtractorError):
                raise
            raise ExtractorError(f"Supervideo extraction failed: {e}")
@@ -1,5 +1,4 @@
 import re
 from typing import Dict, Any
 from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
@@ -36,7 +35,7 @@ class TurboVidPlayExtractor(BaseExtractor):
        if media_url.startswith("//"):
            media_url = "https:" + media_url
        elif media_url.startswith("/"):
-            media_url = response.url.origin + media_url
+            media_url = response.get_origin() + media_url
        #
        # 3. Fetch the intermediate playlist
@@ -53,16 +52,11 @@ class TurboVidPlayExtractor(BaseExtractor):
        real_m3u8 = m2.group(0)
        #
        # 5. Final headers
        #
        self.base_headers["referer"] = url
        #
        # 6. Always return master proxy (your MediaFlow only supports this)
        #
        return {
            "destination_url": real_m3u8,
-            "request_headers": self.base_headers,
+            "request_headers": {"origin": response.get_origin()},
            "propagate_response_headers": {"content-type": "video/mp2t"},
            "remove_response_headers": ["content-length", "content-range"],
            "mediaflow_endpoint": "hls_manifest_proxy",
            "stream_transformer": "ts_stream",  # Use TS transformer for PNG/padding stripping
        }
@@ -1,5 +1,6 @@
 import logging
 from typing import Any, Dict, Optional
 from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
 logger = logging.getLogger(__name__)
@@ -8,6 +9,11 @@ logger = logging.getLogger(__name__)
 class VavooExtractor(BaseExtractor):
    """Vavoo URL extractor for resolving vavoo.to links.
    Supports two URL formats:
    1. Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
       These redirect (302) to external video hosts (Doodstream, etc.)
    2. Legacy mediahubmx format (currently broken on Vavoo's end)
    Features:
    - Uses BaseExtractor's retry/timeouts
    - Improved headers to mimic Android okhttp client
@@ -18,6 +24,40 @@ class VavooExtractor(BaseExtractor):
        super().__init__(request_headers)
        self.mediaflow_endpoint = "proxy_stream_endpoint"
    async def _resolve_web_vod_link(self, url: str) -> str:
        """Resolve a web-vod API link by getting the redirect Location header."""
        import aiohttp
        try:
            # Use aiohttp directly with allow_redirects=False to get the Location header
            timeout = aiohttp.ClientTimeout(total=10)
            async with aiohttp.ClientSession(timeout=timeout) as session:
                async with session.get(
                    url,
                    headers={"Accept": "application/json"},
                    allow_redirects=False,
                ) as resp:
                    # Check for redirect
                    if resp.status in (301, 302, 303, 307, 308):
                        location = resp.headers.get("Location") or resp.headers.get("location")
                        if location:
                            logger.info(f"Vavoo web-vod redirected to: {location}")
                            return location
                    # If we got a 200, the response might contain the URL
                    if resp.status == 200:
                        text = await resp.text()
                        if text and text.startswith("http"):
                            logger.info(f"Vavoo web-vod resolved to: {text.strip()}")
                            return text.strip()
                    raise ExtractorError(f"Vavoo web-vod API returned unexpected status {resp.status}")
        except ExtractorError:
            raise
        except Exception as e:
            raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}")
    async def get_auth_signature(self) -> Optional[str]:
        """Get authentication signature for Vavoo API (async)."""
        headers = {
@@ -27,10 +67,11 @@ class VavooExtractor(BaseExtractor):
            "accept-encoding": "gzip",
        }
        import time
        current_time = int(time.time() * 1000)
        data = {
-            "token": "tosFwQCJMS8qrW_AjLoHPQ41646J5dRNha6ZWHnijoYQQQoADQoXYSo7ki7O5-CsgN4CH0uRk6EEoJ0728ar9scCRQW3ZkbfrPfeCXW2VgopSW2FWDqPOoVYIuVPAOnXCZ5g",
+            "token": "",
            "reason": "app-blur",
            "locale": "de",
            "theme": "dark",
@@ -40,21 +81,11 @@ class VavooExtractor(BaseExtractor):
                    "brand": "google",
                    "model": "Pixel",
                    "name": "sdk_gphone64_arm64",
-                    "uniqueId": "d10e5d99ab665233"
+                    "uniqueId": "d10e5d99ab665233",
                },
                "os": {
                    "name": "android",
                    "version": "13"
                },
                "app": {
                    "platform": "android",
                    "version": "3.1.21"
                },
                "version": {
                    "package": "tv.vavoo.app",
                    "binary": "3.1.21",
                    "js": "3.1.21"
                },
                "os": {"name": "android", "version": "13"},
                "app": {"platform": "android", "version": "3.1.21"},
                "version": {"package": "tv.vavoo.app", "binary": "3.1.21", "js": "3.1.21"},
            },
            "appFocusTime": 0,
            "playerActive": False,
@@ -75,11 +106,9 @@ class VavooExtractor(BaseExtractor):
                "ssVersion": 1,
                "enabled": True,
                "autoServer": True,
-                "id": "de-fra"
+                "id": "de-fra",
            },
-            "iap": {
+            "iap": {"supported": False},
                "supported": False
            }
        }
        try:
@@ -94,7 +123,7 @@ class VavooExtractor(BaseExtractor):
            try:
                result = resp.json()
            except Exception:
-                logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status_code)
+                logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status)
                return None
            addon_sig = result.get("addonSig") if isinstance(result, dict) else None
@@ -109,10 +138,48 @@ class VavooExtractor(BaseExtractor):
            return None
    async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
-        """Extract Vavoo stream URL (async)."""
+        """Extract Vavoo stream URL (async).
        Supports:
        - Direct play URLs: https://vavoo.to/play/{id}/index.m3u8 (Live TV)
        - Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
        - Legacy mediahubmx links (may not work due to Vavoo API changes)
        """
        if "vavoo.to" not in url:
            raise ExtractorError("Not a valid Vavoo URL")
        # Check if this is a direct play URL (Live TV)
        # These URLs are already m3u8 streams but need auth signature
        if "/play/" in url and url.endswith(".m3u8"):
            signature = await self.get_auth_signature()
            if not signature:
                raise ExtractorError("Failed to get Vavoo authentication signature for Live TV")
            stream_headers = {
                "user-agent": "okhttp/4.11.0",
                "referer": "https://vavoo.to/",
                "mediahubmx-signature": signature,
            }
            return {
                "destination_url": url,
                "request_headers": stream_headers,
                "mediaflow_endpoint": "hls_manifest_proxy",
            }
        # Check if this is a web-vod API link (new format)
        if "/web-vod/api/get" in url:
            resolved_url = await self._resolve_web_vod_link(url)
            stream_headers = {
                "user-agent": self.base_headers.get("user-agent", "Mozilla/5.0"),
                "referer": "https://vavoo.to/",
            }
            return {
                "destination_url": resolved_url,
                "request_headers": stream_headers,
                "mediaflow_endpoint": self.mediaflow_endpoint,
            }
        # Legacy mediahubmx flow
        signature = await self.get_auth_signature()
        if not signature:
            raise ExtractorError("Failed to get Vavoo authentication signature")
@@ -139,14 +206,9 @@ class VavooExtractor(BaseExtractor):
            "accept": "application/json",
            "content-type": "application/json; charset=utf-8",
            "accept-encoding": "gzip",
-            "mediahubmx-signature": signature
+            "mediahubmx-signature": signature,
        }
        data = {
            "language": "de",
            "region": "AT",
            "url": link,
            "clientVersion": "3.1.21"
        }
        data = {"language": "de", "region": "AT", "url": link, "clientVersion": "3.1.21"}
        try:
            logger.info(f"Attempting to resolve Vavoo URL: {link}")
            resp = await self._make_request(
@@ -161,7 +223,11 @@ class VavooExtractor(BaseExtractor):
            try:
                result = resp.json()
            except Exception:
-                logger.warning("Vavoo resolve returned non-json response (status=%s). Body preview: %s", resp.status_code, getattr(resp, "text", "")[:500])
+                logger.warning(
                    "Vavoo resolve returned non-json response (status=%s). Body preview: %s",
                    resp.status,
                    getattr(resp, "text", "")[:500],
                )
                return None
            logger.debug("Vavoo API response: %s", result)
@@ -16,10 +16,9 @@ class VidmolyExtractor(BaseExtractor):
            raise ExtractorError("VIDMOLY: Invalid domain")
        headers = {
-            "User-Agent":
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/120 Safari/537.36",
                "Chrome/120 Safari/537.36",
            "Referer": url,
            "Sec-Fetch-Dest": "iframe",
        }
@@ -29,10 +28,7 @@ class VidmolyExtractor(BaseExtractor):
        html = response.text
        # --- Extract master m3u8 ---
-        match = re.search(
+        match = re.search(r'sources\s*:\s*\[\s*\{\s*file\s*:\s*[\'"]([^\'"]+)', html)
            r'sources:\s*\[\{file:"([^"]+)',
            html
        )
        if not match:
            raise ExtractorError("VIDMOLY: Stream URL not found")
@@ -49,10 +45,8 @@ class VidmolyExtractor(BaseExtractor):
                raise ExtractorError("VIDMOLY: Request timed out")
            raise
-        if test.status_code >= 400:
+        if test.status >= 400:
-            raise ExtractorError(
+            raise ExtractorError(f"VIDMOLY: Stream unavailable ({test.status})")
                f"VIDMOLY: Stream unavailable ({test.status_code})"
            )
        # Return MASTER playlist, not variant
        # Let MediaFlow Proxy handle variants
@@ -8,23 +8,23 @@ from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
 class VidozaExtractor(BaseExtractor):
    def __init__(self, request_headers: dict):
        super().__init__(request_headers)
        # if your base doesn’t set this, keep it; otherwise you can remove:
        self.mediaflow_endpoint = "proxy_stream_endpoint"
    async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
        parsed = urlparse(url)
        # Accept vidoza + videzz
        if not parsed.hostname or not (
-            parsed.hostname.endswith("vidoza.net")
+            parsed.hostname.endswith("vidoza.net") or parsed.hostname.endswith("videzz.net")
            or parsed.hostname.endswith("videzz.net")
        ):
            raise ExtractorError("VIDOZA: Invalid domain")
        # Use the correct referer for clones
        referer = f"https://{parsed.hostname}/"
        headers = self.base_headers.copy()
        headers.update(
            {
-                "referer": "https://vidoza.net/",
+                "referer": referer,
                "user-agent": (
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                    "AppleWebKit/537.36 (KHTML, like Gecko) "
@@ -35,16 +35,14 @@ class VidozaExtractor(BaseExtractor):
            }
        )
-        # 1) Fetch the embed page (or whatever URL you pass in)
+        # 1) Fetch embed page
        response = await self._make_request(url, headers=headers)
        html = response.text or ""
        if not html:
-            raise ExtractorError("VIDOZA: Empty HTML from Vidoza")
+            raise ExtractorError("VIDOZA: Empty HTML")
-        cookies = response.cookies or {}
+        # 2) Extract video URL
        # 2) Extract final link with REGEX
        pattern = re.compile(
            r"""["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)"""
            r"""(?:[^}>\]]+)["']?\s*res\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)""",
@@ -53,21 +51,15 @@ class VidozaExtractor(BaseExtractor):
        match = pattern.search(html)
        if not match:
-            raise ExtractorError("VIDOZA: Unable to extract video + label from JS")
+            raise ExtractorError("VIDOZA: Video URL not found")
-        mp4_url = match.group("url")
+        video_url = match.group("url")
        label = match.group("label").strip()
-        # Fix URLs like //str38.vidoza.net/...
+        if video_url.startswith("//"):
-        if mp4_url.startswith("//"):
+            video_url = "https:" + video_url
            mp4_url = "https:" + mp4_url
        # 3) Attach cookies (token may depend on these)
        if cookies:
            headers["cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items())
        return {
-            "destination_url": mp4_url,
+            "destination_url": video_url,
            "request_headers": headers,
            "mediaflow_endpoint": self.mediaflow_endpoint,
        }
@@ -1,7 +1,6 @@
 import json
 import re
 from typing import Dict, Any
 from urllib.parse import urlparse, parse_qs
 from bs4 import BeautifulSoup, SoupStrainer
@@ -25,7 +24,7 @@ class VixCloudExtractor(BaseExtractor):
                "Origin": f"{site_url}",
            },
        )
-        if response.status_code != 200:
+        if response.status != 200:
            raise ExtractorError("Outdated Url")
        # Soup the response
        soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
@@ -48,8 +47,8 @@ class VixCloudExtractor(BaseExtractor):
            response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version})
        elif "movie" in url or "tv" in url:
            response = await self._make_request(url)
-        
+
-        if response.status_code != 200:
+        if response.status != 200:
            raise ExtractorError("Failed to extract URL components, Invalid Request")
        soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body"))
        if soup:
@@ -58,7 +57,7 @@ class VixCloudExtractor(BaseExtractor):
            expires = re.search(r"'expires':\s*'(\d+)'", script).group(1)
            server_url = re.search(r"url:\s*'([^']+)'", script).group(1)
            if "?b=1" in server_url:
-                final_url = f'{server_url}&token={token}&expires={expires}'
+                final_url = f"{server_url}&token={token}&expires={expires}"
            else:
                final_url = f"{server_url}?token={token}&expires={expires}"
            if "window.canPlayFHD = true" in script:
@@ -15,7 +15,7 @@ class VoeExtractor(BaseExtractor):
        response = await self._make_request(url)
        # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/voesx.py
-        redirect_pattern = r'''window\.location\.href\s*=\s*'([^']+)'''
+        redirect_pattern = r"""window\.location\.href\s*=\s*'([^']+)"""
        redirect_match = re.search(redirect_pattern, response.text, re.DOTALL)
        if redirect_match:
            if redirected:
@@ -37,7 +37,7 @@ class VoeExtractor(BaseExtractor):
        data = self.voe_decode(code_and_script_match.group(1), luts_match.group(1))
-        final_url = data.get('source')
+        final_url = data.get("source")
        if not final_url:
            raise ExtractorError("VOE: failed to extract video URL")
@@ -51,8 +51,9 @@ class VoeExtractor(BaseExtractor):
    @staticmethod
    def voe_decode(ct: str, luts: str) -> Dict[str, Any]:
        import json
-        lut = [''.join([('\\' + x) if x in '.*+?^${}()|[]\\' else x for x in i]) for i in luts[2:-2].split("','")]
+
-        txt = ''
+        lut = ["".join([("\\" + x) if x in ".*+?^${}()|[]\\" else x for x in i]) for i in luts[2:-2].split("','")]
        txt = ""
        for i in ct:
            x = ord(i)
            if 64 < x < 91:
@@ -61,8 +62,8 @@ class VoeExtractor(BaseExtractor):
                x = (x - 84) % 26 + 97
            txt += chr(x)
        for i in lut:
-            txt = re.sub(i, '', txt)
+            txt = re.sub(i, "", txt)
-        ct = base64.b64decode(txt).decode('utf-8')
+        ct = base64.b64decode(txt).decode("utf-8")
-        txt = ''.join([chr(ord(i) - 3) for i in ct])
+        txt = "".join([chr(ord(i) - 3) for i in ct])
-        txt = base64.b64decode(txt[::-1]).decode('utf-8')
+        txt = base64.b64decode(txt[::-1]).decode("utf-8")
        return json.loads(txt)
@@ -1,5 +1,7 @@
 import asyncio
 import logging
 import sys
 from contextlib import asynccontextmanager
 from importlib import resources
 from fastapi import FastAPI, Depends, Security, HTTPException
@@ -10,14 +12,85 @@ from starlette.staticfiles import StaticFiles
 from mediaflow_proxy.configs import settings
 from mediaflow_proxy.middleware import UIAccessControlMiddleware
-from mediaflow_proxy.routes import proxy_router, extractor_router, speedtest_router, playlist_builder_router
+from mediaflow_proxy.routes import (
    proxy_router,
    extractor_router,
    speedtest_router,
    playlist_builder_router,
    xtream_root_router,
    acestream_router,
    telegram_router,
 )
 from mediaflow_proxy.schemas import GenerateUrlRequest, GenerateMultiUrlRequest, MultiUrlRequestItem
 from mediaflow_proxy.utils.crypto_utils import EncryptionHandler, EncryptionMiddleware
 from mediaflow_proxy.utils import redis_utils
 from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url
 from mediaflow_proxy.utils.base64_utils import encode_url_to_base64, decode_base64_url, is_base64_url
 from mediaflow_proxy.utils.acestream import acestream_manager
 from mediaflow_proxy.remuxer.video_transcoder import get_hw_capability, HWAccelType
 from mediaflow_proxy.utils.telegram import telegram_manager
 logging.basicConfig(level=settings.log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-app = FastAPI()
+logger = logging.getLogger(__name__)
 # Suppress Telethon's "RuntimeError: coroutine ignored GeneratorExit" warnings.
 # These are harmless GC noise from Telethon's internal _recv_loop coroutines
 # when parallel download connections are cleaned up after client disconnect.
 _default_unraisable_hook = sys.unraisablehook
 def _filtered_unraisable_hook(unraisable):
    if isinstance(unraisable.exc_value, RuntimeError) and "coroutine ignored GeneratorExit" in str(
        unraisable.exc_value
    ):
        return  # Suppress Telethon GC noise
    _default_unraisable_hook(unraisable)
 sys.unraisablehook = _filtered_unraisable_hook
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Application lifespan handler for startup and shutdown events."""
    # Startup
    if settings.clear_cache_on_startup:
        logger.info("Clearing caches on startup (CLEAR_CACHE_ON_STARTUP=true)")
        # Note: Redis cache clearing would require FLUSHDB which is too aggressive.
        # Individual cache entries will expire via TTL. If full clear is needed,
        # use redis-cli KEYS "mfp:*" | xargs redis-cli DEL
        logger.info("Cache clearing note: Redis entries will expire via TTL")
    # Log transcoding capability
    hw = get_hw_capability()
    if hw.accel_type != HWAccelType.NONE and settings.transcode_prefer_gpu:
        logger.info(
            "Transcode ready: GPU %s (encoder=%s) | PyAV pipeline",
            hw.accel_type.value,
            hw.h264_encoder,
        )
    else:
        logger.info(
            "Transcode ready: CPU (%s) | PyAV pipeline",
            hw.h264_encoder,
        )
    yield
    # Shutdown
    logger.info("Shutting down...")
    # Close acestream sessions
    await acestream_manager.close()
    logger.info("Acestream manager closed")
    # Close telegram session
    await telegram_manager.close()
    logger.info("Telegram manager closed")
    # Close Redis connections
    await redis_utils.close_redis()
    logger.info("Redis connections closed")
 app = FastAPI(lifespan=lifespan)
 api_password_query = APIKeyQuery(name="api_password", auto_error=False)
 api_password_header = APIKeyHeader(name="api_password", auto_error=False)
 app.add_middleware(
@@ -66,6 +139,11 @@ async def show_speedtest_page():
    return RedirectResponse(url="/speedtest.html")
@app.get("/url-generator")
 async def show_url_generator_page():
    return RedirectResponse(url="/url_generator.html")
@app.post(
    "/generate_encrypted_or_encoded_url",
    description="Generate a single encoded URL",
@@ -112,6 +190,8 @@ async def generate_url(request: GenerateUrlRequest):
        query_params=query_params,
        request_headers=request.request_headers,
        response_headers=request.response_headers,
        propagate_response_headers=request.propagate_response_headers,
        remove_response_headers=request.remove_response_headers,
        encryption_handler=encryption_handler,
        expiration=request.expiration,
        ip=ip_str,
@@ -151,6 +231,8 @@ async def generate_urls(request: GenerateMultiUrlRequest):
            query_params=query_params,
            request_headers=url_item.request_headers,
            response_headers=url_item.response_headers,
            propagate_response_headers=url_item.propagate_response_headers,
            remove_response_headers=url_item.remove_response_headers,
            encryption_handler=encryption_handler,
            expiration=request.expiration,
            ip=ip_str,
@@ -171,10 +253,10 @@ async def generate_urls(request: GenerateMultiUrlRequest):
 async def encode_url_base64(url: str):
    """
    Encode a URL to base64 format.
-    
+
    Args:
        url (str): The URL to encode.
-        
+
    Returns:
        dict: A dictionary containing the encoded URL.
    """
@@ -194,17 +276,17 @@ async def encode_url_base64(url: str):
 async def decode_url_base64(encoded_url: str):
    """
    Decode a base64 encoded URL.
-    
+
    Args:
        encoded_url (str): The base64 encoded URL to decode.
-        
+
    Returns:
        dict: A dictionary containing the decoded URL.
    """
    decoded_url = decode_base64_url(encoded_url)
    if decoded_url is None:
        raise HTTPException(status_code=400, detail="Invalid base64 encoded URL")
-    
+
    return {"decoded_url": decoded_url, "encoded_url": encoded_url}
@@ -217,28 +299,32 @@ async def decode_url_base64(encoded_url: str):
 async def check_base64_url(url: str):
    """
    Check if a string appears to be a base64 encoded URL.
-    
+
    Args:
        url (str): The string to check.
-        
+
    Returns:
        dict: A dictionary indicating if the string is likely base64 encoded.
    """
    is_base64 = is_base64_url(url)
    result = {"url": url, "is_base64": is_base64}
-    
+
    if is_base64:
        decoded_url = decode_base64_url(url)
        if decoded_url:
            result["decoded_url"] = decoded_url
-    
+
    return result
 app.include_router(proxy_router, prefix="/proxy", tags=["proxy"], dependencies=[Depends(verify_api_key)])
 app.include_router(acestream_router, prefix="/proxy", tags=["acestream"], dependencies=[Depends(verify_api_key)])
 app.include_router(telegram_router, prefix="/proxy", tags=["telegram"], dependencies=[Depends(verify_api_key)])
 app.include_router(extractor_router, prefix="/extractor", tags=["extractors"], dependencies=[Depends(verify_api_key)])
 app.include_router(speedtest_router, prefix="/speedtest", tags=["speedtest"], dependencies=[Depends(verify_api_key)])
 app.include_router(playlist_builder_router, prefix="/playlist", tags=["playlist"])
 # Root-level XC endpoints for IPTV player compatibility (handles its own API key verification)
 app.include_router(xtream_root_router, tags=["xtream"])
 static_path = resources.files("mediaflow_proxy").joinpath("static")
 app.mount("/", StaticFiles(directory=str(static_path), html=True), name="static")
@@ -5,17 +5,39 @@ import time
 from fastapi import Request, Response, HTTPException
-from mediaflow_proxy.drm.decrypter import decrypt_segment
+from mediaflow_proxy.drm.decrypter import decrypt_segment, process_drm_init_segment
 from mediaflow_proxy.utils.crypto_utils import encryption_handler
-from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url, get_original_scheme, ProxyRequestHeaders
+from mediaflow_proxy.utils.http_utils import (
    encode_mediaflow_proxy_url,
    get_original_scheme,
    ProxyRequestHeaders,
    apply_header_manipulation,
 )
 from mediaflow_proxy.utils.dash_prebuffer import dash_prebuffer
 from mediaflow_proxy.utils.cache_utils import get_cached_processed_init, set_cached_processed_init
 from mediaflow_proxy.utils.m3u8_processor import SkipSegmentFilter
 from mediaflow_proxy.remuxer.ts_muxer import remux_fmp4_to_ts
 from mediaflow_proxy.configs import settings
 logger = logging.getLogger(__name__)
 def _resolve_ts_mode(request: Request) -> bool:
    """Resolve the effective TS remux mode from the request query params, falling back to settings."""
    override = request.query_params.get("remux_to_ts")
    if override is not None:
        return override.lower() in ("true", "1", "yes")
    return settings.remux_to_ts
 async def process_manifest(
-    request: Request, mpd_dict: dict, proxy_headers: ProxyRequestHeaders, key_id: str = None, key: str = None
+    request: Request,
    mpd_dict: dict,
    proxy_headers: ProxyRequestHeaders,
    key_id: str = None,
    key: str = None,
    resolution: str = None,
    skip_segments: list = None,
 ) -> Response:
    """
    Processes the MPD manifest and converts it to an HLS manifest.
@@ -26,12 +48,14 @@ async def process_manifest(
        proxy_headers (ProxyRequestHeaders): The headers to include in the request.
        key_id (str, optional): The DRM key ID. Defaults to None.
        key (str, optional): The DRM key. Defaults to None.
        resolution (str, optional): Target resolution (e.g., '1080p', '720p'). Defaults to None.
        skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
    Returns:
        Response: The HLS manifest as an HTTP response.
    """
-    hls_content = build_hls(mpd_dict, request, key_id, key)
+    hls_content = build_hls(mpd_dict, request, key_id, key, resolution, skip_segments)
-    
+
    # Start DASH pre-buffering in background if enabled
    if settings.enable_dash_prebuffer:
        # Extract headers for pre-buffering
@@ -39,20 +63,23 @@ async def process_manifest(
        for key, value in request.query_params.items():
            if key.startswith("h_"):
                headers[key[2:]] = value
-        
+
        # Get the original MPD URL from the request
        mpd_url = request.query_params.get("d", "")
        if mpd_url:
            # Start pre-buffering in background
-            asyncio.create_task(
+            asyncio.create_task(dash_prebuffer.prebuffer_dash_manifest(mpd_url, headers))
-                dash_prebuffer.prebuffer_dash_manifest(mpd_url, headers)
+
            )
    return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=proxy_headers.response)
 async def process_playlist(
-    request: Request, mpd_dict: dict, profile_id: str, proxy_headers: ProxyRequestHeaders
+    request: Request,
    mpd_dict: dict,
    profile_id: str,
    proxy_headers: ProxyRequestHeaders,
    skip_segments: list = None,
    start_offset: float = None,
 ) -> Response:
    """
    Processes the MPD manifest and converts it to an HLS playlist for a specific profile.
@@ -62,6 +89,8 @@ async def process_playlist(
        mpd_dict (dict): The MPD manifest data.
        profile_id (str): The profile ID to generate the playlist for.
        proxy_headers (ProxyRequestHeaders): The headers to include in the request.
        skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
        start_offset (float, optional): Start offset in seconds for live streams.
    Returns:
        Response: The HLS playlist as an HTTP response.
@@ -73,8 +102,22 @@ async def process_playlist(
    if not matching_profiles:
        raise HTTPException(status_code=404, detail="Profile not found")
-    hls_content = build_hls_playlist(mpd_dict, matching_profiles, request)
+    hls_content = build_hls_playlist(mpd_dict, matching_profiles, request, skip_segments, start_offset)
-    return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=proxy_headers.response)
+
    # Trigger prebuffering of upcoming segments for live streams
    if settings.enable_dash_prebuffer and mpd_dict.get("isLive", False):
        # Extract headers for pre-buffering
        headers = {}
        for key, value in request.query_params.items():
            if key.startswith("h_"):
                headers[key[2:]] = value
        # Use the new prefetch method for live playlists
        asyncio.create_task(dash_prebuffer.prefetch_for_live_playlist(matching_profiles, headers))
    # Don't include propagate headers for playlists - they should only apply to segments
    response_headers = apply_header_manipulation({}, proxy_headers, include_propagate=False)
    return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=response_headers)
 async def process_segment(
@@ -84,9 +127,11 @@ async def process_segment(
    proxy_headers: ProxyRequestHeaders,
    key_id: str = None,
    key: str = None,
    use_map: bool = False,
    remux_ts: bool = None,
 ) -> Response:
    """
-    Processes and decrypts a media segment.
+    Processes and decrypts a media segment, optionally remuxing to MPEG-TS.
    Args:
        init_content (bytes): The initialization segment content.
@@ -95,23 +140,110 @@ async def process_segment(
        proxy_headers (ProxyRequestHeaders): The headers to include in the request.
        key_id (str, optional): The DRM key ID. Defaults to None.
        key (str, optional): The DRM key. Defaults to None.
        use_map (bool, optional): If True, init segment is served separately via EXT-X-MAP,
            so don't concatenate init with segment. Defaults to False.
        remux_ts (bool, optional): If True, remux fMP4 to MPEG-TS. Defaults to settings.remux_to_ts.
    Returns:
-        Response: The decrypted segment as an HTTP response.
+        Response: The processed segment as an HTTP response.
    """
    if key_id and key:
        # For DRM protected content
        now = time.time()
-        decrypted_content = decrypt_segment(init_content, segment_content, key_id, key)
+        decrypted_content = decrypt_segment(init_content, segment_content, key_id, key, include_init=not use_map)
        logger.info(f"Decryption of {mimetype} segment took {time.time() - now:.4f} seconds")
    else:
-        # For non-DRM protected content, we just concatenate init and segment content
+        # For non-DRM protected content
-        decrypted_content = init_content + segment_content
+        if use_map:
            # Init is served separately via EXT-X-MAP
            decrypted_content = segment_content
        else:
            # Concatenate init and segment content
            decrypted_content = init_content + segment_content
-    return Response(content=decrypted_content, media_type=mimetype, headers=proxy_headers.response)
+    # Check if we should remux to TS
    should_remux = remux_ts if remux_ts is not None else settings.remux_to_ts
    # Remux both video and audio to MPEG-TS for proper HLS TS playback
    if should_remux and ("video" in mimetype or "audio" in mimetype):
        # Remux fMP4 to MPEG-TS for ExoPlayer/VLC compatibility
        now = time.time()
        try:
            # For TS remuxing, we always need init_content for codec config
            # preserve_timestamps=True keeps the original tfdt timestamps from the
            # fMP4 segment, ensuring continuous playback across HLS segments
            ts_content = remux_fmp4_to_ts(
                init_content,
                decrypted_content,
                preserve_timestamps=True,
            )
            decrypted_content = ts_content
            mimetype = "video/mp2t"  # Update MIME type for TS (same for audio-only TS)
            logger.info(f"TS remuxing took {time.time() - now:.4f} seconds")
        except Exception as e:
            logger.warning(f"TS remuxing failed, returning fMP4: {e}")
            # Fall through to return original content
    response_headers = apply_header_manipulation({}, proxy_headers)
    return Response(content=decrypted_content, media_type=mimetype, headers=response_headers)
-def build_hls(mpd_dict: dict, request: Request, key_id: str = None, key: str = None) -> str:
+async def process_init_segment(
    init_content: bytes,
    mimetype: str,
    proxy_headers: ProxyRequestHeaders,
    key_id: str = None,
    key: str = None,
    init_url: str = None,
 ) -> Response:
    """
    Processes an initialization segment for EXT-X-MAP.
    Args:
        init_content (bytes): The initialization segment content.
        mimetype (str): The MIME type of the segment.
        proxy_headers (ProxyRequestHeaders): The headers to include in the request.
        key_id (str, optional): The DRM key ID. Defaults to None.
        key (str, optional): The DRM key. Defaults to None.
        init_url (str, optional): The init URL for caching. Defaults to None.
    Returns:
        Response: The processed init segment as an HTTP response.
    """
    if key_id and key:
        # Check if we have a cached processed version
        if init_url:
            cached_processed = await get_cached_processed_init(init_url, key_id)
            if cached_processed:
                logger.debug(f"Using cached processed init segment for {init_url}")
                response_headers = apply_header_manipulation({}, proxy_headers)
                return Response(content=cached_processed, media_type=mimetype, headers=response_headers)
        # For DRM protected content, we need to process the init segment
        # to remove encryption-related boxes but keep the moov structure
        now = time.time()
        processed_content = process_drm_init_segment(init_content, key_id, key)
        logger.info(f"Processing of {mimetype} init segment took {time.time() - now:.4f} seconds")
        # Cache the processed init segment
        if init_url:
            await set_cached_processed_init(init_url, key_id, processed_content, ttl=3600)
    else:
        # For non-DRM protected content, just return the init segment as-is
        processed_content = init_content
    response_headers = apply_header_manipulation({}, proxy_headers)
    return Response(content=processed_content, media_type=mimetype, headers=response_headers)
 def build_hls(
    mpd_dict: dict,
    request: Request,
    key_id: str = None,
    key: str = None,
    resolution: str = None,
    skip_segments: list = None,
 ) -> str:
    """
    Builds an HLS manifest from the MPD manifest.
@@ -120,12 +252,23 @@ def build_hls(mpd_dict: dict, request: Request, key_id: str = None, key: str = N
        request (Request): The incoming HTTP request.
        key_id (str, optional): The DRM key ID. Defaults to None.
        key (str, optional): The DRM key. Defaults to None.
        resolution (str, optional): Target resolution (e.g., '1080p', '720p'). Defaults to None.
        skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
    Returns:
        str: The HLS manifest as a string.
    """
-    hls = ["#EXTM3U", "#EXT-X-VERSION:6"]
+    is_ts_mode = _resolve_ts_mode(request)
    # Use HLS v3 for TS (ExoPlayer compatibility), v6 for fMP4
    version = 3 if is_ts_mode else 6
    hls = ["#EXTM3U", f"#EXT-X-VERSION:{version}"]
    query_params = dict(request.query_params)
    # Preserve skip parameter in query params so it propagates to playlists
    if skip_segments:
        # Convert back to compact format for URL
        skip_str = ",".join(f"{s['start']}-{s['end']}" for s in skip_segments)
        query_params["skip"] = skip_str
    has_encrypted = query_params.pop("has_encrypted", False)
    video_profiles = {}
@@ -148,26 +291,113 @@ def build_hls(mpd_dict: dict, request: Request, key_id: str = None, key: str = N
        elif "audio" in profile["mimeType"]:
            audio_profiles[profile["id"]] = (profile, playlist_url)
    # Filter video profiles by resolution if specified
    if resolution and video_profiles:
        video_profiles = _filter_video_profiles_by_resolution(video_profiles, resolution)
    # For TS mode, only expose the highest quality video variant
    # ExoPlayer handles adaptive switching poorly with TS remuxing
    if is_ts_mode and video_profiles:
        max_height = max(p[0].get("height", 0) for p in video_profiles.values())
        video_profiles = {k: v for k, v in video_profiles.items() if v[0].get("height", 0) >= max_height}
    # Add audio streams
    for i, (profile, playlist_url) in enumerate(audio_profiles.values()):
        is_default = "YES" if i == 0 else "NO"  # Set the first audio track as default
        lang = profile.get("lang", "und")
        bandwidth = profile.get("bandwidth", "128000")
        name = f"Audio {lang} ({bandwidth})" if lang != "und" else f"Audio {i + 1} ({bandwidth})"
        hls.append(
-            f'#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",NAME="{profile["id"]}",DEFAULT={is_default},AUTOSELECT={is_default},LANGUAGE="{profile.get("lang", "und")}",URI="{playlist_url}"'
+            f'#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",NAME="{name}",DEFAULT={is_default},AUTOSELECT=YES,LANGUAGE="{lang}",URI="{playlist_url}"'
        )
    # Build combined codecs string (video + audio) for EXT-X-STREAM-INF
    # ExoPlayer requires CODECS to list all codecs when AUDIO group is referenced
    first_audio_codec = None
    if audio_profiles:
        first_audio_profile = next(iter(audio_profiles.values()))[0]
        first_audio_codec = first_audio_profile.get("codecs", "")
    # Add video streams
    for profile, playlist_url in video_profiles.values():
        # Only add AUDIO attribute if there are audio profiles available
        audio_attr = ',AUDIO="audio"' if audio_profiles else ""
-        hls.append(
+
-            f'#EXT-X-STREAM-INF:BANDWIDTH={profile["bandwidth"]},RESOLUTION={profile["width"]}x{profile["height"]},CODECS="{profile["codecs"]}",FRAME-RATE={profile["frameRate"]}{audio_attr}'
+        # Build combined codecs: video + audio
-        )
+        video_codec = profile["codecs"]
        if first_audio_codec and audio_attr:
            combined_codecs = f"{video_codec},{first_audio_codec}"
        else:
            combined_codecs = video_codec
        # Keep full codec strings (e.g., avc1.42C01F, mp4a.40.2) for ALL modes.
        # ExoPlayer's CodecSpecificDataUtil rejects simplified strings like "avc1" or "mp4a"
        # as malformed, which prevents proper codec initialization.
        # Omit FRAME-RATE for TS mode (ExoPlayer compatibility)
        if is_ts_mode:
            hls.append(
                f'#EXT-X-STREAM-INF:BANDWIDTH={profile["bandwidth"]},RESOLUTION={profile["width"]}x{profile["height"]},CODECS="{combined_codecs}"{audio_attr}'
            )
        else:
            hls.append(
                f'#EXT-X-STREAM-INF:BANDWIDTH={profile["bandwidth"]},RESOLUTION={profile["width"]}x{profile["height"]},CODECS="{combined_codecs}",FRAME-RATE={profile["frameRate"]}{audio_attr}'
            )
        hls.append(playlist_url)
    return "\n".join(hls)
-def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -> str:
+def _filter_video_profiles_by_resolution(video_profiles: dict, target_resolution: str) -> dict:
    """
    Filter video profiles to select the one matching the target resolution.
    Falls back to closest lower resolution if exact match not found.
    Args:
        video_profiles: Dictionary of profile_id -> (profile, playlist_url).
        target_resolution: Target resolution string (e.g., '1080p', '720p').
    Returns:
        Filtered dictionary with only the selected profile.
    """
    # Parse target height from "1080p" -> 1080
    target_height = int(target_resolution.rstrip("p"))
    # Convert to list and sort by height descending
    profiles_list = [
        (profile_id, profile, playlist_url)
        for profile_id, (profile, playlist_url) in video_profiles.items()
        if profile.get("height", 0) > 0
    ]
    if not profiles_list:
        logger.warning("No video profiles with valid height found, returning all profiles")
        return video_profiles
    sorted_profiles = sorted(profiles_list, key=lambda x: x[1]["height"], reverse=True)
    # Find exact match or closest lower
    selected = None
    for profile_id, profile, playlist_url in sorted_profiles:
        if profile["height"] <= target_height:
            selected = (profile_id, profile, playlist_url)
            break
    # If all profiles are higher than target, use lowest available
    if selected is None:
        selected = sorted_profiles[-1]
    profile_id, profile, playlist_url = selected
    logger.info(
        f"Selected MPD video profile with resolution {profile['width']}x{profile['height']} for target {target_resolution}"
    )
    return {profile_id: (profile, playlist_url)}
 def build_hls_playlist(
    mpd_dict: dict, profiles: list[dict], request: Request, skip_segments: list = None, start_offset: float = None
 ) -> str:
    """
    Builds an HLS playlist from the MPD manifest for specific profiles.
@@ -175,25 +405,65 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
        mpd_dict (dict): The MPD manifest data.
        profiles (list[dict]): The profiles to include in the playlist.
        request (Request): The incoming HTTP request.
        skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
        start_offset (float, optional): Start offset in seconds for live streams. Defaults to settings.livestream_start_offset for live.
    Returns:
        str: The HLS playlist as a string.
    """
-    hls = ["#EXTM3U", "#EXT-X-VERSION:6"]
+    # Determine if we're in TS remux mode (per-request override > global setting)
    is_ts_mode = _resolve_ts_mode(request)
    # Use HLS v3 for TS (ExoPlayer compatibility), v6 for fMP4
    version = 3 if is_ts_mode else 6
    hls = ["#EXTM3U", f"#EXT-X-VERSION:{version}"]
    added_segments = 0
    skipped_segments = 0
    is_live = mpd_dict.get("isLive", False)
-    proxy_url = request.url_for("segment_endpoint")
+    # Inject EXT-X-START for live streams (enables prebuffering by starting behind live edge)
    # User-provided start_offset always takes precedence; otherwise use default for live streams only
    if is_ts_mode and is_live and start_offset is None:
        # TS mode needs a larger buffer for ExoPlayer
        effective_start_offset = -30.0
    else:
        effective_start_offset = (
            start_offset if start_offset is not None else (settings.livestream_start_offset if is_live else None)
        )
    if effective_start_offset is not None:
        # ExoPlayer doesn't handle PRECISE=YES well with TS
        precise = "NO" if is_ts_mode else "YES"
        hls.append(f"#EXT-X-START:TIME-OFFSET={effective_start_offset:.1f},PRECISE={precise}")
    # Initialize skip filter if skip_segments provided
    skip_filter = SkipSegmentFilter(skip_segments) if skip_segments else None
    # In TS mode, we don't use EXT-X-MAP because TS segments are self-contained
    # (PAT/PMT/VPS/SPS/PPS are embedded in each segment)
    # Use EXT-X-MAP for live streams, but only for fMP4 (not TS)
    use_map = is_live and not is_ts_mode
    # Select appropriate endpoint based on remux mode
    if is_ts_mode:
        proxy_url = request.url_for("segment_ts_endpoint")  # /mpd/segment.ts
    else:
        proxy_url = request.url_for("segment_endpoint")  # /mpd/segment.mp4
    proxy_url = str(proxy_url.replace(scheme=get_original_scheme(request)))
    # Get init endpoint URL for EXT-X-MAP (only used for fMP4 mode)
    init_proxy_url = request.url_for("init_endpoint")
    init_proxy_url = str(init_proxy_url.replace(scheme=get_original_scheme(request)))
    for index, profile in enumerate(profiles):
        segments = profile["segments"]
        if not segments:
            logger.warning(f"No segments found for profile {profile['id']}")
            continue
-        if mpd_dict["isLive"]:
+        if is_live:
-            depth = max(settings.mpd_live_playlist_depth, 1)
+            # TS mode uses deeper playlist for ExoPlayer buffering
            depth = 20 if is_ts_mode else max(settings.mpd_live_playlist_depth, 1)
            trimmed_segments = segments[-depth:]
        else:
            trimmed_segments = segments
@@ -202,24 +472,38 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
        if index == 0:
            first_segment = trimmed_segments[0]
            extinf_values = [f["extinf"] for f in trimmed_segments if "extinf" in f]
-            target_duration = math.ceil(max(extinf_values)) if extinf_values else 3
+
            # TS mode uses int(max)+1 to reduce buffer underruns in ExoPlayer
            if is_ts_mode:
                target_duration = int(max(extinf_values)) + 1 if extinf_values else 10
            else:
                target_duration = math.ceil(max(extinf_values)) if extinf_values else 3
            # Align HLS media sequence with MPD-provided numbering when available
-            mpd_start_number = profile.get("segment_template_start_number")
+            if is_ts_mode and is_live:
-            sequence = first_segment.get("number")
+                # For live TS, derive sequence from timeline first for stable continuity
-
+                time_val = first_segment.get("time")
-            if sequence is None:
+                duration_val = first_segment.get("duration_mpd_timescale")
-                # Fallback to MPD template start number
+                if time_val is not None and duration_val and duration_val > 0:
-                if mpd_start_number is not None:
+                    sequence = math.floor(time_val / duration_val)
                    sequence = mpd_start_number
                else:
-                    # As a last resort, derive from timeline information
+                    sequence = first_segment.get("number") or profile.get("segment_template_start_number") or 1
-                    time_val = first_segment.get("time")
+            else:
-                    duration_val = first_segment.get("duration_mpd_timescale")
+                mpd_start_number = profile.get("segment_template_start_number")
-                    if time_val is not None and duration_val and duration_val > 0:
+                sequence = first_segment.get("number")
-                        sequence = math.floor(time_val / duration_val)
+
                if sequence is None:
                    # Fallback to MPD template start number
                    if mpd_start_number is not None:
                        sequence = mpd_start_number
                    else:
-                        sequence = 1
+                        # As a last resort, derive from timeline information
                        time_val = first_segment.get("time")
                        duration_val = first_segment.get("duration_mpd_timescale")
                        if time_val is not None and duration_val and duration_val > 0:
                            sequence = math.floor(time_val / duration_val)
                        else:
                            sequence = 1
            hls.extend(
                [
@@ -227,31 +511,91 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
                    f"#EXT-X-MEDIA-SEQUENCE:{sequence}",
                ]
            )
-            if mpd_dict["isLive"]:
+            # For live streams, don't set PLAYLIST-TYPE to allow sliding window
-                hls.append("#EXT-X-PLAYLIST-TYPE:EVENT")
+            if not is_live:
            else:
                hls.append("#EXT-X-PLAYLIST-TYPE:VOD")
        init_url = profile["initUrl"]
        # For SegmentBase profiles, we may have byte range for initialization segment
        init_range = profile.get("initRange")
        query_params = dict(request.query_params)
        query_params.pop("profile_id", None)
        query_params.pop("d", None)
        query_params.pop("remux_to_ts", None)  # per-request override; already resolved into endpoint choice
        has_encrypted = query_params.pop("has_encrypted", False)
-        for segment in trimmed_segments:
+        # Add EXT-X-MAP for init segment (for live streams or when beneficial)
-            program_date_time = segment.get("program_date_time")
+        if use_map:
-            if program_date_time:
+            init_query_params = {
-                hls.append(f"#EXT-X-PROGRAM-DATE-TIME:{program_date_time}")
+                "init_url": init_url,
-            hls.append(f'#EXTINF:{segment["extinf"]:.3f},')
+                "mime_type": profile["mimeType"],
-            query_params.update(
+                "is_live": "true" if is_live else "false",
-                {
+            }
-                    "init_url": init_url,
+            if init_range:
-                    "segment_url": segment["media"],
+                init_query_params["init_range"] = init_range
-                    "mime_type": profile["mimeType"],
+            # Add key parameters
-                    "is_live": "true" if mpd_dict.get("isLive") else "false",
+            if query_params.get("key_id"):
-                }
+                init_query_params["key_id"] = query_params["key_id"]
            if query_params.get("key"):
                init_query_params["key"] = query_params["key"]
            # Add api_password for authentication
            if query_params.get("api_password"):
                init_query_params["api_password"] = query_params["api_password"]
            init_map_url = encode_mediaflow_proxy_url(
                init_proxy_url,
                query_params=init_query_params,
                encryption_handler=encryption_handler if has_encrypted else None,
            )
            hls.append(f'#EXT-X-MAP:URI="{init_map_url}"')
        need_discontinuity = False
        for segment in trimmed_segments:
            duration = segment["extinf"]
            # Check if this segment should be skipped
            if skip_filter:
                if skip_filter.should_skip_segment(duration):
                    skip_filter.advance_time(duration)
                    skipped_segments += 1
                    need_discontinuity = True
                    continue
                skip_filter.advance_time(duration)
            # Add discontinuity marker after skipped segments
            if need_discontinuity:
                hls.append("#EXT-X-DISCONTINUITY")
                need_discontinuity = False
            # Emit EXT-X-PROGRAM-DATE-TIME only for fMP4 (not TS)
            program_date_time = segment.get("program_date_time")
            if program_date_time and not is_ts_mode:
                hls.append(f"#EXT-X-PROGRAM-DATE-TIME:{program_date_time}")
            hls.append(f"#EXTINF:{duration:.3f},")
            segment_query_params = {
                "init_url": init_url,
                "segment_url": segment["media"],
                "mime_type": profile["mimeType"],
                "is_live": "true" if is_live else "false",
            }
            # Add use_map flag so segment endpoint knows not to include init
            if use_map and not is_ts_mode:
                segment_query_params["use_map"] = "true"
            elif is_ts_mode:
                # TS segments are self-contained; init is always embedded by remuxer
                segment_query_params["use_map"] = "false"
            # Add byte range parameters for SegmentBase
            if init_range:
                segment_query_params["init_range"] = init_range
            # Segment may also have its own range (for SegmentBase)
            if "initRange" in segment:
                segment_query_params["init_range"] = segment["initRange"]
            query_params.update(segment_query_params)
            hls.append(
                encode_mediaflow_proxy_url(
                    proxy_url,
@@ -264,5 +608,8 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
    if not mpd_dict["isLive"]:
        hls.append("#EXT-X-ENDLIST")
-    logger.info(f"Added {added_segments} segments to HLS playlist")
+    if skip_filter and skipped_segments > 0:
        logger.info(f"Added {added_segments} segments to HLS playlist (skipped {skipped_segments} segments)")
    else:
        logger.info(f"Added {added_segments} segments to HLS playlist")
    return "\n".join(hls)
@@ -0,0 +1,18 @@
 """
 Media remuxer package.
 Provides pure Python implementations for media container parsing, remuxing,
 and transcoding:
 - ebml_parser: Minimal EBML/MKV parser for seeking and demuxing
 - ts_muxer: fMP4 -> MPEG-TS remuxer
 - mkv_demuxer: Streaming MKV demuxer
 - mp4_muxer: MP4 box builder for standard moov-first MP4
 - audio_transcoder: PyAV-based audio frame transcoding
 - video_transcoder: GPU-accelerated video transcoding via PyAV
 - pyav_demuxer: Universal PyAV-based streaming demuxer (any container)
 - codec_utils: Codec compatibility detection and decision engine
 - media_source: Abstract MediaSource protocol (Telegram, HTTP, etc.)
 - transcode_handler: Shared transcode request orchestrator
 - transcode_pipeline: MKV fast-path and universal transcode pipelines
 """
@@ -0,0 +1,351 @@
 """
 PyAV-based audio transcoder for frame-level codec conversion.
 Transcodes audio frames between codecs using PyAV's CodecContext API
 (Python bindings for FFmpeg's libavcodec). This provides in-process
 audio transcoding without subprocess management or pipe overhead.
 Supported input codecs: EAC3, AC3, AAC, Opus, Vorbis, FLAC, MP3
 Output codec: AAC-LC (stereo, configurable bitrate)
 Architecture:
  raw_frame_bytes -> parse() -> decode() -> resample() -> encode() -> raw_aac_bytes
 Usage:
    transcoder = AudioTranscoder("eac3", sample_rate=48000, channels=6)
    for raw_eac3_frame in frames:
        aac_frames = transcoder.transcode(raw_eac3_frame)
        for aac_data in aac_frames:
            write(aac_data)
    # Flush remaining frames
    for aac_data in transcoder.flush():
        write(aac_data)
 """
 import logging
 import av
 from av.audio.resampler import AudioResampler
 from mediaflow_proxy.remuxer.ebml_parser import (
    CODEC_ID_AAC,
    CODEC_ID_AC3,
    CODEC_ID_EAC3,
    CODEC_ID_FLAC,
    CODEC_ID_OPUS,
    CODEC_ID_VORBIS,
 )
 logger = logging.getLogger(__name__)
 def _generate_silence_aac_frame() -> bytes | None:
    """Pre-encode a single silent AAC frame (48 kHz stereo, 1024 samples).
    PyAV's AAC encoder has an intermittent ``avcodec_send_frame`` bug when
    rapidly creating/destroying codec contexts, so we retry a few times.
    This function is called once at module load; the result is cached in
    ``_SILENCE_AAC_FRAME``.
    """
    for _attempt in range(10):
        try:
            enc = av.CodecContext.create("aac", "w")
            enc.sample_rate = 48000
            enc.layout = "stereo"
            enc.format = av.AudioFormat("fltp")
            enc.bit_rate = 192000
            enc.open()
            frame = av.AudioFrame(
                format=enc.format.name,
                layout=enc.layout.name,
                samples=enc.frame_size or 1024,
            )
            frame.sample_rate = enc.sample_rate
            frame.pts = 0
            for pkt in enc.encode(frame):
                return bytes(pkt)
            # AAC priming delay: first encode buffered; flush to retrieve
            for pkt in enc.encode(None):
                return bytes(pkt)
        except Exception:
            continue
    return None
 # Module-level silence frame -- generated once, reused by every transcoder.
 _SILENCE_AAC_FRAME: bytes | None = _generate_silence_aac_frame()
 # Map MKV codec IDs to PyAV/FFmpeg codec names
 _MKV_TO_FFMPEG_CODEC = {
    CODEC_ID_EAC3: "eac3",
    CODEC_ID_AC3: "ac3",
    CODEC_ID_AAC: "aac",
    CODEC_ID_OPUS: "opus",
    CODEC_ID_VORBIS: "vorbis",
    CODEC_ID_FLAC: "flac",
    "A_DTS": "dts",
    "A_MP3": "mp3",
    "A_MPEG/L3": "mp3",
 }
 # Codecs that need transcoding to AAC for browser playback
 NEEDS_TRANSCODE = frozenset(
    {
        CODEC_ID_EAC3,
        CODEC_ID_AC3,
        CODEC_ID_OPUS,
        CODEC_ID_VORBIS,
        CODEC_ID_FLAC,
        "A_DTS",
        "A_MP3",
        "A_MPEG/L3",
    }
 )
 # Output AAC settings
 _OUTPUT_CODEC = "aac"
 _OUTPUT_SAMPLE_FORMAT = "fltp"  # AAC requires float planar
 _OUTPUT_LAYOUT = "stereo"
 # Map channel count -> FFmpeg layout name
 _CHANNEL_LAYOUT_MAP = {
    1: "mono",
    2: "stereo",
    3: "2.1",
    4: "quad",
    6: "5.1",
    8: "7.1",
 }
 def needs_transcode(codec_id: str) -> bool:
    """Check if an MKV audio codec needs transcoding for browser playback."""
    return codec_id in NEEDS_TRANSCODE
 def get_ffmpeg_codec_name(mkv_codec_id: str) -> str | None:
    """Map an MKV CodecID to an FFmpeg codec name."""
    return _MKV_TO_FFMPEG_CODEC.get(mkv_codec_id)
 class AudioTranscoder:
    """
    In-process audio transcoder using PyAV's CodecContext API.
    Decodes raw audio frames from one codec and encodes them to AAC-LC
    stereo, suitable for MP4 container and browser playback. No container
    I/O or subprocess involved -- operates directly on raw frame bytes.
    The transcoder handles sample format conversion and resampling
    automatically via AudioResampler.
    """
    def __init__(
        self,
        input_codec: str,
        input_sample_rate: int = 48000,
        input_channels: int = 6,
        output_sample_rate: int = 48000,
        output_channels: int = 2,
        output_bitrate: int = 192000,
    ) -> None:
        """
        Initialize the transcoder.
        Args:
            input_codec: FFmpeg codec name (e.g., "eac3", "ac3", "aac").
            input_sample_rate: Input sample rate in Hz.
            input_channels: Input channel count.
            output_sample_rate: Output sample rate in Hz (default 48000).
            output_channels: Output channel count (default 2 = stereo).
            output_bitrate: Output bitrate in bits/s (default 192000).
        """
        # Set up decoder -- use layout to configure channel count
        # (PyAV's channels property is read-only; layout drives it)
        self._decoder = av.CodecContext.create(input_codec, "r")
        self._decoder.sample_rate = input_sample_rate
        input_layout = _CHANNEL_LAYOUT_MAP.get(input_channels, "stereo")
        self._decoder.layout = input_layout
        # Set up encoder
        self._encoder = av.CodecContext.create(_OUTPUT_CODEC, "w")
        self._encoder.sample_rate = output_sample_rate
        self._encoder.layout = _OUTPUT_LAYOUT
        self._encoder.format = av.AudioFormat(_OUTPUT_SAMPLE_FORMAT)
        self._encoder.bit_rate = output_bitrate
        self._encoder.open()
        # Set up resampler for format/rate/channel conversion
        self._resampler = AudioResampler(
            format=_OUTPUT_SAMPLE_FORMAT,
            layout=_OUTPUT_LAYOUT,
            rate=output_sample_rate,
        )
        self._input_codec = input_codec
        self._frames_decoded = 0
        self._frames_encoded = 0
        self._audio_specific_config: bytes | None = None
        logger.info(
            "[audio_transcoder] Initialized: %s %dHz %dch -> aac %dHz %dch @%dk",
            input_codec,
            input_sample_rate,
            input_channels,
            output_sample_rate,
            output_channels,
            output_bitrate // 1000,
        )
    @property
    def audio_specific_config(self) -> bytes | None:
        """
        AAC AudioSpecificConfig from the encoder (available after first encode).
        This is needed for the MP4 esds box.
        """
        if self._audio_specific_config is not None:
            return self._audio_specific_config
        # PyAV exposes extradata after the encoder is opened
        if self._encoder.extradata:
            self._audio_specific_config = bytes(self._encoder.extradata)
            return self._audio_specific_config
        return None
    @property
    def output_sample_rate(self) -> int:
        return self._encoder.sample_rate
    @property
    def output_channels(self) -> int:
        return self._encoder.channels
    @property
    def frame_size(self) -> int:
        """AAC frame size (samples per frame), typically 1024."""
        return self._encoder.frame_size or 1024
    def transcode(self, raw_frame_data: bytes) -> list[bytes]:
        """
        Transcode a raw audio frame from the input codec to AAC.
        Args:
            raw_frame_data: Raw audio frame bytes (one codec frame, e.g.,
                           one EAC3 sync frame).
        Returns:
            List of raw AAC frame bytes. May return 0, 1, or more frames
            depending on codec frame sizes and buffering.
        """
        output = []
        # Parse raw bytes into packets
        packets = self._decoder.parse(raw_frame_data)
        for packet in packets:
            # Decode to PCM frames
            try:
                decoded_frames = self._decoder.decode(packet)
            except av.error.InvalidDataError as e:
                logger.debug("[audio_transcoder] Decode error (skipping frame): %s", e)
                continue
            for frame in decoded_frames:
                self._frames_decoded += 1
                # Resample to match encoder format
                resampled = self._resampler.resample(frame)
                if resampled is None:
                    continue
                # resampled can be a single frame or list of frames
                if not isinstance(resampled, list):
                    resampled = [resampled]
                for rs_frame in resampled:
                    # Encode to AAC
                    try:
                        encoded_packets = self._encoder.encode(rs_frame)
                    except av.error.InvalidDataError as e:
                        logger.debug("[audio_transcoder] Encode error: %s", e)
                        continue
                    for enc_packet in encoded_packets:
                        self._frames_encoded += 1
                        output.append(bytes(enc_packet))
        return output
    def flush(self) -> list[bytes]:
        """
        Flush the decoder and encoder buffers.
        Call this when the input stream ends to get remaining frames.
        Returns:
            List of remaining raw AAC frame bytes.
        """
        output = []
        # Flush decoder
        try:
            for frame in self._decoder.decode(None):
                self._frames_decoded += 1
                resampled = self._resampler.resample(frame)
                if resampled is None:
                    continue
                if not isinstance(resampled, list):
                    resampled = [resampled]
                for rs_frame in resampled:
                    for enc_packet in self._encoder.encode(rs_frame):
                        self._frames_encoded += 1
                        output.append(bytes(enc_packet))
        except Exception as e:
            logger.debug("[audio_transcoder] Decoder flush error: %s", e)
        # Flush resampler
        try:
            resampled = self._resampler.resample(None)
            if resampled is not None:
                if not isinstance(resampled, list):
                    resampled = [resampled]
                for rs_frame in resampled:
                    for enc_packet in self._encoder.encode(rs_frame):
                        self._frames_encoded += 1
                        output.append(bytes(enc_packet))
        except Exception as e:
            logger.debug("[audio_transcoder] Resampler flush error: %s", e)
        # Flush encoder
        try:
            for enc_packet in self._encoder.encode(None):
                self._frames_encoded += 1
                output.append(bytes(enc_packet))
        except Exception as e:
            logger.debug("[audio_transcoder] Encoder flush error: %s", e)
        logger.info(
            "[audio_transcoder] Flushed: %d decoded, %d encoded total",
            self._frames_decoded,
            self._frames_encoded,
        )
        return output
    def generate_silence_frame(self) -> bytes | None:
        """Return a pre-encoded silent AAC frame (module-level singleton)."""
        return _SILENCE_AAC_FRAME
    def close(self) -> None:
        """Release codec contexts (best-effort; PyAV AudioCodecContext may not have close())."""
        for ctx in (self._decoder, self._encoder):
            try:
                if hasattr(ctx, "close"):
                    ctx.close()
            except Exception:
                pass
    def __del__(self) -> None:
        self.close()
@@ -0,0 +1,515 @@
 """
 Codec decision engine for browser compatibility detection.
 Determines whether video/audio streams need transcoding for browser
 playback and selects appropriate output codecs.
 """
 import logging
 import struct
 logger = logging.getLogger(__name__)
 # ────────────────────────────────────────────────────────────────────
 # Browser-compatible codecs (work natively in HTML5 <video>)
 # ────────────────────────────────────────────────────────────────────
 BROWSER_VIDEO_CODECS = frozenset(
    {
        "V_MPEG4/ISO/AVC",  # H.264/AVC -- universal
        "h264",
        "avc1",  # FFmpeg/PyAV names
    }
 )
 BROWSER_AUDIO_CODECS = frozenset(
    {
        "A_AAC",  # AAC-LC -- universal
        "A_AAC/MPEG2/LC",
        "A_AAC/MPEG4/LC",
        "aac",  # FFmpeg/PyAV name
    }
 )
 # ────────────────────────────────────────────────────────────────────
 # Video codecs that need re-encoding to H.264
 # ────────────────────────────────────────────────────────────────────
 VIDEO_NEEDS_REENCODE = frozenset(
    {
        "V_MPEGH/ISO/HEVC",  # H.265/HEVC (Chrome/Firefox don't support)
        "V_MPEG2",  # MPEG-2 (DVD-era)
        "V_MPEG4/ISO/SP",  # MPEG-4 Part 2 Simple Profile
        "V_MPEG4/ISO/ASP",  # MPEG-4 Part 2 Advanced Simple (DivX/Xvid)
        "V_MPEG4/ISO/AP",  # MPEG-4 Part 2 Advanced Profile
        "V_MPEG4/MS/V3",  # MS MPEG-4 v3 (WMV)
        "V_MS/VFW/FOURCC",  # Generic VFW (VC-1, etc.)
        "V_REAL/RV10",
        "V_REAL/RV20",
        "V_REAL/RV30",
        "V_REAL/RV40",
        "V_THEORA",
        "V_VP8",
        "V_VP9",  # VP9 in MKV (needs WebM container for browser)
        "V_AV1",  # AV1 (partial support, safer to reencode)
        # PyAV / FFmpeg codec names
        "hevc",
        "h265",
        "mpeg2video",
        "mpeg4",
        "vc1",
        "vp8",
        "vp9",
        "av1",
        "theora",
        "wmv3",
        "rv30",
        "rv40",
    }
 )
 # ────────────────────────────────────────────────────────────────────
 # Audio codecs that need transcoding to AAC
 # (superset of the list in audio_transcoder.py, uses both MKV and
 # PyAV codec names for universal lookup)
 # ────────────────────────────────────────────────────────────────────
 AUDIO_NEEDS_TRANSCODE = frozenset(
    {
        # MKV CodecIDs
        "A_EAC3",
        "A_AC3",
        "A_DTS",
        "A_DTS/EXPRESS",
        "A_DTS/LOSSLESS",
        "A_OPUS",
        "A_VORBIS",
        "A_FLAC",
        "A_TRUEHD",
        "A_MLP",
        "A_PCM/INT/LIT",
        "A_PCM/INT/BIG",
        "A_PCM/FLOAT/IEEE",
        "A_REAL/28_8",
        "A_REAL/COOK",
        "A_REAL/SIPR",
        "A_REAL/ATRC",
        "A_MS/ACM",  # Generic Windows audio
        "A_MP3",
        "A_MPEG/L3",
        # PyAV / FFmpeg names
        "eac3",
        "ac3",
        "dts",
        "dca",
        "truehd",
        "mlp",
        "mp3",
        "opus",
        "vorbis",
        "flac",
        "pcm_s16le",
        "pcm_s24le",
        "pcm_f32le",
        "wmav2",
        "wmavoice",
        "wmapro",
        "cook",
        "sipr",
        "atrac3",
    }
 )
 # Map PyAV codec names to MKV CodecIDs (for the MKV fast-path)
 _PYAV_TO_MKV_VIDEO = {
    "h264": "V_MPEG4/ISO/AVC",
    "hevc": "V_MPEGH/ISO/HEVC",
    "h265": "V_MPEGH/ISO/HEVC",
    "mpeg2video": "V_MPEG2",
    "vp8": "V_VP8",
    "vp9": "V_VP9",
    "av1": "V_AV1",
 }
 _PYAV_TO_MKV_AUDIO = {
    "aac": "A_AAC",
    "eac3": "A_EAC3",
    "ac3": "A_AC3",
    "dts": "A_DTS",
    "opus": "A_OPUS",
    "vorbis": "A_VORBIS",
    "flac": "A_FLAC",
    "mp3": "A_MPEG/L3",
    "truehd": "A_TRUEHD",
 }
 # ────────────────────────────────────────────────────────────────────
 # NAL unit format conversion (Annex B ↔ AVCC)
 # ────────────────────────────────────────────────────────────────────
 # H.264 NAL types that belong in the init segment (avcC), not in samples
 _H264_PARAM_NAL_TYPES = frozenset({7, 8, 9})  # SPS, PPS, AUD
 def _find_annexb_nals(data: bytes) -> list[tuple[int, int]]:
    """
    Find all NAL unit [start, end) byte ranges in Annex B formatted data.
    Handles both 3-byte (00 00 01) and 4-byte (00 00 00 01) start codes.
    Returns a list of (start, end) tuples pointing into *data*.
    """
    size = len(data)
    nals: list[tuple[int, int]] = []
    i = 0
    while i < size - 2:
        # Scan for 0x000001 or 0x00000001
        if data[i] != 0:
            i += 1
            continue
        if data[i + 1] != 0:
            i += 2
            continue
        if data[i + 2] == 1:
            nal_start = i + 3
        elif data[i + 2] == 0 and i + 3 < size and data[i + 3] == 1:
            nal_start = i + 4
        else:
            i += 1
            continue
        # Record end of previous NAL
        if nals:
            nals[-1] = (nals[-1][0], i)
        nals.append((nal_start, size))
        i = nal_start
    return nals
 def is_annexb(data: bytes) -> bool:
    """
    Return True if *data* starts with an Annex B start code.
    Disambiguates AVCC (4-byte length prefix) from Annex B when the data
    begins with ``00 00 01 xx`` or ``00 00 00 01`` by checking whether
    the AVCC interpretation yields a plausible H.264 NAL.  If the 4-byte
    big-endian length + subsequent NAL header byte is valid and the
    length fits within the data, this is AVCC -- not Annex B.
    """
    if len(data) < 5:
        return False
    # 4-byte start code: 00 00 00 01
    if data[0] == 0 and data[1] == 0 and data[2] == 0 and data[3] == 1:
        return True
    # 3-byte start code: 00 00 01 -- but could also be AVCC with length
    # that starts with 00 00 01 (i.e. length 0x000001xx = 256..511).
    if data[0] == 0 and data[1] == 0 and data[2] == 1:
        # Interpret as AVCC: 4-byte big-endian length
        avcc_len = int.from_bytes(data[0:4], "big")
        if 0 < avcc_len <= len(data) - 4:
            # Check if the NAL header byte is a valid H.264 NAL
            nal_byte = data[4]
            forbidden = (nal_byte >> 7) & 1
            nal_type = nal_byte & 0x1F
            if forbidden == 0 and 1 <= nal_type <= 12:
                # Plausible AVCC: valid length + valid NAL type
                return False
        # Not plausible AVCC, treat as Annex B
        return True
    return False
 def annexb_to_avcc(data: bytes, filter_ps: bool = True) -> bytes:
    """
    Convert Annex B (start-code-prefixed) NAL units to AVCC
    (4-byte length-prefixed) format suitable for fMP4 samples.
    Args:
        data: H.264 access unit in Annex B format.
        filter_ps: If True, strip SPS/PPS/AUD NAL units (they belong
                   in the avcC box of the init segment, not in samples).
    Returns:
        The same NAL units with 4-byte big-endian length prefixes.
    """
    if not data or not is_annexb(data):
        return data  # Already AVCC or empty
    nals = _find_annexb_nals(data)
    if not nals:
        return data
    out = bytearray()
    for start, end in nals:
        # Strip trailing zero-padding before next start code
        while end > start and data[end - 1] == 0:
            end -= 1
        if end <= start:
            continue
        if filter_ps:
            nal_type = data[start] & 0x1F
            if nal_type in _H264_PARAM_NAL_TYPES:
                continue
        length = end - start
        out.extend(length.to_bytes(4, "big"))
        out.extend(data[start:end])
    # If every NAL was filtered out (e.g. packet only contains SPS/PPS/AUD),
    # return empty so callers can drop this sample. Returning original Annex-B
    # bytes here would corrupt fMP4 samples (expects AVCC length prefixes).
    return bytes(out)
 # H.264 profiles that require the avcC High Profile extension fields
 # (chroma_format_idc, bit_depth_luma/chroma, numSpsExt).
 _HIGH_PROFILE_IDCS = frozenset({100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134})
 def _fix_avcc_high_profile(avcc: bytes) -> bytes:
    """
    Ensure an avcC record includes High Profile extension bytes.
    The ISO/IEC 14496-15 spec requires additional fields after the PPS
    section when ``AVCProfileIndication`` is 100 (High), 110, 122, or 244.
    Some MKV muxers omit these, causing decoders to not know the chroma
    format or bit depth, which leads to widespread decode errors.
    If the extensions are missing, appends the defaults for 4:2:0 / 8-bit
    with zero extended SPS sets.
    """
    if len(avcc) < 7:
        return avcc
    if avcc[0] != 1:
        return avcc  # Not an avcC record
    profile_idc = avcc[1]
    if profile_idc not in _HIGH_PROFILE_IDCS:
        return avcc  # Not a High Profile variant, no extensions needed
    # Walk past SPS and PPS sections to find where extensions should be
    off = 5
    num_sps = avcc[off] & 0x1F
    off += 1
    for _ in range(num_sps):
        if off + 2 > len(avcc):
            return avcc
        sps_len = struct.unpack(">H", avcc[off : off + 2])[0]
        off += 2 + sps_len
    if off >= len(avcc):
        return avcc
    num_pps = avcc[off]
    off += 1
    for _ in range(num_pps):
        if off + 2 > len(avcc):
            return avcc
        pps_len = struct.unpack(">H", avcc[off : off + 2])[0]
        off += 2 + pps_len
    # If there are already bytes after the PPS section, extensions exist
    if off < len(avcc):
        return avcc
    # Append default High Profile extensions:
    #   chroma_format_idc = 1 (4:2:0)  -> 0xFC | 0x01 = 0xFD  (reserved 111111 + 01)
    #   bit_depth_luma_minus8 = 0       -> 0xF8 | 0x00 = 0xF8  (reserved 11111 + 000)
    #   bit_depth_chroma_minus8 = 0     -> 0xF8 | 0x00 = 0xF8  (reserved 11111 + 000)
    #   numOfSequenceParameterSetExt = 0
    ext = bytearray(avcc)
    ext.append(0xFD)  # 111111_01 : chroma_format_idc = 1
    ext.append(0xF8)  # 11111_000 : bit_depth_luma_minus8 = 0
    ext.append(0xF8)  # 11111_000 : bit_depth_chroma_minus8 = 0
    ext.append(0x00)  # numOfSequenceParameterSetExt = 0
    return bytes(ext)
 def ensure_avcc_extradata(extradata: bytes) -> bytes:
    """
    Ensure h264 extradata is in avcC format for the fMP4 init segment.
    PyAV returns extradata in the container's native format:
    - MKV/MP4: avcC format (starts with 0x01)
    - MPEG-TS: Annex B format (starts with 0x00 0x00)
    If Annex B, parses SPS/PPS NAL units and builds proper avcC.
    If already avcC, validates and fixes High Profile extension fields.
    """
    if not extradata or len(extradata) < 4:
        return extradata
    # Already avcC format (configurationVersion == 1)
    if extradata[0] == 0x01:
        return _fix_avcc_high_profile(extradata)
    # Parse Annex B NAL units to extract SPS and PPS
    nals = _find_annexb_nals(extradata)
    if not nals:
        return extradata
    sps_list: list[bytes] = []
    pps_list: list[bytes] = []
    for start, end in nals:
        while end > start and extradata[end - 1] == 0:
            end -= 1
        if end <= start:
            continue
        nal_type = extradata[start] & 0x1F
        nal_data = extradata[start:end]
        if nal_type == 7:  # SPS
            sps_list.append(nal_data)
        elif nal_type == 8:  # PPS
            pps_list.append(nal_data)
    if not sps_list:
        return extradata  # Can't build avcC without SPS
    sps = sps_list[0]
    if len(sps) < 4:
        return extradata
    # Build avcC box content
    avcc = bytearray()
    avcc.append(1)  # configurationVersion
    avcc.append(sps[1])  # AVCProfileIndication
    avcc.append(sps[2])  # profile_compatibility
    avcc.append(sps[3])  # AVCLevelIndication
    avcc.append(0xFF)  # 6 bits reserved (0x3F) + lengthSizeMinusOne=3 -> 4-byte NAL lengths
    avcc.append(0xE0 | len(sps_list))  # 3 bits reserved (0x07) + numOfSPS
    for s in sps_list:
        avcc.extend(struct.pack(">H", len(s)))
        avcc.extend(s)
    avcc.append(len(pps_list))  # numOfPPS
    for p in pps_list:
        avcc.extend(struct.pack(">H", len(p)))
        avcc.extend(p)
    return _fix_avcc_high_profile(bytes(avcc))
 def extract_sps_pps_from_annexb(data: bytes) -> bytes:
    """
    Extract SPS and PPS NAL units from Annex B encoded data and build
    an avcC-format extradata blob.
    Hardware encoders like VideoToolbox embed SPS/PPS as in-band NAL
    units in their first keyframe output rather than setting extradata
    on the codec context.  This function finds those parameter sets
    and returns proper avcC bytes suitable for the fMP4 init segment.
    Returns:
        avcC bytes if SPS/PPS were found, empty bytes otherwise.
    """
    if not data or not is_annexb(data):
        return b""
    nals = _find_annexb_nals(data)
    if not nals:
        return b""
    sps_list: list[bytes] = []
    pps_list: list[bytes] = []
    for start, end in nals:
        # Strip trailing zero-padding
        while end > start and data[end - 1] == 0:
            end -= 1
        if end <= start:
            continue
        nal_type = data[start] & 0x1F
        if nal_type == 7:  # SPS
            sps_list.append(data[start:end])
        elif nal_type == 8:  # PPS
            pps_list.append(data[start:end])
    if not sps_list:
        return b""
    sps = sps_list[0]
    if len(sps) < 4:
        return b""
    # Build avcC box content
    avcc = bytearray()
    avcc.append(1)  # configurationVersion
    avcc.append(sps[1])  # AVCProfileIndication
    avcc.append(sps[2])  # profile_compatibility
    avcc.append(sps[3])  # AVCLevelIndication
    avcc.append(0xFF)  # 6 bits reserved + lengthSizeMinusOne=3
    avcc.append(0xE0 | len(sps_list))  # 3 bits reserved + numOfSPS
    for s in sps_list:
        avcc.extend(struct.pack(">H", len(s)))
        avcc.extend(s)
    avcc.append(len(pps_list))  # numOfPPS
    for p in pps_list:
        avcc.extend(struct.pack(">H", len(p)))
        avcc.extend(p)
    return bytes(avcc)
 def video_needs_reencode(codec_id: str) -> bool:
    """Check if a video codec requires re-encoding for browser playback."""
    if not codec_id:
        return False
    return codec_id in VIDEO_NEEDS_REENCODE
 def audio_needs_transcode(codec_id: str) -> bool:
    """Check if an audio codec requires transcoding for browser playback."""
    if not codec_id:
        return False
    return codec_id in AUDIO_NEEDS_TRANSCODE
 def is_browser_compatible(video_codec: str, audio_codec: str) -> bool:
    """
    Check if a video+audio combination is fully browser-compatible.
    Returns True only if BOTH video and audio can be played natively in
    an HTML5 <video> element inside an MP4 container.
    """
    video_ok = video_codec in BROWSER_VIDEO_CODECS or not video_codec
    audio_ok = audio_codec in BROWSER_AUDIO_CODECS or not audio_codec
    return video_ok and audio_ok
 class TranscodeDecision:
    """Result of analyzing a stream's codec compatibility."""
    __slots__ = ("transcode_video", "transcode_audio", "video_codec", "audio_codec")
    def __init__(self, video_codec: str = "", audio_codec: str = "") -> None:
        self.video_codec = video_codec
        self.audio_codec = audio_codec
        self.transcode_video = video_needs_reencode(video_codec)
        self.transcode_audio = audio_needs_transcode(audio_codec)
    @property
    def needs_transcode(self) -> bool:
        """True if any stream needs transcoding."""
        return self.transcode_video or self.transcode_audio
    @property
    def passthrough_ok(self) -> bool:
        """True if the stream can be served as-is to a browser."""
        return not self.needs_transcode
    def __repr__(self) -> str:
        parts = []
        if self.transcode_video:
            parts.append(f"video:{self.video_codec}->h264")
        if self.transcode_audio:
            parts.append(f"audio:{self.audio_codec}->aac")
        if not parts:
            parts.append("passthrough")
        return f"TranscodeDecision({', '.join(parts)})"
@@ -0,0 +1,614 @@
 """
 Container format probing -- MKV Cues and MP4 moov.
 Pure Python probing using EBML parsing (MKV) and struct-based atom
 scanning (MP4). No FFmpeg dependency.
 Source-agnostic: accepts any MediaSource protocol implementation
 (Telegram, HTTP, etc.) for byte-range reads.
 Provides:
 - probe_mkv_cues: probe MKV file to extract seek index (MKVCueIndex)
 - probe_mp4_moov: probe MP4 file to extract moov atom and build seek index (MP4Index)
 """
 import base64
 import hashlib
 import json
 import logging
 import struct
 from mediaflow_proxy.utils import redis_utils
 from mediaflow_proxy.remuxer.ebml_parser import (
    MKVCueIndex,
    build_cue_index,
    parse_ebml_header,
    parse_seek_head,
    CUES,
    INFO,
 )
 from mediaflow_proxy.remuxer.mp4_parser import (
    MP4Index,
    build_cue_points_from_moov,
    is_mp4_header,
    rewrite_moov_offsets,
 )
 logger = logging.getLogger(__name__)
 # How much of the MKV header to fetch for SeekHead + Info parsing
 _HEADER_PROBE_SIZE = 64 * 1024  # 64 KB
 # Max Cues element size we'll attempt to fetch
 _MAX_CUES_SIZE = 2 * 1024 * 1024  # 2 MB
 # Redis cache for MKV Cue indexes
 _CUE_INDEX_CACHE_PREFIX = "mfp:cue_index:"
 _CUE_INDEX_CACHE_TTL = 3600  # 1 hour
 # =============================================================================
 # MKV Cues probing
 # =============================================================================
 def derive_cue_cache_key(
    source_key: str = "",
    *,
    chat_id: str | int | None = None,
    message_id: int | None = None,
    file_id: str | None = None,
 ) -> str:
    """
    Derive a deterministic cache key for a file's cue index.
    Accepts either a pre-computed source_key (from MediaSource.cache_key)
    or legacy Telegram-style parameters for backwards compatibility.
    """
    if source_key:
        return source_key
    if file_id:
        raw = f"file_id:{file_id}"
    elif chat_id is not None and message_id is not None:
        raw = f"chat:{chat_id}:msg:{message_id}"
    else:
        return ""
    return hashlib.sha256(raw.encode()).hexdigest()[:16]
 async def _get_cached_cue_index(cache_key: str) -> MKVCueIndex | None:
    """Try to load a MKVCueIndex from Redis cache."""
    if not cache_key:
        return None
    r = await redis_utils.get_redis()
    if r is None:
        return None
    redis_key = f"{_CUE_INDEX_CACHE_PREFIX}{cache_key}"
    data = await r.get(redis_key)
    if not data:
        return None
    try:
        d = json.loads(data)
        seek_header = b""
        if d.get("seek_header_b64"):
            seek_header = base64.b64decode(d["seek_header_b64"])
        video_codec_private = b""
        if d.get("video_codec_private_b64"):
            video_codec_private = base64.b64decode(d["video_codec_private_b64"])
        index = MKVCueIndex(
            duration_ms=d["duration_ms"],
            timestamp_scale=d["timestamp_scale"],
            cue_points=[(cp[0], cp[1]) for cp in d["cue_points"]],
            segment_data_offset=d["segment_data_offset"],
            first_cluster_offset=d.get("first_cluster_offset", 0),
            seek_header=seek_header,
            audio_codec_id=d.get("audio_codec_id", ""),
            audio_bitrate=d.get("audio_bitrate", 0),
            audio_channels=d.get("audio_channels", 0),
            audio_sample_rate=d.get("audio_sample_rate", 0.0),
            video_codec_id=d.get("video_codec_id", ""),
            video_codec_private=video_codec_private,
            video_width=d.get("video_width", 0),
            video_height=d.get("video_height", 0),
            video_fps=d.get("video_fps", 0.0),
            video_default_duration_ns=d.get("video_default_duration_ns", 0),
        )
        logger.debug("[container_probe] Loaded cue index from cache: %s", cache_key)
        return index
    except (KeyError, TypeError, json.JSONDecodeError) as e:
        logger.warning("[container_probe] Invalid cached cue index: %s", e)
        return None
 async def _set_cached_cue_index(cache_key: str, index: MKVCueIndex) -> None:
    """Cache a MKVCueIndex in Redis."""
    if not cache_key:
        return
    r = await redis_utils.get_redis()
    if r is None:
        return
    redis_key = f"{_CUE_INDEX_CACHE_PREFIX}{cache_key}"
    data = json.dumps(
        {
            "duration_ms": index.duration_ms,
            "timestamp_scale": index.timestamp_scale,
            "cue_points": index.cue_points,
            "segment_data_offset": index.segment_data_offset,
            "first_cluster_offset": index.first_cluster_offset,
            "seek_header_b64": base64.b64encode(index.seek_header).decode() if index.seek_header else "",
            "audio_codec_id": index.audio_codec_id,
            "audio_bitrate": index.audio_bitrate,
            "audio_channels": index.audio_channels,
            "audio_sample_rate": index.audio_sample_rate,
            "video_codec_id": index.video_codec_id,
            "video_codec_private_b64": base64.b64encode(index.video_codec_private).decode()
            if index.video_codec_private
            else "",
            "video_width": index.video_width,
            "video_height": index.video_height,
            "video_fps": index.video_fps,
            "video_default_duration_ns": index.video_default_duration_ns,
        }
    )
    await r.set(redis_key, data, ex=_CUE_INDEX_CACHE_TTL)
    logger.debug("[container_probe] Cached cue index: %s", cache_key)
 async def probe_mkv_cues(
    source,
    file_size: int = 0,
    cache_key: str = "",
    header_data: bytes | None = None,
 ) -> MKVCueIndex | None:
    """
    Probe an MKV file's EBML header and Cues to build a seek index.
    Pure Python -- parses EBML structures directly, no FFmpeg involved.
    Makes up to two small byte-range reads via the provided source:
    1. First ~64KB: EBML header + SeekHead + Info (skipped if header_data provided)
    2. Cues section: byte range from SeekHead's Cues position
    Args:
        source: A MediaSource protocol implementation, or any object with
                a ``stream(offset, limit)`` async generator method.
        file_size: Total file size in bytes. If 0, tries ``source.file_size``.
        cache_key: Optional cache key for Redis caching. If empty, tries
                   ``source.cache_key``.
        header_data: Pre-fetched header bytes (first ~64KB). If provided,
                     skips the initial header fetch from source.
    Returns:
        MKVCueIndex if successful, None if the file has no Cues or parsing fails.
    """
    # Resolve file_size and cache_key from source if not provided
    if file_size <= 0:
        file_size = getattr(source, "file_size", 0)
    if not cache_key:
        cache_key = getattr(source, "cache_key", "")
    # Check cache first
    if cache_key:
        cached = await _get_cached_cue_index(cache_key)
        if cached:
            return cached
    try:
        # Step 1: Use pre-fetched header or fetch from source
        if header_data is None:
            header_size = min(_HEADER_PROBE_SIZE, file_size) if file_size > 0 else _HEADER_PROBE_SIZE
            header_data = b""
            async for chunk in source.stream(offset=0, limit=header_size):
                header_data += chunk
        if len(header_data) < 64:
            logger.warning("[container_probe] Header too small (%d bytes), cannot probe", len(header_data))
            return None
        # Step 2: Parse EBML header to find Segment data offset
        segment_data_offset = parse_ebml_header(header_data)
        # Step 3: Parse SeekHead to find Cues and Info positions
        seek_positions = parse_seek_head(header_data, segment_data_offset)
        if CUES not in seek_positions:
            logger.info("[container_probe] No Cues position in SeekHead, seeking not available")
            return None
        cues_relative_offset = seek_positions[CUES]
        cues_absolute_offset = segment_data_offset + cues_relative_offset
        logger.info(
            "[container_probe] SeekHead: Cues at offset %d (absolute %d), Info at %s",
            cues_relative_offset,
            cues_absolute_offset,
            seek_positions.get(INFO, "not found"),
        )
        # Step 4: Fetch the Cues element
        cues_max = file_size - cues_absolute_offset if file_size > 0 else _MAX_CUES_SIZE
        cues_fetch_size = min(_MAX_CUES_SIZE, cues_max)
        if cues_fetch_size <= 0:
            logger.warning("[container_probe] Cues offset %d beyond file size %d", cues_absolute_offset, file_size)
            return None
        cues_data = b""
        async for chunk in source.stream(offset=cues_absolute_offset, limit=cues_fetch_size):
            cues_data += chunk
        if len(cues_data) < 16:
            logger.warning("[container_probe] Cues data too small (%d bytes)", len(cues_data))
            return None
        # Step 5: Build the cue index
        index = build_cue_index(
            header_data=header_data,
            cues_data=cues_data,
            cues_file_offset=cues_absolute_offset,
            segment_data_offset=segment_data_offset,
        )
        # Cache the result
        if cache_key:
            await _set_cached_cue_index(cache_key, index)
        return index
    except Exception as e:
        logger.warning("[container_probe] Failed to probe MKV cues: %s", e)
        return None
 # =============================================================================
 # MP4 Moov probing
 # =============================================================================
 # Redis cache for MP4 indexes
 _MP4_INDEX_CACHE_PREFIX = "mfp:mp4_index:"
 _MP4_INDEX_CACHE_TTL = 3600  # 1 hour
 # How much to read from the start for ftyp + initial atom scanning
 _MP4_HEADER_PROBE_SIZE = 64 * 1024  # 64 KB
 # Max moov size we'll accept
 _MAX_MOOV_SIZE = 50 * 1024 * 1024  # 50 MB
 # How much to read from the end of the file to find moov
 _MP4_TAIL_PROBE_SIZE = 512 * 1024  # 512 KB
 async def _get_cached_mp4_index(cache_key: str) -> MP4Index | None:
    """Try to load an MP4Index from Redis cache."""
    if not cache_key:
        return None
    r = await redis_utils.get_redis()
    if r is None:
        return None
    redis_key = f"{_MP4_INDEX_CACHE_PREFIX}{cache_key}"
    data = await r.get(redis_key)
    if not data:
        return None
    try:
        d = json.loads(data)
        ftyp_data = b""
        if d.get("ftyp_data_b64"):
            ftyp_data = base64.b64decode(d["ftyp_data_b64"])
        index = MP4Index(
            duration_ms=d["duration_ms"],
            timescale=d["timescale"],
            cue_points=[(cp[0], cp[1]) for cp in d["cue_points"]],
            moov_offset=d["moov_offset"],
            moov_size=d["moov_size"],
            ftyp_data=ftyp_data,
            mdat_offset=d["mdat_offset"],
            mdat_size=d["mdat_size"],
            video_codec=d.get("video_codec", ""),
            audio_codec=d.get("audio_codec", ""),
            # moov_data is NOT cached (too large), it will be re-fetched
        )
        logger.debug("[container_probe] Loaded MP4 index from cache: %s", cache_key)
        return index
    except (KeyError, TypeError, json.JSONDecodeError) as e:
        logger.warning("[container_probe] Invalid cached MP4 index: %s", e)
        return None
 async def _set_cached_mp4_index(cache_key: str, index: MP4Index) -> None:
    """Cache an MP4Index in Redis (without moov_data)."""
    if not cache_key:
        return
    r = await redis_utils.get_redis()
    if r is None:
        return
    redis_key = f"{_MP4_INDEX_CACHE_PREFIX}{cache_key}"
    data = json.dumps(
        {
            "duration_ms": index.duration_ms,
            "timescale": index.timescale,
            "cue_points": index.cue_points,
            "moov_offset": index.moov_offset,
            "moov_size": index.moov_size,
            "ftyp_data_b64": base64.b64encode(index.ftyp_data).decode() if index.ftyp_data else "",
            "mdat_offset": index.mdat_offset,
            "mdat_size": index.mdat_size,
            "video_codec": index.video_codec,
            "audio_codec": index.audio_codec,
        }
    )
    await r.set(redis_key, data, ex=_MP4_INDEX_CACHE_TTL)
    logger.debug("[container_probe] Cached MP4 index: %s", cache_key)
 def _scan_top_level_atoms(data: bytes) -> list[tuple[bytes, int, int]]:
    """
    Scan top-level atom headers from raw file bytes.
    Returns:
        List of (box_type, absolute_offset, total_size) for each atom found.
    """
    atoms = []
    offset = 0
    while offset + 8 <= len(data):
        size = struct.unpack_from(">I", data, offset)[0]
        box_type = data[offset + 4 : offset + 8]
        if size == 1:  # Extended size
            if offset + 16 > len(data):
                break
            size = struct.unpack_from(">Q", data, offset + 8)[0]
        elif size == 0:
            # Extends to end of file - we can't know the real size from
            # a partial read, but record what we have
            atoms.append((box_type, offset, 0))
            break
        if size < 8:
            break
        atoms.append((box_type, offset, size))
        offset += size
    return atoms
 async def probe_mp4_moov(
    source,
    file_size: int = 0,
    cache_key: str = "",
    header_data: bytes | None = None,
 ) -> MP4Index | None:
    """
    Probe an MP4 file's moov atom to build a seek index.
    Pure Python -- scans MP4 box headers with struct, no FFmpeg involved.
    Strategy:
    1. Read first ~64KB to check for ftyp (MP4 signature).
    2. Scan top-level atoms to find moov and mdat.
    3. If moov is at the start (faststart), read it from the header data.
    4. If moov is not in the header, read from the tail of the file.
    5. Parse moov sample tables to build cue points.
    Args:
        source: A MediaSource protocol implementation with stream(offset, limit).
        file_size: Total file size in bytes.
        cache_key: Optional cache key for Redis caching.
        header_data: Pre-fetched header bytes (first ~64KB). If provided,
                     skips the initial header fetch from source.
    Returns:
        MP4Index if successful, None if not an MP4 or parsing fails.
    """
    if file_size <= 0:
        file_size = getattr(source, "file_size", 0)
    if not cache_key:
        cache_key = getattr(source, "cache_key", "")
    # Check cache first
    if cache_key:
        cached = await _get_cached_mp4_index(cache_key)
        if cached:
            # Re-fetch moov_data (not cached due to size) and rewrite offsets
            if cached.moov_size > 0 and cached.moov_size <= _MAX_MOOV_SIZE:
                moov_data = b""
                async for chunk in source.stream(offset=cached.moov_offset, limit=cached.moov_size):
                    moov_data += chunk
                if cached.mdat_offset >= 0:
                    new_mdat_start = len(cached.ftyp_data) + cached.moov_size
                    offset_delta = new_mdat_start - cached.mdat_offset
                    if offset_delta != 0:
                        moov_data = rewrite_moov_offsets(moov_data, offset_delta)
                cached.moov_data = moov_data
            return cached
    try:
        # Step 1: Use pre-fetched header or fetch from source
        if header_data is None:
            header_size = min(_MP4_HEADER_PROBE_SIZE, file_size) if file_size > 0 else _MP4_HEADER_PROBE_SIZE
            header_data = b""
            async for chunk in source.stream(offset=0, limit=header_size):
                header_data += chunk
        if len(header_data) < 12:
            return None
        # Step 2: Check for ftyp
        if not is_mp4_header(header_data):
            return None
        logger.info("[container_probe] MP4 detected, scanning atoms (header=%d bytes)", len(header_data))
        # Step 3: Scan top-level atoms from header
        atoms = _scan_top_level_atoms(header_data)
        ftyp_offset = -1
        ftyp_size = 0
        moov_offset = -1
        moov_size = 0
        mdat_offset = -1
        mdat_size = 0
        for box_type, atom_offset, atom_size in atoms:
            if box_type == b"ftyp":
                ftyp_offset = atom_offset
                ftyp_size = atom_size
            elif box_type == b"moov":
                moov_offset = atom_offset
                moov_size = atom_size
            elif box_type == b"mdat":
                mdat_offset = atom_offset
                mdat_size = atom_size
        # Step 4: If moov not found in header, scan from tail
        if moov_offset < 0 and file_size > 0:
            tail_start = max(0, file_size - _MP4_TAIL_PROBE_SIZE)
            tail_data = b""
            async for chunk in source.stream(offset=tail_start, limit=file_size - tail_start):
                tail_data += chunk
            if tail_data:
                tail_atoms = _scan_top_level_atoms(tail_data)
                for box_type, rel_offset, atom_size in tail_atoms:
                    abs_offset = tail_start + rel_offset
                    if box_type == b"moov":
                        moov_offset = abs_offset
                        moov_size = atom_size
                    elif box_type == b"mdat" and mdat_offset < 0:
                        mdat_offset = abs_offset
                        mdat_size = atom_size
                # If the initial scan yielded no moov (tail_start may land
                # inside a large mdat payload producing garbage atom headers),
                # resync by scanning 8-byte aligned windows for b"moov".
                if moov_offset < 0:
                    needle = b"moov"
                    search_pos = 0
                    while search_pos + 8 <= len(tail_data):
                        idx = tail_data.find(needle, search_pos)
                        if idx < 0 or idx < 4:
                            break
                        candidate_size = struct.unpack_from(">I", tail_data, idx - 4)[0]
                        if 8 < candidate_size <= _MAX_MOOV_SIZE:
                            moov_offset = tail_start + idx - 4
                            moov_size = candidate_size
                            break
                        search_pos = idx + 4
        if moov_offset < 0:
            logger.info("[container_probe] No moov atom found in MP4")
            return None
        if moov_size <= 0 or moov_size > _MAX_MOOV_SIZE:
            logger.warning("[container_probe] moov size %d is invalid or too large", moov_size)
            return None
        logger.info(
            "[container_probe] MP4 atoms: moov at %d (%d bytes), mdat at %d (%d bytes)",
            moov_offset,
            moov_size,
            mdat_offset,
            mdat_size,
        )
        # Step 5: Fetch full moov atom
        # Check if moov is already contained in the header data we read
        if moov_offset + moov_size <= len(header_data):
            moov_data = header_data[moov_offset : moov_offset + moov_size]
        else:
            moov_data = b""
            async for chunk in source.stream(offset=moov_offset, limit=moov_size):
                moov_data += chunk
        if len(moov_data) < moov_size:
            logger.warning(
                "[container_probe] Incomplete moov: got %d of %d bytes",
                len(moov_data),
                moov_size,
            )
            return None
        # Step 6: Parse moov body (skip box header)
        # Determine header size
        raw_size = struct.unpack_from(">I", moov_data, 0)[0]
        hdr_size = 16 if raw_size == 1 else 8
        moov_body = moov_data[hdr_size:]
        cue_points, duration_ms, timescale, video_codec, audio_codec = build_cue_points_from_moov(moov_body)
        # If mdat wasn't found via header scan, it's likely right after ftyp
        # or right after moov. Common layouts:
        # ftyp + moov + mdat (faststart) or ftyp + mdat + moov
        if mdat_offset < 0:
            # Walk atoms to find mdat by scanning just enough from the file
            # In most cases, mdat is either before or after moov
            if moov_offset < file_size // 2:
                # moov is early -> mdat likely follows
                mdat_search_offset = moov_offset + moov_size
            else:
                # moov is late -> mdat likely right after ftyp
                ftyp_size = struct.unpack_from(">I", header_data, 0)[0]
                if ftyp_size == 1:
                    ftyp_size = struct.unpack_from(">Q", header_data, 8)[0]
                mdat_search_offset = ftyp_size
            # Read a small amount to find the mdat header
            mdat_header = b""
            async for chunk in source.stream(offset=mdat_search_offset, limit=16):
                mdat_header += chunk
            if len(mdat_header) >= 8:
                box_type = mdat_header[4:8]
                if box_type == b"mdat":
                    mdat_offset = mdat_search_offset
                    raw_sz = struct.unpack_from(">I", mdat_header, 0)[0]
                    if raw_sz == 1 and len(mdat_header) >= 16:
                        mdat_size = struct.unpack_from(">Q", mdat_header, 8)[0]
                    else:
                        mdat_size = raw_sz
        # Step 7: Extract ftyp data (always in the header since it's the first atom)
        ftyp_data = b""
        if ftyp_offset >= 0 and ftyp_size > 0 and ftyp_offset + ftyp_size <= len(header_data):
            ftyp_data = header_data[ftyp_offset : ftyp_offset + ftyp_size]
        # Step 8: Rewrite moov chunk offsets for faststart pipe layout.
        # The pipe stream will be: ftyp + moov + mdat. The stco/co64
        # offsets in the original moov point to positions in the original
        # file. We need to shift them to account for the new layout.
        # New mdat position = ftyp_size + moov_size
        # Delta = new_mdat_position - original_mdat_offset
        if mdat_offset >= 0:
            new_mdat_start = len(ftyp_data) + moov_size
            offset_delta = new_mdat_start - mdat_offset
            if offset_delta != 0:
                moov_data = rewrite_moov_offsets(moov_data, offset_delta)
        index = MP4Index(
            duration_ms=duration_ms,
            timescale=timescale,
            cue_points=cue_points,
            moov_offset=moov_offset,
            moov_size=moov_size,
            moov_data=moov_data,
            ftyp_data=ftyp_data,
            mdat_offset=mdat_offset,
            mdat_size=mdat_size,
            video_codec=video_codec,
            audio_codec=audio_codec,
        )
        logger.info(
            "[container_probe] MP4 index: duration=%.1fs, %d cue points, video=%s, audio=%s",
            duration_ms / 1000.0,
            len(cue_points),
            video_codec,
            audio_codec,
        )
        if cache_key:
            await _set_cached_mp4_index(cache_key, index)
        return index
    except Exception as e:
        logger.warning("[container_probe] Failed to probe MP4 moov: %s", e)
        return None
@@ -0,0 +1,151 @@
 """
 HLS VOD playlist generator for on-the-fly fMP4 transcoding.
 Produces an M3U8 VOD playlist from an ``MKVCueIndex`` or ``MP4Index``.
 Consecutive keyframes that are closer together than the target segment
 duration are merged into a single HLS segment, matching the behaviour
 of ``ffmpeg -hls_time``.
 The init segment is referenced via ``#EXT-X-MAP``.
 Requires ``#EXT-X-VERSION:7`` for fMP4 (CMAF) segments.
 """
 from __future__ import annotations
 import math
 from typing import TYPE_CHECKING
 if TYPE_CHECKING:
    pass
 def merge_cue_points(
    cue_points: list[tuple[float, int]],
    target_duration_ms: float = 5000.0,
 ) -> list[tuple[float, int]]:
    """Merge consecutive keyframes into segments of *>= target_duration_ms*.
    This replicates the logic of ``ffmpeg -hls_time``: a new segment
    boundary is created only when a keyframe is encountered **at least**
    ``target_duration_ms`` after the start of the current segment.
    Keyframes that fall within the target window are absorbed into the
    current segment.
    Side-effects:
    * Eliminates duplicate byte-offset entries (previously handled by
      ``deduplicate_cue_points``).
    * Eliminates very short "runt" segments (e.g. 0.3 s).
    Args:
        cue_points: Sorted ``(time_ms, byte_offset)`` list.
        target_duration_ms: Minimum segment duration in milliseconds.
    Returns:
        A reduced list of ``(time_ms, byte_offset)`` tuples representing
        the merged segment boundaries.
    """
    if not cue_points:
        return []
    # Normalize duplicate offsets first: keep the earliest timestamp for each
    # byte offset. Some MKV files expose multiple cue times for the same
    # cluster offset; if we keep a later duplicate, segment start times no
    # longer match the actual bytes and can produce timestamp regressions.
    # Sorting by (time, offset) ensures earliest time wins deterministically.
    by_time = sorted(cue_points, key=lambda x: (x[0], x[1]))
    deduped: list[tuple[float, int]] = []
    seen_offsets: set[int] = set()
    for time_ms, byte_offset in by_time:
        if byte_offset in seen_offsets:
            continue
        seen_offsets.add(byte_offset)
        deduped.append((time_ms, byte_offset))
    if not deduped:
        return []
    merged: list[tuple[float, int]] = [deduped[0]]
    for i in range(1, len(deduped)):
        time_since_last = deduped[i][0] - merged[-1][0]
        if time_since_last >= target_duration_ms:
            merged.append(deduped[i])
    return merged
 def generate_vod_playlist(
    cue_points: list[tuple[float, int]],
    duration_ms: float,
    init_url: str,
    segment_url_template: str,
    target_segment_duration_ms: float = 5000.0,
 ) -> str:
    """Build an HLS VOD M3U8 playlist from cue-point data.
    Consecutive keyframes that are closer than *target_segment_duration_ms*
    are merged into a single segment (matching ``ffmpeg -hls_time``).
    Segment URLs use ``{start_ms}`` and ``{end_ms}`` placeholders that are
    replaced with the segment's time range in milliseconds.
    Args:
        cue_points: Sorted list of ``(time_ms, byte_offset)`` tuples.
        duration_ms: Total media duration in milliseconds.
        init_url: URL for the fMP4 init segment (``#EXT-X-MAP`` URI).
        segment_url_template: URL template containing ``{seg}``,
            ``{start_ms}`` and ``{end_ms}`` placeholders.
        target_segment_duration_ms: Target minimum segment duration.
    Returns:
        Complete M3U8 playlist string.
    """
    if not cue_points:
        return ""
    merged = merge_cue_points(cue_points, target_segment_duration_ms)
    # Build per-segment (start_ms, end_ms, duration_s) list.
    segments: list[tuple[float, float, float]] = []
    for i in range(len(merged)):
        start_ms = merged[i][0]
        end_ms = merged[i + 1][0] if i + 1 < len(merged) else duration_ms
        dur_s = max((end_ms - start_ms) / 1000.0, 0.001)
        segments.append((start_ms, end_ms, dur_s))
    if not segments:
        return ""
    target_duration = math.ceil(max(dur_s for _, _, dur_s in segments))
    target_duration = max(target_duration, 1)
    lines: list[str] = [
        "#EXTM3U",
        "#EXT-X-VERSION:7",
        f"#EXT-X-TARGETDURATION:{target_duration}",
        "#EXT-X-PLAYLIST-TYPE:VOD",
        "#EXT-X-MEDIA-SEQUENCE:0",
        f'#EXT-X-MAP:URI="{init_url}"',
    ]
    for seg_num, (start_ms, end_ms, dur_s) in enumerate(segments):
        lines.append(f"#EXTINF:{dur_s:.3f},")
        url = (
            segment_url_template.replace(
                "{seg}",
                str(seg_num),
            )
            .replace(
                "{start_ms}",
                str(int(start_ms)),
            )
            .replace(
                "{end_ms}",
                str(int(end_ms)),
            )
        )
        lines.append(url)
    lines.append("#EXT-X-ENDLIST")
    lines.append("")  # trailing newline
    return "\n".join(lines)
@@ -0,0 +1,234 @@
 """
 Abstract media source protocol for source-agnostic transcode pipeline.
 Decouples the transcode pipeline, MKV cue probing, and seeking logic
 from any specific transport (Telegram, HTTP, etc.). Each transport
 implements the MediaSource protocol to provide byte-range streaming.
 """
 import hashlib
 import logging
 from collections.abc import AsyncIterator
 from typing import Protocol, runtime_checkable
 from urllib.parse import urlparse, unquote
 from mediaflow_proxy.utils.http_client import create_aiohttp_session
 from mediaflow_proxy.utils.telegram import telegram_manager
 logger = logging.getLogger(__name__)
 # Extensions mapped to container format hints used by transcode_handler
 _MKV_EXTENSIONS = frozenset({".mkv", ".webm"})
 _MP4_EXTENSIONS = frozenset({".mp4", ".m4v", ".mov", ".m4a", ".3gp"})
 def _extract_extension(path: str) -> str:
    """Extract lowercase file extension (e.g. '.mkv') from a path or URL."""
    # Strip query/fragment first for URL paths
    dot_pos = path.rfind(".")
    if dot_pos < 0:
        return ""
    ext = path[dot_pos:].lower()
    # Trim anything after the extension (query params from raw paths)
    for ch in ("?", "#", "&"):
        idx = ext.find(ch)
        if idx > 0:
            ext = ext[:idx]
    return ext
 def filename_hint_from_url(url: str) -> str:
    """Derive a filename hint from a URL path (e.g. '.mkv', '.mp4')."""
    try:
        parsed = urlparse(url)
        return _extract_extension(unquote(parsed.path))
    except Exception:
        return ""
 def filename_hint_from_name(filename: str) -> str:
    """Derive a filename hint from a filename string."""
    return _extract_extension(filename) if filename else ""
@runtime_checkable
 class MediaSource(Protocol):
    """
    Protocol for streaming media byte ranges.
    Implementations must provide:
    - stream(): async iterator of bytes from offset/limit
    - file_size: total file size in bytes
    - cache_key: deterministic key for caching (cue index, etc.)
    - filename_hint: optional file extension hint (e.g. '.mkv', '.mp4')
    """
    @property
    def file_size(self) -> int:
        """Total file size in bytes."""
        ...
    @property
    def cache_key(self) -> str:
        """Deterministic cache key derived from the source identity."""
        ...
    @property
    def filename_hint(self) -> str:
        """Optional file extension hint (e.g. '.mkv', '.mp4') for format detection."""
        ...
    async def stream(self, offset: int = 0, limit: int | None = None) -> AsyncIterator[bytes]:
        """
        Stream bytes from the source.
        Args:
            offset: Byte offset to start from.
            limit: Number of bytes to read. None = read to end.
        Yields:
            Chunks of bytes.
        """
        ...
 class TelegramMediaSource:
    """
    MediaSource backed by Telegram MTProto downloads.
    Supports two download modes:
    * **parallel** (default): Uses ``ParallelTransferrer`` with multiple
      MTProtoSender connections for maximum throughput.  Best for full-file
      streaming (e.g. ``/proxy/telegram/stream``).
    * **single** (``use_single_client=True``): Uses Telethon's built-in
      ``iter_download`` over the existing client connection.  Avoids the
      overhead of creating/destroying extra connections for each request,
      ideal for small byte-range fetches like HLS segments and probe
      headers.
    """
    def __init__(
        self,
        telegram_ref,
        file_size: int,
        file_name: str = "",
        *,
        use_single_client: bool = False,
    ) -> None:
        self._ref = telegram_ref
        self._file_size = file_size
        self._filename_hint = filename_hint_from_name(file_name)
        self._use_single_client = use_single_client
    @property
    def file_size(self) -> int:
        return self._file_size
    @property
    def cache_key(self) -> str:
        ref = self._ref
        if ref.file_id:
            raw = f"file_id:{ref.file_id}"
        elif ref.chat_id is not None and ref.message_id is not None:
            raw = f"chat:{ref.chat_id}:msg:{ref.message_id}"
        else:
            return ""
        return hashlib.sha256(raw.encode()).hexdigest()[:16]
    @property
    def filename_hint(self) -> str:
        return self._filename_hint
    async def stream(self, offset: int = 0, limit: int | None = None) -> AsyncIterator[bytes]:
        effective_limit = limit or self._file_size
        if self._use_single_client:
            async for chunk in telegram_manager.stream_media_single(
                self._ref,
                offset=offset,
                limit=effective_limit,
                file_size=self._file_size,
            ):
                yield chunk
        else:
            async for chunk in telegram_manager.stream_media(
                self._ref,
                offset=offset,
                limit=effective_limit,
                file_size=self._file_size,
            ):
                yield chunk
 class HTTPMediaSource:
    """MediaSource backed by HTTP byte-range requests via aiohttp."""
    def __init__(self, url: str, headers: dict | None = None, file_size: int = 0) -> None:
        self._url = url
        self._headers = headers or {}
        self._file_size = file_size
        self._filename_hint = filename_hint_from_url(url)
    @property
    def file_size(self) -> int:
        return self._file_size
    @property
    def cache_key(self) -> str:
        return hashlib.sha256(self._url.encode()).hexdigest()[:16]
    @property
    def filename_hint(self) -> str:
        return self._filename_hint
    async def resolve_file_size(self) -> int:
        """Perform a HEAD request to determine file size if not already known."""
        if self._file_size > 0:
            return self._file_size
        async with create_aiohttp_session(self._url, headers=self._headers) as (session, proxy_url):
            async with session.head(
                self._url,
                headers=self._headers,
                proxy=proxy_url,
                allow_redirects=True,
            ) as resp:
                cl = resp.headers.get("content-length")
                if cl:
                    self._file_size = int(cl)
                else:
                    # Try GET with range to get content-range
                    async with session.get(
                        self._url,
                        headers={**self._headers, "range": "bytes=0-0"},
                        proxy=proxy_url,
                        allow_redirects=True,
                    ) as range_resp:
                        cr = range_resp.headers.get("content-range", "")
                        if "/" in cr:
                            try:
                                self._file_size = int(cr.split("/")[-1])
                            except ValueError:
                                pass
        return self._file_size
    async def stream(self, offset: int = 0, limit: int | None = None) -> AsyncIterator[bytes]:
        headers = dict(self._headers)
        if offset > 0 or limit is not None:
            end = ""
            if limit is not None:
                end = str(offset + limit - 1)
            headers["range"] = f"bytes={offset}-{end}"
        async with create_aiohttp_session(self._url, headers=headers) as (session, proxy_url):
            async with session.get(
                self._url,
                headers=headers,
                proxy=proxy_url,
                allow_redirects=True,
            ) as resp:
                resp.raise_for_status()
                async for chunk in resp.content.iter_any():
                    yield chunk
@@ -0,0 +1,469 @@
 """
 Streaming MKV demuxer.
 Reads an MKV byte stream via an async iterator and yields individual media
 frames (MKVFrame) with absolute timestamps. Designed for on-the-fly remuxing
 without buffering the entire file.
 Architecture:
  AsyncIterator[bytes] -> StreamBuffer -> EBML parsing -> MKVFrame yields
 The demuxer works in two phases:
  1. read_header(): Consume bytes until Tracks is fully parsed, returning
     a list of MKVTrack with codec metadata.
  2. iter_frames(): Yield MKVFrame objects from Cluster/SimpleBlock data
     as clusters arrive.
 """
 import logging
 from collections.abc import AsyncIterator
 from dataclasses import dataclass, field
 from mediaflow_proxy.remuxer.ebml_parser import (
    CLUSTER,
    CLUSTER_TIMESTAMP,
    EBML_HEADER,
    INFO,
    MKVFrame,
    MKVTrack,
    SEGMENT,
    SIMPLE_BLOCK,
    BLOCK_GROUP,
    TRACKS,
    TIMESTAMP_SCALE,
    DURATION,
    UNKNOWN_SIZE,
    extract_block_frames,
    parse_tracks,
    read_element_id,
    read_element_size,
    read_float,
    read_uint,
    _parse_block_group,
    iter_elements,
 )
 logger = logging.getLogger(__name__)
 class StreamBuffer:
    """
    Accumulating byte buffer for streaming EBML parsing.
    Collects chunks from an async byte source and provides read-ahead
    capabilities for EBML element parsing. Supports consuming parsed
    bytes to keep memory usage bounded.
    """
    def __init__(self) -> None:
        self._chunks: list[bytes] = []
        self._total: int = 0
        self._consumed: int = 0  # Logical bytes consumed (for offset tracking)
    @property
    def available(self) -> int:
        """Number of buffered bytes available for reading."""
        return self._total
    @property
    def consumed(self) -> int:
        """Total bytes consumed so far (for absolute offset tracking)."""
        return self._consumed
    def append(self, data: bytes) -> None:
        """Add bytes to the buffer."""
        if data:
            self._chunks.append(data)
            self._total += len(data)
    def peek(self, size: int) -> bytes:
        """Read up to size bytes without consuming."""
        if size <= 0:
            return b""
        result = bytearray()
        remaining = size
        for chunk in self._chunks:
            if remaining <= 0:
                break
            take = min(len(chunk), remaining)
            result.extend(chunk[:take])
            remaining -= take
        return bytes(result)
    def get_all(self) -> bytes:
        """Get all buffered data as a single bytes object (without consuming)."""
        if len(self._chunks) == 1:
            return self._chunks[0]
        data = b"".join(self._chunks)
        self._chunks = [data]
        return data
    def consume(self, size: int) -> bytes:
        """Remove and return size bytes from the front of the buffer."""
        if size <= 0:
            return b""
        if size > self._total:
            size = self._total
        result = bytearray()
        remaining = size
        while remaining > 0 and self._chunks:
            chunk = self._chunks[0]
            if len(chunk) <= remaining:
                result.extend(chunk)
                remaining -= len(chunk)
                self._chunks.pop(0)
            else:
                result.extend(chunk[:remaining])
                self._chunks[0] = chunk[remaining:]
                remaining = 0
        consumed = len(result)
        self._total -= consumed
        self._consumed += consumed
        return bytes(result)
    def skip(self, size: int) -> int:
        """Discard size bytes from the front. Returns actual bytes skipped."""
        if size <= 0:
            return 0
        actual = min(size, self._total)
        remaining = actual
        while remaining > 0 and self._chunks:
            chunk = self._chunks[0]
            if len(chunk) <= remaining:
                remaining -= len(chunk)
                self._chunks.pop(0)
            else:
                self._chunks[0] = chunk[remaining:]
                remaining = 0
        self._total -= actual
        self._consumed += actual
        return actual
@dataclass
 class MKVHeader:
    """Parsed MKV header metadata."""
    tracks: list[MKVTrack] = field(default_factory=list)
    timestamp_scale_ns: int = 1_000_000  # Default 1ms
    duration_ms: float = 0.0
    segment_data_offset: int = 0  # Absolute byte offset of Segment children
 class MKVDemuxer:
    """
    Streaming async MKV demuxer.
    Reads an MKV byte stream from an async iterator and provides:
    - read_header(): Parse EBML header + Segment metadata + Tracks
    - iter_frames(): Yield MKVFrame objects from Clusters
    Usage:
        demuxer = MKVDemuxer()
        header = await demuxer.read_header(source)
        async for frame in demuxer.iter_frames(source):
            process(frame)
    """
    # Minimum bytes to try parsing an element header (ID + size)
    _MIN_ELEMENT_HEADER = 12
    def __init__(self) -> None:
        self._buf = StreamBuffer()
        self._header: MKVHeader | None = None
        self._scale_ms: float = 1.0  # timestamp_scale / 1_000_000
    @property
    def header(self) -> MKVHeader | None:
        return self._header
    async def read_header(self, source: AsyncIterator[bytes]) -> MKVHeader:
        """
        Read and parse the MKV header (EBML header, Segment, Info, Tracks).
        Consumes bytes from source until Tracks is fully parsed. Any leftover
        bytes (start of first Cluster) remain in the internal buffer for
        iter_frames().
        Returns:
            MKVHeader with track info and timing metadata.
        """
        header = MKVHeader()
        # Phase 1: Accumulate enough data for EBML header + Segment header
        await self._ensure_bytes(source, 64)
        data = self._buf.get_all()
        if len(data) < 4:
            raise ValueError(
                f"Source ended prematurely: got {len(data)} bytes, need at least an EBML header (source disconnected?)"
            )
        pos = 0
        # Parse EBML Header
        eid, pos = read_element_id(data, pos)
        if eid != EBML_HEADER:
            raise ValueError(f"Not an MKV file: expected EBML header, got 0x{eid:X}")
        size, pos = read_element_size(data, pos)
        if size == UNKNOWN_SIZE:
            raise ValueError("EBML header has unknown size")
        pos += size  # Skip EBML header content
        # Parse Segment element header
        eid, pos = read_element_id(data, pos)
        if eid != SEGMENT:
            raise ValueError(f"Expected Segment, got 0x{eid:X}")
        _seg_size, pos = read_element_size(data, pos)
        header.segment_data_offset = self._buf.consumed + pos
        # Phase 2: Parse Segment children until we have Tracks
        # We need to iterate top-level Segment children: SeekHead, Info, Tracks
        # Stop when we hit the first Cluster (media data).
        tracks_found = False
        while not tracks_found:
            # Ensure we have enough for element header
            await self._ensure_bytes(source, pos + self._MIN_ELEMENT_HEADER)
            data = self._buf.get_all()
            if pos >= len(data):
                break
            try:
                eid, pos2 = read_element_id(data, pos)
                size, pos3 = read_element_size(data, pos2)
            except (ValueError, IndexError):
                await self._ensure_bytes(source, pos + 32)
                data = self._buf.get_all()
                try:
                    eid, pos2 = read_element_id(data, pos)
                    size, pos3 = read_element_size(data, pos2)
                except (ValueError, IndexError):
                    break
            if eid == CLUSTER:
                # Reached media data; header parsing is done.
                # Don't consume the Cluster -- leave it for iter_frames.
                break
            if size == UNKNOWN_SIZE:
                # Can't handle unknown-size elements in header
                logger.warning("[mkv_demuxer] Unknown-size element 0x%X in header at pos %d", eid, pos)
                break
            # Ensure we have the full element
            elem_end = pos3 + size
            await self._ensure_bytes(source, elem_end)
            data = self._buf.get_all()
            if eid == INFO:
                self._parse_info_element(data, pos3, pos3 + size, header)
            elif eid == TRACKS:
                header.tracks = parse_tracks(data, pos3, pos3 + size)
                tracks_found = True
                logger.info(
                    "[mkv_demuxer] Parsed %d tracks: %s",
                    len(header.tracks),
                    ", ".join(f"#{t.track_number}={t.codec_id}" for t in header.tracks),
                )
            pos = elem_end
        # Consume everything up to the current position (Cluster boundary)
        self._buf.consume(pos)
        # Set timing scale
        self._scale_ms = header.timestamp_scale_ns / 1_000_000.0
        self._header = header
        return header
    async def iter_frames(self, source: AsyncIterator[bytes]) -> AsyncIterator[MKVFrame]:
        """
        Yield MKVFrame objects from Cluster/SimpleBlock data.
        Must be called after read_header(). Continues consuming bytes from
        source, parsing Clusters and yielding individual frames.
        """
        if self._header is None:
            raise RuntimeError("read_header() must be called before iter_frames()")
        while True:
            # Try to read the next element header
            if not await self._ensure_bytes_soft(source, self._MIN_ELEMENT_HEADER):
                break
            data = self._buf.get_all()
            pos = 0
            try:
                eid, pos2 = read_element_id(data, pos)
                size, pos3 = read_element_size(data, pos2)
            except (ValueError, IndexError):
                # Try to get more data
                if not await self._ensure_bytes_soft(source, len(data) + 4096):
                    break
                data = self._buf.get_all()
                try:
                    eid, pos2 = read_element_id(data, pos)
                    size, pos3 = read_element_size(data, pos2)
                except (ValueError, IndexError):
                    break
            if eid == CLUSTER:
                if size == UNKNOWN_SIZE:
                    # Unknown-size Cluster: parse children until we hit the next
                    # Cluster or run out of data
                    self._buf.consume(pos3)  # consume Cluster header
                    async for frame in self._parse_unknown_size_cluster(source):
                        yield frame
                else:
                    # Known-size Cluster: ensure we have all data
                    elem_end = pos3 + size
                    await self._ensure_bytes(source, elem_end)
                    data = self._buf.get_all()
                    for frame in self._parse_cluster_data(data, pos3, pos3 + size):
                        yield frame
                    self._buf.consume(elem_end)
            else:
                # Skip non-Cluster top-level elements
                if size == UNKNOWN_SIZE:
                    break
                elem_end = pos3 + size
                if elem_end > len(data):
                    # Need to skip bytes we don't have yet
                    self._buf.consume(len(data))
                    skip_remaining = elem_end - len(data)
                    await self._skip_bytes(source, skip_remaining)
                else:
                    self._buf.consume(elem_end)
    def _parse_info_element(self, data: bytes, start: int, end: int, header: MKVHeader) -> None:
        """Parse Info element children for timestamp scale and duration."""
        for eid, off, size, _ in iter_elements(data, start, end):
            if eid == TIMESTAMP_SCALE:
                header.timestamp_scale_ns = read_uint(data, off, size)
            elif eid == DURATION:
                scale = header.timestamp_scale_ns / 1_000_000.0
                header.duration_ms = read_float(data, off, size) * scale
    def _parse_cluster_data(self, data: bytes, start: int, end: int) -> list[MKVFrame]:
        """Parse a known-size Cluster and return its frames."""
        cluster_timecode = 0
        frames = []
        for eid, data_off, size, _ in iter_elements(data, start, end):
            if eid == CLUSTER_TIMESTAMP:
                cluster_timecode = read_uint(data, data_off, size)
            elif eid == SIMPLE_BLOCK:
                for track_num, rel_tc, flags, frame_list in extract_block_frames(data, data_off, size):
                    is_kf = bool(flags & 0x80)
                    abs_ts_ms = (cluster_timecode + rel_tc) * self._scale_ms
                    for frame_data in frame_list:
                        frames.append(
                            MKVFrame(
                                track_number=track_num,
                                timestamp_ms=abs_ts_ms,
                                is_keyframe=is_kf,
                                data=frame_data,
                            )
                        )
            elif eid == BLOCK_GROUP:
                _parse_block_group(data, data_off, data_off + size, cluster_timecode, self._scale_ms, frames)
        return frames
    async def _parse_unknown_size_cluster(self, source: AsyncIterator[bytes]) -> AsyncIterator[MKVFrame]:
        """Parse an unknown-size Cluster by reading children until next Cluster."""
        cluster_timecode = 0
        while True:
            if not await self._ensure_bytes_soft(source, self._MIN_ELEMENT_HEADER):
                break
            data = self._buf.get_all()
            pos = 0
            try:
                eid, pos2 = read_element_id(data, pos)
                size, pos3 = read_element_size(data, pos2)
            except (ValueError, IndexError):
                if not await self._ensure_bytes_soft(source, len(data) + 4096):
                    break
                data = self._buf.get_all()
                try:
                    eid, pos2 = read_element_id(data, pos)
                    size, pos3 = read_element_size(data, pos2)
                except (ValueError, IndexError):
                    break
            # A new Cluster or top-level element signals end of current Cluster
            if eid == CLUSTER or eid == SEGMENT:
                break
            if size == UNKNOWN_SIZE:
                break
            elem_end = pos3 + size
            await self._ensure_bytes(source, elem_end)
            data = self._buf.get_all()
            if eid == CLUSTER_TIMESTAMP:
                cluster_timecode = read_uint(data, pos3, size)
            elif eid == SIMPLE_BLOCK:
                for track_num, rel_tc, flags, frame_list in extract_block_frames(data, pos3, size):
                    is_kf = bool(flags & 0x80)
                    abs_ts_ms = (cluster_timecode + rel_tc) * self._scale_ms
                    for frame_data in frame_list:
                        yield MKVFrame(
                            track_number=track_num,
                            timestamp_ms=abs_ts_ms,
                            is_keyframe=is_kf,
                            data=frame_data,
                        )
            elif eid == BLOCK_GROUP:
                bg_frames = []
                _parse_block_group(data, pos3, pos3 + size, cluster_timecode, self._scale_ms, bg_frames)
                for frame in bg_frames:
                    yield frame
            self._buf.consume(elem_end)
    async def _ensure_bytes(self, source: AsyncIterator[bytes], needed: int) -> None:
        """Ensure the buffer has at least 'needed' bytes. Raises StopAsyncIteration if exhausted."""
        while self._buf.available < needed:
            try:
                chunk = await source.__anext__()
                self._buf.append(chunk)
            except StopAsyncIteration:
                return
    async def _ensure_bytes_soft(self, source: AsyncIterator[bytes], needed: int) -> bool:
        """Like _ensure_bytes but returns False instead of raising."""
        while self._buf.available < needed:
            try:
                chunk = await source.__anext__()
                if not chunk:
                    return self._buf.available > 0
                self._buf.append(chunk)
            except StopAsyncIteration:
                return self._buf.available > 0
        return True
    async def _skip_bytes(self, source: AsyncIterator[bytes], count: int) -> None:
        """Skip count bytes from the source without buffering."""
        remaining = count
        while remaining > 0:
            try:
                chunk = await source.__anext__()
                if len(chunk) <= remaining:
                    remaining -= len(chunk)
                else:
                    # Put the excess back
                    self._buf.append(chunk[remaining:])
                    remaining = 0
            except StopAsyncIteration:
                break
@@ -0,0 +1,834 @@
 """
 MP4 container parser for moov atom probing.
 Provides:
 - MP4Index: seek index extracted from MP4 moov atom (parallel to MKVCueIndex)
 - Top-level atom scanning
 - Sample table parsers (stco, co64, stss, stsz, stts, stsc)
 - Moov-to-cue-point builder
 - rewrite_moov_offsets: adjust stco/co64 in moov for file rearrangement
 The parsers are the inverse of the builder functions in mp4_muxer.py.
 Box navigation reuses the pattern from ts_muxer.py's read_box/find_box/iter_boxes.
 """
 import bisect
 import logging
 import struct
 from dataclasses import dataclass, field
 logger = logging.getLogger(__name__)
 # =============================================================================
 # MP4 Box Utilities
 # =============================================================================
 # Minimum bytes needed to read a standard box header
 _BOX_HEADER_SIZE = 8
 # ftyp brands that identify MP4/MOV containers
 _MP4_BRANDS = {
    b"isom",
    b"iso2",
    b"iso3",
    b"iso4",
    b"iso5",
    b"iso6",
    b"mp41",
    b"mp42",
    b"M4V ",
    b"M4A ",
    b"f4v ",
    b"kddi",
    b"avc1",
    b"qt  ",
    b"MSNV",
    b"dash",
    b"3gp4",
    b"3gp5",
    b"3gp6",
 }
 def is_mp4_header(data: bytes) -> bool:
    """Check if the data starts with an ftyp box (MP4 signature)."""
    if len(data) < 8:
        return False
    size = struct.unpack_from(">I", data, 0)[0]
    box_type = data[4:8]
    if box_type != b"ftyp":
        return False
    if size < 12 or size > len(data):
        return size >= 12  # might be valid but truncated
    major_brand = data[8:12]
    return major_brand in _MP4_BRANDS
 def read_box_header(data: bytes, offset: int) -> tuple[bytes, int, int] | None:
    """
    Read a box header at the given offset.
    Returns:
        (box_type, header_size, total_box_size) or None if not enough data.
    """
    if offset + 8 > len(data):
        return None
    size, box_type = struct.unpack_from(">I4s", data, offset)
    header_size = 8
    if size == 1:  # Extended size (64-bit)
        if offset + 16 > len(data):
            return None
        size = struct.unpack_from(">Q", data, offset + 8)[0]
        header_size = 16
    elif size == 0:  # Box extends to end of data
        size = len(data) - offset
    return box_type, header_size, size
 def iter_top_level_boxes(data: bytes):
    """
    Iterate over top-level box headers.
    Yields:
        (box_type, header_size, total_size, data_offset)
    """
    offset = 0
    while offset < len(data):
        result = read_box_header(data, offset)
        if result is None:
            break
        box_type, header_size, total_size = result
        yield box_type, header_size, total_size, offset + header_size
        if total_size == 0:
            break
        offset += total_size
 def find_box(data: bytes, target: bytes) -> bytes | None:
    """Find a box by type and return its body (data after header)."""
    for box_type, header_size, total_size, data_offset in iter_top_level_boxes(data):
        if box_type == target:
            return data[data_offset : data_offset - header_size + total_size]
    return None
 def iter_boxes(data: bytes):
    """Iterate over child boxes: yields (box_type, box_body_bytes)."""
    for box_type, header_size, total_size, data_offset in iter_top_level_boxes(data):
        end = data_offset - header_size + total_size
        yield box_type, data[data_offset:end]
 # =============================================================================
 # Sample Table Parsers (inverse of mp4_muxer.py builders)
 # =============================================================================
 def parse_full_box_header(data: bytes) -> tuple[int, int, int]:
    """
    Parse a full box header (version + flags).
    Returns:
        (version, flags, header_size) where header_size is 4 bytes.
    """
    if len(data) < 4:
        return 0, 0, 0
    version = data[0]
    flags = (data[1] << 16) | (data[2] << 8) | data[3]
    return version, flags, 4
 def parse_stco(data: bytes) -> list[int]:
    """
    Parse Chunk Offset box (stco) - 32-bit offsets.
    Layout: version(1) + flags(3) + entry_count(4) + [offset(4)]...
    """
    if len(data) < 8:
        return []
    _, _, hdr = parse_full_box_header(data)
    pos = hdr
    entry_count = struct.unpack_from(">I", data, pos)[0]
    pos += 4
    if len(data) < pos + entry_count * 4:
        return []
    offsets = []
    for _ in range(entry_count):
        offsets.append(struct.unpack_from(">I", data, pos)[0])
        pos += 4
    return offsets
 def parse_co64(data: bytes) -> list[int]:
    """
    Parse Chunk Offset box (co64) - 64-bit offsets.
    Layout: version(1) + flags(3) + entry_count(4) + [offset(8)]...
    """
    if len(data) < 8:
        return []
    _, _, hdr = parse_full_box_header(data)
    pos = hdr
    entry_count = struct.unpack_from(">I", data, pos)[0]
    pos += 4
    if len(data) < pos + entry_count * 8:
        return []
    offsets = []
    for _ in range(entry_count):
        offsets.append(struct.unpack_from(">Q", data, pos)[0])
        pos += 8
    return offsets
 def parse_stss(data: bytes) -> list[int]:
    """
    Parse Sync Sample box (stss) - keyframe indices (1-based).
    Layout: version(1) + flags(3) + entry_count(4) + [sample_number(4)]...
    """
    if len(data) < 8:
        return []
    _, _, hdr = parse_full_box_header(data)
    pos = hdr
    entry_count = struct.unpack_from(">I", data, pos)[0]
    pos += 4
    if len(data) < pos + entry_count * 4:
        return []
    indices = []
    for _ in range(entry_count):
        indices.append(struct.unpack_from(">I", data, pos)[0])
        pos += 4
    return indices
 def parse_stsz(data: bytes) -> tuple[int, list[int]]:
    """
    Parse Sample Size box (stsz).
    Layout: version(1) + flags(3) + sample_size(4) + sample_count(4) + [size(4)]...
    Returns:
        (uniform_size, sizes_list).
        If uniform_size > 0, all samples have that size and sizes_list is empty.
        Otherwise, sizes_list contains per-sample sizes.
    """
    if len(data) < 12:
        return 0, []
    _, _, hdr = parse_full_box_header(data)
    pos = hdr
    sample_size = struct.unpack_from(">I", data, pos)[0]
    sample_count = struct.unpack_from(">I", data, pos + 4)[0]
    pos += 8
    if sample_size > 0:
        return sample_size, []
    if len(data) < pos + sample_count * 4:
        return 0, []
    sizes = []
    for _ in range(sample_count):
        sizes.append(struct.unpack_from(">I", data, pos)[0])
        pos += 4
    return 0, sizes
 def parse_stts(data: bytes) -> list[tuple[int, int]]:
    """
    Parse Time-to-Sample box (stts) - run-length encoded durations.
    Layout: version(1) + flags(3) + entry_count(4) + [sample_count(4) + sample_delta(4)]...
    Returns:
        List of (sample_count, sample_delta) entries.
    """
    if len(data) < 8:
        return []
    _, _, hdr = parse_full_box_header(data)
    pos = hdr
    entry_count = struct.unpack_from(">I", data, pos)[0]
    pos += 4
    if len(data) < pos + entry_count * 8:
        return []
    entries = []
    for _ in range(entry_count):
        count = struct.unpack_from(">I", data, pos)[0]
        delta = struct.unpack_from(">I", data, pos + 4)[0]
        entries.append((count, delta))
        pos += 8
    return entries
 def parse_stsc(data: bytes) -> list[tuple[int, int, int]]:
    """
    Parse Sample-to-Chunk box (stsc).
    Layout: version(1) + flags(3) + entry_count(4) +
            [first_chunk(4) + samples_per_chunk(4) + sample_desc_index(4)]...
    Returns:
        List of (first_chunk, samples_per_chunk, sample_desc_index) entries.
        first_chunk is 1-based.
    """
    if len(data) < 8:
        return []
    _, _, hdr = parse_full_box_header(data)
    pos = hdr
    entry_count = struct.unpack_from(">I", data, pos)[0]
    pos += 4
    if len(data) < pos + entry_count * 12:
        return []
    entries = []
    for _ in range(entry_count):
        first_chunk = struct.unpack_from(">I", data, pos)[0]
        spc = struct.unpack_from(">I", data, pos + 4)[0]
        sdi = struct.unpack_from(">I", data, pos + 8)[0]
        entries.append((first_chunk, spc, sdi))
        pos += 12
    return entries
 def parse_mdhd(data: bytes) -> tuple[int, int]:
    """
    Parse Media Header box (mdhd) for timescale and duration.
    Returns:
        (timescale, duration) in media timescale units.
    """
    if len(data) < 4:
        return 0, 0
    version = data[0]
    if version == 1:
        # 64-bit: skip version(1)+flags(3)+creation(8)+modification(8)
        if len(data) < 32:
            return 0, 0
        timescale = struct.unpack_from(">I", data, 20)[0]
        duration = struct.unpack_from(">Q", data, 24)[0]
    else:
        # 32-bit: skip version(1)+flags(3)+creation(4)+modification(4)
        if len(data) < 20:
            return 0, 0
        timescale = struct.unpack_from(">I", data, 12)[0]
        duration = struct.unpack_from(">I", data, 16)[0]
    return timescale, duration
 def parse_stsd_codec(data: bytes) -> str:
    """
    Parse Sample Description box (stsd) to extract the codec FourCC.
    Returns the codec name as a string (e.g. "avc1", "hvc1", "mp4a").
    """
    if len(data) < 16:
        return ""
    # version(1)+flags(3)+entry_count(4)
    pos = 8
    # First entry: size(4)+type(4)
    if pos + 8 > len(data):
        return ""
    codec_fourcc = data[pos + 4 : pos + 8]
    try:
        return codec_fourcc.decode("ascii").strip()
    except (UnicodeDecodeError, ValueError):
        return ""
 # =============================================================================
 # MP4 Index (parallel to MKVCueIndex)
 # =============================================================================
@dataclass
 class MP4Index:
    """
    Seek index extracted from an MP4 file's moov atom.
    Parallel to ``MKVCueIndex`` for MKV files. Provides keyframe-indexed
    cue points for time-based seeking and the raw moov bytes needed to
    reconstruct a streamable (faststart) MP4 for on-the-fly demuxing.
    """
    duration_ms: float = 0.0
    timescale: int = 0
    cue_points: list[tuple[float, int]] = field(default_factory=list)  # [(time_ms, byte_offset), ...]
    moov_offset: int = 0  # Absolute file offset where moov atom starts
    moov_size: int = 0  # Total size of the moov atom (header + body)
    moov_data: bytes = b""  # Raw moov atom bytes (for prepending to mdat pipe)
    ftyp_data: bytes = b""  # Raw ftyp atom bytes (for prepending before moov)
    mdat_offset: int = 0  # Absolute file offset where mdat atom starts
    mdat_size: int = 0  # Total size of the mdat atom
    video_codec: str = ""  # e.g. "avc1", "hvc1", "mp4v"
    audio_codec: str = ""  # e.g. "mp4a", "ac-3"
    def byte_offset_for_time(self, time_ms: float) -> tuple[int, float]:
        """
        Find the byte offset for the nearest keyframe at or before time_ms.
        Returns:
            (absolute_byte_offset, actual_keyframe_time_ms)
        """
        if not self.cue_points:
            return 0, 0.0
        times = [cp[0] for cp in self.cue_points]
        idx = bisect.bisect_right(times, time_ms) - 1
        if idx < 0:
            idx = 0
        cue_time_ms, byte_offset = self.cue_points[idx]
        return byte_offset, cue_time_ms
 # =============================================================================
 # Moov -> Cue Points Builder
 # =============================================================================
 def _find_nested_box(data: bytes, *path: bytes) -> bytes | None:
    """Walk a box hierarchy: find_nested_box(data, b"trak", b"mdia") etc."""
    current = data
    for box_name in path:
        found = find_box(current, box_name)
        if found is None:
            return None
        current = found
    return current
 def build_cue_points_from_moov(moov_body: bytes) -> tuple[list[tuple[float, int]], float, int, str, str]:
    """
    Parse a moov body to build keyframe-indexed cue points.
    Walks the first video trak's stbl to extract:
    - Chunk offsets (stco/co64)
    - Keyframe sample indices (stss)
    - Sample sizes (stsz)
    - Sample durations (stts)
    - Sample-to-chunk mapping (stsc)
    - Timescale and duration from mdhd
    Returns:
        (cue_points, duration_ms, timescale, video_codec, audio_codec)
    """
    cue_points: list[tuple[float, int]] = []
    duration_ms = 0.0
    timescale = 0
    video_codec = ""
    audio_codec = ""
    # Find all traks
    video_stbl = None
    video_mdhd = None
    offset = 0
    data = moov_body
    while offset < len(data):
        result = read_box_header(data, offset)
        if result is None:
            break
        box_type, hdr_size, total_size = result
        if box_type == b"trak":
            trak_body = data[offset + hdr_size : offset + total_size]
            # Check handler type to identify video/audio
            hdlr_data = _find_nested_box(trak_body, b"mdia", b"hdlr")
            handler_type = b""
            if hdlr_data and len(hdlr_data) >= 12:
                # hdlr: version(1)+flags(3)+pre_defined(4)+handler_type(4)
                handler_type = hdlr_data[8:12]
            if handler_type == b"vide" and video_stbl is None:
                video_stbl = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl")
                video_mdhd_data = _find_nested_box(trak_body, b"mdia", b"mdhd")
                if video_mdhd_data:
                    video_mdhd = video_mdhd_data
                stsd_data = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl", b"stsd")
                if stsd_data:
                    video_codec = parse_stsd_codec(stsd_data)
            elif handler_type == b"soun" and not audio_codec:
                stsd_data = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl", b"stsd")
                if stsd_data:
                    audio_codec = parse_stsd_codec(stsd_data)
        elif box_type == b"mvhd":
            # Fallback: parse mvhd for timescale/duration if no mdhd
            mvhd_body = data[offset + hdr_size : offset + total_size]
            if len(mvhd_body) >= 20:
                version = mvhd_body[0]
                if version == 1:
                    if len(mvhd_body) >= 28:
                        ts = struct.unpack_from(">I", mvhd_body, 20)[0]
                        dur = struct.unpack_from(">Q", mvhd_body, 24)[0]
                        if timescale == 0:
                            timescale = ts
                            duration_ms = dur / ts * 1000.0 if ts else 0.0
                else:
                    ts = struct.unpack_from(">I", mvhd_body, 12)[0]
                    dur = struct.unpack_from(">I", mvhd_body, 16)[0]
                    if timescale == 0:
                        timescale = ts
                        duration_ms = dur / ts * 1000.0 if ts else 0.0
        if total_size == 0:
            break
        offset += total_size
    # Parse mdhd for video timescale (more precise than mvhd)
    if video_mdhd:
        ts, dur = parse_mdhd(video_mdhd)
        if ts > 0:
            timescale = ts
            duration_ms = dur / ts * 1000.0
    if video_stbl is None:
        logger.warning("[mp4_parser] No video stbl found in moov")
        return cue_points, duration_ms, timescale, video_codec, audio_codec
    # Parse sample tables from video stbl
    stco_data = find_box(video_stbl, b"stco")
    co64_data = find_box(video_stbl, b"co64")
    stss_data = find_box(video_stbl, b"stss")
    stsz_data = find_box(video_stbl, b"stsz")
    stts_data = find_box(video_stbl, b"stts")
    stsc_data = find_box(video_stbl, b"stsc")
    # Chunk offsets
    chunk_offsets = parse_co64(co64_data) if co64_data else (parse_stco(stco_data) if stco_data else [])
    # Keyframe sample numbers (1-based)
    keyframe_samples = set(parse_stss(stss_data)) if stss_data else set()
    all_are_keyframes = not stss_data  # No stss means all samples are sync
    # Sample sizes
    uniform_size, size_list = parse_stsz(stsz_data) if stsz_data else (0, [])
    # Sample durations (run-length encoded)
    stts_entries = parse_stts(stts_data) if stts_data else []
    # Sample-to-chunk mapping
    stsc_entries = parse_stsc(stsc_data) if stsc_data else []
    if not chunk_offsets or timescale == 0:
        logger.warning(
            "[mp4_parser] Missing data: chunks=%d, timescale=%d",
            len(chunk_offsets),
            timescale,
        )
        return cue_points, duration_ms, timescale, video_codec, audio_codec
    # Expand stts to per-sample durations
    sample_durations: list[int] = []
    for count, delta in stts_entries:
        sample_durations.extend([delta] * count)
    # Expand stsc to determine which samples belong to which chunk
    # Build a mapping: chunk_index (0-based) -> samples_per_chunk
    total_chunks = len(chunk_offsets)
    chunk_sample_counts: list[int] = [0] * total_chunks
    if stsc_entries:
        for i, (first_chunk, spc, _sdi) in enumerate(stsc_entries):
            # first_chunk is 1-based
            start = first_chunk - 1
            if i + 1 < len(stsc_entries):
                end = stsc_entries[i + 1][0] - 1
            else:
                end = total_chunks
            for c in range(start, end):
                if c < total_chunks:
                    chunk_sample_counts[c] = spc
    else:
        # Default: 1 sample per chunk
        chunk_sample_counts = [1] * total_chunks
    # Count total samples
    total_samples = sum(chunk_sample_counts)
    # Get per-sample sizes
    if uniform_size > 0:
        sample_sizes = [uniform_size] * total_samples
    else:
        sample_sizes = size_list
    # Build cumulative timestamp for each sample and map keyframes to byte offsets
    current_sample = 0  # 0-based sample index
    current_time = 0  # in timescale units
    for chunk_idx, chunk_offset in enumerate(chunk_offsets):
        spc = chunk_sample_counts[chunk_idx] if chunk_idx < len(chunk_sample_counts) else 1
        byte_pos = chunk_offset
        for s in range(spc):
            sample_num = current_sample + 1  # 1-based for stss comparison
            is_keyframe = all_are_keyframes or sample_num in keyframe_samples
            if is_keyframe:
                time_ms = current_time / timescale * 1000.0
                cue_points.append((time_ms, byte_pos))
            # Advance byte position by this sample's size
            if current_sample < len(sample_sizes):
                byte_pos += sample_sizes[current_sample]
            # Advance timestamp
            if current_sample < len(sample_durations):
                current_time += sample_durations[current_sample]
            current_sample += 1
    logger.info(
        "[mp4_parser] Built %d cue points from %d samples, duration=%.1fs, video=%s, audio=%s",
        len(cue_points),
        total_samples,
        duration_ms / 1000.0,
        video_codec,
        audio_codec,
    )
    return cue_points, duration_ms, timescale, video_codec, audio_codec
 # =============================================================================
 # Moov Offset Rewriting (for faststart pipe construction)
 # =============================================================================
 def _rewrite_stco_in_place(data: bytearray, box_start: int, box_size: int, delta: int) -> int:
    """Rewrite stco chunk offsets by adding delta. Returns number of entries fixed."""
    # FullBox header: version(1) + flags(3) = 4 bytes
    body_start = box_start + 4
    if body_start + 4 > box_start + box_size:
        return 0
    entry_count = struct.unpack_from(">I", data, body_start)[0]
    pos = body_start + 4
    for _ in range(entry_count):
        if pos + 4 > box_start + box_size:
            break
        old_val = struct.unpack_from(">I", data, pos)[0]
        struct.pack_into(">I", data, pos, old_val + delta)
        pos += 4
    return entry_count
 def _rewrite_co64_in_place(data: bytearray, box_start: int, box_size: int, delta: int) -> int:
    """Rewrite co64 chunk offsets by adding delta. Returns number of entries fixed."""
    body_start = box_start + 4
    if body_start + 4 > box_start + box_size:
        return 0
    entry_count = struct.unpack_from(">I", data, body_start)[0]
    pos = body_start + 4
    for _ in range(entry_count):
        if pos + 8 > box_start + box_size:
            break
        old_val = struct.unpack_from(">Q", data, pos)[0]
        struct.pack_into(">Q", data, pos, old_val + delta)
        pos += 8
    return entry_count
 def _walk_and_rewrite(data: bytearray, start: int, end: int, delta: int) -> int:
    """
    Recursively walk boxes within [start, end) looking for stco/co64 boxes
    and rewriting their offsets.
    Returns total number of offset entries rewritten.
    """
    total = 0
    offset = start
    while offset + 8 <= end:
        size = struct.unpack_from(">I", data, offset)[0]
        box_type = data[offset + 4 : offset + 8]
        hdr_size = 8
        if size == 1:
            if offset + 16 > end:
                break
            size = struct.unpack_from(">Q", data, offset + 8)[0]
            hdr_size = 16
        elif size == 0:
            size = end - offset
        if size < 8 or offset + size > end:
            break
        body_start = offset + hdr_size
        body_end = offset + size
        if box_type == b"stco":
            total += _rewrite_stco_in_place(data, body_start, size - hdr_size, delta)
        elif box_type == b"co64":
            total += _rewrite_co64_in_place(data, body_start, size - hdr_size, delta)
        elif box_type in (b"moov", b"trak", b"mdia", b"minf", b"stbl"):
            # Container box -- recurse into children
            total += _walk_and_rewrite(data, body_start, body_end, delta)
        offset += size
    return total
 def extract_video_track_from_moov(moov_data: bytes):
    """
    Extract video codec configuration from an MP4 moov atom.
    Walks the moov box tree to find the first video trak, extracts its
    resolution and codec-private data (avcC/hvcC), and returns a synthetic
    ``MKVTrack`` suitable for building an fMP4 init segment.
    Returns:
        An ``MKVTrack`` with video metadata, or ``None`` if no video track
        is found.
    """
    from mediaflow_proxy.remuxer.ebml_parser import (
        CODEC_ID_H264,
        CODEC_ID_H265,
        MKVTrack,
    )
    # Strip the moov box header to get the body
    if len(moov_data) < 8:
        return None
    raw_size = struct.unpack_from(">I", moov_data, 0)[0]
    hdr_size = 16 if raw_size == 1 else 8
    moov_body = moov_data[hdr_size:]
    # Walk traks looking for video handler
    offset = 0
    while offset < len(moov_body):
        result = read_box_header(moov_body, offset)
        if result is None:
            break
        box_type, box_hdr_size, total_size = result
        if box_type == b"trak":
            trak_body = moov_body[offset + box_hdr_size : offset + total_size]
            # Check handler type
            hdlr_data = _find_nested_box(trak_body, b"mdia", b"hdlr")
            handler_type = b""
            if hdlr_data and len(hdlr_data) >= 12:
                handler_type = hdlr_data[8:12]
            if handler_type == b"vide":
                # Found video trak -- extract stsd for codec config
                stsd_data = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl", b"stsd")
                if not stsd_data or len(stsd_data) < 16:
                    offset += total_size
                    continue
                codec_name = parse_stsd_codec(stsd_data)
                # Map MP4 codec names to MKV codec IDs
                if codec_name in ("avc1", "avc3"):
                    mkv_codec_id = CODEC_ID_H264
                elif codec_name in ("hvc1", "hev1"):
                    mkv_codec_id = CODEC_ID_H265
                else:
                    mkv_codec_id = f"V_MP4/{codec_name}"
                # Extract codec private (avcC or hvcC box) from inside the
                # sample entry. The stsd structure is:
                #   version(1) + flags(3) + entry_count(4)
                #   then entry: size(4) + type(4) + ... + nested boxes
                # The avcC/hvcC is a child box of the sample entry.
                codec_private = b""
                width = 0
                height = 0
                # Parse sample entry to get width/height and codec config
                entry_start = 8  # skip version+flags+entry_count
                if entry_start + 8 <= len(stsd_data):
                    entry_size = struct.unpack_from(">I", stsd_data, entry_start)[0]
                    entry_body_start = entry_start + 8  # skip size+type
                    entry_end = min(entry_start + entry_size, len(stsd_data))
                    # Visual sample entry: 6 reserved + 2 data_ref_idx + ...
                    # At offset 24 from entry body start: width(2) + height(2)
                    vis_offset = entry_body_start + 24
                    if vis_offset + 4 <= entry_end:
                        width = struct.unpack_from(">H", stsd_data, vis_offset)[0]
                        height = struct.unpack_from(">H", stsd_data, vis_offset + 2)[0]
                    # Scan nested boxes for avcC or hvcC
                    # Visual sample entry fixed fields = 70 bytes from entry body
                    nested_start = entry_body_start + 70
                    if nested_start < entry_end:
                        nested_data = stsd_data[nested_start:entry_end]
                        for target in (b"avcC", b"hvcC"):
                            found = find_box(nested_data, target)
                            if found:
                                codec_private = found
                                break
                # Get duration from mdhd if available
                default_duration_ns = 0
                mdhd_data = _find_nested_box(trak_body, b"mdia", b"mdhd")
                if mdhd_data and len(mdhd_data) >= 20:
                    version = mdhd_data[0]
                    if version == 1 and len(mdhd_data) >= 28:
                        ts = struct.unpack_from(">I", mdhd_data, 20)[0]
                        dur = struct.unpack_from(">Q", mdhd_data, 24)[0]
                    else:
                        ts = struct.unpack_from(">I", mdhd_data, 12)[0]
                        dur = struct.unpack_from(">I", mdhd_data, 16)[0]
                    if ts > 0 and dur > 0:
                        # Rough estimate: assume 24fps if we can't determine.
                        default_duration_ns = int(1_000_000_000 / 24)
                return MKVTrack(
                    track_number=1,
                    track_type=1,  # video
                    codec_id=mkv_codec_id,
                    codec_private=codec_private,
                    pixel_width=width,
                    pixel_height=height,
                    default_duration_ns=default_duration_ns,
                )
        offset += total_size
    return None
 def rewrite_moov_offsets(moov_data: bytes, delta: int) -> bytes:
    """
    Rewrite all stco/co64 chunk offsets in a moov atom by adding ``delta``.
    This is needed when rearranging an MP4 file for pipe streaming:
    the original moov's chunk offsets reference positions in the original
    file layout. When we prepend moov before mdat, the offsets must be
    shifted by ``delta = moov_size - original_mdat_offset``.
    Args:
        moov_data: Raw bytes of the complete moov box (header + body).
        delta: Offset adjustment to add to every chunk offset.
    Returns:
        Modified moov bytes with updated chunk offsets.
    """
    buf = bytearray(moov_data)
    # Determine moov box header size
    raw_size = struct.unpack_from(">I", buf, 0)[0]
    hdr_size = 16 if raw_size == 1 else 8
    total = _walk_and_rewrite(buf, hdr_size, len(buf), delta)
    logger.info("[mp4_parser] Rewrote %d chunk offset entries (delta=%+d)", total, delta)
    return bytes(buf)
@@ -0,0 +1,608 @@
 """
 Universal PyAV-based streaming demuxer.
 Bridges async byte streams to PyAV's synchronous I/O using an OS pipe,
 allowing on-the-fly demuxing of any container format (MKV, MP4, TS,
 FLV, WebM, etc.) from an async source.
 Architecture:
  AsyncIterator[bytes]  -->  async feeder task --> queue.Queue --> writer thread (pipe)
                                                                        |
                                                                OS pipe (kernel buffer)
                                                                        |
                                    demux thread: av.open + discover + demux
                                                                        |
                                                queue.Queue --> run_in_executor consumer
 Performance: Uses plain threading.Queue on both sides (writer input and
 packet output) to avoid per-item ``run_coroutine_threadsafe`` overhead.
 The async/thread bridge is done via ``run_in_executor`` on the consumer
 side and a dedicated asyncio task on the producer side.
 For MP4 inputs, the caller (transcode_handler) prepends the moov atom
 to the stream so PyAV receives a "faststart"-style MP4 through the pipe.
 This allows true on-the-fly demuxing for all container formats.
 """
 import asyncio
 import logging
 import os
 import queue
 import threading
 from collections.abc import AsyncIterator
 from dataclasses import dataclass
 import av
 logger = logging.getLogger(__name__)
 # Sentinel object to signal end-of-stream in queues
 _SENTINEL = object()
@dataclass(slots=True)
 class DemuxedStream:
    """Metadata about a demuxed stream."""
    index: int
    codec_name: str
    codec_type: str  # "video" or "audio"
    # Video-specific
    width: int = 0
    height: int = 0
    fps: float = 0.0
    pixel_format: str = ""
    # Audio-specific
    sample_rate: int = 0
    channels: int = 0
    # Timing
    time_base_num: int = 1
    time_base_den: int = 1000
    duration_seconds: float = 0.0
    # Raw codec extradata (e.g. SPS/PPS for H.264, AudioSpecificConfig for AAC)
    extradata: bytes = b""
@dataclass(slots=True)
 class DemuxedPacket:
    """A demuxed packet with timing info."""
    stream_index: int
    codec_type: str  # "video" or "audio"
    data: bytes
    pts: int  # Presentation timestamp in stream time_base units
    dts: int  # Decode timestamp in stream time_base units
    duration: int  # Duration in stream time_base units
    is_keyframe: bool
    time_base_num: int
    time_base_den: int
    # Optional decoded frame when decode_video/decode_audio is True
    # av.VideoFrame for video, av.AudioFrame for audio
    decoded_frame: object = None
    @property
    def pts_seconds(self) -> float:
        if self.time_base_den == 0:
            return 0.0
        return self.pts * self.time_base_num / self.time_base_den
    @property
    def dts_seconds(self) -> float:
        if self.time_base_den == 0:
            return 0.0
        return self.dts * self.time_base_num / self.time_base_den
    @property
    def duration_seconds(self) -> float:
        if self.time_base_den == 0:
            return 0.0
        return self.duration * self.time_base_num / self.time_base_den
 class PyAVDemuxer:
    """
    Streaming demuxer using PyAV with pipe-based I/O.
    All container I/O happens in background threads. The writer thread
    feeds source bytes into a pipe; a single demux thread opens the
    container, discovers streams, and demuxes packets -- all on the
    same file object, ensuring the pipe's read cursor is never lost.
    Performance optimisation: both the writer-input side and the
    packet-output side use plain ``queue.Queue`` (no event-loop
    involvement per item). The async/thread bridge is done via
    ``run_in_executor`` on the consumer and an asyncio task on the
    producer, eliminating ~1700 ``run_coroutine_threadsafe`` round-trips
    per 30 s of 4K content.
    Usage:
        demuxer = PyAVDemuxer()
        await demuxer.start(source_async_iter)
        # demuxer.video_stream / audio_stream are now available
        async for packet in demuxer.iter_packets():
            if packet.codec_type == "video":
                ...
    """
    def __init__(self, decode_video: bool = False, decode_audio: bool = False) -> None:
        """
        Args:
            decode_video: If True, the demux thread will decode video packets
                using the container's codec context and attach decoded frames
                to DemuxedPacket.decoded_frame. This avoids format conversion
                issues with standalone decoders (HVCC vs Annex B).
            decode_audio: If True, the demux thread will decode audio packets
                using the container's codec context and attach decoded frames
                to DemuxedPacket.decoded_frame. This is needed for codecs like
                Vorbis/Opus where the standalone decoder requires codec headers
                that are only available in the container context. Can also be
                set after start() returns (before packets are consumed) via
                the ``enable_audio_decode()`` method.
        """
        self._decode_video = decode_video
        self._decode_audio = decode_audio
        self._video_decode_decided = threading.Event()
        self._audio_decode_decided = threading.Event()
        # If decode flags were set at construction time, mark decided immediately
        if decode_video:
            self._video_decode_decided.set()
        if decode_audio:
            self._audio_decode_decided.set()
        self._container: av.InputContainer | None = None
        self._video_stream: DemuxedStream | None = None
        self._audio_stream: DemuxedStream | None = None
        # Thread-safe queues (no event-loop involvement per put/get)
        self._packet_queue: queue.Queue | None = None
        self._source_queue: queue.Queue | None = None
        self._demux_thread: threading.Thread | None = None
        self._writer_thread: threading.Thread | None = None
        self._feeder_task: asyncio.Task | None = None
        self._write_fd: int | None = None
        self._read_fd: int | None = None
    @property
    def video_stream(self) -> DemuxedStream | None:
        return self._video_stream
    @property
    def audio_stream(self) -> DemuxedStream | None:
        return self._audio_stream
    def enable_video_decode(self, enable: bool = True) -> None:
        """
        Enable or disable in-thread video decoding.
        Call this after ``start()`` returns (stream metadata is available)
        but before consuming packets via ``iter_packets()``. The demux
        thread waits for this signal before processing video packets.
        """
        self._decode_video = enable
        self._video_decode_decided.set()
    def enable_audio_decode(self, enable: bool = True) -> None:
        """
        Enable or disable in-thread audio decoding.
        Call this after ``start()`` returns (stream metadata is available)
        but before consuming packets via ``iter_packets()``. The demux
        thread waits for this signal before processing audio packets.
        """
        self._decode_audio = enable
        self._audio_decode_decided.set()
    # ── Writer side ──────────────────────────────────────────────────
    async def _async_feeder(self, source: AsyncIterator[bytes]) -> None:
        """
        Async task: pull chunks from the async source and push them
        into a plain ``queue.Queue`` for the writer thread.
        This replaces the old per-chunk ``run_coroutine_threadsafe``
        pattern, batching the async-to-sync bridge into one task.
        ``queue.Queue.put()`` is a blocking call, so we use
        ``run_in_executor`` to avoid blocking the event loop when the
        queue is full.
        """
        loop = asyncio.get_running_loop()
        sq = self._source_queue
        try:
            async for chunk in source:
                await loop.run_in_executor(None, sq.put, chunk)
        except (asyncio.CancelledError, GeneratorExit):
            pass
        except Exception:
            pass
        finally:
            sq.put(_SENTINEL)
    def _write_chunks_sync(self) -> None:
        """
        Writer thread: pull pre-buffered chunks from ``_source_queue``
        and write to the OS pipe. No event-loop interaction.
        """
        write_fd = self._write_fd
        sq = self._source_queue
        try:
            while True:
                chunk = sq.get(timeout=30.0)
                if chunk is _SENTINEL:
                    break
                os.write(write_fd, chunk)
        except Exception:
            pass
        finally:
            try:
                os.close(write_fd)
            except OSError:
                pass
            self._write_fd = None
    # ── Demux side ───────────────────────────────────────────────────
    async def start(self, source: AsyncIterator[bytes]) -> None:
        """
        Start pipe-based streaming: writer thread feeds the pipe, a single
        demux thread opens the container, discovers streams, and begins
        enqueuing packets.
        After this returns, ``video_stream`` and ``audio_stream`` are
        populated and packets are being enqueued for ``iter_packets()``.
        """
        loop = asyncio.get_running_loop()
        # Create OS pipe
        self._read_fd, self._write_fd = os.pipe()
        # Source buffer queue (async feeder task -> writer thread)
        self._source_queue = queue.Queue(maxsize=256)
        # Kick off the async feeder task
        self._feeder_task = asyncio.create_task(self._async_feeder(source))
        # Start writer thread (drains source_queue into the pipe)
        self._writer_thread = threading.Thread(
            target=self._write_chunks_sync,
            daemon=True,
            name="pyav-writer",
        )
        self._writer_thread.start()
        # Packet queue for demux-thread -> async consumer bridge
        self._packet_queue = queue.Queue(maxsize=128)
        streams_ready = threading.Event()
        def _open_and_demux():
            """
            Single background thread: open container, discover streams,
            demux all packets.
            Critical: av.open(), _discover_streams(), and container.demux()
            all happen on the same file object in the same thread. This
            ensures the pipe read cursor is never lost between open and demux.
            """
            pkt_count = 0
            pq = self._packet_queue
            try:
                # Open container from read end of pipe
                read_file = os.fdopen(self._read_fd, "rb")
                self._read_fd = None  # ownership transferred
                self._container = av.open(
                    read_file,
                    mode="r",
                    options={
                        # Tolerate mid-stream joins / broken data in live TS
                        "err_detect": "ignore_err",
                        "fflags": "+discardcorrupt+genpts",
                    },
                )
                self._discover_streams()
                # Signal stream metadata is available
                streams_ready.set()
                if self._video_stream is None and self._audio_stream is None:
                    logger.warning("[pyav_demuxer] No video or audio streams found")
                    return
                # Select streams to demux
                streams_to_demux = []
                if self._video_stream is not None:
                    streams_to_demux.append(self._container.streams[self._video_stream.index])
                if self._audio_stream is not None:
                    streams_to_demux.append(self._container.streams[self._audio_stream.index])
                # Wait for the caller to decide on video/audio decoding
                # (if not already decided at construction time).
                if not self._video_decode_decided.is_set():
                    self._video_decode_decided.wait(timeout=10.0)
                if not self._audio_decode_decided.is_set():
                    self._audio_decode_decided.wait(timeout=10.0)
                # Cache stream objects and time_base for the hot loop
                video_stream_obj = (
                    self._container.streams[self._video_stream.index] if self._video_stream is not None else None
                )
                audio_stream_obj = (
                    self._container.streams[self._audio_stream.index] if self._audio_stream is not None else None
                )
                video_tb_num = video_stream_obj.time_base.numerator if video_stream_obj else 1
                video_tb_den = video_stream_obj.time_base.denominator if video_stream_obj else 1
                audio_tb_num = audio_stream_obj.time_base.numerator if audio_stream_obj else 1
                audio_tb_den = audio_stream_obj.time_base.denominator if audio_stream_obj else 1
                decode_video = self._decode_video
                decode_audio = self._decode_audio
                # Demux and enqueue packets -- plain queue.put(), no event loop
                for packet in self._container.demux(*streams_to_demux):
                    if packet.size == 0:
                        continue
                    stream = self._container.streams[packet.stream_index]
                    is_video = stream.type == "video"
                    is_audio = stream.type == "audio"
                    # Optionally decode video packets in-thread
                    if decode_video and is_video and video_stream_obj is not None:
                        try:
                            frames = video_stream_obj.codec_context.decode(packet)
                        except Exception:
                            frames = []
                        for frame in frames:
                            pq.put(
                                DemuxedPacket(
                                    stream_index=packet.stream_index,
                                    codec_type="video",
                                    data=b"",
                                    pts=int(frame.pts) if frame.pts is not None else 0,
                                    dts=int(frame.pts) if frame.pts is not None else 0,
                                    duration=int(packet.duration) if packet.duration is not None else 0,
                                    is_keyframe=frame.key_frame,
                                    time_base_num=video_tb_num,
                                    time_base_den=video_tb_den,
                                    decoded_frame=frame,
                                )
                            )
                            pkt_count += 1
                    # Optionally decode audio packets in-thread
                    elif decode_audio and is_audio and audio_stream_obj is not None:
                        try:
                            frames = audio_stream_obj.codec_context.decode(packet)
                        except Exception:
                            frames = []
                        for frame in frames:
                            pq.put(
                                DemuxedPacket(
                                    stream_index=packet.stream_index,
                                    codec_type="audio",
                                    data=b"",
                                    pts=int(frame.pts) if frame.pts is not None else 0,
                                    dts=int(frame.pts) if frame.pts is not None else 0,
                                    duration=int(packet.duration) if packet.duration is not None else 0,
                                    is_keyframe=False,
                                    time_base_num=audio_tb_num,
                                    time_base_den=audio_tb_den,
                                    decoded_frame=frame,
                                )
                            )
                            pkt_count += 1
                    else:
                        tb_num = video_tb_num if is_video else audio_tb_num
                        tb_den = video_tb_den if is_video else audio_tb_den
                        pq.put(
                            DemuxedPacket(
                                stream_index=packet.stream_index,
                                codec_type=stream.type,
                                data=bytes(packet),
                                pts=int(packet.pts) if packet.pts is not None else 0,
                                dts=int(packet.dts) if packet.dts is not None else 0,
                                duration=int(packet.duration) if packet.duration is not None else 0,
                                is_keyframe=packet.is_keyframe,
                                time_base_num=tb_num,
                                time_base_den=tb_den,
                            )
                        )
                        pkt_count += 1
                # Flush the video decoder if we were decoding
                if decode_video and video_stream_obj is not None:
                    try:
                        for frame in video_stream_obj.codec_context.decode(None):
                            pq.put(
                                DemuxedPacket(
                                    stream_index=video_stream_obj.index,
                                    codec_type="video",
                                    data=b"",
                                    pts=int(frame.pts) if frame.pts is not None else 0,
                                    dts=int(frame.pts) if frame.pts is not None else 0,
                                    duration=0,
                                    is_keyframe=frame.key_frame,
                                    time_base_num=video_tb_num,
                                    time_base_den=video_tb_den,
                                    decoded_frame=frame,
                                )
                            )
                            pkt_count += 1
                    except Exception:
                        pass
                # Flush the audio decoder if we were decoding
                if decode_audio and audio_stream_obj is not None:
                    try:
                        for frame in audio_stream_obj.codec_context.decode(None):
                            pq.put(
                                DemuxedPacket(
                                    stream_index=audio_stream_obj.index,
                                    codec_type="audio",
                                    data=b"",
                                    pts=int(frame.pts) if frame.pts is not None else 0,
                                    dts=int(frame.pts) if frame.pts is not None else 0,
                                    duration=0,
                                    is_keyframe=False,
                                    time_base_num=audio_tb_num,
                                    time_base_den=audio_tb_den,
                                    decoded_frame=frame,
                                )
                            )
                            pkt_count += 1
                    except Exception:
                        pass
                logger.info("[pyav_demuxer] Demux complete: %d packets", pkt_count)
            except Exception as e:
                if "Invalid data" not in str(e):
                    logger.debug("[pyav_demuxer] Demux thread error: %s", e)
                # Ensure streams_ready is set even on error
                streams_ready.set()
            finally:
                pq.put(_SENTINEL)
        self._demux_thread = threading.Thread(target=_open_and_demux, daemon=True, name="pyav-demux")
        self._demux_thread.start()
        # Wait for stream discovery before returning.
        # Use run_in_executor to avoid blocking the event loop.
        await loop.run_in_executor(None, streams_ready.wait)
    async def iter_packets(self) -> AsyncIterator[DemuxedPacket]:
        """
        Yield demuxed packets from the background thread.
        Uses ``run_in_executor`` for the blocking ``queue.get()`` call,
        avoiding per-packet ``run_coroutine_threadsafe`` overhead.
        ``start()`` must be called first.
        """
        if self._packet_queue is None:
            raise RuntimeError("Call start() before iter_packets()")
        loop = asyncio.get_running_loop()
        pq = self._packet_queue
        try:
            while True:
                packet = await loop.run_in_executor(None, pq.get)
                if packet is _SENTINEL:
                    break
                yield packet
            if self._demux_thread is not None:
                self._demux_thread.join(timeout=5.0)
        except GeneratorExit:
            logger.debug("[pyav_demuxer] Generator closed")
        except asyncio.CancelledError:
            logger.debug("[pyav_demuxer] Cancelled")
        finally:
            self._cleanup()
    def _discover_streams(self) -> None:
        """Inspect the opened container and record stream metadata."""
        if self._container is None:
            return
        for stream in self._container.streams:
            if stream.type == "video" and self._video_stream is None:
                codec_ctx = stream.codec_context
                fps = float(stream.average_rate) if stream.average_rate else 24.0
                self._video_stream = DemuxedStream(
                    index=stream.index,
                    codec_name=codec_ctx.name if codec_ctx else stream.codec.name,
                    codec_type="video",
                    width=codec_ctx.width if codec_ctx else 0,
                    height=codec_ctx.height if codec_ctx else 0,
                    fps=fps,
                    pixel_format=str(codec_ctx.pix_fmt) if codec_ctx and codec_ctx.pix_fmt else "yuv420p",
                    time_base_num=stream.time_base.numerator,
                    time_base_den=stream.time_base.denominator,
                    duration_seconds=float(stream.duration * stream.time_base) if stream.duration else 0.0,
                    extradata=bytes(codec_ctx.extradata) if codec_ctx and codec_ctx.extradata else b"",
                )
                logger.info(
                    "[pyav_demuxer] Video: %s %dx%d @%.1ffps",
                    self._video_stream.codec_name,
                    self._video_stream.width,
                    self._video_stream.height,
                    self._video_stream.fps,
                )
            elif stream.type == "audio" and self._audio_stream is None:
                codec_ctx = stream.codec_context
                self._audio_stream = DemuxedStream(
                    index=stream.index,
                    codec_name=codec_ctx.name if codec_ctx else stream.codec.name,
                    codec_type="audio",
                    sample_rate=codec_ctx.sample_rate if codec_ctx else 0,
                    channels=codec_ctx.channels if codec_ctx else 0,
                    time_base_num=stream.time_base.numerator,
                    time_base_den=stream.time_base.denominator,
                    duration_seconds=float(stream.duration * stream.time_base) if stream.duration else 0.0,
                    extradata=bytes(codec_ctx.extradata) if codec_ctx and codec_ctx.extradata else b"",
                )
                logger.info(
                    "[pyav_demuxer] Audio: %s %dHz %dch",
                    self._audio_stream.codec_name,
                    self._audio_stream.sample_rate,
                    self._audio_stream.channels,
                )
    def _cleanup(self) -> None:
        """Stop threads and release all resources safely.
        The order is critical to avoid SIGSEGV from closing the container
        while the demux thread is still calling container.demux():
        1. Cancel the feeder task (stops new bytes being queued).
        2. Put a sentinel into the source queue so the writer thread
           unblocks and exits. The writer's ``finally`` closes the pipe
           write-end, which causes the demux thread to see EOF.
        3. Join the writer thread (wait for it to drain and exit).
        4. Join the demux thread (it finishes after pipe EOF).
        5. ONLY THEN close the container (no thread is using it).
        6. Close any remaining pipe FDs (read end, if still open).
        """
        # 1. Cancel feeder task
        if self._feeder_task is not None:
            self._feeder_task.cancel()
            self._feeder_task = None
        # 2. Unblock writer thread so it exits and closes the pipe
        if self._source_queue is not None:
            try:
                self._source_queue.put_nowait(_SENTINEL)
            except Exception:
                pass
        # 3. Join writer thread (it closes _write_fd in its finally block)
        if self._writer_thread is not None:
            self._writer_thread.join(timeout=5.0)
            self._writer_thread = None
        # 4. Join demux thread -- must finish before we close the container
        if self._demux_thread is not None:
            self._demux_thread.join(timeout=5.0)
            self._demux_thread = None
        # 5. Now safe to close the container (no thread is using it)
        if self._container is not None:
            try:
                self._container.close()
            except Exception:
                pass
            self._container = None
        # 6. Close any remaining pipe FDs
        for fd_name in ("_read_fd", "_write_fd"):
            fd = getattr(self, fd_name, None)
            if fd is not None:
                try:
                    os.close(fd)
                except OSError:
                    pass
                setattr(self, fd_name, None)
@@ -0,0 +1,403 @@
 """
 GPU-accelerated video transcoder with runtime detection.
 Detects available hardware encoders/decoders at first use and selects
 the best available backend:
  - NVIDIA:         h264_nvenc / hevc_cuvid (NVENC + CUDA)
  - Apple macOS:    h264_videotoolbox / hevc_videotoolbox
  - Intel Linux:    h264_vaapi / h264_qsv
  - Fallback:       libx264 (CPU)
 The transcoder operates at the packet/frame level via PyAV, suitable
 for integration into the streaming pipeline.
 """
 import logging
 from dataclasses import dataclass, field
 from enum import Enum
 from fractions import Fraction
 import av
 from mediaflow_proxy.configs import settings
 logger = logging.getLogger(__name__)
 class HWAccelType(Enum):
    NONE = "none"
    NVIDIA = "nvidia"
    VIDEOTOOLBOX = "videotoolbox"
    VAAPI = "vaapi"
    QSV = "qsv"
@dataclass
 class HWCapability:
    """Detected hardware acceleration capability."""
    accel_type: HWAccelType = HWAccelType.NONE
    h264_encoder: str = "libx264"
    h264_decoder: str | None = None  # None = use default software decoder
    hevc_decoder: str | None = None
    available_encoders: list[str] = field(default_factory=list)
    available_decoders: list[str] = field(default_factory=list)
 # Module-level singleton -- populated on first call to get_hw_capability()
 _hw_capability: HWCapability | None = None
 def _probe_codec(name: str, mode: str = "w") -> bool:
    """
    Check if a PyAV codec is available by name.
    Args:
        name: Codec name (e.g. 'h264_videotoolbox').
        mode: 'w' for encoder, 'r' for decoder.
    """
    try:
        av.Codec(name, mode)
        return True
    except Exception:
        return False
 def _detect_hw_capability() -> HWCapability:
    """
    Probe the runtime environment for hardware encoder/decoder availability.
    Checks NVIDIA, Apple VideoToolbox, Intel VAAPI/QSV in priority order.
    Falls back to libx264 CPU encoding.
    """
    cap = HWCapability()
    # Collect available encoders/decoders for logging
    hw_encoders = [
        "h264_nvenc",
        "hevc_nvenc",
        "h264_videotoolbox",
        "hevc_videotoolbox",
        "h264_vaapi",
        "hevc_vaapi",
        "h264_qsv",
        "hevc_qsv",
    ]
    hw_decoders = [
        "h264_cuvid",
        "hevc_cuvid",
        "h264_qsv",
        "hevc_qsv",
    ]
    cap.available_encoders = [c for c in hw_encoders if _probe_codec(c, "w")]
    cap.available_decoders = [c for c in hw_decoders if _probe_codec(c, "r")]
    # Priority 1: NVIDIA NVENC
    if "h264_nvenc" in cap.available_encoders:
        cap.accel_type = HWAccelType.NVIDIA
        cap.h264_encoder = "h264_nvenc"
        if "h264_cuvid" in cap.available_decoders:
            cap.h264_decoder = "h264_cuvid"
        if "hevc_cuvid" in cap.available_decoders:
            cap.hevc_decoder = "hevc_cuvid"
        return cap
    # Priority 2: Apple VideoToolbox
    if "h264_videotoolbox" in cap.available_encoders:
        cap.accel_type = HWAccelType.VIDEOTOOLBOX
        cap.h264_encoder = "h264_videotoolbox"
        # VideoToolbox decoders are used automatically via hwaccel
        return cap
    # Priority 3: Intel VAAPI (Linux)
    if "h264_vaapi" in cap.available_encoders:
        cap.accel_type = HWAccelType.VAAPI
        cap.h264_encoder = "h264_vaapi"
        return cap
    # Priority 4: Intel QSV
    if "h264_qsv" in cap.available_encoders:
        cap.accel_type = HWAccelType.QSV
        cap.h264_encoder = "h264_qsv"
        if "h264_qsv" in cap.available_decoders:
            cap.h264_decoder = "h264_qsv"
        if "hevc_qsv" in cap.available_decoders:
            cap.hevc_decoder = "hevc_qsv"
        return cap
    # Fallback: CPU
    cap.accel_type = HWAccelType.NONE
    cap.h264_encoder = "libx264"
    return cap
 def get_hw_capability() -> HWCapability:
    """Get the detected hardware acceleration capability (cached singleton)."""
    global _hw_capability
    if _hw_capability is None:
        _hw_capability = _detect_hw_capability()
        if settings.transcode_prefer_gpu and _hw_capability.accel_type != HWAccelType.NONE:
            logger.info(
                "[video_transcoder] GPU acceleration: %s (encoder=%s, decoders=%s)",
                _hw_capability.accel_type.value,
                _hw_capability.h264_encoder,
                _hw_capability.available_decoders or "software",
            )
        else:
            logger.info(
                "[video_transcoder] Using CPU encoder: %s (available HW: encoders=%s, decoders=%s)",
                _hw_capability.h264_encoder,
                _hw_capability.available_encoders or "none",
                _hw_capability.available_decoders or "none",
            )
    return _hw_capability
 class VideoTranscoder:
    """
    In-process video transcoder using PyAV.
    Decodes input video packets and re-encodes to H.264 using the best
    available hardware encoder (or CPU libx264 fallback).
    Operates at the frame level: caller provides raw video packets (from
    PyAV demuxer), transcoder returns encoded H.264 NAL data suitable
    for the fMP4 muxer.
    """
    def __init__(
        self,
        input_codec_name: str,
        width: int,
        height: int,
        fps: float = 24.0,
        pixel_format: str = "yuv420p",
        force_software: bool = False,
    ) -> None:
        hw = get_hw_capability()
        use_gpu = settings.transcode_prefer_gpu and hw.accel_type != HWAccelType.NONE and not force_software
        # --- Decoder ---
        hw_decoder = None
        if use_gpu:
            if "hevc" in input_codec_name or "h265" in input_codec_name:
                hw_decoder = hw.hevc_decoder
            else:
                hw_decoder = hw.h264_decoder
        decoder_name = hw_decoder or input_codec_name
        self._decoder = av.CodecContext.create(decoder_name, "r")
        # --- Encoder ---
        encoder_name = hw.h264_encoder if use_gpu else "libx264"
        # H.264 requires even dimensions
        enc_width = width if width % 2 == 0 else width + 1
        enc_height = height if height % 2 == 0 else height + 1
        self._encoder = av.CodecContext.create(encoder_name, "w")
        self._encoder.width = enc_width
        self._encoder.height = enc_height
        self._encoder.pix_fmt = "yuv420p"  # H.264 requires yuv420p
        self._encoder.time_base = Fraction(1, int(fps * 1000))
        self._encoder.framerate = Fraction(int(fps * 1000), 1000)
        self._encoder.bit_rate = _parse_bitrate(settings.transcode_video_bitrate)
        self._encoder.gop_size = int(fps * 2)  # Keyframe every ~2 seconds
        # Encoder options based on backend
        opts = {}
        if encoder_name == "libx264":
            opts["preset"] = settings.transcode_video_preset
            opts["tune"] = "zerolatency"
            opts["profile"] = "high"
        elif "nvenc" in encoder_name:
            opts["preset"] = "p4"  # NVENC preset (p1=fastest .. p7=slowest)
            opts["tune"] = "ll"  # Low latency
            opts["rc"] = "vbr"
        elif "videotoolbox" in encoder_name:
            opts["realtime"] = "1"
            opts["allow_sw"] = "1"  # Fallback to software if HW busy
        elif "vaapi" in encoder_name:
            opts["rc_mode"] = "VBR"
        elif "qsv" in encoder_name:
            opts["preset"] = "medium"
        self._encoder.options = opts
        self._encoder.open()
        width = enc_width
        height = enc_height
        self._input_codec = input_codec_name
        self._encoder_name = encoder_name
        self._frames_decoded = 0
        self._frames_encoded = 0
        self._width = width
        self._height = height
        # Tracks whether the standalone decoder was actually used (via decode_packet).
        # When the demux thread decodes frames in-thread (decode_video=True),
        # the standalone decoder is never fed packets and flushing it is wasted work.
        self._decoder_used = False
        self._flushed = False  # Prevents double-flush which causes SIGSEGV
        logger.info(
            "[video_transcoder] Initialized: %s -> %s (%s), %dx%d @%.1ffps %dk",
            input_codec_name,
            encoder_name,
            hw.accel_type.value,
            width,
            height,
            fps,
            self._encoder.bit_rate // 1000 if self._encoder.bit_rate else 0,
        )
    @property
    def codec_private_data(self) -> bytes | None:
        """H.264 extradata (SPS/PPS) from the encoder, for the fMP4 init segment."""
        if self._encoder.extradata:
            return bytes(self._encoder.extradata)
        return None
    @property
    def width(self) -> int:
        return self._width
    @property
    def height(self) -> int:
        return self._height
    def transcode_frame(self, frame: av.VideoFrame) -> list[tuple[bytes, bool, int, int]]:
        """
        Encode a decoded video frame to H.264.
        Args:
            frame: A decoded av.VideoFrame.
        Returns:
            List of (nal_data, is_keyframe, pts, dts) tuples.
        """
        self._frames_decoded += 1
        output = []
        # Ensure correct pixel format for encoder
        if frame.format.name != self._encoder.pix_fmt:
            frame = frame.reformat(format=self._encoder.pix_fmt)
        try:
            for packet in self._encoder.encode(frame):
                self._frames_encoded += 1
                output.append(
                    (
                        bytes(packet),
                        packet.is_keyframe,
                        int(packet.pts) if packet.pts is not None else 0,
                        int(packet.dts) if packet.dts is not None else 0,
                    )
                )
        except av.error.InvalidDataError as e:
            logger.debug("[video_transcoder] Encode error: %s", e)
        return output
    def decode_packet(self, packet: av.Packet) -> list[av.VideoFrame]:
        """Decode a video packet into frames."""
        self._decoder_used = True
        try:
            return list(self._decoder.decode(packet))
        except av.error.InvalidDataError as e:
            logger.debug("[video_transcoder] Decode error: %s", e)
            return []
    def flush(self) -> list[tuple[bytes, bool, int, int]]:
        """
        Flush encoder (and decoder, if it was used) buffers.
        When ``decode_video=True`` is used in PyAVDemuxer, the demux thread
        decodes frames using the container's codec context. In that case the
        standalone ``_decoder`` here is never fed any packets, so flushing
        it is skipped -- avoiding a stall that added ~5 s on some backends.
        Safe to call multiple times -- subsequent calls return an empty list.
        """
        if self._flushed:
            return []
        self._flushed = True
        output = []
        # Flush decoder only if it was actually used (via decode_packet)
        if self._decoder_used:
            try:
                for frame in self._decoder.decode(None):
                    self._frames_decoded += 1
                    if frame.format.name != self._encoder.pix_fmt:
                        frame = frame.reformat(format=self._encoder.pix_fmt)
                    for packet in self._encoder.encode(frame):
                        self._frames_encoded += 1
                        output.append(
                            (
                                bytes(packet),
                                packet.is_keyframe,
                                int(packet.pts) if packet.pts is not None else 0,
                                int(packet.dts) if packet.dts is not None else 0,
                            )
                        )
            except Exception as e:
                logger.debug("[video_transcoder] Decoder flush error: %s", e)
        else:
            logger.debug("[video_transcoder] Skipping decoder flush (decoder not used)")
        # Flush encoder
        try:
            for packet in self._encoder.encode(None):
                self._frames_encoded += 1
                output.append(
                    (
                        bytes(packet),
                        packet.is_keyframe,
                        int(packet.pts) if packet.pts is not None else 0,
                        int(packet.dts) if packet.dts is not None else 0,
                    )
                )
        except Exception as e:
            logger.debug("[video_transcoder] Encoder flush error: %s", e)
        logger.info(
            "[video_transcoder] Flushed: %d decoded, %d encoded total (decoder_used=%s)",
            self._frames_decoded,
            self._frames_encoded,
            self._decoder_used,
        )
        return output
    def close(self) -> None:
        """Release codec contexts.
        Flushes the encoder (if not already flushed) before releasing to avoid
        SIGSEGV when libx264 or hardware encoders have buffered frames at
        teardown time. Double-flushing is the most common cause of SIGSEGV
        in the transcode pipeline.
        PyAV codec contexts are released via garbage collection (no explicit
        close method), so we flush first to ensure native buffers are drained
        before the C-level codec is freed.
        """
        # flush() is idempotent -- safe to call even if already flushed
        self.flush()
        # Release references -- GC will free the native codec contexts
        self._encoder = None
        self._decoder = None
    def __del__(self) -> None:
        self.close()
 def _parse_bitrate(bitrate_str: str) -> int:
    """Parse a bitrate string like '4M', '2000k', '5000000' to int bits/s."""
    s = bitrate_str.strip().lower()
    if s.endswith("m"):
        return int(float(s[:-1]) * 1_000_000)
    if s.endswith("k"):
        return int(float(s[:-1]) * 1_000)
    return int(s)
@@ -2,5 +2,16 @@ from .proxy import proxy_router
 from .extractor import extractor_router
 from .speedtest import speedtest_router
 from .playlist_builder import playlist_builder_router
 from .xtream import xtream_root_router
 from .acestream import acestream_router
 from .telegram import telegram_router
-__all__ = ["proxy_router", "extractor_router", "speedtest_router", "playlist_builder_router"]
+__all__ = [
    "proxy_router",
    "extractor_router",
    "speedtest_router",
    "playlist_builder_router",
    "xtream_root_router",
    "acestream_router",
    "telegram_router",
 ]
--- a/Show More
+++ b/Show More