import asyncio import base64 import logging import time from typing import Optional, AsyncGenerator from urllib.parse import urlparse, parse_qs import aiohttp from aiohttp import ClientTimeout import tenacity from fastapi import Request, Response, HTTPException from fastapi.responses import StreamingResponse from starlette.background import BackgroundTask from .const import SUPPORTED_RESPONSE_HEADERS from .mpd_processor import process_manifest, process_playlist, process_segment, process_init_segment from .schemas import HLSManifestParams, MPDManifestParams, MPDPlaylistParams, MPDSegmentParams, MPDInitParams from .utils.cache_utils import ( get_cached_mpd, get_cached_init_segment, get_cached_segment, set_cached_segment, get_cached_processed_segment, set_cached_processed_segment, ) from .utils.dash_prebuffer import dash_prebuffer from .utils.http_utils import ( Streamer, DownloadError, download_file_with_retry, request_with_retry, EnhancedStreamingResponse, ProxyRequestHeaders, create_streamer, apply_header_manipulation, ) from .utils.m3u8_processor import M3U8Processor, generate_graceful_end_playlist from .utils.mpd_utils import pad_base64 from .utils.redis_utils import ( acquire_stream_gate, release_stream_gate, get_cached_head, set_cached_head, check_and_set_cooldown, is_redis_configured, ) from .utils.stream_transformers import StreamTransformer, get_transformer from .utils.rate_limit_handlers import get_rate_limit_handler from .configs import settings logger = logging.getLogger(__name__) def handle_exceptions(exception: Exception, context: str = "") -> Response: """ Handle exceptions and return appropriate HTTP responses. Uses appropriate log levels based on exception type: - DEBUG: Expected errors like 404 Not Found - WARNING: Transient errors like timeouts, connection issues - ERROR: Only for truly unexpected errors Args: exception (Exception): The exception that was raised. context (str): Optional context string for better error messages. Returns: Response: An HTTP response corresponding to the exception type. """ ctx = f" [{context}]" if context else "" if isinstance(exception, aiohttp.ClientResponseError): if exception.status == 404: logger.debug(f"Upstream 404{ctx}: {exception.request_info.url if exception.request_info else 'unknown'}") return Response(status_code=404, content="Upstream resource not found") elif exception.status in (429, 509): # Rate limited by upstream - pass through so the player can retry on its own logger.warning(f"Upstream rate limited ({exception.status}){ctx}") return Response(status_code=exception.status, content=f"Upstream rate limited: {exception.status}") elif exception.status in (502, 503, 504): # Upstream server errors - log at warning level as these are often transient logger.warning(f"Upstream server error{ctx}: {exception.status}") return Response(status_code=exception.status, content=f"Upstream server error: {exception.status}") else: logger.warning(f"Upstream HTTP error{ctx}: {exception}") return Response(status_code=exception.status, content=f"Upstream service error: {exception}") elif isinstance(exception, DownloadError): # DownloadError is expected for various upstream issues logger.warning(f"Download error{ctx}: {exception}") return Response(status_code=exception.status_code, content=str(exception)) elif isinstance(exception, tenacity.RetryError): logger.warning(f"Max retries exceeded{ctx}") return Response(status_code=502, content="Max retries exceeded while downloading content") elif isinstance(exception, asyncio.TimeoutError): logger.warning(f"Timeout error{ctx}: upstream did not respond in time") return Response(status_code=504, content="Gateway timeout") elif isinstance(exception, aiohttp.ClientError): # Client errors are often network issues - warning level logger.warning(f"Client error{ctx}: {exception}") return Response(status_code=502, content=f"Upstream connection error: {exception}") elif isinstance(exception, HTTPException): # HTTPException is intentionally raised (e.g. segment unavailable) - not unexpected if exception.status_code >= 500: logger.warning(f"HTTP exception{ctx}: {exception.status_code}: {exception.detail}") else: logger.debug(f"HTTP exception{ctx}: {exception.status_code}: {exception.detail}") return Response(status_code=exception.status_code, content=exception.detail) elif isinstance(exception, ValueError) and "HTML instead of m3u8" in str(exception): # Expected error when upstream returns error page instead of playlist logger.warning(f"Upstream returned HTML{ctx}: stream may be offline or unavailable") return Response(status_code=502, content=str(exception)) else: # Only use exception() (with traceback) for truly unexpected errors logger.exception(f"Unexpected error{ctx}: {exception}") return Response(status_code=502, content=f"Internal server error: {exception}") async def handle_hls_stream_proxy( request: Request, hls_params: HLSManifestParams, proxy_headers: ProxyRequestHeaders, transformer_id: Optional[str] = None, ) -> Response: """ Handle HLS stream proxy requests. This function processes HLS manifest files and streams content based on the request parameters. Args: request (Request): The incoming FastAPI request object. hls_params (HLSManifestParams): Parameters for the HLS manifest. proxy_headers (ProxyRequestHeaders): Headers to be used in the proxy request. transformer_id (str, optional): ID of the stream transformer to use for segment streaming. Returns: Union[Response, EnhancedStreamingResponse]: Either a processed m3u8 playlist or a streaming response. """ streamer = await create_streamer(hls_params.destination) # Handle range requests content_range = proxy_headers.request.get("range", "bytes=0-") if "nan" in content_range.casefold(): # Handle invalid range requests "bytes=NaN-NaN" raise HTTPException(status_code=416, detail="Invalid Range Header") proxy_headers.request.update({"range": content_range}) try: # Auto-detect and resolve Vavoo links if "vavoo.to" in hls_params.destination: try: from mediaflow_proxy.extractors.vavoo import VavooExtractor vavoo_extractor = VavooExtractor(proxy_headers.request) resolved_data = await vavoo_extractor.extract(hls_params.destination) resolved_url = resolved_data["destination_url"] logger.info(f"Auto-resolved Vavoo URL: {hls_params.destination} -> {resolved_url}") # Update destination with resolved URL hls_params.destination = resolved_url except Exception as e: logger.warning(f"Failed to auto-resolve Vavoo URL: {e}") # Continue with original URL if resolution fails # Parse skip_segments from JSON string to list skip_segments_list = hls_params.get_skip_segments() # Get transformer instance if specified transformer = get_transformer(transformer_id) # If force_playlist_proxy is enabled, skip detection and directly process as m3u8 if hls_params.force_playlist_proxy: return await fetch_and_process_m3u8( streamer, hls_params.destination, proxy_headers, request, hls_params.key_url, hls_params.force_playlist_proxy, hls_params.key_only_proxy, hls_params.no_proxy, skip_segments_list, transformer, hls_params.start_offset, ) parsed_url = urlparse(hls_params.destination) # Check if the URL is a valid m3u8 playlist or m3u file if parsed_url.path.endswith((".m3u", ".m3u8", ".m3u_plus")) or parse_qs(parsed_url.query).get("type", [""])[ 0 ] in ["m3u", "m3u8", "m3u_plus"]: return await fetch_and_process_m3u8( streamer, hls_params.destination, proxy_headers, request, hls_params.key_url, hls_params.force_playlist_proxy, hls_params.key_only_proxy, hls_params.no_proxy, skip_segments_list, transformer, hls_params.start_offset, ) # Create initial streaming response to check content type await streamer.create_streaming_response(hls_params.destination, proxy_headers.request) response_headers = prepare_response_headers( streamer.response.headers, proxy_headers.response, proxy_headers.remove, proxy_headers.propagate ) if "mpegurl" in response_headers.get("content-type", "").lower(): return await fetch_and_process_m3u8( streamer, hls_params.destination, proxy_headers, request, hls_params.key_url, hls_params.force_playlist_proxy, hls_params.key_only_proxy, hls_params.no_proxy, skip_segments_list, transformer, hls_params.start_offset, ) # If we're removing content-range but upstream returned 206, change to 200 # (206 Partial Content requires Content-Range header per HTTP spec) status_code = streamer.response.status if status_code == 206 and "content-range" in [h.lower() for h in proxy_headers.remove]: status_code = 200 return EnhancedStreamingResponse( streamer.stream_content(transformer), status_code=status_code, headers=response_headers, background=BackgroundTask(streamer.close), ) except Exception as e: await streamer.close() return handle_exceptions(e) async def handle_stream_request( method: str, video_url: str, proxy_headers: ProxyRequestHeaders, transformer_id: Optional[str] = None, rate_limit_handler_id: Optional[str] = None, ) -> Response: """ Handle general stream requests. This function processes both HEAD and GET requests for video streams. Uses Redis for cross-worker coordination to prevent CDN rate-limiting (e.g., Vidoza 509). Rate limiting behavior is controlled by the rate_limit_handler parameter: - If specified, uses that handler's settings - If not specified, auto-detects based on video URL hostname - If no handler matches, no rate limiting is applied (fast path) The coordination strategy (when rate limiting is enabled): 1. HEAD requests: Check/use Redis cache, skip upstream entirely if cached 2. GET requests: Check cooldown FIRST, return 503 if in cooldown 3. Only ONE request proceeds to upstream at a time via gate 4. After upstream responds, set cooldown to prevent rapid follow-up requests Args: method (str): The HTTP method (e.g., 'GET' or 'HEAD'). video_url (str): The URL of the video to stream. proxy_headers (ProxyRequestHeaders): Headers to be used in the proxy request. transformer_id (str, optional): ID of the stream transformer to use for content manipulation. rate_limit_handler_id (str, optional): ID of the rate limit handler to use (e.g., "vidoza", "aggressive"). If not specified, auto-detects based on video URL hostname. Returns: Union[Response, EnhancedStreamingResponse]: Either a HEAD response with headers or a streaming response. """ host = urlparse(video_url).hostname or "unknown" gate_acquired = False # Get rate limit handler (explicit ID, auto-detect from URL, or default no-op) rate_handler = get_rate_limit_handler(rate_limit_handler_id, video_url) # Check if rate limiting features are needed and Redis is available needs_rate_limiting = ( rate_handler.cooldown_seconds > 0 or rate_handler.use_head_cache or rate_handler.use_stream_gate ) redis_available = is_redis_configured() if needs_rate_limiting: logger.info( f"[handle_stream] Rate limiting ENABLED for {host}: " f"cooldown={rate_handler.cooldown_seconds}s, gate={rate_handler.use_stream_gate}, " f"head_cache={rate_handler.use_head_cache}, redis={redis_available}" ) if needs_rate_limiting and not redis_available: logger.warning(f"[handle_stream] Rate limiting requested for {host} but Redis not configured - skipping") needs_rate_limiting = False # Cooldown key - prevents rapid-fire requests to same CDN URL cooldown_key = f"stream_cooldown:{video_url}" # 1. Check Redis HEAD cache first (if enabled by handler) cached = None if needs_rate_limiting and rate_handler.use_head_cache: cached = await get_cached_head(video_url) if method == "HEAD": if cached: logger.info(f"[handle_stream] Serving cached HEAD response for {host}") response_headers = prepare_response_headers( cached["headers"], proxy_headers.response, proxy_headers.remove, proxy_headers.propagate ) return Response(headers=response_headers, status_code=cached["status"]) # No cached HEAD - for rate-limited hosts, wait for cache via gate instead of hitting upstream if needs_rate_limiting and rate_handler.use_stream_gate: # Try to acquire gate - if we get it, we make the upstream HEAD request # If another request holds the gate, we wait and then check cache again gate_acquired = await acquire_stream_gate(video_url, timeout=30.0) if gate_acquired: # We got the gate - check cache again (another request may have populated it while we waited) cached = await get_cached_head(video_url) if cached: await release_stream_gate(video_url) logger.info(f"[handle_stream] Serving cached HEAD response after gate wait for {host}") response_headers = prepare_response_headers( cached["headers"], proxy_headers.response, proxy_headers.remove, proxy_headers.propagate ) return Response(headers=response_headers, status_code=cached["status"]) # Cache still empty - we'll make the upstream request (gate is held) else: # Gate timeout - check cache one more time before giving up cached = await get_cached_head(video_url) if cached: logger.info(f"[handle_stream] Serving cached HEAD after gate timeout for {host}") response_headers = prepare_response_headers( cached["headers"], proxy_headers.response, proxy_headers.remove, proxy_headers.propagate ) return Response(headers=response_headers, status_code=cached["status"]) logger.warning(f"[handle_stream] HEAD gate timeout for {host}, no cached headers available") return Response(status_code=503, content="Upstream host is busy, try again later") # No rate limiting - proceed to upstream without gate else: # For GET requests with rate limiting: wait for cooldown and acquire gate if needs_rate_limiting and rate_handler.use_stream_gate: # Wait for gate - this serializes all requests to the same URL gate_acquired = await acquire_stream_gate(video_url, timeout=30.0) if not gate_acquired: logger.warning(f"[handle_stream] Gate timeout for {host}, upstream may be slow") return Response(status_code=503, content="Upstream host is busy, try again later") # Got the gate - now check/set cooldown # If in cooldown, wait for it to expire before proceeding if rate_handler.cooldown_seconds > 0: max_wait = rate_handler.cooldown_seconds + 1 # Wait slightly longer than cooldown wait_start = time.time() while not await check_and_set_cooldown(cooldown_key, rate_handler.cooldown_seconds): # Still in cooldown - wait a bit and retry elapsed = time.time() - wait_start if elapsed >= max_wait: # Cooldown still active after max wait - give up await release_stream_gate(video_url) logger.warning(f"[handle_stream] Cooldown wait timeout for {host}") return Response( status_code=503, content="Stream busy, try again later", headers={"Retry-After": str(rate_handler.retry_after_seconds)}, ) logger.debug(f"[handle_stream] Waiting for cooldown to expire for {host}...") await asyncio.sleep(0.5) # Poll every 500ms # Cooldown acquired - we can proceed to upstream logger.info(f"[handle_stream] Cooldown acquired for {host} after {time.time() - wait_start:.1f}s wait") streamer = await create_streamer(video_url) try: # Auto-detect and resolve Vavoo links if "vavoo.to" in video_url: try: from mediaflow_proxy.extractors.vavoo import VavooExtractor vavoo_extractor = VavooExtractor(proxy_headers.request) resolved_data = await vavoo_extractor.extract(video_url) resolved_url = resolved_data["destination_url"] logger.info(f"Auto-resolved Vavoo URL: {video_url} -> {resolved_url}") # Update video_url with resolved URL video_url = resolved_url except Exception as e: logger.warning(f"Failed to auto-resolve Vavoo URL: {e}") # Continue with original URL if resolution fails # Log timing for debugging seek performance start_time = time.time() range_header = proxy_headers.request.get("range", "not set") logger.info(f"[handle_stream] Starting upstream {method} request - range: {range_header}") # Check if the range header was auto-added by proxy (not from client) # This is set in proxy.py when we add bytes=0- because client didn't send a range auto_added_range = getattr(proxy_headers, "auto_added_range", False) # Use the same HTTP method for upstream request (HEAD for HEAD, GET for GET) # This prevents unnecessary data download when client just wants headers await streamer.create_streaming_response(video_url, proxy_headers.request, method=method) elapsed = time.time() - start_time logger.info(f"[handle_stream] Upstream responded in {elapsed:.2f}s - status: {streamer.response.status}") logger.debug(f"Upstream response headers: {dict(streamer.response.headers)}") response_headers = prepare_response_headers( streamer.response.headers, proxy_headers.response, proxy_headers.remove, proxy_headers.propagate ) logger.debug(f"Prepared response headers: {response_headers}") # When client didn't send a Range header but upstream returns 206 Partial Content: # - Convert status to 200 (full content, not partial) # - Remove content-range header to avoid confusing the client # - BUT keep Accept-Ranges: bytes so client knows seeking is supported # This handles cases where we added bytes=0- range but upstream still treats it as a range request status_code = streamer.response.status if status_code == 206: # Always ensure Accept-Ranges: bytes is present for 206 responses # This tells clients like ExoPlayer that they can seek by sending Range requests # Some upstream servers (like TorBox) don't send this header even though they support ranges if "accept-ranges" not in [h.lower() for h in response_headers.keys()]: response_headers["accept-ranges"] = "bytes" if "content-range" in [h.lower() for h in proxy_headers.remove]: # Explicitly requested to remove content-range status_code = 200 # Also remove content-range from response headers if present response_headers.pop("content-range", None) elif auto_added_range: # We auto-added bytes=0- range but got 206 - convert to 200 # This happens when client didn't send a range but upstream responds with 206 status_code = 200 # Remove content-range to avoid confusing client response_headers.pop("content-range", None) # Update content-length to total size (remove range suffix if present) content_range = streamer.response.headers.get("Content-Range", "") if "/" in content_range: # Extract total size from "bytes X-Y/total" total_size = content_range.split("/")[-1].strip() response_headers["content-length"] = total_size # Get transformer instance if specified transformer = get_transformer(transformer_id) # Cache headers in Redis for future HEAD probes (if rate limiting enabled) if needs_rate_limiting and rate_handler.use_head_cache and status_code in (200, 206): await set_cached_head(video_url, dict(streamer.response.headers), status_code) if method == "HEAD": # HEAD requests always release gate immediately if gate_acquired: await release_stream_gate(video_url) gate_acquired = False await streamer.close() return Response(headers=response_headers, status_code=status_code) else: # For GET requests: check if we need exclusive streaming if gate_acquired and needs_rate_limiting and rate_handler.exclusive_stream: # EXCLUSIVE MODE: Keep gate held during entire stream # Release gate in background task when stream ends logger.info(f"[handle_stream] Exclusive stream mode - gate held during stream for {host}") async def cleanup_exclusive(): await streamer.close() await release_stream_gate(video_url) logger.info(f"[handle_stream] Exclusive stream ended - gate released for {host}") gate_acquired = False # Background task will release it return EnhancedStreamingResponse( streamer.stream_content(transformer), headers=response_headers, status_code=status_code, background=BackgroundTask(cleanup_exclusive), ) else: # NORMAL MODE: Release gate after headers, stream continues freely if gate_acquired: await release_stream_gate(video_url) gate_acquired = False return EnhancedStreamingResponse( streamer.stream_content(transformer), headers=response_headers, status_code=status_code, background=BackgroundTask(streamer.close), ) except Exception as e: await streamer.close() return handle_exceptions(e) finally: # Safety: release gate if not already released (error path before headers received) if gate_acquired: await release_stream_gate(video_url) def prepare_response_headers( original_headers, proxy_response_headers, remove_headers=None, propagate_headers=None ) -> dict: """ Prepare response headers for the proxy response. This function filters the original headers, ensures proper transfer encoding, and merges them with the proxy response headers. Args: original_headers: The original headers from the upstream response (aiohttp CIMultiDictProxy). proxy_response_headers (dict): Additional headers to be included in the proxy response. remove_headers (list, optional): List of header names to remove from the response. Defaults to None. propagate_headers (dict, optional): Headers that propagate to segments (rp_ prefix). Defaults to None. Returns: dict: The prepared headers for the proxy response. """ remove_set = set(h.lower() for h in (remove_headers or [])) response_headers = {} # aiohttp transparently decompresses gzip/deflate/br responses. When it does, # the original Content-Length (which was the *compressed* size) is no longer # valid for the decompressed body we are forwarding. Forwarding that stale # Content-Length causes h11 to raise "Too much data for declared Content-Length" # when the decompressed body is larger than the declared length. upstream_content_encoding = any(k.lower() == "content-encoding" for k in original_headers.keys()) # Handle aiohttp CIMultiDictProxy for k, v in original_headers.items(): k_lower = k.lower() if k_lower in SUPPORTED_RESPONSE_HEADERS and k_lower not in remove_set: # Drop Content-Length when the upstream compressed the body — the # decompressed size differs and the header would be wrong. if k_lower == "content-length" and upstream_content_encoding: continue response_headers[k_lower] = v # Apply propagate headers first (for segments), then response headers (response takes precedence) if propagate_headers: response_headers.update(propagate_headers) response_headers.update(proxy_response_headers) return response_headers async def proxy_stream( method: str, destination: str, proxy_headers: ProxyRequestHeaders, transformer_id: Optional[str] = None, rate_limit_handler_id: Optional[str] = None, ): """ Proxies the stream request to the given video URL. Args: method (str): The HTTP method (e.g., GET, HEAD). destination (str): The URL of the stream to be proxied. proxy_headers (ProxyRequestHeaders): The headers to include in the request. transformer_id (str, optional): ID of the stream transformer to use. rate_limit_handler_id (str, optional): ID of the rate limit handler to use (e.g., "vidoza"). If not specified, auto-detects based on destination URL hostname. Returns: Response: The HTTP response with the streamed content. """ return await handle_stream_request(method, destination, proxy_headers, transformer_id, rate_limit_handler_id) async def fetch_and_process_m3u8( streamer: Streamer, url: str, proxy_headers: ProxyRequestHeaders, request: Request, key_url: str = None, force_playlist_proxy: bool = None, key_only_proxy: bool = False, no_proxy: bool = False, skip_segments: list = None, transformer: Optional[StreamTransformer] = None, start_offset: float = None, ): """ Fetches and processes the m3u8 playlist on-the-fly, converting it to an HLS playlist. Args: streamer (Streamer): The HTTP client to use for streaming. url (str): The URL of the m3u8 playlist. proxy_headers (ProxyRequestHeaders): The headers to include in the request. request (Request): The incoming HTTP request. key_url (str, optional): The HLS Key URL to replace the original key URL. Defaults to None. force_playlist_proxy (bool, optional): Force all playlist URLs to be proxied through MediaFlow. Defaults to None. key_only_proxy (bool, optional): Only proxy the key URL, leaving segment URLs direct. Defaults to False. no_proxy (bool, optional): If True, returns the manifest without proxying any URLs. Defaults to False. skip_segments (list, optional): List of time segments to skip. Each item should have 'start', 'end' (in seconds), and optionally 'type'. transformer (StreamTransformer, optional): Transformer to apply to the stream content. start_offset (float, optional): Time offset in seconds for EXT-X-START tag. Use negative values for live streams to start behind the live edge. Returns: Response: The HTTP response with the processed m3u8 playlist. """ try: # Create streaming response if not already created if not streamer.response: await streamer.create_streaming_response(url, proxy_headers.request) # Initialize processor and response headers # skip_segments is already a list of dicts with 'start' and 'end' keys processor = M3U8Processor( request, key_url, force_playlist_proxy, key_only_proxy, no_proxy, skip_segments, start_offset ) base_headers = { "content-disposition": "inline", "accept-ranges": "none", "content-type": "application/vnd.apple.mpegurl", } # Don't include propagate headers for manifests - they should only apply to segments response_headers = apply_header_manipulation(base_headers, proxy_headers, include_propagate=False) # Get the generator for processing m3u8_generator = processor.process_m3u8_streaming( streamer.stream_content(transformer), str(streamer.response.url) ) # Pre-fetch the first chunk to validate the content before starting the response # This allows us to return a proper HTTP error if the upstream returns HTML first_chunk = None try: first_chunk = await m3u8_generator.__anext__() except ValueError as e: # Upstream returned HTML instead of m3u8 - expected error, log at warning level logger.warning(f"Upstream error for {url}: {e}") await streamer.close() # Return graceful end playlist if enabled, otherwise raise error if settings.graceful_stream_end: graceful_content = generate_graceful_end_playlist("Stream offline or unavailable") return Response( content=graceful_content, media_type="application/vnd.apple.mpegurl", headers=response_headers, ) raise HTTPException(status_code=502, detail=str(e)) except StopAsyncIteration: # Empty response - this shouldn't happen for valid m3u8 logger.warning(f"Upstream returned empty m3u8 playlist: {url}") await streamer.close() # Return graceful end playlist if enabled, otherwise raise error if settings.graceful_stream_end: graceful_content = generate_graceful_end_playlist("Empty upstream response") return Response( content=graceful_content, media_type="application/vnd.apple.mpegurl", headers=response_headers, ) raise HTTPException(status_code=502, detail="Upstream returned empty m3u8 playlist") # Create a wrapper that yields the first chunk then continues with the rest async def prefetched_generator(): yield first_chunk try: async for chunk in m3u8_generator: yield chunk except ValueError as e: # This shouldn't happen since we already validated the first chunk, # but handle it gracefully if it does logger.warning(f"ValueError during m3u8 streaming (after initial validation): {e}") # Create streaming response with on-the-fly processing return EnhancedStreamingResponse( prefetched_generator(), headers=response_headers, background=BackgroundTask(streamer.close), ) except HTTPException: raise except Exception as e: await streamer.close() return handle_exceptions(e) def _parse_combined_key_param(key_id: str | None, key: str | None) -> tuple[str | None, str | None]: """ Support the combined ``kid:key,kid:key`` format commonly passed as a single ``key`` param. When ``key_id`` is absent and ``key`` looks like ``kid1:key1,kid2:key2`` (every comma-separated part contains exactly one colon), split it into separate ``key_id`` and ``key`` values so the rest of the pipeline receives the canonical ``key_id=kid1,kid2`` / ``key=key1,key2`` format. This format is convenient when using tools like mpv that express ClearKey pairs as ``kid:key`` strings. """ if key and not key_id: pairs = [p.strip() for p in key.split(",") if p.strip()] if pairs and all(":" in p for p in pairs): key_id = ",".join(p.split(":", 1)[0] for p in pairs) key = ",".join(p.split(":", 1)[1] for p in pairs) return key_id, key def _normalize_drm_key_value(value: str) -> str: """ Normalize a DRM key_id or key value to lowercase hex. Accepts either: - A 32-char hex string (returned as-is, lowercased). - A base64url-encoded value (decoded to hex). - A comma-separated list of the above, for multi-key DRM scenarios. Commas are treated as list separators, NOT as characters to pass into base64 decoding. Previously the ``len() != 32`` check was applied to the entire comma-joined string, and ``urlsafe_b64decode`` silently strips non-alphabet characters (including commas), causing adjacent keys to be concatenated into an oversized byte string. Args: value: The key value string, or None. Returns: Normalized hex string (or comma-joined hex strings for multi-key), or the original value unchanged if it is falsy. """ if not value: return value if "," in value: parts = [_normalize_single_key(p.strip()) for p in value.split(",") if p.strip()] return ",".join(parts) return _normalize_single_key(value) def _normalize_single_key(value: str) -> str: """Convert a single key_id or key to a 32-char lowercase hex string.""" if len(value) != 32: return base64.urlsafe_b64decode(pad_base64(value)).hex() return value.lower() async def handle_drm_key_data(key_id, key, drm_info): """ Handles the DRM key data, retrieving the key ID and key from the DRM info if not provided. Args: key_id (str): The DRM key ID. key (str): The DRM key. drm_info (dict): The DRM information from the MPD manifest. Returns: tuple: The key ID and key. """ if drm_info and not drm_info.get("isDrmProtected"): return None, None if not key_id or not key: if "keyId" in drm_info and "key" in drm_info: key_id = drm_info["keyId"] key = drm_info["key"] elif "laUrl" in drm_info and "keyId" in drm_info: # License URL with keyId - license acquisition should have been attempted already # If we still don't have a key, it means acquisition failed pass else: # Try to use extracted KID if available (from MPD/init segment analysis) if not key_id and drm_info.get("extracted_kids"): # Use the first extracted KID extracted_kids = drm_info["extracted_kids"] if extracted_kids: key_id = extracted_kids[0] logger.info(f"Using extracted KID from MPD/init segment: {key_id}") # Still require the actual decryption key if not key: if drm_info.get("extracted_kids"): license_urls = drm_info.get("license_urls", []) license_url_msg = f" License server: {license_urls[0]}" if license_urls else "" raise HTTPException( status_code=400, detail=( f"Key ID (KID) was automatically extracted: {key_id}. " f"However, the actual decryption key must be provided via the 'key' parameter. " f"The key cannot be extracted from the MPD or init segment and must be obtained " f"from the license server or source website.{license_url_msg}" ), ) else: raise HTTPException( status_code=400, detail="Unable to determine key_id and key, and they were not provided" ) return key_id, key async def get_manifest( request: Request, manifest_params: MPDManifestParams, proxy_headers: ProxyRequestHeaders, ): """ Retrieves and processes the MPD manifest, converting it to an HLS manifest. Args: request (Request): The incoming HTTP request. manifest_params (MPDManifestParams): The parameters for the manifest request. proxy_headers (ProxyRequestHeaders): The headers to include in the request. Returns: Response: The HTTP response with the HLS manifest. """ try: mpd_dict = await get_cached_mpd( manifest_params.destination, headers=proxy_headers.request, parse_drm=not manifest_params.key_id and not manifest_params.key, ) except DownloadError as e: raise HTTPException(status_code=e.status_code, detail=f"Failed to download MPD: {e.message}") drm_info = mpd_dict.get("drmInfo", {}) # Get skip segments if provided skip_segments = manifest_params.get_skip_segments() if drm_info and not drm_info.get("isDrmProtected"): # For non-DRM protected MPD, we still create an HLS manifest return await process_manifest( request, mpd_dict, proxy_headers, None, None, manifest_params.resolution, skip_segments ) # Support combined kid:key,kid:key format passed as a single key= param key_id_param, key_param = _parse_combined_key_param(manifest_params.key_id, manifest_params.key) key_id, key = await handle_drm_key_data(key_id_param, key_param, drm_info) # Normalize key_id and key: convert from base64 to hex when needed. # Each value may be a comma-separated list for multi-key DRM; each part is # normalized independently so that commas are never passed to urlsafe_b64decode # (base64 silently ignores non-alphabet characters, stripping commas and # concatenating the keys into a single oversized value). key_id = _normalize_drm_key_value(key_id) key = _normalize_drm_key_value(key) return await process_manifest( request, mpd_dict, proxy_headers, key_id, key, manifest_params.resolution, skip_segments ) async def get_playlist( request: Request, playlist_params: MPDPlaylistParams, proxy_headers: ProxyRequestHeaders, ): """ Retrieves and processes the MPD manifest, converting it to an HLS playlist for a specific profile. Args: request (Request): The incoming HTTP request. playlist_params (MPDPlaylistParams): The parameters for the playlist request. proxy_headers (ProxyRequestHeaders): The headers to include in the request. Returns: Response: The HTTP response with the HLS playlist. """ try: mpd_dict = await get_cached_mpd( playlist_params.destination, headers=proxy_headers.request, parse_drm=not playlist_params.key_id and not playlist_params.key, parse_segment_profile_id=playlist_params.profile_id, ) except DownloadError as e: raise HTTPException(status_code=e.status_code, detail=f"Failed to download MPD: {e.message}") # Get skip segments if provided skip_segments = playlist_params.get_skip_segments() return await process_playlist( request, mpd_dict, playlist_params.profile_id, proxy_headers, skip_segments, playlist_params.start_offset ) async def _stream_segment_base_drm( init_url: str, init_range: Optional[str], init_headers: dict, init_cache_token: str, init_cache_ttl: Optional[int], segment_url: str, segment_headers: dict, key_id: str, key: str, use_map: bool, ) -> AsyncGenerator[bytes, None]: """ Async generator for streaming a DRM-protected SegmentBase segment. The generator is started AFTER FastAPI has already sent HTTP 200 response headers to the player (StreamingResponse sends headers before iterating the body). All blocking CDN I/O — both the init-segment fetch and the large media-segment fetch — lives here, so zero network work happens before the player receives its 200 OK. Stream layout (use_map=False): [yield] processed moov (~600 B) — sent after init fetch, player gets codec info [yield …] decrypted moof + mdat per fragment — streamed as they arrive """ import struct from mediaflow_proxy.drm.decrypter import MP4Decrypter, MP4Atom, _build_key_map from mediaflow_proxy.utils.http_client import create_aiohttp_session from mediaflow_proxy.utils.http_utils import fetch_with_retry # Fetch the init segment inside the generator — this runs after HTTP 200 # headers are already in-flight, so the player never times out waiting for # the response line. try: init_content = await get_cached_init_segment( init_url, init_headers, cache_token=init_cache_token, ttl=init_cache_ttl, byte_range=init_range, ) except Exception as e: logger.warning(f"SegmentBase init fetch failed in stream: {e}") return if not init_content: logger.warning(f"SegmentBase init segment empty for {init_url}") return # Build a single decrypter instance primed with the init segment so that # track_encryption_settings / iv_size / encryption_scheme are all populated # before we start processing media boxes. key_map = _build_key_map(key_id, key) decrypter = MP4Decrypter(key_map) # Yield cleaned init immediately. Calling process_init_only() also primes # the decrypter. if not use_map: try: processed_init = decrypter.process_init_only(init_content) yield processed_init except Exception as e: logger.warning(f"SegmentBase init processing failed: {e}") return else: # EXT-X-MAP: init served separately; still prime the decrypter try: decrypter.process_init_only(init_content) except Exception as e: logger.warning(f"SegmentBase init priming failed: {e}") return # Stream the media range from the CDN and process each MP4 box as it # arrives. We maintain a rolling byte buffer and parse 8-byte box headers # (4-byte big-endian size + 4-byte type) to know when a complete box has # accumulated, then hand it to the decrypter immediately. seg_timeout = ClientTimeout(connect=10, sock_read=60, total=None) buf = bytearray() try: async with create_aiohttp_session(segment_url, timeout=seg_timeout) as (session, proxy_url): response = await fetch_with_retry(session, "GET", segment_url, segment_headers, proxy=proxy_url) async for chunk in response.content.iter_chunked(65536): buf.extend(chunk) # Drain all complete boxes from the front of the buffer while len(buf) >= 8: box_size = struct.unpack_from(">I", buf, 0)[0] # box_size == 1 means a 64-bit extended size follows (rare in # streaming segments); box_size == 0 means "to end of file". # Neither is expected here — fall through to flush at the end. if box_size < 8: break if len(buf) < box_size: break # Incomplete box — wait for more data atom_type = bytes(buf[4:8]) atom_data = bytearray(buf[8:box_size]) del buf[:box_size] # Drop sidx: its byte-offset references point to the original # encrypted stream and become incorrect after senc/saiz/saio # stripping. Omitting it lets the demuxer fall back to scanning # moof boxes sequentially, which is always correct. if atom_type == b"sidx": continue # When use_map=True the moov is served separately via EXT-X-MAP. # If the CDN ignores the Range header and returns the full file, # the stream would contain a moov we must not re-emit. if use_map and atom_type == b"moov": continue atom = MP4Atom(atom_type, box_size, atom_data) try: processed = decrypter._process_atom(atom_type, atom) yield processed.pack() except Exception as e: logger.warning(f"Box processing error ({atom_type!r}): {e}") # Yield the original box so the stream isn't truncated yield struct.pack(">I", box_size) + atom_type + bytes(atom_data) # Flush any trailing bytes (should not happen for a well-formed MP4) if buf: yield bytes(buf) except Exception as e: logger.warning(f"SegmentBase media streaming failed for {segment_url}: {e}") async def get_segment( segment_params: MPDSegmentParams, proxy_headers: ProxyRequestHeaders, force_remux_ts: bool = None, ): """ Retrieves and processes a media segment, decrypting it if necessary. Uses event-based coordination with the DASH prebuffer to prevent duplicate downloads. The prebuffer's get_or_download() handles cache checks, waiting for existing downloads, and starting new downloads as needed. Args: segment_params (MPDSegmentParams): The parameters for the segment request. proxy_headers (ProxyRequestHeaders): The headers to include in the request. force_remux_ts (bool, optional): If True, force remuxing to MPEG-TS regardless of global settings. Used by /mpd/segment.ts endpoint. Defaults to None. Returns: Response: The HTTP response with the processed segment. """ try: live_cache_ttl = settings.mpd_live_init_cache_ttl if segment_params.is_live else None segment_url = segment_params.segment_url should_remux = force_remux_ts if force_remux_ts is not None else settings.remux_to_ts # For SegmentBase MPDs, segment_range specifies the byte range of media data # (e.g. "658-" meaning everything after the init segment). Apply it as a # Range header so we never try to download a whole large file. segment_headers = dict(proxy_headers.request) if segment_params.segment_range: segment_headers["Range"] = f"bytes={segment_params.segment_range}" # Check processed segment cache first (avoids re-decrypting/re-remuxing) is_processed = bool(segment_params.key_id or should_remux) if is_processed: cache_key = f"{segment_url}|{segment_params.segment_range or ''}" processed_content = await get_cached_processed_segment(cache_key, segment_params.key_id, should_remux) if processed_content: logger.info(f"Serving processed segment from cache: {segment_url}") mimetype = "video/mp2t" if should_remux else segment_params.mime_type response_headers = apply_header_manipulation({}, proxy_headers) return Response(content=processed_content, media_type=mimetype, headers=response_headers) # SegmentBase + DRM (non-remux): streaming path. # Return StreamingResponse with zero blocking — all CDN I/O (init fetch # + media stream) happens inside the generator, AFTER FastAPI has already # sent HTTP 200 + headers to the player. This prevents ffmpeg/mpv from # reporting "Immediate exit requested" / "Operation timed out" when the # CDN connection is slow. Connection: close tells the player not to # reuse this socket after the long-lived streaming response finishes. if segment_params.segment_range and segment_params.key_id and not should_remux: response_headers = apply_header_manipulation({}, proxy_headers) response_headers["connection"] = "close" return StreamingResponse( _stream_segment_base_drm( init_url=segment_params.init_url, init_range=segment_params.init_range, init_headers=dict(proxy_headers.request), init_cache_token=segment_params.key_id, init_cache_ttl=live_cache_ttl, segment_url=segment_url, segment_headers=segment_headers, key_id=segment_params.key_id, key=segment_params.key, use_map=segment_params.use_map, ), media_type=segment_params.mime_type, headers=response_headers, ) # SegmentBase without DRM, or with TS remux: buffer then process. # Use sock_read timeout so large files don't hit the 60 s total cap. if segment_params.segment_range: try: seg_timeout = ClientTimeout(connect=10, sock_read=60, total=None) segment_content = await download_file_with_retry(segment_url, segment_headers, timeout=seg_timeout) except Exception as dl_err: logger.warning(f"SegmentBase range download failed for {segment_url}: {dl_err}") segment_content = None # Use event-based coordination for segment download # get_or_download() handles: # - Cache check # - Waiting for existing downloads (via asyncio.Event) # - Starting new download if needed # - Caching the result # Player requests should get priority over background prebuffer activity. # Use a configurable lock timeout to balance responsiveness and cache reuse. elif settings.enable_dash_prebuffer: segment_content = await dash_prebuffer.get_or_download( segment_url, segment_headers, timeout=settings.dash_player_lock_timeout ) else: # Prebuffer disabled - check cache then download directly segment_content = await get_cached_segment(segment_url) if not segment_content: try: segment_content = await download_file_with_retry(segment_url, segment_headers) # Cache for future requests (synchronous to ensure it's cached before returning) if segment_content and segment_params.is_live: # Use create_task for non-blocking cache write, but segment is already downloaded asyncio.create_task( set_cached_segment(segment_url, segment_content, ttl=settings.dash_segment_cache_ttl) ) except Exception as dl_err: logger.warning(f"Direct download failed when prebuffer disabled: {dl_err}") segment_content = None # If prebuffer returned None (lock timeout or coordination failure), # check cache one more time - the download may have completed while we waited # Then fall back to a direct download if still not cached. # This is critical for live streams where the prebuffer may be busy # downloading other segments/profiles. if not segment_content and not segment_params.segment_range: # Final cache check - download may have completed during lock wait segment_content = await get_cached_segment(segment_url) if segment_content: logger.info(f"Segment found in cache after prebuffer timeout: {segment_url}") else: logger.info(f"Prebuffer returned no content, falling back to direct download: {segment_url}") try: segment_content = await download_file_with_retry(segment_url, segment_headers) # Cache on success for future requests if segment_content and segment_params.is_live: asyncio.create_task( set_cached_segment(segment_url, segment_content, ttl=settings.dash_segment_cache_ttl) ) except Exception as dl_err: logger.warning(f"Direct download fallback also failed: {dl_err}") if not segment_content: # Return 404 instead of 502 so players can skip and continue # Most video players handle 404s gracefully by skipping the segment raise HTTPException(status_code=404, detail="Segment unavailable") # Fetch init segment (uses its own cache) init_content = await get_cached_init_segment( segment_params.init_url, proxy_headers.request, cache_token=segment_params.key_id, ttl=live_cache_ttl, byte_range=segment_params.init_range, ) # Trigger continuous prefetch for live streams if settings.enable_dash_prebuffer and segment_params.is_live: for mpd_url in dash_prebuffer.active_streams: asyncio.create_task( dash_prebuffer.prefetch_upcoming_segments( mpd_url, segment_url, proxy_headers.request, ) ) break # Only need to trigger once except Exception as e: return handle_exceptions(e) try: response = await process_segment( init_content, segment_content, segment_params.mime_type, proxy_headers, segment_params.key_id, segment_params.key, use_map=segment_params.use_map, remux_ts=force_remux_ts, ) except Exception as e: return handle_exceptions(e) # Cache processed segment for future requests (avoids re-decrypting/re-remuxing) if is_processed and response.status_code == 200: asyncio.create_task( set_cached_processed_segment( segment_url, response.body, segment_params.key_id, should_remux, ttl=settings.processed_segment_cache_ttl, ) ) return response async def get_init_segment( init_params: MPDInitParams, proxy_headers: ProxyRequestHeaders, ): """ Retrieves and processes an initialization segment for EXT-X-MAP. Args: init_params (MPDInitParams): The parameters for the init segment request. proxy_headers (ProxyRequestHeaders): The headers to include in the request. Returns: Response: The HTTP response with the processed init segment. """ try: live_cache_ttl = settings.mpd_live_init_cache_ttl if init_params.is_live else None init_content = await get_cached_init_segment( init_params.init_url, proxy_headers.request, cache_token=init_params.key_id, ttl=live_cache_ttl, byte_range=init_params.init_range, ) except Exception as e: return handle_exceptions(e) return await process_init_segment( init_content, init_params.mime_type, proxy_headers, init_params.key_id, init_params.key, init_params.init_url, ) IP_LOOKUP_SERVICES = [ {"url": "https://api.ipify.org?format=json", "key": "ip"}, {"url": "https://ipinfo.io/json", "key": "ip"}, {"url": "https://httpbin.org/ip", "key": "origin"}, ] async def get_public_ip(): """ Retrieves the public IP address of the MediaFlow proxy. Tries multiple services for reliability. Returns: dict: A dictionary with the public IP address {"ip": "x.x.x.x"}. Raises: DownloadError: If all IP lookup services fail. """ for service in IP_LOOKUP_SERVICES: try: response = await request_with_retry("GET", service["url"], {}) content = await response.text() import json data = json.loads(content) ip = data.get(service["key"]) if ip: return {"ip": ip.strip()} except Exception: continue raise DownloadError(503, "Failed to retrieve public IP from all services")