new version

2026-06-10 09:10:23 +00:00 · 2026-01-11 14:29:22 +01:00
parent b8a40b5afc
commit 7785e8c604
45 changed files with 5463 additions and 832 deletions
@@ -1,10 +1,13 @@
+import asyncio
 from typing import Annotated
 from urllib.parse import quote, unquote
 import re
 import logging
+import httpx
+import time

 from fastapi import Request, Depends, APIRouter, Query, HTTPException
-from fastapi.responses import Response, RedirectResponse
+from fastapi.responses import Response

 from mediaflow_proxy.handlers import (
    handle_hls_stream_proxy,
@@ -21,11 +24,22 @@ from mediaflow_proxy.schemas import (
    HLSManifestParams,
    MPDManifestParams,
 )
-from mediaflow_proxy.utils.http_utils import get_proxy_headers, ProxyRequestHeaders
+from mediaflow_proxy.utils.http_utils import (
+    get_proxy_headers,
+    ProxyRequestHeaders,
+    create_httpx_client,
+)
 from mediaflow_proxy.utils.base64_utils import process_potential_base64_url

 proxy_router = APIRouter()

+# DLHD extraction cache: {original_url: {"data": extraction_result, "timestamp": time.time()}}
+_dlhd_extraction_cache = {}
+_dlhd_cache_duration = 600  # 10 minutes in seconds
+
+_sportsonline_extraction_cache = {}
+_sportsonline_cache_duration = 600  # 10 minutes in seconds
+

 def sanitize_url(url: str) -> str:
    """
@@ -122,41 +136,146 @@ def extract_drm_params_from_url(url: str) -> tuple[str, str, str]:
    return clean_url, key_id, key


-def _check_and_redirect_dlhd_stream(request: Request, destination: str) -> RedirectResponse | None:
+def _invalidate_dlhd_cache(destination: str):
+    """Invalidate DLHD cache for a specific destination URL."""
+    if destination in _dlhd_extraction_cache:
+        del _dlhd_extraction_cache[destination]
+        logger = logging.getLogger(__name__)
+        logger.info(f"DLHD cache invalidated for: {destination}")
+
+
+async def _check_and_extract_dlhd_stream(
+    request: Request, 
+    destination: str, 
+    proxy_headers: ProxyRequestHeaders,
+    force_refresh: bool = False
+) -> dict | None:
    """
-    Check if destination contains stream-{numero} pattern and redirect to extractor if needed.
+    Check if destination contains DLHD/DaddyLive patterns and extract stream directly.
+    Uses caching to avoid repeated extractions (10 minute cache).
    
    Args:
        request (Request): The incoming HTTP request.
        destination (str): The destination URL to check.
+        proxy_headers (ProxyRequestHeaders): The headers to include in the request.
+        force_refresh (bool): Force re-extraction even if cached data exists.
        
    Returns:
-        RedirectResponse | None: RedirectResponse if redirect is needed, None otherwise.
+        dict | None: Extracted stream data if DLHD link detected, None otherwise.
    """
    import re
+    from urllib.parse import urlparse
+    from mediaflow_proxy.extractors.factory import ExtractorFactory
+    from mediaflow_proxy.extractors.base import ExtractorError
+    from mediaflow_proxy.utils.http_utils import DownloadError
    
-    # Check for stream-{numero} pattern (e.g., stream-1, stream-123, etc.)
-    if re.search(r'stream-\d+', destination):
-        from urllib.parse import urlencode
+    # Check for common DLHD/DaddyLive patterns in the URL
+    # This includes stream-XXX pattern and domain names like dlhd.dad or daddylive.sx
+    is_dlhd_link = (
+        re.search(r'stream-\d+', destination) or
+        "dlhd.dad" in urlparse(destination).netloc or
+        "daddylive.sx" in urlparse(destination).netloc
+    )
+    
+    if not is_dlhd_link:
+        return None
+    
+    logger = logging.getLogger(__name__)
+    logger.info(f"DLHD link detected: {destination}")
+    
+    # Check cache first (unless force_refresh is True)
+    current_time = time.time()
+    if not force_refresh and destination in _dlhd_extraction_cache:
+        cached_entry = _dlhd_extraction_cache[destination]
+        cache_age = current_time - cached_entry["timestamp"]
        
-        # Build redirect URL to extractor
-        redirect_params = {
-            "host": "DLHD",
-            "redirect_stream": "true",
-            "d": destination
+        if cache_age < _dlhd_cache_duration:
+            logger.info(f"Using cached DLHD data (age: {cache_age:.1f}s)")
+            return cached_entry["data"]
+        else:
+            logger.info(f"DLHD cache expired (age: {cache_age:.1f}s), re-extracting...")
+            del _dlhd_extraction_cache[destination]
+    
+    # Extract stream data
+    try:
+        logger.info(f"Extracting DLHD stream data from: {destination}")
+        extractor = ExtractorFactory.get_extractor("DLHD", proxy_headers.request)
+        result = await extractor.extract(destination)
+        
+        logger.info(f"DLHD extraction successful. Stream URL: {result.get('destination_url')}")
+        
+        # Cache the result
+        _dlhd_extraction_cache[destination] = {
+            "data": result,
+            "timestamp": current_time
        }
+        logger.info(f"DLHD data cached for {_dlhd_cache_duration}s")
        
-        # Preserve api_password if present
-        if "api_password" in request.query_params:
-            redirect_params["api_password"] = request.query_params["api_password"]
+        return result
        
-        # Build the redirect URL
-        base_url = str(request.url_for("extract_url"))
-        redirect_url = f"{base_url}?{urlencode(redirect_params)}"
-        
-        return RedirectResponse(url=redirect_url, status_code=302)
-    
-    return None
+    except (ExtractorError, DownloadError) as e:
+        logger.error(f"DLHD extraction failed: {str(e)}")
+        raise HTTPException(status_code=400, detail=f"DLHD extraction failed: {str(e)}")
+    except Exception as e:
+        logger.exception(f"Unexpected error during DLHD extraction: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"DLHD extraction failed: {str(e)}")
+
+
+async def _check_and_extract_sportsonline_stream(
+    request: Request,
+    destination: str,
+    proxy_headers: ProxyRequestHeaders,
+    force_refresh: bool = False
+) -> dict | None:
+    """
+    Check if destination contains Sportsonline/Sportzonline patterns and extract stream directly.
+    Uses caching to avoid repeated extractions (10 minute cache).
+
+    Args:
+        request (Request): The incoming HTTP request.
+        destination (str): The destination URL to check.
+        proxy_headers (ProxyRequestHeaders): The headers to include in the request.
+        force_refresh (bool): Force re-extraction even if cached data exists.
+
+    Returns:
+        dict | None: Extracted stream data if Sportsonline link detected, None otherwise.
+    """
+    import re
+    from urllib.parse import urlparse
+    from mediaflow_proxy.extractors.factory import ExtractorFactory
+    from mediaflow_proxy.extractors.base import ExtractorError
+    from mediaflow_proxy.utils.http_utils import DownloadError
+
+    parsed_netloc = urlparse(destination).netloc
+    is_sportsonline_link = "sportzonline." in parsed_netloc or "sportsonline." in parsed_netloc
+
+    if not is_sportsonline_link:
+        return None
+
+    logger = logging.getLogger(__name__)
+    logger.info(f"Sportsonline link detected: {destination}")
+
+    current_time = time.time()
+    if not force_refresh and destination in _sportsonline_extraction_cache:
+        cached_entry = _sportsonline_extraction_cache[destination]
+        if current_time - cached_entry["timestamp"] < _sportsonline_cache_duration:
+            logger.info(f"Using cached Sportsonline data (age: {current_time - cached_entry['timestamp']:.1f}s)")
+            return cached_entry["data"]
+        else:
+            logger.info("Sportsonline cache expired, re-extracting...")
+            del _sportsonline_extraction_cache[destination]
+
+    try:
+        logger.info(f"Extracting Sportsonline stream data from: {destination}")
+        extractor = ExtractorFactory.get_extractor("Sportsonline", proxy_headers.request)
+        result = await extractor.extract(destination)
+        logger.info(f"Sportsonline extraction successful. Stream URL: {result.get('destination_url')}")
+        _sportsonline_extraction_cache[destination] = {"data": result, "timestamp": current_time}
+        logger.info(f"Sportsonline data cached for {_sportsonline_cache_duration}s")
+        return result
+    except (ExtractorError, DownloadError, Exception) as e:
+        logger.error(f"Sportsonline extraction failed: {str(e)}")
+        raise HTTPException(status_code=400, detail=f"Sportsonline extraction failed: {str(e)}")


@proxy_router.head("/hls/manifest.m3u8")
@@ -179,12 +298,197 @@ async def hls_manifest_proxy(
        Response: The HTTP response with the processed m3u8 playlist or streamed content.
    """
    # Sanitize destination URL to fix common encoding issues
+    original_destination = hls_params.destination
    hls_params.destination = sanitize_url(hls_params.destination)
    
-    # Check if destination contains stream-{numero} pattern and redirect to extractor
-    redirect_response = _check_and_redirect_dlhd_stream(request, hls_params.destination)
-    if redirect_response:
-        return redirect_response
+    # Check if this is a retry after 403 error (dlhd_retry parameter)
+    force_refresh = request.query_params.get("dlhd_retry") == "1"
+    
+    # Check if destination contains DLHD pattern and extract stream directly
+    dlhd_result = await _check_and_extract_dlhd_stream(
+        request, hls_params.destination, proxy_headers, force_refresh=force_refresh
+    )
+    dlhd_original_url = None
+    if dlhd_result:
+        # Store original DLHD URL for cache invalidation on 403 errors
+        dlhd_original_url = hls_params.destination
+        
+        # Update destination and headers with extracted stream data
+        hls_params.destination = dlhd_result["destination_url"]
+        extracted_headers = dlhd_result.get("request_headers", {})
+        proxy_headers.request.update(extracted_headers)
+        
+        # Check if extractor wants key-only proxy (DLHD uses hls_key_proxy endpoint)
+        if dlhd_result.get("mediaflow_endpoint") == "hls_key_proxy":
+            hls_params.key_only_proxy = True
+        
+        # Also add headers to query params so they propagate to key/segment requests
+        # This is necessary because M3U8Processor encodes headers as h_* query params
+        from fastapi.datastructures import QueryParams
+        query_dict = dict(request.query_params)
+        for header_name, header_value in extracted_headers.items():
+            # Add header with h_ prefix to query params
+            query_dict[f"h_{header_name}"] = header_value
+        # Add DLHD original URL to track for cache invalidation
+        if dlhd_original_url:
+            query_dict["dlhd_original"] = dlhd_original_url
+        # Remove retry flag from subsequent requests
+        query_dict.pop("dlhd_retry", None)
+        # Update request query params
+        request._query_params = QueryParams(query_dict)
+
+    # Check if destination contains Sportsonline pattern and extract stream directly
+    sportsonline_result = await _check_and_extract_sportsonline_stream(
+        request, hls_params.destination, proxy_headers
+    )
+    if sportsonline_result:
+        # Update destination and headers with extracted stream data
+        hls_params.destination = sportsonline_result["destination_url"]
+        extracted_headers = sportsonline_result.get("request_headers", {})
+        proxy_headers.request.update(extracted_headers)
+
+        # Check if extractor wants key-only proxy
+        if sportsonline_result.get("mediaflow_endpoint") == "hls_key_proxy":
+            hls_params.key_only_proxy = True
+
+        # Also add headers to query params so they propagate to key/segment requests
+        from fastapi.datastructures import QueryParams
+        query_dict = dict(request.query_params)
+        for header_name, header_value in extracted_headers.items():
+            # Add header with h_ prefix to query params
+            query_dict[f"h_{header_name}"] = header_value
+        # Remove retry flag from subsequent requests
+        query_dict.pop("dlhd_retry", None)
+        # Update request query params
+        request._query_params = QueryParams(query_dict)
+
+    # Wrap the handler to catch 403 errors and retry with cache invalidation
+    try:
+        result = await _handle_hls_with_dlhd_retry(request, hls_params, proxy_headers, dlhd_original_url)
+        return result
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger = logging.getLogger(__name__)
+        logger.exception(f"Unexpected error in hls_manifest_proxy: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+async def _handle_hls_with_dlhd_retry(
+    request: Request,
+    hls_params: HLSManifestParams,
+    proxy_headers: ProxyRequestHeaders,
+    dlhd_original_url: str | None
+):
+    """
+    Handle HLS request with automatic retry on 403 errors for DLHD streams.
+    """
+    logger = logging.getLogger(__name__)
+    
+    if hls_params.max_res:
+        from mediaflow_proxy.utils.hls_utils import parse_hls_playlist
+        from mediaflow_proxy.utils.m3u8_processor import M3U8Processor
+
+        async with create_httpx_client(
+            headers=proxy_headers.request,
+            follow_redirects=True,
+        ) as client:
+            try:
+                response = await client.get(hls_params.destination)
+                response.raise_for_status()
+                playlist_content = response.text
+            except httpx.HTTPStatusError as e:
+                raise HTTPException(
+                    status_code=502,
+                    detail=f"Failed to fetch HLS manifest from origin: {e.response.status_code} {e.response.reason_phrase}",
+                ) from e
+            except httpx.TimeoutException as e:
+                raise HTTPException(
+                    status_code=504,
+                    detail=f"Timeout while fetching HLS manifest: {e}",
+                ) from e
+            except httpx.RequestError as e:
+                raise HTTPException(status_code=502, detail=f"Network error fetching HLS manifest: {e}") from e
+        
+        streams = parse_hls_playlist(playlist_content, base_url=hls_params.destination)
+        if not streams:
+            raise HTTPException(
+                status_code=404, detail="No streams found in the manifest."
+            )
+
+        highest_res_stream = max(
+            streams,
+            key=lambda s: s.get("resolution", (0, 0))[0]
+            * s.get("resolution", (0, 0))[1],
+        )
+
+        if highest_res_stream.get("resolution", (0, 0)) == (0, 0):
+            logging.warning("Selected stream has resolution (0, 0); resolution parsing may have failed or not be available in the manifest.")
+
+        # Rebuild the manifest preserving master-level directives
+        # but removing non-selected variant blocks
+        lines = playlist_content.splitlines()
+        highest_variant_index = streams.index(highest_res_stream)
+        
+        variant_index = -1
+        new_manifest_lines = []
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            if line.startswith("#EXT-X-STREAM-INF"):
+                variant_index += 1
+                next_line = ""
+                if i + 1 < len(lines) and not lines[i + 1].startswith("#"):
+                    next_line = lines[i + 1]
+                
+                # Only keep the selected variant
+                if variant_index == highest_variant_index:
+                    new_manifest_lines.append(line)
+                    if next_line:
+                        new_manifest_lines.append(next_line)
+                
+                # Skip variant block (stream-inf + optional url)
+                i += 2 if next_line else 1
+                continue
+            
+            # Preserve all other lines (master directives, media tags, etc.)
+            new_manifest_lines.append(line)
+            i += 1
+        
+        new_manifest = "\n".join(new_manifest_lines)
+
+        # Process the new manifest to proxy all URLs within it
+        processor = M3U8Processor(request, hls_params.key_url, hls_params.force_playlist_proxy, hls_params.key_only_proxy, hls_params.no_proxy)
+        processed_manifest = await processor.process_m3u8(new_manifest, base_url=hls_params.destination)
+        
+        return Response(content=processed_manifest, media_type="application/vnd.apple.mpegurl")
+    
+    return await handle_hls_stream_proxy(request, hls_params, proxy_headers)
+
+
+@proxy_router.head("/hls/key_proxy/manifest.m3u8", name="hls_key_proxy")
+@proxy_router.get("/hls/key_proxy/manifest.m3u8", name="hls_key_proxy")
+async def hls_key_proxy(
+    request: Request,
+    hls_params: Annotated[HLSManifestParams, Query()],
+    proxy_headers: Annotated[ProxyRequestHeaders, Depends(get_proxy_headers)],
+):
+    """
+    Proxify HLS stream requests, but only proxy the key URL, leaving segment URLs direct.
+
+    Args:
+        request (Request): The incoming HTTP request.
+        hls_params (HLSManifestParams): The parameters for the HLS stream request.
+        proxy_headers (ProxyRequestHeaders): The headers to include in the request.
+
+    Returns:
+        Response: The HTTP response with the processed m3u8 playlist.
+    """
+    # Sanitize destination URL to fix common encoding issues
+    hls_params.destination = sanitize_url(hls_params.destination)
+    
+    # Set the key_only_proxy flag to True
+    hls_params.key_only_proxy = True
    
    return await handle_hls_stream_proxy(request, hls_params, proxy_headers)

@@ -208,7 +512,7 @@ async def hls_segment_proxy(
    """
    from mediaflow_proxy.utils.hls_prebuffer import hls_prebuffer
    from mediaflow_proxy.configs import settings
-    
+
    # Sanitize segment URL to fix common encoding issues
    segment_url = sanitize_url(segment_url)
    
@@ -217,11 +521,13 @@ async def hls_segment_proxy(
    for key, value in request.query_params.items():
        if key.startswith("h_"):
            headers[key[2:]] = value
-    
+
    # Try to get segment from pre-buffer cache first
    if settings.enable_hls_prebuffer:
        cached_segment = await hls_prebuffer.get_segment(segment_url, headers)
        if cached_segment:
+            # Avvia prebuffer dei successivi in background
+            asyncio.create_task(hls_prebuffer.prebuffer_from_segment(segment_url, headers))
            return Response(
                content=cached_segment,
                media_type="video/mp2t",
@@ -231,8 +537,11 @@ async def hls_segment_proxy(
                    "Access-Control-Allow-Origin": "*"
                }
            )
-    
-    # Fallback to direct streaming if not in cache
+
+    # Fallback to direct streaming se non in cache:
+    # prima di restituire, prova comunque a far partire il prebuffer dei successivi
+    if settings.enable_hls_prebuffer:
+        asyncio.create_task(hls_prebuffer.prebuffer_from_segment(segment_url, headers))
    return await handle_stream_request("GET", segment_url, proxy_headers)


@@ -308,16 +617,21 @@ async def proxy_stream_endpoint(
    # Sanitize destination URL to fix common encoding issues
    destination = sanitize_url(destination)
    
-    # Check if destination contains stream-{numero} pattern and redirect to extractor
-    redirect_response = _check_and_redirect_dlhd_stream(request, destination)
-    if redirect_response:
-        return redirect_response
+    # Check if destination contains DLHD pattern and extract stream directly
+    dlhd_result = await _check_and_extract_dlhd_stream(request, destination, proxy_headers)
+    if dlhd_result:
+        # Update destination and headers with extracted stream data
+        destination = dlhd_result["destination_url"]
+        proxy_headers.request.update(dlhd_result.get("request_headers", {}))
+    if proxy_headers.request.get("range", "").strip() == "":
+        proxy_headers.request.pop("range", None)
+
+    if proxy_headers.request.get("if-range", "").strip() == "":
+        proxy_headers.request.pop("if-range", None)
+    
+    if "range" not in proxy_headers.request:
+        proxy_headers.request["range"] = "bytes=0-"
    
-    content_range = proxy_headers.request.get("range", "bytes=0-")
-    if "nan" in content_range.casefold():
-        # Handle invalid range requests "bytes=NaN-NaN"
-        raise HTTPException(status_code=416, detail="Invalid Range Header")
-    proxy_headers.request.update({"range": content_range})
    if filename:
        # If a filename is provided, set it in the headers using RFC 6266 format
        try:
@@ -430,4 +744,4 @@ async def get_mediaflow_proxy_public_ip():
    Returns:
        Response: The HTTP response with the public IP address in the form of a JSON object. {"ip": "xxx.xxx.xxx.xxx"}
    """
-    return await get_public_ip()
+    return await get_public_ip()