new version

This commit is contained in:
UrloMythus
2026-01-11 14:29:22 +01:00
parent b8a40b5afc
commit 7785e8c604
45 changed files with 5463 additions and 832 deletions

View File

@@ -1,10 +1,13 @@
import asyncio
from typing import Annotated
from urllib.parse import quote, unquote
import re
import logging
import httpx
import time
from fastapi import Request, Depends, APIRouter, Query, HTTPException
from fastapi.responses import Response, RedirectResponse
from fastapi.responses import Response
from mediaflow_proxy.handlers import (
handle_hls_stream_proxy,
@@ -21,11 +24,22 @@ from mediaflow_proxy.schemas import (
HLSManifestParams,
MPDManifestParams,
)
from mediaflow_proxy.utils.http_utils import get_proxy_headers, ProxyRequestHeaders
from mediaflow_proxy.utils.http_utils import (
get_proxy_headers,
ProxyRequestHeaders,
create_httpx_client,
)
from mediaflow_proxy.utils.base64_utils import process_potential_base64_url
proxy_router = APIRouter()
# DLHD extraction cache: {original_url: {"data": extraction_result, "timestamp": time.time()}}
_dlhd_extraction_cache = {}
_dlhd_cache_duration = 600 # 10 minutes in seconds
_sportsonline_extraction_cache = {}
_sportsonline_cache_duration = 600 # 10 minutes in seconds
def sanitize_url(url: str) -> str:
"""
@@ -122,41 +136,146 @@ def extract_drm_params_from_url(url: str) -> tuple[str, str, str]:
return clean_url, key_id, key
def _check_and_redirect_dlhd_stream(request: Request, destination: str) -> RedirectResponse | None:
def _invalidate_dlhd_cache(destination: str):
"""Invalidate DLHD cache for a specific destination URL."""
if destination in _dlhd_extraction_cache:
del _dlhd_extraction_cache[destination]
logger = logging.getLogger(__name__)
logger.info(f"DLHD cache invalidated for: {destination}")
async def _check_and_extract_dlhd_stream(
request: Request,
destination: str,
proxy_headers: ProxyRequestHeaders,
force_refresh: bool = False
) -> dict | None:
"""
Check if destination contains stream-{numero} pattern and redirect to extractor if needed.
Check if destination contains DLHD/DaddyLive patterns and extract stream directly.
Uses caching to avoid repeated extractions (10 minute cache).
Args:
request (Request): The incoming HTTP request.
destination (str): The destination URL to check.
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
force_refresh (bool): Force re-extraction even if cached data exists.
Returns:
RedirectResponse | None: RedirectResponse if redirect is needed, None otherwise.
dict | None: Extracted stream data if DLHD link detected, None otherwise.
"""
import re
from urllib.parse import urlparse
from mediaflow_proxy.extractors.factory import ExtractorFactory
from mediaflow_proxy.extractors.base import ExtractorError
from mediaflow_proxy.utils.http_utils import DownloadError
# Check for stream-{numero} pattern (e.g., stream-1, stream-123, etc.)
if re.search(r'stream-\d+', destination):
from urllib.parse import urlencode
# Check for common DLHD/DaddyLive patterns in the URL
# This includes stream-XXX pattern and domain names like dlhd.dad or daddylive.sx
is_dlhd_link = (
re.search(r'stream-\d+', destination) or
"dlhd.dad" in urlparse(destination).netloc or
"daddylive.sx" in urlparse(destination).netloc
)
if not is_dlhd_link:
return None
logger = logging.getLogger(__name__)
logger.info(f"DLHD link detected: {destination}")
# Check cache first (unless force_refresh is True)
current_time = time.time()
if not force_refresh and destination in _dlhd_extraction_cache:
cached_entry = _dlhd_extraction_cache[destination]
cache_age = current_time - cached_entry["timestamp"]
# Build redirect URL to extractor
redirect_params = {
"host": "DLHD",
"redirect_stream": "true",
"d": destination
if cache_age < _dlhd_cache_duration:
logger.info(f"Using cached DLHD data (age: {cache_age:.1f}s)")
return cached_entry["data"]
else:
logger.info(f"DLHD cache expired (age: {cache_age:.1f}s), re-extracting...")
del _dlhd_extraction_cache[destination]
# Extract stream data
try:
logger.info(f"Extracting DLHD stream data from: {destination}")
extractor = ExtractorFactory.get_extractor("DLHD", proxy_headers.request)
result = await extractor.extract(destination)
logger.info(f"DLHD extraction successful. Stream URL: {result.get('destination_url')}")
# Cache the result
_dlhd_extraction_cache[destination] = {
"data": result,
"timestamp": current_time
}
logger.info(f"DLHD data cached for {_dlhd_cache_duration}s")
# Preserve api_password if present
if "api_password" in request.query_params:
redirect_params["api_password"] = request.query_params["api_password"]
return result
# Build the redirect URL
base_url = str(request.url_for("extract_url"))
redirect_url = f"{base_url}?{urlencode(redirect_params)}"
return RedirectResponse(url=redirect_url, status_code=302)
return None
except (ExtractorError, DownloadError) as e:
logger.error(f"DLHD extraction failed: {str(e)}")
raise HTTPException(status_code=400, detail=f"DLHD extraction failed: {str(e)}")
except Exception as e:
logger.exception(f"Unexpected error during DLHD extraction: {str(e)}")
raise HTTPException(status_code=500, detail=f"DLHD extraction failed: {str(e)}")
async def _check_and_extract_sportsonline_stream(
request: Request,
destination: str,
proxy_headers: ProxyRequestHeaders,
force_refresh: bool = False
) -> dict | None:
"""
Check if destination contains Sportsonline/Sportzonline patterns and extract stream directly.
Uses caching to avoid repeated extractions (10 minute cache).
Args:
request (Request): The incoming HTTP request.
destination (str): The destination URL to check.
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
force_refresh (bool): Force re-extraction even if cached data exists.
Returns:
dict | None: Extracted stream data if Sportsonline link detected, None otherwise.
"""
import re
from urllib.parse import urlparse
from mediaflow_proxy.extractors.factory import ExtractorFactory
from mediaflow_proxy.extractors.base import ExtractorError
from mediaflow_proxy.utils.http_utils import DownloadError
parsed_netloc = urlparse(destination).netloc
is_sportsonline_link = "sportzonline." in parsed_netloc or "sportsonline." in parsed_netloc
if not is_sportsonline_link:
return None
logger = logging.getLogger(__name__)
logger.info(f"Sportsonline link detected: {destination}")
current_time = time.time()
if not force_refresh and destination in _sportsonline_extraction_cache:
cached_entry = _sportsonline_extraction_cache[destination]
if current_time - cached_entry["timestamp"] < _sportsonline_cache_duration:
logger.info(f"Using cached Sportsonline data (age: {current_time - cached_entry['timestamp']:.1f}s)")
return cached_entry["data"]
else:
logger.info("Sportsonline cache expired, re-extracting...")
del _sportsonline_extraction_cache[destination]
try:
logger.info(f"Extracting Sportsonline stream data from: {destination}")
extractor = ExtractorFactory.get_extractor("Sportsonline", proxy_headers.request)
result = await extractor.extract(destination)
logger.info(f"Sportsonline extraction successful. Stream URL: {result.get('destination_url')}")
_sportsonline_extraction_cache[destination] = {"data": result, "timestamp": current_time}
logger.info(f"Sportsonline data cached for {_sportsonline_cache_duration}s")
return result
except (ExtractorError, DownloadError, Exception) as e:
logger.error(f"Sportsonline extraction failed: {str(e)}")
raise HTTPException(status_code=400, detail=f"Sportsonline extraction failed: {str(e)}")
@proxy_router.head("/hls/manifest.m3u8")
@@ -179,12 +298,197 @@ async def hls_manifest_proxy(
Response: The HTTP response with the processed m3u8 playlist or streamed content.
"""
# Sanitize destination URL to fix common encoding issues
original_destination = hls_params.destination
hls_params.destination = sanitize_url(hls_params.destination)
# Check if destination contains stream-{numero} pattern and redirect to extractor
redirect_response = _check_and_redirect_dlhd_stream(request, hls_params.destination)
if redirect_response:
return redirect_response
# Check if this is a retry after 403 error (dlhd_retry parameter)
force_refresh = request.query_params.get("dlhd_retry") == "1"
# Check if destination contains DLHD pattern and extract stream directly
dlhd_result = await _check_and_extract_dlhd_stream(
request, hls_params.destination, proxy_headers, force_refresh=force_refresh
)
dlhd_original_url = None
if dlhd_result:
# Store original DLHD URL for cache invalidation on 403 errors
dlhd_original_url = hls_params.destination
# Update destination and headers with extracted stream data
hls_params.destination = dlhd_result["destination_url"]
extracted_headers = dlhd_result.get("request_headers", {})
proxy_headers.request.update(extracted_headers)
# Check if extractor wants key-only proxy (DLHD uses hls_key_proxy endpoint)
if dlhd_result.get("mediaflow_endpoint") == "hls_key_proxy":
hls_params.key_only_proxy = True
# Also add headers to query params so they propagate to key/segment requests
# This is necessary because M3U8Processor encodes headers as h_* query params
from fastapi.datastructures import QueryParams
query_dict = dict(request.query_params)
for header_name, header_value in extracted_headers.items():
# Add header with h_ prefix to query params
query_dict[f"h_{header_name}"] = header_value
# Add DLHD original URL to track for cache invalidation
if dlhd_original_url:
query_dict["dlhd_original"] = dlhd_original_url
# Remove retry flag from subsequent requests
query_dict.pop("dlhd_retry", None)
# Update request query params
request._query_params = QueryParams(query_dict)
# Check if destination contains Sportsonline pattern and extract stream directly
sportsonline_result = await _check_and_extract_sportsonline_stream(
request, hls_params.destination, proxy_headers
)
if sportsonline_result:
# Update destination and headers with extracted stream data
hls_params.destination = sportsonline_result["destination_url"]
extracted_headers = sportsonline_result.get("request_headers", {})
proxy_headers.request.update(extracted_headers)
# Check if extractor wants key-only proxy
if sportsonline_result.get("mediaflow_endpoint") == "hls_key_proxy":
hls_params.key_only_proxy = True
# Also add headers to query params so they propagate to key/segment requests
from fastapi.datastructures import QueryParams
query_dict = dict(request.query_params)
for header_name, header_value in extracted_headers.items():
# Add header with h_ prefix to query params
query_dict[f"h_{header_name}"] = header_value
# Remove retry flag from subsequent requests
query_dict.pop("dlhd_retry", None)
# Update request query params
request._query_params = QueryParams(query_dict)
# Wrap the handler to catch 403 errors and retry with cache invalidation
try:
result = await _handle_hls_with_dlhd_retry(request, hls_params, proxy_headers, dlhd_original_url)
return result
except HTTPException:
raise
except Exception as e:
logger = logging.getLogger(__name__)
logger.exception(f"Unexpected error in hls_manifest_proxy: {e}")
raise HTTPException(status_code=500, detail=str(e))
async def _handle_hls_with_dlhd_retry(
request: Request,
hls_params: HLSManifestParams,
proxy_headers: ProxyRequestHeaders,
dlhd_original_url: str | None
):
"""
Handle HLS request with automatic retry on 403 errors for DLHD streams.
"""
logger = logging.getLogger(__name__)
if hls_params.max_res:
from mediaflow_proxy.utils.hls_utils import parse_hls_playlist
from mediaflow_proxy.utils.m3u8_processor import M3U8Processor
async with create_httpx_client(
headers=proxy_headers.request,
follow_redirects=True,
) as client:
try:
response = await client.get(hls_params.destination)
response.raise_for_status()
playlist_content = response.text
except httpx.HTTPStatusError as e:
raise HTTPException(
status_code=502,
detail=f"Failed to fetch HLS manifest from origin: {e.response.status_code} {e.response.reason_phrase}",
) from e
except httpx.TimeoutException as e:
raise HTTPException(
status_code=504,
detail=f"Timeout while fetching HLS manifest: {e}",
) from e
except httpx.RequestError as e:
raise HTTPException(status_code=502, detail=f"Network error fetching HLS manifest: {e}") from e
streams = parse_hls_playlist(playlist_content, base_url=hls_params.destination)
if not streams:
raise HTTPException(
status_code=404, detail="No streams found in the manifest."
)
highest_res_stream = max(
streams,
key=lambda s: s.get("resolution", (0, 0))[0]
* s.get("resolution", (0, 0))[1],
)
if highest_res_stream.get("resolution", (0, 0)) == (0, 0):
logging.warning("Selected stream has resolution (0, 0); resolution parsing may have failed or not be available in the manifest.")
# Rebuild the manifest preserving master-level directives
# but removing non-selected variant blocks
lines = playlist_content.splitlines()
highest_variant_index = streams.index(highest_res_stream)
variant_index = -1
new_manifest_lines = []
i = 0
while i < len(lines):
line = lines[i]
if line.startswith("#EXT-X-STREAM-INF"):
variant_index += 1
next_line = ""
if i + 1 < len(lines) and not lines[i + 1].startswith("#"):
next_line = lines[i + 1]
# Only keep the selected variant
if variant_index == highest_variant_index:
new_manifest_lines.append(line)
if next_line:
new_manifest_lines.append(next_line)
# Skip variant block (stream-inf + optional url)
i += 2 if next_line else 1
continue
# Preserve all other lines (master directives, media tags, etc.)
new_manifest_lines.append(line)
i += 1
new_manifest = "\n".join(new_manifest_lines)
# Process the new manifest to proxy all URLs within it
processor = M3U8Processor(request, hls_params.key_url, hls_params.force_playlist_proxy, hls_params.key_only_proxy, hls_params.no_proxy)
processed_manifest = await processor.process_m3u8(new_manifest, base_url=hls_params.destination)
return Response(content=processed_manifest, media_type="application/vnd.apple.mpegurl")
return await handle_hls_stream_proxy(request, hls_params, proxy_headers)
@proxy_router.head("/hls/key_proxy/manifest.m3u8", name="hls_key_proxy")
@proxy_router.get("/hls/key_proxy/manifest.m3u8", name="hls_key_proxy")
async def hls_key_proxy(
request: Request,
hls_params: Annotated[HLSManifestParams, Query()],
proxy_headers: Annotated[ProxyRequestHeaders, Depends(get_proxy_headers)],
):
"""
Proxify HLS stream requests, but only proxy the key URL, leaving segment URLs direct.
Args:
request (Request): The incoming HTTP request.
hls_params (HLSManifestParams): The parameters for the HLS stream request.
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
Returns:
Response: The HTTP response with the processed m3u8 playlist.
"""
# Sanitize destination URL to fix common encoding issues
hls_params.destination = sanitize_url(hls_params.destination)
# Set the key_only_proxy flag to True
hls_params.key_only_proxy = True
return await handle_hls_stream_proxy(request, hls_params, proxy_headers)
@@ -208,7 +512,7 @@ async def hls_segment_proxy(
"""
from mediaflow_proxy.utils.hls_prebuffer import hls_prebuffer
from mediaflow_proxy.configs import settings
# Sanitize segment URL to fix common encoding issues
segment_url = sanitize_url(segment_url)
@@ -217,11 +521,13 @@ async def hls_segment_proxy(
for key, value in request.query_params.items():
if key.startswith("h_"):
headers[key[2:]] = value
# Try to get segment from pre-buffer cache first
if settings.enable_hls_prebuffer:
cached_segment = await hls_prebuffer.get_segment(segment_url, headers)
if cached_segment:
# Avvia prebuffer dei successivi in background
asyncio.create_task(hls_prebuffer.prebuffer_from_segment(segment_url, headers))
return Response(
content=cached_segment,
media_type="video/mp2t",
@@ -231,8 +537,11 @@ async def hls_segment_proxy(
"Access-Control-Allow-Origin": "*"
}
)
# Fallback to direct streaming if not in cache
# Fallback to direct streaming se non in cache:
# prima di restituire, prova comunque a far partire il prebuffer dei successivi
if settings.enable_hls_prebuffer:
asyncio.create_task(hls_prebuffer.prebuffer_from_segment(segment_url, headers))
return await handle_stream_request("GET", segment_url, proxy_headers)
@@ -308,16 +617,21 @@ async def proxy_stream_endpoint(
# Sanitize destination URL to fix common encoding issues
destination = sanitize_url(destination)
# Check if destination contains stream-{numero} pattern and redirect to extractor
redirect_response = _check_and_redirect_dlhd_stream(request, destination)
if redirect_response:
return redirect_response
# Check if destination contains DLHD pattern and extract stream directly
dlhd_result = await _check_and_extract_dlhd_stream(request, destination, proxy_headers)
if dlhd_result:
# Update destination and headers with extracted stream data
destination = dlhd_result["destination_url"]
proxy_headers.request.update(dlhd_result.get("request_headers", {}))
if proxy_headers.request.get("range", "").strip() == "":
proxy_headers.request.pop("range", None)
if proxy_headers.request.get("if-range", "").strip() == "":
proxy_headers.request.pop("if-range", None)
if "range" not in proxy_headers.request:
proxy_headers.request["range"] = "bytes=0-"
content_range = proxy_headers.request.get("range", "bytes=0-")
if "nan" in content_range.casefold():
# Handle invalid range requests "bytes=NaN-NaN"
raise HTTPException(status_code=416, detail="Invalid Range Header")
proxy_headers.request.update({"range": content_range})
if filename:
# If a filename is provided, set it in the headers using RFC 6266 format
try:
@@ -430,4 +744,4 @@ async def get_mediaflow_proxy_public_ip():
Returns:
Response: The HTTP response with the public IP address in the form of a JSON object. {"ip": "xxx.xxx.xxx.xxx"}
"""
return await get_public_ip()
return await get_public_ip()