updated to lastest version

This commit is contained in:
UrloMythus
2025-09-01 18:41:27 +02:00
parent bc41be6194
commit 8f8c3b195e
21 changed files with 2389 additions and 390 deletions

View File

@@ -1,5 +1,6 @@
from .proxy import proxy_router
from .extractor import extractor_router
from .speedtest import speedtest_router
from .playlist_builder import playlist_builder_router
__all__ = ["proxy_router", "extractor_router", "speedtest_router"]
__all__ = ["proxy_router", "extractor_router", "speedtest_router", "playlist_builder_router"]

View File

@@ -14,6 +14,7 @@ from mediaflow_proxy.utils.http_utils import (
ProxyRequestHeaders,
get_proxy_headers,
)
from mediaflow_proxy.utils.base64_utils import process_potential_base64_url
extractor_router = APIRouter()
logger = logging.getLogger(__name__)
@@ -28,6 +29,10 @@ async def extract_url(
):
"""Extract clean links from various video hosting services."""
try:
# Process potential base64 encoded destination URL
processed_destination = process_potential_base64_url(extractor_params.destination)
extractor_params.destination = processed_destination
cache_key = f"{extractor_params.host}_{extractor_params.model_dump_json()}"
response = await get_cached_extractor_result(cache_key)
if not response:

View File

@@ -0,0 +1,270 @@
import json
import logging
import urllib.parse
from typing import Iterator, Dict, Optional
from fastapi import APIRouter, Request, HTTPException, Query
from fastapi.responses import StreamingResponse
from starlette.responses import RedirectResponse
import httpx
from mediaflow_proxy.configs import settings
from mediaflow_proxy.utils.http_utils import get_original_scheme
import asyncio
logger = logging.getLogger(__name__)
playlist_builder_router = APIRouter()
def rewrite_m3u_links_streaming(m3u_lines_iterator: Iterator[str], base_url: str, api_password: Optional[str]) -> Iterator[str]:
"""
Riscrive i link da un iteratore di linee M3U secondo le regole specificate,
includendo gli headers da #EXTVLCOPT e #EXTHTTP. Yields rewritten lines.
"""
current_ext_headers: Dict[str, str] = {} # Dizionario per conservare gli headers dalle direttive
for line_with_newline in m3u_lines_iterator:
line_content = line_with_newline.rstrip('\n')
logical_line = line_content.strip()
is_header_tag = False
if logical_line.startswith('#EXTVLCOPT:'):
is_header_tag = True
try:
option_str = logical_line.split(':', 1)[1]
if '=' in option_str:
key_vlc, value_vlc = option_str.split('=', 1)
key_vlc = key_vlc.strip()
value_vlc = value_vlc.strip()
# Gestione speciale per http-header che contiene "Key: Value"
if key_vlc == 'http-header' and ':' in value_vlc:
header_key, header_value = value_vlc.split(':', 1)
header_key = header_key.strip()
header_value = header_value.strip()
current_ext_headers[header_key] = header_value
elif key_vlc.startswith('http-'):
# Gestisce http-user-agent, http-referer etc.
header_key = '-'.join(word.capitalize() for word in key_vlc[len('http-'):].split('-'))
current_ext_headers[header_key] = value_vlc
except Exception as e:
logger.error(f"⚠️ Error parsing #EXTVLCOPT '{logical_line}': {e}")
elif logical_line.startswith('#EXTHTTP:'):
is_header_tag = True
try:
json_str = logical_line.split(':', 1)[1]
# Sostituisce tutti gli header correnti con quelli del JSON
current_ext_headers = json.loads(json_str)
except Exception as e:
logger.error(f"⚠️ Error parsing #EXTHTTP '{logical_line}': {e}")
current_ext_headers = {} # Resetta in caso di errore
if is_header_tag:
yield line_with_newline
continue
if logical_line and not logical_line.startswith('#') and \
('http://' in logical_line or 'https://' in logical_line):
processed_url_content = logical_line
# Non modificare link pluto.tv
if 'pluto.tv' in logical_line:
processed_url_content = logical_line
elif 'vavoo.to' in logical_line:
encoded_url = urllib.parse.quote(logical_line, safe='')
processed_url_content = f"{base_url}/proxy/hls/manifest.m3u8?d={encoded_url}"
elif 'vixsrc.to' in logical_line:
encoded_url = urllib.parse.quote(logical_line, safe='')
processed_url_content = f"{base_url}/extractor/video?host=VixCloud&redirect_stream=true&d={encoded_url}"
elif '.m3u8' in logical_line:
encoded_url = urllib.parse.quote(logical_line, safe='')
processed_url_content = f"{base_url}/proxy/hls/manifest.m3u8?d={encoded_url}"
elif '.mpd' in logical_line:
# Estrai parametri DRM dall'URL MPD se presenti
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
# Parse dell'URL per estrarre parametri
parsed_url = urlparse(logical_line)
query_params = parse_qs(parsed_url.query)
# Estrai key_id e key se presenti
key_id = query_params.get('key_id', [None])[0]
key = query_params.get('key', [None])[0]
# Rimuovi key_id e key dai parametri originali
clean_params = {k: v for k, v in query_params.items() if k not in ['key_id', 'key']}
# Ricostruisci l'URL senza i parametri DRM
clean_query = urlencode(clean_params, doseq=True) if clean_params else ''
clean_url = urlunparse((
parsed_url.scheme,
parsed_url.netloc,
parsed_url.path,
parsed_url.params,
clean_query,
parsed_url.fragment
))
# Encode the MPD URL like other URL types
clean_url_for_param = urllib.parse.quote(clean_url, safe='')
# Costruisci l'URL MediaFlow con parametri DRM separati
processed_url_content = f"{base_url}/proxy/mpd/manifest.m3u8?d={clean_url_for_param}"
# Aggiungi parametri DRM se presenti
if key_id:
processed_url_content += f"&key_id={key_id}"
if key:
processed_url_content += f"&key={key}"
elif '.php' in logical_line:
encoded_url = urllib.parse.quote(logical_line, safe='')
processed_url_content = f"{base_url}/proxy/hls/manifest.m3u8?d={encoded_url}"
else:
# Per tutti gli altri link senza estensioni specifiche, trattali come .m3u8 con codifica
encoded_url = urllib.parse.quote(logical_line, safe='')
processed_url_content = f"{base_url}/proxy/hls/manifest.m3u8?d={encoded_url}"
# Applica gli header raccolti prima di api_password
if current_ext_headers:
header_params_str = "".join([f"&h_{urllib.parse.quote(key)}={urllib.parse.quote(value)}" for key, value in current_ext_headers.items()])
processed_url_content += header_params_str
current_ext_headers = {}
# Aggiungi api_password sempre alla fine
if api_password:
processed_url_content += f"&api_password={api_password}"
yield processed_url_content + '\n'
else:
yield line_with_newline
async def async_download_m3u_playlist(url: str) -> list[str]:
"""Scarica una playlist M3U in modo asincrono e restituisce le righe."""
headers = {
'User-Agent': settings.user_agent,
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive'
}
lines = []
try:
async with httpx.AsyncClient(verify=True, timeout=30) as client:
async with client.stream('GET', url, headers=headers) as response:
response.raise_for_status()
async for line_bytes in response.aiter_lines():
if isinstance(line_bytes, bytes):
decoded_line = line_bytes.decode('utf-8', errors='replace')
else:
decoded_line = str(line_bytes)
lines.append(decoded_line + '\n' if decoded_line else '')
except Exception as e:
logger.error(f"Error downloading playlist (async): {str(e)}")
raise
return lines
async def async_generate_combined_playlist(playlist_definitions: list[str], base_url: str, api_password: Optional[str]):
"""Genera una playlist combinata da multiple definizioni, scaricando in parallelo."""
# Prepara gli URL
playlist_urls = []
for definition in playlist_definitions:
if '&' in definition:
parts = definition.split('&', 1)
playlist_url_str = parts[1] if len(parts) > 1 else parts[0]
else:
playlist_url_str = definition
playlist_urls.append(playlist_url_str)
# Scarica tutte le playlist in parallelo
results = await asyncio.gather(*[async_download_m3u_playlist(url) for url in playlist_urls], return_exceptions=True)
first_playlist_header_handled = False
for idx, lines in enumerate(results):
if isinstance(lines, Exception):
yield f"# ERROR processing playlist {playlist_urls[idx]}: {str(lines)}\n"
continue
playlist_lines: list[str] = lines # type: ignore
current_playlist_had_lines = False
first_line_of_this_segment = True
lines_processed_for_current_playlist = 0
rewritten_lines_iter = rewrite_m3u_links_streaming(iter(playlist_lines), base_url, api_password)
for line in rewritten_lines_iter:
current_playlist_had_lines = True
is_extm3u_line = line.strip().startswith('#EXTM3U')
lines_processed_for_current_playlist += 1
if not first_playlist_header_handled:
yield line
if is_extm3u_line:
first_playlist_header_handled = True
else:
if first_line_of_this_segment and is_extm3u_line:
pass
else:
yield line
first_line_of_this_segment = False
if current_playlist_had_lines and not first_playlist_header_handled:
first_playlist_header_handled = True
@playlist_builder_router.get("/playlist")
async def proxy_handler(
request: Request,
d: str = Query(..., description="Query string con le definizioni delle playlist", alias="d"),
api_password: Optional[str] = Query(None, description="Password API per MFP"),
):
"""
Endpoint per il proxy delle playlist M3U con supporto MFP.
Formato query string: playlist1&url1;playlist2&url2
Esempio: https://mfp.com:pass123&http://provider.com/playlist.m3u
"""
try:
if not d:
raise HTTPException(status_code=400, detail="Query string mancante")
if not d.strip():
raise HTTPException(status_code=400, detail="Query string cannot be empty")
# Validate that we have at least one valid definition
playlist_definitions = [def_.strip() for def_ in d.split(';') if def_.strip()]
if not playlist_definitions:
raise HTTPException(status_code=400, detail="No valid playlist definitions found")
# Costruisci base_url con lo schema corretto
original_scheme = get_original_scheme(request)
base_url = f"{original_scheme}://{request.url.netloc}"
# Estrai base_url dalla prima definizione se presente
if playlist_definitions and '&' in playlist_definitions[0]:
parts = playlist_definitions[0].split('&', 1)
if ':' in parts[0] and not parts[0].startswith('http'):
# Estrai base_url dalla prima parte se contiene password
base_url_part = parts[0].rsplit(':', 1)[0]
if base_url_part.startswith('http'):
base_url = base_url_part
async def generate_response():
async for line in async_generate_combined_playlist(playlist_definitions, base_url, api_password):
yield line
return StreamingResponse(
generate_response(),
media_type='application/vnd.apple.mpegurl',
headers={
'Content-Disposition': 'attachment; filename="playlist.m3u"',
'Access-Control-Allow-Origin': '*'
}
)
except Exception as e:
logger.error(f"General error in playlist handler: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error: {str(e)}") from e
@playlist_builder_router.get("/builder")
async def url_builder():
"""
Pagina con un'interfaccia per generare l'URL del proxy MFP.
"""
return RedirectResponse(url="/playlist_builder.html")

View File

@@ -1,10 +1,14 @@
from typing import Annotated
from urllib.parse import quote
from urllib.parse import quote, unquote
import re
import logging
from fastapi import Request, Depends, APIRouter, Query, HTTPException
from fastapi.responses import Response, RedirectResponse
from mediaflow_proxy.handlers import (
handle_hls_stream_proxy,
handle_stream_request,
proxy_stream,
get_manifest,
get_playlist,
@@ -18,10 +22,144 @@ from mediaflow_proxy.schemas import (
MPDManifestParams,
)
from mediaflow_proxy.utils.http_utils import get_proxy_headers, ProxyRequestHeaders
from mediaflow_proxy.utils.base64_utils import process_potential_base64_url
proxy_router = APIRouter()
def sanitize_url(url: str) -> str:
"""
Sanitize URL to fix common encoding issues and handle base64 encoded URLs.
Args:
url (str): The URL to sanitize.
Returns:
str: The sanitized URL.
"""
logger = logging.getLogger(__name__)
original_url = url
# First, try to process potential base64 encoded URLs
url = process_potential_base64_url(url)
# Fix malformed URLs where https%22// should be https://
url = re.sub(r'https%22//', 'https://', url)
url = re.sub(r'http%22//', 'http://', url)
# Fix malformed URLs where https%3A%22// should be https://
url = re.sub(r'https%3A%22//', 'https://', url)
url = re.sub(r'http%3A%22//', 'http://', url)
# Fix malformed URLs where https:"// should be https:// (after partial decoding)
url = re.sub(r'https:"//', 'https://', url)
url = re.sub(r'http:"//', 'http://', url)
# Fix URLs where key_id and key parameters are incorrectly appended to the base URL
# This happens when the URL contains &key_id= and &key= which should be handled as proxy parameters
if '&key_id=' in url and '&key=' in url:
# Split the URL at the first occurrence of &key_id= to separate the base URL from the incorrectly appended parameters
base_url = url.split('&key_id=')[0]
logger.info(f"Removed incorrectly appended key parameters from URL: '{url}' -> '{base_url}'")
url = base_url
# Log if URL was changed
if url != original_url:
logger.info(f"URL sanitized: '{original_url}' -> '{url}'")
# Also try URL decoding to see what we get
try:
decoded_url = unquote(url)
if decoded_url != url:
logger.info(f"URL after decoding: '{decoded_url}'")
# If after decoding we still have malformed protocol, fix it
if ':"/' in decoded_url:
# Fix https:"// or http:"// patterns
fixed_decoded = re.sub(r'([a-z]+):"//', r'\1://', decoded_url)
logger.info(f"Fixed decoded URL: '{fixed_decoded}'")
return fixed_decoded
except Exception as e:
logger.warning(f"Error decoding URL '{url}': {e}")
return url
def extract_drm_params_from_url(url: str) -> tuple[str, str, str]:
"""
Extract DRM parameters (key_id and key) from a URL if they are incorrectly appended.
Args:
url (str): The URL that may contain appended DRM parameters.
Returns:
tuple: (clean_url, key_id, key) where clean_url has the parameters removed,
and key_id/key are the extracted values (or None if not found).
"""
logger = logging.getLogger(__name__)
key_id = None
key = None
clean_url = url
# Check if URL contains incorrectly appended key_id and key parameters
if '&key_id=' in url and '&key=' in url:
# Extract key_id
key_id_match = re.search(r'&key_id=([^&]+)', url)
if key_id_match:
key_id = key_id_match.group(1)
# Extract key
key_match = re.search(r'&key=([^&]+)', url)
if key_match:
key = key_match.group(1)
# Remove the parameters from the URL
clean_url = re.sub(r'&key_id=[^&]*', '', url)
clean_url = re.sub(r'&key=[^&]*', '', clean_url)
logger.info(f"Extracted DRM parameters from URL: key_id={key_id}, key={key}")
logger.info(f"Cleaned URL: '{url}' -> '{clean_url}'")
return clean_url, key_id, key
def _check_and_redirect_dlhd_stream(request: Request, destination: str) -> RedirectResponse | None:
"""
Check if destination contains stream-{numero} pattern and redirect to extractor if needed.
Args:
request (Request): The incoming HTTP request.
destination (str): The destination URL to check.
Returns:
RedirectResponse | None: RedirectResponse if redirect is needed, None otherwise.
"""
import re
# Check for stream-{numero} pattern (e.g., stream-1, stream-123, etc.)
if re.search(r'stream-\d+', destination):
from urllib.parse import urlencode
# Build redirect URL to extractor
redirect_params = {
"host": "DLHD",
"redirect_stream": "true",
"d": destination
}
# Preserve api_password if present
if "api_password" in request.query_params:
redirect_params["api_password"] = request.query_params["api_password"]
# Build the redirect URL
base_url = str(request.url_for("extract_url"))
redirect_url = f"{base_url}?{urlencode(redirect_params)}"
return RedirectResponse(url=redirect_url, status_code=302)
return None
@proxy_router.head("/hls/manifest.m3u8")
@proxy_router.head("/hls/manifest.m3u8")
@proxy_router.get("/hls/manifest.m3u8")
async def hls_manifest_proxy(
@@ -40,9 +178,111 @@ async def hls_manifest_proxy(
Returns:
Response: The HTTP response with the processed m3u8 playlist or streamed content.
"""
# Sanitize destination URL to fix common encoding issues
hls_params.destination = sanitize_url(hls_params.destination)
# Check if destination contains stream-{numero} pattern and redirect to extractor
redirect_response = _check_and_redirect_dlhd_stream(request, hls_params.destination)
if redirect_response:
return redirect_response
return await handle_hls_stream_proxy(request, hls_params, proxy_headers)
@proxy_router.get("/hls/segment")
async def hls_segment_proxy(
request: Request,
proxy_headers: Annotated[ProxyRequestHeaders, Depends(get_proxy_headers)],
segment_url: str = Query(..., description="URL of the HLS segment"),
):
"""
Proxy HLS segments with optional pre-buffering support.
Args:
request (Request): The incoming HTTP request.
segment_url (str): URL of the HLS segment to proxy.
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
Returns:
Response: The HTTP response with the segment content.
"""
from mediaflow_proxy.utils.hls_prebuffer import hls_prebuffer
from mediaflow_proxy.configs import settings
# Sanitize segment URL to fix common encoding issues
segment_url = sanitize_url(segment_url)
# Extract headers for pre-buffering
headers = {}
for key, value in request.query_params.items():
if key.startswith("h_"):
headers[key[2:]] = value
# Try to get segment from pre-buffer cache first
if settings.enable_hls_prebuffer:
cached_segment = await hls_prebuffer.get_segment(segment_url, headers)
if cached_segment:
return Response(
content=cached_segment,
media_type="video/mp2t",
headers={
"Content-Type": "video/mp2t",
"Cache-Control": "public, max-age=3600",
"Access-Control-Allow-Origin": "*"
}
)
# Fallback to direct streaming if not in cache
return await handle_stream_request("GET", segment_url, proxy_headers)
@proxy_router.get("/dash/segment")
async def dash_segment_proxy(
request: Request,
proxy_headers: Annotated[ProxyRequestHeaders, Depends(get_proxy_headers)],
segment_url: str = Query(..., description="URL of the DASH segment"),
):
"""
Proxy DASH segments with optional pre-buffering support.
Args:
request (Request): The incoming HTTP request.
segment_url (str): URL of the DASH segment to proxy.
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
Returns:
Response: The HTTP response with the segment content.
"""
from mediaflow_proxy.utils.dash_prebuffer import dash_prebuffer
from mediaflow_proxy.configs import settings
# Sanitize segment URL to fix common encoding issues
segment_url = sanitize_url(segment_url)
# Extract headers for pre-buffering
headers = {}
for key, value in request.query_params.items():
if key.startswith("h_"):
headers[key[2:]] = value
# Try to get segment from pre-buffer cache first
if settings.enable_dash_prebuffer:
cached_segment = await dash_prebuffer.get_segment(segment_url, headers)
if cached_segment:
return Response(
content=cached_segment,
media_type="video/mp4",
headers={
"Content-Type": "video/mp4",
"Cache-Control": "public, max-age=3600",
"Access-Control-Allow-Origin": "*"
}
)
# Fallback to direct streaming if not in cache
return await handle_stream_request("GET", segment_url, proxy_headers)
@proxy_router.head("/stream")
@proxy_router.get("/stream")
@proxy_router.head("/stream/{filename:path}")
@@ -65,6 +305,14 @@ async def proxy_stream_endpoint(
Returns:
Response: The HTTP response with the streamed content.
"""
# Sanitize destination URL to fix common encoding issues
destination = sanitize_url(destination)
# Check if destination contains stream-{numero} pattern and redirect to extractor
redirect_response = _check_and_redirect_dlhd_stream(request, destination)
if redirect_response:
return redirect_response
content_range = proxy_headers.request.get("range", "bytes=0-")
if "nan" in content_range.casefold():
# Handle invalid range requests "bytes=NaN-NaN"
@@ -103,6 +351,21 @@ async def mpd_manifest_proxy(
Returns:
Response: The HTTP response with the HLS manifest.
"""
# Extract DRM parameters from destination URL if they are incorrectly appended
clean_url, extracted_key_id, extracted_key = extract_drm_params_from_url(manifest_params.destination)
# Update the destination with the cleaned URL
manifest_params.destination = clean_url
# Use extracted parameters if they exist and the manifest params don't already have them
if extracted_key_id and not manifest_params.key_id:
manifest_params.key_id = extracted_key_id
if extracted_key and not manifest_params.key:
manifest_params.key = extracted_key
# Sanitize destination URL to fix common encoding issues
manifest_params.destination = sanitize_url(manifest_params.destination)
return await get_manifest(request, manifest_params, proxy_headers)
@@ -123,6 +386,21 @@ async def playlist_endpoint(
Returns:
Response: The HTTP response with the HLS playlist.
"""
# Extract DRM parameters from destination URL if they are incorrectly appended
clean_url, extracted_key_id, extracted_key = extract_drm_params_from_url(playlist_params.destination)
# Update the destination with the cleaned URL
playlist_params.destination = clean_url
# Use extracted parameters if they exist and the playlist params don't already have them
if extracted_key_id and not playlist_params.key_id:
playlist_params.key_id = extracted_key_id
if extracted_key and not playlist_params.key:
playlist_params.key = extracted_key
# Sanitize destination URL to fix common encoding issues
playlist_params.destination = sanitize_url(playlist_params.destination)
return await get_playlist(request, playlist_params, proxy_headers)
@@ -152,4 +430,4 @@ async def get_mediaflow_proxy_public_ip():
Returns:
Response: The HTTP response with the public IP address in the form of a JSON object. {"ip": "xxx.xxx.xxx.xxx"}
"""
return await get_public_ip()
return await get_public_ip()