mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 11:50:51 +00:00
Compare commits
17 Commits
9431837e6c
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5120b19d0b | ||
|
|
cfc6bbabc9 | ||
|
|
7785e8c604 | ||
|
|
b8a40b5afc | ||
|
|
f11bc22a01 | ||
|
|
29a9c01418 | ||
|
|
8f8c3b195e | ||
|
|
bc41be6194 | ||
|
|
2d7058c669 | ||
|
|
abf282c770 | ||
|
|
655b62ab30 | ||
|
|
bdd4e4a3a3 | ||
|
|
e1b7376b8b | ||
|
|
7206754014 | ||
|
|
abd330e83f | ||
|
|
7a1e96d8b1 | ||
|
|
8fa0250155 |
19
Dockerfile
19
Dockerfile
@@ -1,21 +1,8 @@
|
|||||||
FROM python:3.10-slim-buster
|
FROM python:3.11-slim-bullseye
|
||||||
|
|
||||||
# Set the working directory in the container to /app
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install git
|
|
||||||
RUN apt-get update && apt-get install -y git
|
RUN apt-get update && apt-get install -y git
|
||||||
|
|
||||||
# Clone the repository
|
|
||||||
RUN git clone https://github.com/YourUsername/YourRepoName.git .
|
RUN git clone https://github.com/YourUsername/YourRepoName.git .
|
||||||
|
|
||||||
# Copy the local config.json file to the container
|
|
||||||
|
|
||||||
# Install any needed packages specified in requirements.txt
|
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
EXPOSE 7860
|
||||||
|
CMD ["uvicorn", "run:main_app", "--host", "0.0.0.0", "--port", "7860", "--workers", "4"]
|
||||||
|
|
||||||
|
|
||||||
EXPOSE 8888
|
|
||||||
|
|
||||||
# Run run.py when the container launches
|
|
||||||
CMD ["uvicorn", "run:main_app", "--host", "0.0.0.0", "--port", "8888", "--workers", "4"]
|
|
||||||
|
|||||||
BIN
mediaflow_proxy/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/configs.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/configs.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/const.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/const.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/handlers.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/handlers.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/main.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/main.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/middleware.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/middleware.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/schemas.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/schemas.cpython-313.pyc
Normal file
Binary file not shown.
@@ -1,7 +1,6 @@
|
|||||||
from typing import Dict, Literal, Optional, Union
|
from typing import Dict, Literal, Optional
|
||||||
|
|
||||||
import httpx
|
from pydantic import BaseModel, Field, SecretStr
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from pydantic_settings import BaseSettings
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
|
|
||||||
@@ -19,32 +18,14 @@ class TransportConfig(BaseSettings):
|
|||||||
proxy_url: Optional[str] = Field(
|
proxy_url: Optional[str] = Field(
|
||||||
None, description="Primary proxy URL. Example: socks5://user:pass@proxy:1080 or http://proxy:8080"
|
None, description="Primary proxy URL. Example: socks5://user:pass@proxy:1080 or http://proxy:8080"
|
||||||
)
|
)
|
||||||
|
disable_ssl_verification_globally: bool = Field(
|
||||||
|
False, description="Disable SSL verification for all requests globally."
|
||||||
|
)
|
||||||
all_proxy: bool = Field(False, description="Enable proxy for all routes by default")
|
all_proxy: bool = Field(False, description="Enable proxy for all routes by default")
|
||||||
transport_routes: Dict[str, RouteConfig] = Field(
|
transport_routes: Dict[str, RouteConfig] = Field(
|
||||||
default_factory=dict, description="Pattern-based route configuration"
|
default_factory=dict, description="Pattern-based route configuration"
|
||||||
)
|
)
|
||||||
timeout: int = Field(30, description="Timeout for HTTP requests in seconds")
|
timeout: int = Field(60, description="Timeout for HTTP requests in seconds")
|
||||||
|
|
||||||
def get_mounts(
|
|
||||||
self, async_http: bool = True
|
|
||||||
) -> Dict[str, Optional[Union[httpx.HTTPTransport, httpx.AsyncHTTPTransport]]]:
|
|
||||||
"""
|
|
||||||
Get a dictionary of httpx mount points to transport instances.
|
|
||||||
"""
|
|
||||||
mounts = {}
|
|
||||||
transport_cls = httpx.AsyncHTTPTransport if async_http else httpx.HTTPTransport
|
|
||||||
|
|
||||||
# Configure specific routes
|
|
||||||
for pattern, route in self.transport_routes.items():
|
|
||||||
mounts[pattern] = transport_cls(
|
|
||||||
verify=route.verify_ssl, proxy=route.proxy_url or self.proxy_url if route.proxy else None
|
|
||||||
)
|
|
||||||
|
|
||||||
# Set default proxy for all routes if enabled
|
|
||||||
if self.all_proxy:
|
|
||||||
mounts["all://"] = transport_cls(proxy=self.proxy_url)
|
|
||||||
|
|
||||||
return mounts
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
env_file = ".env"
|
env_file = ".env"
|
||||||
@@ -54,18 +35,80 @@ class TransportConfig(BaseSettings):
|
|||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
api_password: str | None = None # The password for protecting the API endpoints.
|
api_password: str | None = None # The password for protecting the API endpoints.
|
||||||
log_level: str = "INFO" # The logging level to use.
|
log_level: str = "INFO" # The logging level to use.
|
||||||
transport_config: TransportConfig = Field(default_factory=TransportConfig) # Configuration for httpx transport.
|
transport_config: TransportConfig = Field(default_factory=TransportConfig) # Configuration for HTTP transport.
|
||||||
enable_streaming_progress: bool = False # Whether to enable streaming progress tracking.
|
enable_streaming_progress: bool = False # Whether to enable streaming progress tracking.
|
||||||
disable_home_page: bool = False # Whether to disable the home page UI.
|
disable_home_page: bool = False # Whether to disable the home page UI.
|
||||||
disable_docs: bool = False # Whether to disable the API documentation (Swagger UI).
|
disable_docs: bool = False # Whether to disable the API documentation (Swagger UI).
|
||||||
disable_speedtest: bool = False # Whether to disable the speedtest UI.
|
disable_speedtest: bool = False # Whether to disable the speedtest UI.
|
||||||
|
clear_cache_on_startup: bool = (
|
||||||
|
False # Whether to clear all caches (extractor, MPD, etc.) on startup. Useful for development.
|
||||||
|
)
|
||||||
stremio_proxy_url: str | None = None # The Stremio server URL for alternative content proxying.
|
stremio_proxy_url: str | None = None # The Stremio server URL for alternative content proxying.
|
||||||
m3u8_content_routing: Literal["mediaflow", "stremio", "direct"] = (
|
m3u8_content_routing: Literal["mediaflow", "stremio", "direct"] = (
|
||||||
"mediaflow" # Routing strategy for M3U8 content URLs: "mediaflow", "stremio", or "direct"
|
"mediaflow" # Routing strategy for M3U8 content URLs: "mediaflow", "stremio", or "direct"
|
||||||
)
|
)
|
||||||
|
enable_hls_prebuffer: bool = True # Whether to enable HLS pre-buffering for improved streaming performance.
|
||||||
|
livestream_start_offset: (
|
||||||
|
float | None
|
||||||
|
) = -18 # Default start offset for live streams (e.g., -18 to start 18 seconds behind live edge). Applies to HLS and MPD live playlists. Set to None to disable.
|
||||||
|
hls_prebuffer_segments: int = 5 # Number of segments to pre-buffer ahead.
|
||||||
|
hls_prebuffer_cache_size: int = 50 # Maximum number of segments to cache in memory.
|
||||||
|
hls_prebuffer_max_memory_percent: int = 80 # Maximum percentage of system memory to use for HLS pre-buffer cache.
|
||||||
|
hls_prebuffer_emergency_threshold: int = 90 # Emergency threshold percentage to trigger aggressive cache cleanup.
|
||||||
|
hls_prebuffer_inactivity_timeout: int = 60 # Seconds of inactivity before stopping playlist refresh loop.
|
||||||
|
hls_segment_cache_ttl: int = 300 # TTL (seconds) for cached HLS segments; 300s (5min) for VOD, lower for live.
|
||||||
|
enable_dash_prebuffer: bool = True # Whether to enable DASH pre-buffering for improved streaming performance.
|
||||||
|
dash_prebuffer_segments: int = 5 # Number of segments to pre-buffer ahead.
|
||||||
|
dash_prebuffer_cache_size: int = 50 # Maximum number of segments to cache in memory.
|
||||||
|
dash_prebuffer_max_memory_percent: int = 80 # Maximum percentage of system memory to use for DASH pre-buffer cache.
|
||||||
|
dash_prebuffer_emergency_threshold: int = 90 # Emergency threshold percentage to trigger aggressive cache cleanup.
|
||||||
|
dash_prebuffer_inactivity_timeout: int = 60 # Seconds of inactivity before cleaning up stream state.
|
||||||
|
dash_segment_cache_ttl: int = 60 # TTL (seconds) for cached media segments; longer = better for slow playback.
|
||||||
|
mpd_live_init_cache_ttl: int = 60 # TTL (seconds) for live init segment cache; 0 disables caching.
|
||||||
|
mpd_live_playlist_depth: int = 8 # Number of recent segments to expose per live playlist variant.
|
||||||
|
remux_to_ts: bool = False # Remux fMP4 segments to MPEG-TS for ExoPlayer/VLC compatibility.
|
||||||
|
processed_segment_cache_ttl: int = 60 # TTL (seconds) for caching processed (decrypted/remuxed) segments.
|
||||||
|
|
||||||
user_agent: str = (
|
# FlareSolverr settings (for Cloudflare bypass)
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36" # The user agent to use for HTTP requests.
|
flaresolverr_url: str | None = None # FlareSolverr service URL. Example: http://localhost:8191
|
||||||
|
flaresolverr_timeout: int = 60 # Timeout (seconds) for FlareSolverr requests.
|
||||||
|
|
||||||
|
# Acestream settings
|
||||||
|
enable_acestream: bool = False # Whether to enable Acestream proxy support.
|
||||||
|
acestream_host: str = "localhost" # Acestream engine host.
|
||||||
|
acestream_port: int = 6878 # Acestream engine port.
|
||||||
|
acestream_buffer_size: int = 4 * 1024 * 1024 # Buffer size for MPEG-TS streaming (4MB default, like acexy).
|
||||||
|
acestream_empty_timeout: int = 30 # Timeout (seconds) when no data is received from upstream.
|
||||||
|
acestream_session_timeout: int = 60 # Session timeout (seconds) for cleanup of inactive sessions.
|
||||||
|
acestream_keepalive_interval: int = 15 # Interval (seconds) for session keepalive polling.
|
||||||
|
|
||||||
|
# Telegram MTProto settings
|
||||||
|
enable_telegram: bool = False # Whether to enable Telegram MTProto proxy support.
|
||||||
|
telegram_api_id: int | None = None # Telegram API ID from https://my.telegram.org/apps
|
||||||
|
telegram_api_hash: SecretStr | None = None # Telegram API hash from https://my.telegram.org/apps
|
||||||
|
telegram_session_string: SecretStr | None = None # Persistent session string (avoids re-authentication).
|
||||||
|
telegram_max_connections: int = 8 # Max parallel DC connections for downloads (max 20, careful of floods).
|
||||||
|
telegram_request_timeout: int = 30 # Request timeout in seconds.
|
||||||
|
|
||||||
|
# Transcode settings
|
||||||
|
enable_transcode: bool = True # Whether to enable on-the-fly transcoding endpoints (MKV→fMP4, HLS VOD).
|
||||||
|
transcode_prefer_gpu: bool = True # Prefer GPU acceleration (NVENC/VideoToolbox/VAAPI) when available.
|
||||||
|
transcode_video_bitrate: str = "4M" # Target video bitrate for re-encoding (e.g. "4M", "2000k").
|
||||||
|
transcode_audio_bitrate: int = 192000 # AAC audio bitrate in bits/s for the Python transcode pipeline.
|
||||||
|
transcode_video_preset: str = "medium" # Encoding speed/quality tradeoff (libx264: ultrafast..veryslow).
|
||||||
|
|
||||||
|
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36" # The user agent to use for HTTP requests.
|
||||||
|
|
||||||
|
# Upstream error resilience settings
|
||||||
|
upstream_retry_on_disconnect: bool = True # Enable/disable retry when upstream disconnects mid-stream.
|
||||||
|
upstream_retry_attempts: int = 2 # Number of retry attempts when upstream disconnects during streaming.
|
||||||
|
upstream_retry_delay: float = 1.0 # Delay (seconds) between retry attempts.
|
||||||
|
graceful_stream_end: bool = True # Return valid empty playlist instead of error when upstream fails.
|
||||||
|
|
||||||
|
# Redis settings
|
||||||
|
redis_url: str | None = None # Redis URL for distributed locking and caching. None = disabled.
|
||||||
|
cache_namespace: str | None = (
|
||||||
|
None # Optional namespace for instance-specific caches (e.g. pod name or hostname). When set, extractor results and other IP-bound data are stored under this namespace so multiple pods sharing one Redis don't serve each other's IP-specific URLs.
|
||||||
)
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
|
|||||||
BIN
mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc
Normal file
Binary file not shown.
File diff suppressed because it is too large
Load Diff
104
mediaflow_proxy/extractors/F16Px.py
Normal file
104
mediaflow_proxy/extractors/F16Px.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
# https://github.com/Gujal00/ResolveURL/blob/55c7f66524ebd65bc1f88650614e627b00167fa0/script.module.resolveurl/lib/resolveurl/plugins/f16px.py
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
from mediaflow_proxy.utils import python_aesgcm
|
||||||
|
|
||||||
|
|
||||||
|
class F16PxExtractor(BaseExtractor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _b64url_decode(value: str) -> bytes:
|
||||||
|
# base64url -> base64
|
||||||
|
value = value.replace("-", "+").replace("_", "/")
|
||||||
|
padding = (-len(value)) % 4
|
||||||
|
if padding:
|
||||||
|
value += "=" * padding
|
||||||
|
return base64.b64decode(value)
|
||||||
|
|
||||||
|
def _join_key_parts(self, parts) -> bytes:
|
||||||
|
return b"".join(self._b64url_decode(p) for p in parts)
|
||||||
|
|
||||||
|
async def extract(self, url: str) -> Dict[str, Any]:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
host = parsed.netloc
|
||||||
|
origin = f"{parsed.scheme}://{parsed.netloc}"
|
||||||
|
|
||||||
|
match = re.search(r"/e/([A-Za-z0-9]+)", parsed.path or "")
|
||||||
|
if not match:
|
||||||
|
raise ExtractorError("F16PX: Invalid embed URL")
|
||||||
|
|
||||||
|
media_id = match.group(1)
|
||||||
|
api_url = f"https://{host}/api/videos/{media_id}/embed/playback"
|
||||||
|
|
||||||
|
headers = self.base_headers.copy()
|
||||||
|
headers["referer"] = f"https://{host}/"
|
||||||
|
|
||||||
|
resp = await self._make_request(api_url, headers=headers)
|
||||||
|
try:
|
||||||
|
data = resp.json()
|
||||||
|
except Exception:
|
||||||
|
raise ExtractorError("F16PX: Invalid JSON response")
|
||||||
|
|
||||||
|
# Case 1: plain sources
|
||||||
|
if "sources" in data and data["sources"]:
|
||||||
|
src = data["sources"][0].get("url")
|
||||||
|
if not src:
|
||||||
|
raise ExtractorError("F16PX: Empty source URL")
|
||||||
|
return {
|
||||||
|
"destination_url": src,
|
||||||
|
"request_headers": headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Case 2: encrypted playback
|
||||||
|
pb = data.get("playback")
|
||||||
|
if not pb:
|
||||||
|
raise ExtractorError("F16PX: No playback data")
|
||||||
|
|
||||||
|
try:
|
||||||
|
iv = self._b64url_decode(pb["iv"]) # nonce
|
||||||
|
key = self._join_key_parts(pb["key_parts"]) # AES key
|
||||||
|
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
|
||||||
|
|
||||||
|
cipher = python_aesgcm.new(key)
|
||||||
|
decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL
|
||||||
|
|
||||||
|
if decrypted is None:
|
||||||
|
raise ExtractorError("F16PX: GCM authentication failed")
|
||||||
|
|
||||||
|
decrypted_json = json.loads(decrypted.decode("utf-8", "ignore"))
|
||||||
|
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise ExtractorError(f"F16PX: Decryption failed ({e})")
|
||||||
|
|
||||||
|
sources = decrypted_json.get("sources") or []
|
||||||
|
if not sources:
|
||||||
|
raise ExtractorError("F16PX: No sources after decryption")
|
||||||
|
|
||||||
|
best = sources[0].get("url")
|
||||||
|
if not best:
|
||||||
|
raise ExtractorError("F16PX: Empty source URL after decryption")
|
||||||
|
|
||||||
|
self.base_headers.clear()
|
||||||
|
self.base_headers["referer"] = f"{origin}/"
|
||||||
|
self.base_headers["origin"] = origin
|
||||||
|
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||||
|
self.base_headers["Accept"] = "*/*"
|
||||||
|
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": best,
|
||||||
|
"request_headers": self.base_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
BIN
mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc
Normal file
Binary file not shown.
@@ -1,10 +1,18 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
from typing import Dict, Optional, Any
|
from typing import Dict, Optional, Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import httpx
|
import asyncio
|
||||||
|
import aiohttp
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
from mediaflow_proxy.configs import settings
|
from mediaflow_proxy.configs import settings
|
||||||
from mediaflow_proxy.utils.http_utils import create_httpx_client
|
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||||
|
from mediaflow_proxy.utils.http_utils import DownloadError
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ExtractorError(Exception):
|
class ExtractorError(Exception):
|
||||||
@@ -13,36 +21,158 @@ class ExtractorError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HttpResponse:
|
||||||
|
"""
|
||||||
|
Simple response container for extractor HTTP requests.
|
||||||
|
|
||||||
|
Uses aiohttp-style naming conventions:
|
||||||
|
- status (not status_code)
|
||||||
|
- text (pre-loaded content as string)
|
||||||
|
- content (pre-loaded content as bytes)
|
||||||
|
"""
|
||||||
|
|
||||||
|
status: int
|
||||||
|
headers: Dict[str, str]
|
||||||
|
text: str
|
||||||
|
content: bytes
|
||||||
|
url: str
|
||||||
|
|
||||||
|
def json(self) -> Any:
|
||||||
|
"""Parse response content as JSON."""
|
||||||
|
return json.loads(self.text)
|
||||||
|
|
||||||
|
def get_origin(self) -> str:
|
||||||
|
"""Get the origin (scheme + host) from the response URL."""
|
||||||
|
parsed = urlparse(self.url)
|
||||||
|
return f"{parsed.scheme}://{parsed.netloc}"
|
||||||
|
|
||||||
|
|
||||||
class BaseExtractor(ABC):
|
class BaseExtractor(ABC):
|
||||||
"""Base class for all URL extractors."""
|
"""Base class for all URL extractors.
|
||||||
|
|
||||||
|
Improvements:
|
||||||
|
- Built-in retry/backoff for transient network errors
|
||||||
|
- Configurable timeouts and per-request overrides
|
||||||
|
- Better logging of non-200 responses and body previews for debugging
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, request_headers: dict):
|
def __init__(self, request_headers: dict):
|
||||||
self.base_headers = {
|
self.base_headers = {
|
||||||
"user-agent": settings.user_agent,
|
"user-agent": settings.user_agent,
|
||||||
}
|
}
|
||||||
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||||
self.base_headers.update(request_headers)
|
# merge incoming headers (e.g. Accept-Language / Referer) with default base headers
|
||||||
|
self.base_headers.update(request_headers or {})
|
||||||
|
|
||||||
async def _make_request(
|
async def _make_request(
|
||||||
self, url: str, method: str = "GET", headers: Optional[Dict] = None, **kwargs
|
self,
|
||||||
) -> httpx.Response:
|
url: str,
|
||||||
"""Make HTTP request with error handling."""
|
method: str = "GET",
|
||||||
|
headers: Optional[Dict] = None,
|
||||||
|
timeout: Optional[float] = None,
|
||||||
|
retries: int = 3,
|
||||||
|
backoff_factor: float = 0.5,
|
||||||
|
raise_on_status: bool = True,
|
||||||
|
**kwargs,
|
||||||
|
) -> HttpResponse:
|
||||||
|
"""
|
||||||
|
Make HTTP request with retry and timeout support using aiohttp.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
The URL to request.
|
||||||
|
method : str
|
||||||
|
HTTP method (GET, POST, etc.). Defaults to GET.
|
||||||
|
headers : dict | None
|
||||||
|
Additional headers to merge with base headers.
|
||||||
|
timeout : float | None
|
||||||
|
Seconds to wait for the request. Defaults to 15s.
|
||||||
|
retries : int
|
||||||
|
Number of attempts for transient errors.
|
||||||
|
backoff_factor : float
|
||||||
|
Base for exponential backoff between retries.
|
||||||
|
raise_on_status : bool
|
||||||
|
If True, HTTP non-2xx raises DownloadError.
|
||||||
|
**kwargs
|
||||||
|
Additional arguments passed to aiohttp request (e.g., data, json).
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
HttpResponse
|
||||||
|
Response object with pre-loaded content.
|
||||||
|
"""
|
||||||
|
attempt = 0
|
||||||
|
last_exc = None
|
||||||
|
|
||||||
|
# Build request headers merging base and per-request
|
||||||
|
request_headers = self.base_headers.copy()
|
||||||
|
if headers:
|
||||||
|
request_headers.update(headers)
|
||||||
|
|
||||||
|
timeout_val = timeout or 15.0
|
||||||
|
|
||||||
|
while attempt < retries:
|
||||||
try:
|
try:
|
||||||
async with create_httpx_client() as client:
|
async with create_aiohttp_session(url, timeout=timeout_val) as (session, proxy_url):
|
||||||
request_headers = self.base_headers
|
async with session.request(
|
||||||
request_headers.update(headers or {})
|
|
||||||
response = await client.request(
|
|
||||||
method,
|
method,
|
||||||
url,
|
url,
|
||||||
headers=request_headers,
|
headers=request_headers,
|
||||||
|
proxy=proxy_url,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
|
) as response:
|
||||||
|
# Read content while session is still open
|
||||||
|
content = await response.read()
|
||||||
|
text = content.decode("utf-8", errors="replace")
|
||||||
|
final_url = str(response.url)
|
||||||
|
status = response.status
|
||||||
|
resp_headers = dict(response.headers)
|
||||||
|
|
||||||
|
if raise_on_status and status >= 400:
|
||||||
|
body_preview = text[:500]
|
||||||
|
logger.debug(
|
||||||
|
"HTTP error for %s (status=%s) -- body preview: %s",
|
||||||
|
url,
|
||||||
|
status,
|
||||||
|
body_preview,
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
raise DownloadError(status, f"HTTP error {status} while requesting {url}")
|
||||||
return response
|
|
||||||
except httpx.HTTPError as e:
|
return HttpResponse(
|
||||||
raise ExtractorError(f"HTTP request failed: {str(e)}")
|
status=status,
|
||||||
|
headers=resp_headers,
|
||||||
|
text=text,
|
||||||
|
content=content,
|
||||||
|
url=final_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
except DownloadError:
|
||||||
|
# Do not retry on explicit HTTP status errors (they are intentional)
|
||||||
|
raise
|
||||||
|
except (asyncio.TimeoutError, aiohttp.ClientError) as e:
|
||||||
|
# Transient network error - retry with backoff
|
||||||
|
last_exc = e
|
||||||
|
attempt += 1
|
||||||
|
sleep_for = backoff_factor * (2 ** (attempt - 1))
|
||||||
|
logger.warning(
|
||||||
|
"Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
|
||||||
|
attempt,
|
||||||
|
retries,
|
||||||
|
url,
|
||||||
|
e,
|
||||||
|
sleep_for,
|
||||||
|
)
|
||||||
|
await asyncio.sleep(sleep_for)
|
||||||
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"Request failed: {str(e)}")
|
# Unexpected exception - wrap as ExtractorError to keep interface consistent
|
||||||
|
logger.exception("Unhandled exception while requesting %s: %s", url, e)
|
||||||
|
raise ExtractorError(f"Request failed for URL {url}: {str(e)}")
|
||||||
|
|
||||||
|
logger.error("All retries failed for %s: %s", url, last_exc)
|
||||||
|
raise ExtractorError(f"Request failed for URL {url}: {str(last_exc)}")
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
|||||||
@@ -1,348 +1,704 @@
|
|||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
import re
|
import re
|
||||||
from typing import Dict, Any, Optional
|
import time
|
||||||
from urllib.parse import urlparse, quote
|
import logging
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from typing import Any, Dict, Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
|
||||||
|
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||||
|
from mediaflow_proxy.configs import settings
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Silenzia l'errore ConnectionResetError su Windows
|
||||||
|
logging.getLogger("asyncio").setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
|
# Default fingerprint parameters
|
||||||
|
DEFAULT_DLHD_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
|
||||||
|
DEFAULT_DLHD_SCREEN_RESOLUTION = "1920x1080"
|
||||||
|
DEFAULT_DLHD_TIMEZONE = "UTC"
|
||||||
|
DEFAULT_DLHD_LANGUAGE = "en"
|
||||||
|
|
||||||
|
|
||||||
|
def compute_fingerprint(
|
||||||
|
user_agent: str = DEFAULT_DLHD_USER_AGENT,
|
||||||
|
screen_resolution: str = DEFAULT_DLHD_SCREEN_RESOLUTION,
|
||||||
|
timezone: str = DEFAULT_DLHD_TIMEZONE,
|
||||||
|
language: str = DEFAULT_DLHD_LANGUAGE,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Compute the X-Fingerprint header value.
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
fingerprint = SHA256(useragent + screen_resolution + timezone + language).hex()[:16]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_agent: The user agent string
|
||||||
|
screen_resolution: The screen resolution (e.g., "1920x1080")
|
||||||
|
timezone: The timezone (e.g., "UTC")
|
||||||
|
language: The language code (e.g., "en")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The 16-character fingerprint
|
||||||
|
"""
|
||||||
|
combined = f"{user_agent}{screen_resolution}{timezone}{language}"
|
||||||
|
return hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def compute_key_path(resource: str, number: str, timestamp: int, fingerprint: str, secret_key: str) -> str:
|
||||||
|
"""
|
||||||
|
Compute the X-Key-Path header value.
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
key_path = HMAC-SHA256("resource|number|timestamp|fingerprint", secret_key).hex()[:16]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resource: The resource from the key URL
|
||||||
|
number: The number from the key URL
|
||||||
|
timestamp: The Unix timestamp
|
||||||
|
fingerprint: The fingerprint value
|
||||||
|
secret_key: The HMAC secret key (channel_salt)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The 16-character key path
|
||||||
|
"""
|
||||||
|
combined = f"{resource}|{number}|{timestamp}|{fingerprint}"
|
||||||
|
hmac_hash = hmac.new(secret_key.encode("utf-8"), combined.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||||
|
return hmac_hash[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def compute_key_headers(key_url: str, secret_key: str) -> tuple[int, int, str, str] | None:
|
||||||
|
"""
|
||||||
|
Compute X-Key-Timestamp, X-Key-Nonce, X-Key-Path, and X-Fingerprint for a /key/ URL.
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
1. Extract resource and number from URL pattern /key/{resource}/{number}
|
||||||
|
2. ts = Unix timestamp in seconds
|
||||||
|
3. hmac_hash = HMAC-SHA256(resource, secret_key).hex()
|
||||||
|
4. nonce = proof-of-work: find i where MD5(hmac+resource+number+ts+i)[:4] < 0x1000
|
||||||
|
5. fingerprint = compute_fingerprint()
|
||||||
|
6. key_path = HMAC-SHA256("resource|number|ts|fingerprint", secret_key).hex()[:16]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key_url: The key URL containing /key/{resource}/{number}
|
||||||
|
secret_key: The HMAC secret key (channel_salt)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (timestamp, nonce, key_path, fingerprint) or None if URL doesn't match pattern
|
||||||
|
"""
|
||||||
|
# Extract resource and number from URL
|
||||||
|
pattern = r"/key/([^/]+)/(\d+)"
|
||||||
|
match = re.search(pattern, key_url)
|
||||||
|
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
|
||||||
|
resource = match.group(1)
|
||||||
|
number = match.group(2)
|
||||||
|
|
||||||
|
ts = int(time.time())
|
||||||
|
|
||||||
|
# Compute HMAC-SHA256
|
||||||
|
hmac_hash = hmac.new(secret_key.encode("utf-8"), resource.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||||
|
|
||||||
|
# Proof-of-work loop
|
||||||
|
nonce = 0
|
||||||
|
for i in range(100000):
|
||||||
|
combined = f"{hmac_hash}{resource}{number}{ts}{i}"
|
||||||
|
md5_hash = hashlib.md5(combined.encode("utf-8")).hexdigest()
|
||||||
|
prefix_value = int(md5_hash[:4], 16)
|
||||||
|
|
||||||
|
if prefix_value < 0x1000: # < 4096
|
||||||
|
nonce = i
|
||||||
|
break
|
||||||
|
|
||||||
|
fingerprint = compute_fingerprint()
|
||||||
|
key_path = compute_key_path(resource, number, ts, fingerprint, secret_key)
|
||||||
|
|
||||||
|
return ts, nonce, key_path, fingerprint
|
||||||
|
|
||||||
|
|
||||||
class DLHDExtractor(BaseExtractor):
|
class DLHDExtractor(BaseExtractor):
|
||||||
"""DLHD (DaddyLive) URL extractor for M3U8 streams."""
|
"""DLHD (DaddyLive) URL extractor for M3U8 streams.
|
||||||
|
|
||||||
|
Supports the new authentication flow with:
|
||||||
|
- EPlayerAuth extraction (auth_token, channel_key, channel_salt)
|
||||||
|
- Server lookup for dynamic server selection
|
||||||
|
- Dynamic key header computation for AES-128 encrypted streams
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, request_headers: dict):
|
def __init__(self, request_headers: dict):
|
||||||
super().__init__(request_headers)
|
super().__init__(request_headers)
|
||||||
# Default to HLS proxy endpoint
|
self.mediaflow_endpoint = "hls_key_proxy"
|
||||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
self._iframe_context: Optional[str] = None
|
||||||
|
self._flaresolverr_cookies: Optional[str] = None
|
||||||
|
self._flaresolverr_user_agent: Optional[str] = None
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def _fetch_via_flaresolverr(self, url: str) -> HttpResponse:
|
||||||
"""Extract DLHD stream URL and required headers.
|
"""Fetch a URL using FlareSolverr to bypass Cloudflare protection."""
|
||||||
|
if not settings.flaresolverr_url:
|
||||||
|
raise ExtractorError("FlareSolverr URL not configured. Set FLARESOLVERR_URL in environment.")
|
||||||
|
|
||||||
|
flaresolverr_endpoint = f"{settings.flaresolverr_url.rstrip('/')}/v1"
|
||||||
|
payload = {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": url,
|
||||||
|
"maxTimeout": settings.flaresolverr_timeout * 1000,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Using FlareSolverr to fetch: {url}")
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.post(
|
||||||
|
flaresolverr_endpoint,
|
||||||
|
json=payload,
|
||||||
|
timeout=aiohttp.ClientTimeout(total=settings.flaresolverr_timeout + 10),
|
||||||
|
) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
raise ExtractorError(f"FlareSolverr returned status {response.status}")
|
||||||
|
|
||||||
|
data = await response.json()
|
||||||
|
|
||||||
|
if data.get("status") != "ok":
|
||||||
|
raise ExtractorError(f"FlareSolverr failed: {data.get('message', 'Unknown error')}")
|
||||||
|
|
||||||
|
solution = data.get("solution", {})
|
||||||
|
html_content = solution.get("response", "")
|
||||||
|
final_url = solution.get("url", url)
|
||||||
|
status = solution.get("status", 200)
|
||||||
|
|
||||||
|
# Store cookies and user-agent for subsequent requests
|
||||||
|
cookies = solution.get("cookies", [])
|
||||||
|
if cookies:
|
||||||
|
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
|
||||||
|
self._flaresolverr_cookies = cookie_str
|
||||||
|
logger.info(f"FlareSolverr provided {len(cookies)} cookies")
|
||||||
|
|
||||||
|
user_agent = solution.get("userAgent")
|
||||||
|
if user_agent:
|
||||||
|
self._flaresolverr_user_agent = user_agent
|
||||||
|
logger.info(f"FlareSolverr user-agent: {user_agent}")
|
||||||
|
|
||||||
|
logger.info(f"FlareSolverr successfully bypassed Cloudflare for: {url}")
|
||||||
|
|
||||||
|
return HttpResponse(
|
||||||
|
status=status,
|
||||||
|
headers={},
|
||||||
|
text=html_content,
|
||||||
|
content=html_content.encode("utf-8", errors="replace"),
|
||||||
|
url=final_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _make_request(
|
||||||
|
self, url: str, method: str = "GET", headers: Optional[Dict] = None, use_flaresolverr: bool = False, **kwargs
|
||||||
|
) -> HttpResponse:
|
||||||
|
"""Override to disable SSL verification and optionally use FlareSolverr."""
|
||||||
|
# Use FlareSolverr for Cloudflare-protected pages
|
||||||
|
if use_flaresolverr and settings.flaresolverr_url:
|
||||||
|
return await self._fetch_via_flaresolverr(url)
|
||||||
|
|
||||||
|
timeout = kwargs.pop("timeout", 15)
|
||||||
|
kwargs.pop("retries", 3) # consumed but not used directly
|
||||||
|
kwargs.pop("backoff_factor", 0.5) # consumed but not used directly
|
||||||
|
|
||||||
|
# Merge headers
|
||||||
|
request_headers = self.base_headers.copy()
|
||||||
|
if headers:
|
||||||
|
request_headers.update(headers)
|
||||||
|
|
||||||
|
# Add FlareSolverr cookies if available
|
||||||
|
if self._flaresolverr_cookies:
|
||||||
|
existing_cookies = request_headers.get("Cookie", "")
|
||||||
|
if existing_cookies:
|
||||||
|
request_headers["Cookie"] = f"{existing_cookies}; {self._flaresolverr_cookies}"
|
||||||
|
else:
|
||||||
|
request_headers["Cookie"] = self._flaresolverr_cookies
|
||||||
|
|
||||||
|
# Use FlareSolverr user-agent if available
|
||||||
|
if self._flaresolverr_user_agent:
|
||||||
|
request_headers["User-Agent"] = self._flaresolverr_user_agent
|
||||||
|
|
||||||
|
# Use create_aiohttp_session with verify=False for SSL bypass
|
||||||
|
async with create_aiohttp_session(url, timeout=timeout, verify=False) as (session, proxy_url):
|
||||||
|
async with session.request(method, url, headers=request_headers, proxy=proxy_url, **kwargs) as response:
|
||||||
|
content = await response.read()
|
||||||
|
final_url = str(response.url)
|
||||||
|
status = response.status
|
||||||
|
resp_headers = dict(response.headers)
|
||||||
|
|
||||||
|
if status >= 400:
|
||||||
|
raise ExtractorError(f"HTTP error {status} while requesting {url}")
|
||||||
|
|
||||||
|
return HttpResponse(
|
||||||
|
status=status,
|
||||||
|
headers=resp_headers,
|
||||||
|
text=content.decode("utf-8", errors="replace"),
|
||||||
|
content=content,
|
||||||
|
url=final_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _extract_session_data(self, iframe_url: str, main_url: str) -> dict | None:
|
||||||
|
"""
|
||||||
|
Fetch the iframe URL and extract auth_token, channel_key, and channel_salt.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url: The DaddyLive channel URL (required)
|
iframe_url: The iframe URL to fetch
|
||||||
|
main_url: The main site domain for Referer header
|
||||||
Keyword Args:
|
|
||||||
player_url: Direct player URL (optional)
|
|
||||||
stream_url: The stream URL (optional)
|
|
||||||
auth_url_base: Base URL for auth requests (optional)
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict containing stream URL and required headers
|
Dict with auth_token, channel_key, channel_salt, or None if not found
|
||||||
"""
|
"""
|
||||||
try:
|
headers = {
|
||||||
# Channel URL is required and serves as the referer
|
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||||
channel_url = url
|
"Referer": f"https://{main_url}/",
|
||||||
player_origin = self._get_origin(channel_url)
|
|
||||||
|
|
||||||
# Check for direct parameters
|
|
||||||
player_url = kwargs.get("player_url")
|
|
||||||
stream_url = kwargs.get("stream_url")
|
|
||||||
auth_url_base = kwargs.get("auth_url_base")
|
|
||||||
|
|
||||||
# If player URL not provided, extract it from channel page
|
|
||||||
if not player_url:
|
|
||||||
# Get the channel page to extract the player iframe URL
|
|
||||||
channel_headers = {
|
|
||||||
"referer": player_origin + "/",
|
|
||||||
"origin": player_origin,
|
|
||||||
"user-agent": self.base_headers["user-agent"],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
channel_response = await self._make_request(channel_url, headers=channel_headers)
|
|
||||||
player_url = self._extract_player_url(channel_response.text)
|
|
||||||
|
|
||||||
if not player_url:
|
|
||||||
raise ExtractorError("Could not extract player URL from channel page")
|
|
||||||
|
|
||||||
if not re.search(r"/stream/([a-zA-Z0-9-]+)", player_url):
|
|
||||||
iframe_player_url = await self._handle_playnow(player_url, player_origin)
|
|
||||||
player_origin = self._get_origin(player_url)
|
|
||||||
player_url = iframe_player_url
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return await self._handle_vecloud(player_url, player_origin + "/")
|
resp = await self._make_request(iframe_url, headers=headers, timeout=12)
|
||||||
|
html = resp.text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
logger.warning(f"Error fetching iframe URL: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
# Get player page to extract authentication information
|
# Pattern to extract EPlayerAuth.init block with authToken, channelKey, channelSalt
|
||||||
player_headers = {
|
# Matches: EPlayerAuth.init({ authToken: '...', channelKey: '...', ..., channelSalt: '...' });
|
||||||
"referer": player_origin + "/",
|
auth_pattern = r"EPlayerAuth\.init\s*\(\s*\{\s*authToken:\s*'([^']+)'"
|
||||||
"origin": player_origin,
|
channel_key_pattern = r"channelKey:\s*'([^']+)'"
|
||||||
"user-agent": self.base_headers["user-agent"],
|
channel_salt_pattern = r"channelSalt:\s*'([^']+)'"
|
||||||
|
|
||||||
|
# Pattern to extract server lookup base URL from fetchWithRetry call
|
||||||
|
lookup_pattern = r"fetchWithRetry\s*\(\s*'([^']+server_lookup\?channel_id=)"
|
||||||
|
|
||||||
|
auth_match = re.search(auth_pattern, html)
|
||||||
|
channel_key_match = re.search(channel_key_pattern, html)
|
||||||
|
channel_salt_match = re.search(channel_salt_pattern, html)
|
||||||
|
lookup_match = re.search(lookup_pattern, html)
|
||||||
|
|
||||||
|
if auth_match and channel_key_match and channel_salt_match:
|
||||||
|
result = {
|
||||||
|
"auth_token": auth_match.group(1),
|
||||||
|
"channel_key": channel_key_match.group(1),
|
||||||
|
"channel_salt": channel_salt_match.group(1),
|
||||||
|
}
|
||||||
|
if lookup_match:
|
||||||
|
result["server_lookup_url"] = lookup_match.group(1) + result["channel_key"]
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _get_server_key(self, server_lookup_url: str, iframe_url: str) -> str | None:
|
||||||
|
"""
|
||||||
|
Fetch the server lookup URL and extract the server_key.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
server_lookup_url: The server lookup URL
|
||||||
|
iframe_url: The iframe URL for extracting the host for headers
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The server_key or None if not found
|
||||||
|
"""
|
||||||
|
parsed = urlparse(iframe_url)
|
||||||
|
iframe_host = parsed.netloc
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||||
|
"Referer": f"https://{iframe_host}/",
|
||||||
|
"Origin": f"https://{iframe_host}",
|
||||||
}
|
}
|
||||||
|
|
||||||
player_response = await self._make_request(player_url, headers=player_headers)
|
try:
|
||||||
player_content = player_response.text
|
resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
||||||
|
data = resp.json()
|
||||||
|
return data.get("server_key")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error fetching server lookup: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
# Extract authentication details from script tag
|
def _build_m3u8_url(self, server_key: str, channel_key: str) -> str:
|
||||||
auth_data = self._extract_auth_data(player_content)
|
"""
|
||||||
if not auth_data:
|
Build the m3u8 URL based on the server_key.
|
||||||
raise ExtractorError("Failed to extract authentication data from player")
|
|
||||||
|
|
||||||
# Extract auth URL base if not provided
|
Args:
|
||||||
if not auth_url_base:
|
server_key: The server key from server lookup
|
||||||
auth_url_base = self._extract_auth_url_base(player_content)
|
channel_key: The channel key
|
||||||
|
|
||||||
# If still no auth URL base, try to derive from stream URL or player URL
|
Returns:
|
||||||
if not auth_url_base:
|
The m3u8 URL (with .css extension as per the original implementation)
|
||||||
if stream_url:
|
"""
|
||||||
auth_url_base = self._get_origin(stream_url)
|
if server_key == "top1/cdn":
|
||||||
|
return f"https://top1.dvalna.ru/top1/cdn/{channel_key}/mono.css"
|
||||||
else:
|
else:
|
||||||
# Try to extract from player URL structure
|
return f"https://{server_key}new.dvalna.ru/{server_key}/{channel_key}/mono.css"
|
||||||
player_domain = self._get_origin(player_url)
|
|
||||||
# Attempt to construct a standard auth domain
|
|
||||||
auth_url_base = self._derive_auth_url_base(player_domain)
|
|
||||||
|
|
||||||
if not auth_url_base:
|
async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||||
raise ExtractorError("Could not determine auth URL base")
|
"""Handles the new authentication flow found in recent updates."""
|
||||||
|
|
||||||
# Construct auth URL
|
def _extract_params(js: str) -> Dict[str, Optional[str]]:
|
||||||
auth_url = (
|
params = {}
|
||||||
f"{auth_url_base}/auth.php?channel_id={auth_data['channel_key']}"
|
patterns = {
|
||||||
f"&ts={auth_data['auth_ts']}&rnd={auth_data['auth_rnd']}"
|
"channel_key": r'(?:const|var|let)\s+(?:CHANNEL_KEY|channelKey)\s*=\s*["\']([^"\']+)["\']',
|
||||||
f"&sig={quote(auth_data['auth_sig'])}"
|
"auth_token": r'(?:const|var|let)\s+AUTH_TOKEN\s*=\s*["\']([^"\']+)["\']',
|
||||||
|
"auth_country": r'(?:const|var|let)\s+AUTH_COUNTRY\s*=\s*["\']([^"\']+)["\']',
|
||||||
|
"auth_ts": r'(?:const|var|let)\s+AUTH_TS\s*=\s*["\']([^"\']+)["\']',
|
||||||
|
"auth_expiry": r'(?:const|var|let)\s+AUTH_EXPIRY\s*=\s*["\']([^"\']+)["\']',
|
||||||
|
}
|
||||||
|
for key, pattern in patterns.items():
|
||||||
|
match = re.search(pattern, js)
|
||||||
|
params[key] = match.group(1) if match else None
|
||||||
|
return params
|
||||||
|
|
||||||
|
params = _extract_params(iframe_content)
|
||||||
|
|
||||||
|
missing_params = [k for k, v in params.items() if not v]
|
||||||
|
if missing_params:
|
||||||
|
# This is not an error, just means it's not the new flow
|
||||||
|
raise ExtractorError(f"Not the new auth flow: missing params {missing_params}")
|
||||||
|
|
||||||
|
logger.info("New auth flow detected. Proceeding with POST auth.")
|
||||||
|
|
||||||
|
# 1. Initial Auth POST
|
||||||
|
auth_url = "https://security.newkso.ru/auth2.php"
|
||||||
|
|
||||||
|
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||||
|
auth_headers = headers.copy()
|
||||||
|
auth_headers.update(
|
||||||
|
{
|
||||||
|
"Accept": "*/*",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
"Origin": iframe_origin,
|
||||||
|
"Referer": iframe_url,
|
||||||
|
"Sec-Fetch-Dest": "empty",
|
||||||
|
"Sec-Fetch-Mode": "cors",
|
||||||
|
"Sec-Fetch-Site": "cross-site",
|
||||||
|
"Priority": "u=1, i",
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Make auth request
|
# Build form data for multipart/form-data
|
||||||
player_origin = self._get_origin(player_url)
|
form_data = aiohttp.FormData()
|
||||||
auth_headers = {
|
form_data.add_field("channelKey", params["channel_key"])
|
||||||
"referer": player_origin + "/",
|
form_data.add_field("country", params["auth_country"])
|
||||||
"origin": player_origin,
|
form_data.add_field("timestamp", params["auth_ts"])
|
||||||
"user-agent": self.base_headers["user-agent"],
|
form_data.add_field("expiry", params["auth_expiry"])
|
||||||
}
|
form_data.add_field("token", params["auth_token"])
|
||||||
|
|
||||||
auth_response = await self._make_request(auth_url, headers=auth_headers)
|
try:
|
||||||
|
async with create_aiohttp_session(auth_url, timeout=12, verify=False) as (session, proxy_url):
|
||||||
# Check if authentication succeeded
|
async with session.post(
|
||||||
if auth_response.json().get("status") != "ok":
|
auth_url,
|
||||||
raise ExtractorError("Authentication failed")
|
|
||||||
|
|
||||||
# If no stream URL provided, look up the server and generate the stream URL
|
|
||||||
if not stream_url:
|
|
||||||
stream_url = await self._lookup_server(
|
|
||||||
lookup_url_base=player_origin,
|
|
||||||
auth_url_base=auth_url_base,
|
|
||||||
auth_data=auth_data,
|
|
||||||
headers=auth_headers,
|
headers=auth_headers,
|
||||||
)
|
data=form_data,
|
||||||
|
proxy=proxy_url,
|
||||||
|
) as response:
|
||||||
|
content = await response.read()
|
||||||
|
response.raise_for_status()
|
||||||
|
import json
|
||||||
|
|
||||||
|
auth_data = json.loads(content.decode("utf-8"))
|
||||||
|
if not (auth_data.get("valid") or auth_data.get("success")):
|
||||||
|
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
|
||||||
|
logger.info("New auth flow: Initial auth successful.")
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise ExtractorError(f"New auth flow failed during initial auth POST: {e}")
|
||||||
|
|
||||||
|
# 2. Server Lookup
|
||||||
|
server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}"
|
||||||
|
try:
|
||||||
|
# Use _make_request as it handles retries
|
||||||
|
lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
||||||
|
server_data = lookup_resp.json()
|
||||||
|
server_key = server_data.get("server_key")
|
||||||
|
if not server_key:
|
||||||
|
raise ExtractorError(f"No server_key in lookup response: {server_data}")
|
||||||
|
logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}")
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise ExtractorError(f"New auth flow failed during server lookup: {e}")
|
||||||
|
|
||||||
|
# 3. Build final stream URL
|
||||||
|
channel_key = params["channel_key"]
|
||||||
|
auth_token = params["auth_token"]
|
||||||
|
# The JS logic uses .css, not .m3u8
|
||||||
|
if server_key == "top1/cdn":
|
||||||
|
stream_url = f"https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css"
|
||||||
|
else:
|
||||||
|
stream_url = f"https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css"
|
||||||
|
|
||||||
|
logger.info(f"New auth flow: Constructed stream URL: {stream_url}")
|
||||||
|
|
||||||
# Set up the final stream headers
|
|
||||||
stream_headers = {
|
stream_headers = {
|
||||||
"referer": player_url,
|
"User-Agent": headers["User-Agent"],
|
||||||
"origin": player_origin,
|
"Referer": iframe_url,
|
||||||
"user-agent": self.base_headers["user-agent"],
|
"Origin": iframe_origin,
|
||||||
|
"Authorization": f"Bearer {auth_token}",
|
||||||
|
"X-Channel-Key": channel_key,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Return the stream URL with headers
|
|
||||||
return {
|
return {
|
||||||
"destination_url": stream_url,
|
"destination_url": stream_url,
|
||||||
"request_headers": stream_headers,
|
"request_headers": stream_headers,
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Alternative extractor for lovecdn.ru iframe that uses a different format.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Look for direct stream URL patterns
|
||||||
|
m3u8_patterns = [
|
||||||
|
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
|
||||||
|
r'source[:\s]+["\']([^"\']+)["\']',
|
||||||
|
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||||
|
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
|
||||||
|
]
|
||||||
|
|
||||||
|
stream_url = None
|
||||||
|
for pattern in m3u8_patterns:
|
||||||
|
matches = re.findall(pattern, iframe_content)
|
||||||
|
for match in matches:
|
||||||
|
if ".m3u8" in match and match.startswith("http"):
|
||||||
|
stream_url = match
|
||||||
|
logger.info(f"Found direct m3u8 URL: {stream_url}")
|
||||||
|
break
|
||||||
|
if stream_url:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Pattern 2: Look for dynamic URL construction
|
||||||
|
if not stream_url:
|
||||||
|
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||||
|
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||||
|
|
||||||
|
if channel_match:
|
||||||
|
channel_name = channel_match.group(1)
|
||||||
|
server = server_match.group(1) if server_match else "newkso.ru"
|
||||||
|
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
|
||||||
|
logger.info(f"Constructed stream URL: {stream_url}")
|
||||||
|
|
||||||
|
if not stream_url:
|
||||||
|
# Fallback: look for any URL that looks like a stream
|
||||||
|
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
|
||||||
|
matches = re.findall(url_pattern, iframe_content)
|
||||||
|
if matches:
|
||||||
|
stream_url = matches[0]
|
||||||
|
logger.info(f"Found fallback stream URL: {stream_url}")
|
||||||
|
|
||||||
|
if not stream_url:
|
||||||
|
raise ExtractorError("Could not find stream URL in lovecdn.ru iframe")
|
||||||
|
|
||||||
|
# Use iframe URL as referer
|
||||||
|
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||||
|
stream_headers = {"User-Agent": headers["User-Agent"], "Referer": iframe_url, "Origin": iframe_origin}
|
||||||
|
|
||||||
|
# Determine endpoint based on the stream domain
|
||||||
|
endpoint = "hls_key_proxy"
|
||||||
|
|
||||||
|
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": stream_url,
|
||||||
|
"request_headers": stream_headers,
|
||||||
|
"mediaflow_endpoint": endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
|
||||||
|
|
||||||
|
async def _extract_direct_stream(self, channel_id: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Direct stream extraction using server lookup API with the new auth flow.
|
||||||
|
This extracts auth_token, channel_key, channel_salt and computes key headers.
|
||||||
|
"""
|
||||||
|
# Common iframe domains for DLHD
|
||||||
|
iframe_domains = ["lefttoplay.xyz"]
|
||||||
|
|
||||||
|
for iframe_domain in iframe_domains:
|
||||||
|
try:
|
||||||
|
iframe_url = f"https://{iframe_domain}/premiumtv/daddyhd.php?id={channel_id}"
|
||||||
|
logger.info(f"Attempting extraction via {iframe_domain}")
|
||||||
|
|
||||||
|
session_data = await self._extract_session_data(iframe_url, "dlhd.link")
|
||||||
|
|
||||||
|
if not session_data:
|
||||||
|
logger.debug(f"No session data from {iframe_domain}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Got session data from {iframe_domain}: channel_key={session_data['channel_key']}")
|
||||||
|
|
||||||
|
# Get server key
|
||||||
|
if "server_lookup_url" not in session_data:
|
||||||
|
logger.debug(f"No server lookup URL from {iframe_domain}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
server_key = await self._get_server_key(session_data["server_lookup_url"], iframe_url)
|
||||||
|
|
||||||
|
if not server_key:
|
||||||
|
logger.debug(f"No server key from {iframe_domain}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Got server key: {server_key}")
|
||||||
|
|
||||||
|
# Build m3u8 URL
|
||||||
|
m3u8_url = self._build_m3u8_url(server_key, session_data["channel_key"])
|
||||||
|
logger.info(f"M3U8 URL: {m3u8_url}")
|
||||||
|
|
||||||
|
# Build stream headers with auth
|
||||||
|
iframe_origin = f"https://{iframe_domain}"
|
||||||
|
stream_headers = {
|
||||||
|
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||||
|
"Referer": iframe_url,
|
||||||
|
"Origin": iframe_origin,
|
||||||
|
"Authorization": f"Bearer {session_data['auth_token']}",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Return the result with key header parameters
|
||||||
|
# These will be used to compute headers when fetching keys
|
||||||
|
return {
|
||||||
|
"destination_url": m3u8_url,
|
||||||
|
"request_headers": stream_headers,
|
||||||
|
"mediaflow_endpoint": "hls_key_proxy",
|
||||||
|
# Force playlist processing since DLHD uses .css extension for m3u8
|
||||||
|
"force_playlist_proxy": True,
|
||||||
|
# Key header computation parameters
|
||||||
|
"dlhd_key_params": {
|
||||||
|
"channel_salt": session_data["channel_salt"],
|
||||||
|
"auth_token": session_data["auth_token"],
|
||||||
|
"iframe_url": iframe_url,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed extraction via {iframe_domain}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
raise ExtractorError(f"Failed to extract stream from all iframe domains for channel {channel_id}")
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
"""Main extraction flow - uses direct server lookup with new auth flow."""
|
||||||
|
|
||||||
|
def extract_channel_id(u: str) -> Optional[str]:
|
||||||
|
match_watch_id = re.search(r"watch\.php\?id=(\d+)", u)
|
||||||
|
if match_watch_id:
|
||||||
|
return match_watch_id.group(1)
|
||||||
|
# Also try stream-XXX pattern
|
||||||
|
match_stream = re.search(r"stream-(\d+)", u)
|
||||||
|
if match_stream:
|
||||||
|
return match_stream.group(1)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
channel_id = extract_channel_id(url)
|
||||||
|
if not channel_id:
|
||||||
|
raise ExtractorError(f"Unable to extract channel ID from {url}")
|
||||||
|
|
||||||
|
logger.info(f"Extracting DLHD stream for channel ID: {channel_id}")
|
||||||
|
|
||||||
|
# Try direct stream extraction with new auth flow
|
||||||
|
try:
|
||||||
|
return await self._extract_direct_stream(channel_id)
|
||||||
|
except ExtractorError as e:
|
||||||
|
logger.warning(f"Direct stream extraction failed: {e}")
|
||||||
|
|
||||||
|
# Fallback to legacy iframe-based extraction if direct fails
|
||||||
|
logger.info("Falling back to iframe-based extraction...")
|
||||||
|
return await self._extract_via_iframe(url, channel_id)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"Extraction failed: {str(e)}")
|
raise ExtractorError(f"Extraction failed: {str(e)}")
|
||||||
|
|
||||||
async def _handle_vecloud(self, player_url: str, channel_referer: str) -> Dict[str, Any]:
|
async def _extract_via_iframe(self, url: str, channel_id: str) -> Dict[str, Any]:
|
||||||
"""Handle vecloud URLs with their specific API.
|
"""Legacy iframe-based extraction flow - used as fallback."""
|
||||||
|
baseurl = "https://dlhd.dad/"
|
||||||
|
|
||||||
Args:
|
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
|
||||||
player_url: The vecloud player URL
|
daddylive_headers = {
|
||||||
channel_referer: The referer of the channel page
|
"User-Agent": self._flaresolverr_user_agent
|
||||||
Returns:
|
or "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||||
Dict containing stream URL and required headers
|
"Referer": baseurl,
|
||||||
"""
|
"Origin": daddy_origin,
|
||||||
|
}
|
||||||
|
|
||||||
|
# 1. Request initial page - use FlareSolverr if available to bypass Cloudflare
|
||||||
|
use_flaresolverr = settings.flaresolverr_url is not None
|
||||||
|
resp1 = await self._make_request(url, headers=daddylive_headers, timeout=15, use_flaresolverr=use_flaresolverr)
|
||||||
|
resp1_text = resp1.text
|
||||||
|
|
||||||
|
# Update headers with FlareSolverr user-agent after initial request
|
||||||
|
if self._flaresolverr_user_agent:
|
||||||
|
daddylive_headers["User-Agent"] = self._flaresolverr_user_agent
|
||||||
|
|
||||||
|
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1_text)
|
||||||
|
if not player_links:
|
||||||
|
raise ExtractorError("No player links found on the page.")
|
||||||
|
|
||||||
|
# Try all players and collect all valid iframes
|
||||||
|
last_player_error = None
|
||||||
|
iframe_candidates = []
|
||||||
|
|
||||||
|
for player_url in player_links:
|
||||||
try:
|
try:
|
||||||
# Extract stream ID from vecloud URL
|
if not player_url.startswith("http"):
|
||||||
stream_id_match = re.search(r"/stream/([a-zA-Z0-9-]+)", player_url)
|
player_url = baseurl + player_url.lstrip("/")
|
||||||
if not stream_id_match:
|
|
||||||
raise ExtractorError("Could not extract stream ID from vecloud URL")
|
|
||||||
|
|
||||||
stream_id = stream_id_match.group(1)
|
daddylive_headers["Referer"] = player_url
|
||||||
|
daddylive_headers["Origin"] = player_url
|
||||||
|
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
|
||||||
|
resp2_text = resp2.text
|
||||||
|
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2_text)
|
||||||
|
|
||||||
response = await self._make_request(
|
# Collect all found iframes
|
||||||
player_url, headers={"referer": channel_referer, "user-agent": self.base_headers["user-agent"]}
|
for iframe in iframes2:
|
||||||
)
|
if iframe not in iframe_candidates:
|
||||||
player_url = str(response.url)
|
iframe_candidates.append(iframe)
|
||||||
|
logger.info(f"Found iframe candidate: {iframe}")
|
||||||
# Construct API URL
|
|
||||||
player_parsed = urlparse(player_url)
|
|
||||||
player_domain = player_parsed.netloc
|
|
||||||
player_origin = f"{player_parsed.scheme}://{player_parsed.netloc}"
|
|
||||||
api_url = f"{player_origin}/api/source/{stream_id}?type=live"
|
|
||||||
|
|
||||||
# Set up headers for API request
|
|
||||||
api_headers = {
|
|
||||||
"referer": player_url,
|
|
||||||
"origin": player_origin,
|
|
||||||
"user-agent": self.base_headers["user-agent"],
|
|
||||||
"content-type": "application/json",
|
|
||||||
}
|
|
||||||
|
|
||||||
api_data = {"r": channel_referer, "d": player_domain}
|
|
||||||
|
|
||||||
# Make API request
|
|
||||||
api_response = await self._make_request(api_url, method="POST", headers=api_headers, json=api_data)
|
|
||||||
api_data = api_response.json()
|
|
||||||
|
|
||||||
# Check if request was successful
|
|
||||||
if not api_data.get("success"):
|
|
||||||
raise ExtractorError("Vecloud API request failed")
|
|
||||||
|
|
||||||
# Extract stream URL from response
|
|
||||||
stream_url = api_data.get("player", {}).get("source_file")
|
|
||||||
|
|
||||||
if not stream_url:
|
|
||||||
raise ExtractorError("Could not find stream URL in vecloud response")
|
|
||||||
|
|
||||||
# Set up stream headers
|
|
||||||
stream_headers = {
|
|
||||||
"referer": player_origin + "/",
|
|
||||||
"origin": player_origin,
|
|
||||||
"user-agent": self.base_headers["user-agent"],
|
|
||||||
}
|
|
||||||
|
|
||||||
# Return the stream URL with headers
|
|
||||||
return {
|
|
||||||
"destination_url": stream_url,
|
|
||||||
"request_headers": stream_headers,
|
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"Vecloud extraction failed: {str(e)}")
|
last_player_error = e
|
||||||
|
logger.warning(f"Failed to process player link {player_url}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
async def _handle_playnow(self, player_iframe: str, channel_origin: str) -> str:
|
if not iframe_candidates:
|
||||||
"""Handle playnow URLs."""
|
if last_player_error:
|
||||||
# Set up headers for the playnow request
|
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
|
||||||
playnow_headers = {"referer": channel_origin + "/", "user-agent": self.base_headers["user-agent"]}
|
raise ExtractorError("No valid iframe found in any player page")
|
||||||
|
|
||||||
# Make the playnow request
|
# Try each iframe until one works
|
||||||
playnow_response = await self._make_request(player_iframe, headers=playnow_headers)
|
last_iframe_error = None
|
||||||
player_url = self._extract_player_url(playnow_response.text)
|
|
||||||
if not player_url:
|
|
||||||
raise ExtractorError("Could not extract player URL from playnow response")
|
|
||||||
return player_url
|
|
||||||
|
|
||||||
def _extract_player_url(self, html_content: str) -> Optional[str]:
|
for iframe_candidate in iframe_candidates:
|
||||||
"""Extract player iframe URL from channel page HTML."""
|
|
||||||
try:
|
try:
|
||||||
# Look for iframe with allowfullscreen attribute
|
logger.info(f"Trying iframe: {iframe_candidate}")
|
||||||
iframe_match = re.search(
|
|
||||||
r'<iframe[^>]*src=["\']([^"\']+)["\'][^>]*allowfullscreen', html_content, re.IGNORECASE
|
|
||||||
)
|
|
||||||
|
|
||||||
if not iframe_match:
|
iframe_domain = urlparse(iframe_candidate).netloc
|
||||||
# Try alternative pattern without requiring allowfullscreen
|
if not iframe_domain:
|
||||||
iframe_match = re.search(
|
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
|
||||||
r'<iframe[^>]*src=["\']([^"\']+(?:premiumtv|daddylivehd|vecloud)[^"\']*)["\']',
|
continue
|
||||||
html_content,
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
|
||||||
|
|
||||||
if iframe_match:
|
self._iframe_context = iframe_candidate
|
||||||
return iframe_match.group(1).strip()
|
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
|
||||||
|
iframe_content = resp3.text
|
||||||
|
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
|
||||||
|
|
||||||
return None
|
if "lovecdn.ru" in iframe_domain:
|
||||||
except Exception:
|
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
|
||||||
return None
|
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
|
||||||
|
|
||||||
async def _lookup_server(
|
|
||||||
self, lookup_url_base: str, auth_url_base: str, auth_data: Dict[str, str], headers: Dict[str, str]
|
|
||||||
) -> str:
|
|
||||||
"""Lookup server information and generate stream URL."""
|
|
||||||
try:
|
|
||||||
# Construct server lookup URL
|
|
||||||
server_lookup_url = f"{lookup_url_base}/server_lookup.php?channel_id={quote(auth_data['channel_key'])}"
|
|
||||||
|
|
||||||
# Make server lookup request
|
|
||||||
server_response = await self._make_request(server_lookup_url, headers=headers)
|
|
||||||
|
|
||||||
server_data = server_response.json()
|
|
||||||
server_key = server_data.get("server_key")
|
|
||||||
|
|
||||||
if not server_key:
|
|
||||||
raise ExtractorError("Failed to get server key")
|
|
||||||
|
|
||||||
# Extract domain parts from auth URL for constructing stream URL
|
|
||||||
auth_domain_parts = urlparse(auth_url_base).netloc.split(".")
|
|
||||||
domain_suffix = ".".join(auth_domain_parts[1:]) if len(auth_domain_parts) > 1 else auth_domain_parts[0]
|
|
||||||
|
|
||||||
# Generate the m3u8 URL based on server response pattern
|
|
||||||
if "/" in server_key:
|
|
||||||
# Handle special case like "top1/cdn"
|
|
||||||
parts = server_key.split("/")
|
|
||||||
return f"https://{parts[0]}.{domain_suffix}/{server_key}/{auth_data['channel_key']}/mono.m3u8"
|
|
||||||
else:
|
else:
|
||||||
# Handle normal case
|
logger.info("Attempting new auth flow extraction.")
|
||||||
return f"https://{server_key}new.{domain_suffix}/{server_key}/{auth_data['channel_key']}/mono.m3u8"
|
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"Server lookup failed: {str(e)}")
|
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
|
||||||
|
last_iframe_error = e
|
||||||
|
continue
|
||||||
|
|
||||||
def _extract_auth_data(self, html_content: str) -> Dict[str, str]:
|
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
|
||||||
"""Extract authentication data from player page."""
|
|
||||||
try:
|
|
||||||
# Extract channel key
|
|
||||||
channel_key_match = re.search(r'var\s+channelKey\s*=\s*["\']([^"\']+)["\']', html_content)
|
|
||||||
# Extract auth timestamp
|
|
||||||
auth_ts_match = re.search(r'var\s+authTs\s*=\s*["\']([^"\']+)["\']', html_content)
|
|
||||||
# Extract auth random value
|
|
||||||
auth_rnd_match = re.search(r'var\s+authRnd\s*=\s*["\']([^"\']+)["\']', html_content)
|
|
||||||
# Extract auth signature
|
|
||||||
auth_sig_match = re.search(r'var\s+authSig\s*=\s*["\']([^"\']+)["\']', html_content)
|
|
||||||
|
|
||||||
if not all([channel_key_match, auth_ts_match, auth_rnd_match, auth_sig_match]):
|
|
||||||
return {}
|
|
||||||
|
|
||||||
return {
|
|
||||||
"channel_key": channel_key_match.group(1),
|
|
||||||
"auth_ts": auth_ts_match.group(1),
|
|
||||||
"auth_rnd": auth_rnd_match.group(1),
|
|
||||||
"auth_sig": auth_sig_match.group(1),
|
|
||||||
}
|
|
||||||
except Exception:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def _extract_auth_url_base(self, html_content: str) -> Optional[str]:
|
|
||||||
"""Extract auth URL base from player page script content."""
|
|
||||||
try:
|
|
||||||
# Look for auth URL or domain in fetchWithRetry call or similar patterns
|
|
||||||
auth_url_match = re.search(r'fetchWithRetry\([\'"]([^\'"]*/auth\.php)', html_content)
|
|
||||||
|
|
||||||
if auth_url_match:
|
|
||||||
auth_url = auth_url_match.group(1)
|
|
||||||
# Extract base URL up to the auth.php part
|
|
||||||
return auth_url.split("/auth.php")[0]
|
|
||||||
|
|
||||||
# Try finding domain directly
|
|
||||||
domain_match = re.search(r'[\'"]https://([^/\'\"]+)(?:/[^\'\"]*)?/auth\.php', html_content)
|
|
||||||
|
|
||||||
if domain_match:
|
|
||||||
return f"https://{domain_match.group(1)}"
|
|
||||||
|
|
||||||
return None
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _get_origin(self, url: str) -> str:
|
|
||||||
"""Extract origin from URL."""
|
|
||||||
parsed = urlparse(url)
|
|
||||||
return f"{parsed.scheme}://{parsed.netloc}"
|
|
||||||
|
|
||||||
def _derive_auth_url_base(self, player_domain: str) -> Optional[str]:
|
|
||||||
"""Attempt to derive auth URL base from player domain."""
|
|
||||||
try:
|
|
||||||
# Typical pattern is to use a subdomain for auth domain
|
|
||||||
parsed = urlparse(player_domain)
|
|
||||||
domain_parts = parsed.netloc.split(".")
|
|
||||||
|
|
||||||
# Get the top-level domain and second-level domain
|
|
||||||
if len(domain_parts) >= 2:
|
|
||||||
base_domain = ".".join(domain_parts[-2:])
|
|
||||||
# Try common subdomains for auth
|
|
||||||
for prefix in ["auth", "api", "cdn"]:
|
|
||||||
potential_auth_domain = f"https://{prefix}.{base_domain}"
|
|
||||||
return potential_auth_domain
|
|
||||||
|
|
||||||
return None
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|||||||
@@ -1,39 +1,52 @@
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from typing import Dict
|
from urllib.parse import urlparse, urljoin
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class DoodStreamExtractor(BaseExtractor):
|
class DoodStreamExtractor(BaseExtractor):
|
||||||
"""DoodStream URL extractor."""
|
"""
|
||||||
|
Dood / MyVidPlay extractor
|
||||||
|
Resolves to direct CDN MP4
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, request_headers: dict):
|
def __init__(self, request_headers: dict):
|
||||||
super().__init__(request_headers)
|
super().__init__(request_headers)
|
||||||
self.base_url = "https://d000d.com"
|
self.base_url = "https://myvidplay.com"
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, str]:
|
async def extract(self, url: str, **kwargs):
|
||||||
"""Extract DoodStream URL."""
|
parsed = urlparse(url)
|
||||||
response = await self._make_request(url)
|
video_id = parsed.path.rstrip("/").split("/")[-1]
|
||||||
|
if not video_id:
|
||||||
|
raise ExtractorError("Invalid Dood URL")
|
||||||
|
|
||||||
# Extract URL pattern
|
headers = {
|
||||||
pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
|
"User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0",
|
||||||
match = re.search(pattern, response.text, re.DOTALL)
|
"Referer": f"{self.base_url}/",
|
||||||
|
}
|
||||||
|
|
||||||
|
embed_url = f"{self.base_url}/e/{video_id}"
|
||||||
|
html = (await self._make_request(embed_url, headers=headers)).text
|
||||||
|
|
||||||
|
match = re.search(r"(\/pass_md5\/[^']+)", html)
|
||||||
if not match:
|
if not match:
|
||||||
raise ExtractorError("Failed to extract URL pattern")
|
raise ExtractorError("Dood: pass_md5 not found")
|
||||||
|
|
||||||
# Build final URL
|
pass_url = urljoin(self.base_url, match.group(1))
|
||||||
pass_url = f"{self.base_url}{match[1]}"
|
|
||||||
referer = f"{self.base_url}/"
|
|
||||||
headers = {"range": "bytes=0-", "referer": referer}
|
|
||||||
|
|
||||||
response = await self._make_request(pass_url, headers=headers)
|
base_stream = (await self._make_request(pass_url, headers=headers)).text.strip()
|
||||||
timestamp = str(int(time.time()))
|
|
||||||
final_url = f"{response.text}123456789{match[2]}{timestamp}"
|
token_match = re.search(r"token=([^&]+)", html)
|
||||||
|
if not token_match:
|
||||||
|
raise ExtractorError("Dood: token missing")
|
||||||
|
|
||||||
|
token = token_match.group(1)
|
||||||
|
|
||||||
|
final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}"
|
||||||
|
|
||||||
self.base_headers["referer"] = referer
|
|
||||||
return {
|
return {
|
||||||
"destination_url": final_url,
|
"destination_url": final_url,
|
||||||
"request_headers": self.base_headers,
|
"request_headers": headers,
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": "proxy_stream_endpoint",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,14 +3,27 @@ from typing import Dict, Type
|
|||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
from mediaflow_proxy.extractors.dlhd import DLHDExtractor
|
from mediaflow_proxy.extractors.dlhd import DLHDExtractor
|
||||||
from mediaflow_proxy.extractors.doodstream import DoodStreamExtractor
|
from mediaflow_proxy.extractors.doodstream import DoodStreamExtractor
|
||||||
|
from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor
|
||||||
|
from mediaflow_proxy.extractors.filelions import FileLionsExtractor
|
||||||
|
from mediaflow_proxy.extractors.filemoon import FileMoonExtractor
|
||||||
|
from mediaflow_proxy.extractors.F16Px import F16PxExtractor
|
||||||
|
from mediaflow_proxy.extractors.gupload import GuploadExtractor
|
||||||
from mediaflow_proxy.extractors.livetv import LiveTVExtractor
|
from mediaflow_proxy.extractors.livetv import LiveTVExtractor
|
||||||
|
from mediaflow_proxy.extractors.lulustream import LuluStreamExtractor
|
||||||
from mediaflow_proxy.extractors.maxstream import MaxstreamExtractor
|
from mediaflow_proxy.extractors.maxstream import MaxstreamExtractor
|
||||||
from mediaflow_proxy.extractors.mixdrop import MixdropExtractor
|
from mediaflow_proxy.extractors.mixdrop import MixdropExtractor
|
||||||
from mediaflow_proxy.extractors.okru import OkruExtractor
|
from mediaflow_proxy.extractors.okru import OkruExtractor
|
||||||
from mediaflow_proxy.extractors.streamtape import StreamtapeExtractor
|
from mediaflow_proxy.extractors.streamtape import StreamtapeExtractor
|
||||||
|
from mediaflow_proxy.extractors.streamwish import StreamWishExtractor
|
||||||
from mediaflow_proxy.extractors.supervideo import SupervideoExtractor
|
from mediaflow_proxy.extractors.supervideo import SupervideoExtractor
|
||||||
|
from mediaflow_proxy.extractors.turbovidplay import TurboVidPlayExtractor
|
||||||
from mediaflow_proxy.extractors.uqload import UqloadExtractor
|
from mediaflow_proxy.extractors.uqload import UqloadExtractor
|
||||||
|
from mediaflow_proxy.extractors.vavoo import VavooExtractor
|
||||||
|
from mediaflow_proxy.extractors.vidmoly import VidmolyExtractor
|
||||||
|
from mediaflow_proxy.extractors.vidoza import VidozaExtractor
|
||||||
from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor
|
from mediaflow_proxy.extractors.vixcloud import VixCloudExtractor
|
||||||
|
from mediaflow_proxy.extractors.fastream import FastreamExtractor
|
||||||
|
from mediaflow_proxy.extractors.voe import VoeExtractor
|
||||||
|
|
||||||
|
|
||||||
class ExtractorFactory:
|
class ExtractorFactory:
|
||||||
@@ -18,15 +31,28 @@ class ExtractorFactory:
|
|||||||
|
|
||||||
_extractors: Dict[str, Type[BaseExtractor]] = {
|
_extractors: Dict[str, Type[BaseExtractor]] = {
|
||||||
"Doodstream": DoodStreamExtractor,
|
"Doodstream": DoodStreamExtractor,
|
||||||
|
"FileLions": FileLionsExtractor,
|
||||||
|
"FileMoon": FileMoonExtractor,
|
||||||
|
"F16Px": F16PxExtractor,
|
||||||
|
"Gupload": GuploadExtractor,
|
||||||
"Uqload": UqloadExtractor,
|
"Uqload": UqloadExtractor,
|
||||||
"Mixdrop": MixdropExtractor,
|
"Mixdrop": MixdropExtractor,
|
||||||
"Streamtape": StreamtapeExtractor,
|
"Streamtape": StreamtapeExtractor,
|
||||||
|
"StreamWish": StreamWishExtractor,
|
||||||
"Supervideo": SupervideoExtractor,
|
"Supervideo": SupervideoExtractor,
|
||||||
|
"TurboVidPlay": TurboVidPlayExtractor,
|
||||||
"VixCloud": VixCloudExtractor,
|
"VixCloud": VixCloudExtractor,
|
||||||
"Okru": OkruExtractor,
|
"Okru": OkruExtractor,
|
||||||
"Maxstream": MaxstreamExtractor,
|
"Maxstream": MaxstreamExtractor,
|
||||||
"LiveTV": LiveTVExtractor,
|
"LiveTV": LiveTVExtractor,
|
||||||
|
"LuluStream": LuluStreamExtractor,
|
||||||
"DLHD": DLHDExtractor,
|
"DLHD": DLHDExtractor,
|
||||||
|
"Vavoo": VavooExtractor,
|
||||||
|
"Vidmoly": VidmolyExtractor,
|
||||||
|
"Vidoza": VidozaExtractor,
|
||||||
|
"Fastream": FastreamExtractor,
|
||||||
|
"Voe": VoeExtractor,
|
||||||
|
"Sportsonline": SportsonlineExtractor,
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
35
mediaflow_proxy/extractors/fastream.py
Normal file
35
mediaflow_proxy/extractors/fastream.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||||
|
from mediaflow_proxy.utils.packed import eval_solver
|
||||||
|
|
||||||
|
|
||||||
|
class FastreamExtractor(BaseExtractor):
|
||||||
|
"""Fastream URL extractor."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
headers = {
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
|
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0",
|
||||||
|
}
|
||||||
|
patterns = [r'file:"(.*?)"']
|
||||||
|
|
||||||
|
final_url = await eval_solver(self, url, headers, patterns)
|
||||||
|
|
||||||
|
self.base_headers["referer"] = f"https://{url.replace('https://', '').split('/')[0]}/"
|
||||||
|
self.base_headers["origin"] = f"https://{url.replace('https://', '').split('/')[0]}"
|
||||||
|
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||||
|
self.base_headers["Accept"] = "*/*"
|
||||||
|
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": final_url,
|
||||||
|
"request_headers": self.base_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
28
mediaflow_proxy/extractors/filelions.py
Normal file
28
mediaflow_proxy/extractors/filelions.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||||
|
from mediaflow_proxy.utils.packed import eval_solver
|
||||||
|
|
||||||
|
|
||||||
|
class FileLionsExtractor(BaseExtractor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
headers = {}
|
||||||
|
patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
|
||||||
|
r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)""",
|
||||||
|
r"""["']hls4["']:\s*["'](?P<url>[^"']+)""",
|
||||||
|
r"""["']hls2["']:\s*["'](?P<url>[^"']+)""",
|
||||||
|
]
|
||||||
|
|
||||||
|
final_url = await eval_solver(self, url, headers, patterns)
|
||||||
|
|
||||||
|
self.base_headers["referer"] = url
|
||||||
|
return {
|
||||||
|
"destination_url": final_url,
|
||||||
|
"request_headers": self.base_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
"stream_transformer": "ts_stream",
|
||||||
|
}
|
||||||
52
mediaflow_proxy/extractors/filemoon.py
Normal file
52
mediaflow_proxy/extractors/filemoon.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import re
|
||||||
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urlparse, urljoin
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
from mediaflow_proxy.utils.packed import eval_solver
|
||||||
|
|
||||||
|
|
||||||
|
class FileMoonExtractor(BaseExtractor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
response = await self._make_request(url)
|
||||||
|
|
||||||
|
pattern = r'iframe.*?src=["\'](.*?)["\']'
|
||||||
|
match = re.search(pattern, response.text, re.DOTALL)
|
||||||
|
if not match:
|
||||||
|
raise ExtractorError("Failed to extract iframe URL")
|
||||||
|
|
||||||
|
iframe_url = match.group(1)
|
||||||
|
|
||||||
|
parsed = urlparse(str(response.url))
|
||||||
|
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
||||||
|
|
||||||
|
if iframe_url.startswith("//"):
|
||||||
|
iframe_url = f"{parsed.scheme}:{iframe_url}"
|
||||||
|
elif not urlparse(iframe_url).scheme:
|
||||||
|
iframe_url = urljoin(base_url, iframe_url)
|
||||||
|
|
||||||
|
headers = {"Referer": url}
|
||||||
|
patterns = [r'file:"(.*?)"']
|
||||||
|
|
||||||
|
final_url = await eval_solver(
|
||||||
|
self,
|
||||||
|
iframe_url,
|
||||||
|
headers,
|
||||||
|
patterns,
|
||||||
|
)
|
||||||
|
|
||||||
|
test_resp = await self._make_request(final_url, headers=headers)
|
||||||
|
if test_resp.status == 404:
|
||||||
|
raise ExtractorError("Stream not found (404)")
|
||||||
|
|
||||||
|
self.base_headers["referer"] = url
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": final_url,
|
||||||
|
"request_headers": self.base_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
65
mediaflow_proxy/extractors/gupload.py
Normal file
65
mediaflow_proxy/extractors/gupload.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import re
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class GuploadExtractor(BaseExtractor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str) -> Dict[str, Any]:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
if not parsed.hostname or "gupload.xyz" not in parsed.hostname:
|
||||||
|
raise ExtractorError("GUPLOAD: Invalid domain")
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/144 Safari/537.36"
|
||||||
|
),
|
||||||
|
"Referer": "https://gupload.xyz/",
|
||||||
|
"Origin": "https://gupload.xyz",
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Fetch embed page ---
|
||||||
|
response = await self._make_request(url, headers=headers)
|
||||||
|
html = response.text
|
||||||
|
|
||||||
|
# --- Extract base64 payload ---
|
||||||
|
match = re.search(r"decodePayload\('([^']+)'\)", html)
|
||||||
|
if not match:
|
||||||
|
raise ExtractorError("GUPLOAD: Payload not found")
|
||||||
|
|
||||||
|
encoded = match.group(1).strip()
|
||||||
|
|
||||||
|
# --- Decode payload ---
|
||||||
|
try:
|
||||||
|
decoded = base64.b64decode(encoded).decode("utf-8", "ignore")
|
||||||
|
# payload format: <junk>|{json}
|
||||||
|
json_part = decoded.split("|", 1)[1]
|
||||||
|
payload = json.loads(json_part)
|
||||||
|
except Exception:
|
||||||
|
raise ExtractorError("GUPLOAD: Payload decode failed")
|
||||||
|
|
||||||
|
# --- Extract HLS URL ---
|
||||||
|
hls_url = payload.get("videoUrl")
|
||||||
|
if not hls_url:
|
||||||
|
raise ExtractorError("GUPLOAD: videoUrl missing")
|
||||||
|
|
||||||
|
# --- Validate stream (prevents client timeout) ---
|
||||||
|
test = await self._make_request(hls_url, headers=headers, raise_on_status=False)
|
||||||
|
if test.status >= 400:
|
||||||
|
raise ExtractorError(f"GUPLOAD: Stream unavailable ({test.status})")
|
||||||
|
|
||||||
|
# Return MASTER playlist
|
||||||
|
return {
|
||||||
|
"destination_url": hls_url,
|
||||||
|
"request_headers": headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
@@ -2,9 +2,9 @@ import re
|
|||||||
from typing import Dict, Tuple, Optional
|
from typing import Dict, Tuple, Optional
|
||||||
from urllib.parse import urljoin, urlparse, unquote
|
from urllib.parse import urljoin, urlparse, unquote
|
||||||
|
|
||||||
from httpx import Response
|
import aiohttp
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
|
||||||
|
|
||||||
|
|
||||||
class LiveTVExtractor(BaseExtractor):
|
class LiveTVExtractor(BaseExtractor):
|
||||||
@@ -33,20 +33,21 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
stream_title: Optional stream title to filter specific stream
|
stream_title: Optional stream title to filter specific stream
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[str, Dict[str, str]]: Stream URL and required headers
|
Dict containing destination_url, request_headers, and mediaflow_endpoint
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get the channel page
|
# Get the channel page
|
||||||
response = await self._make_request(url)
|
response = await self._make_request(url)
|
||||||
|
response_text = response.text
|
||||||
self.base_headers["referer"] = urljoin(url, "/")
|
self.base_headers["referer"] = urljoin(url, "/")
|
||||||
|
|
||||||
# Extract player API details
|
# Extract player API details
|
||||||
player_api_base, method = await self._extract_player_api_base(response.text)
|
player_api_base, method = await self._extract_player_api_base(response_text)
|
||||||
if not player_api_base:
|
if not player_api_base:
|
||||||
raise ExtractorError("Failed to extract player API URL")
|
raise ExtractorError("Failed to extract player API URL")
|
||||||
|
|
||||||
# Get player options
|
# Get player options
|
||||||
options_data = await self._get_player_options(response.text)
|
options_data = await self._get_player_options(response_text)
|
||||||
if not options_data:
|
if not options_data:
|
||||||
raise ExtractorError("No player options found")
|
raise ExtractorError("No player options found")
|
||||||
|
|
||||||
@@ -66,7 +67,7 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
if not stream_url:
|
if not stream_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
response = {
|
result = {
|
||||||
"destination_url": stream_url,
|
"destination_url": stream_url,
|
||||||
"request_headers": self.base_headers,
|
"request_headers": self.base_headers,
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
@@ -75,7 +76,7 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
# Set endpoint based on stream type
|
# Set endpoint based on stream type
|
||||||
if stream_data.get("type") == "mpd":
|
if stream_data.get("type") == "mpd":
|
||||||
if stream_data.get("drm_key_id") and stream_data.get("drm_key"):
|
if stream_data.get("drm_key_id") and stream_data.get("drm_key"):
|
||||||
response.update(
|
result.update(
|
||||||
{
|
{
|
||||||
"query_params": {
|
"query_params": {
|
||||||
"key_id": stream_data["drm_key_id"],
|
"key_id": stream_data["drm_key_id"],
|
||||||
@@ -85,7 +86,7 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
return response
|
return result
|
||||||
|
|
||||||
raise ExtractorError("No valid stream found")
|
raise ExtractorError("No valid stream found")
|
||||||
|
|
||||||
@@ -120,7 +121,12 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
api_url = f"{api_base}{post}/{type_}/{nume}"
|
api_url = f"{api_base}{post}/{type_}/{nume}"
|
||||||
response = await self._make_request(api_url)
|
response = await self._make_request(api_url)
|
||||||
else:
|
else:
|
||||||
form_data = {"action": "doo_player_ajax", "post": post, "nume": nume, "type": type_}
|
# Use aiohttp FormData for POST requests
|
||||||
|
form_data = aiohttp.FormData()
|
||||||
|
form_data.add_field("action", "doo_player_ajax")
|
||||||
|
form_data.add_field("post", post)
|
||||||
|
form_data.add_field("nume", nume)
|
||||||
|
form_data.add_field("type", type_)
|
||||||
response = await self._make_request(api_base, method="POST", data=form_data)
|
response = await self._make_request(api_base, method="POST", data=form_data)
|
||||||
|
|
||||||
# Get iframe URL from API response
|
# Get iframe URL from API response
|
||||||
@@ -136,7 +142,7 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"Failed to process player option: {str(e)}")
|
raise ExtractorError(f"Failed to process player option: {str(e)}")
|
||||||
|
|
||||||
async def _extract_stream_url(self, iframe_response: Response, iframe_url: str) -> Dict:
|
async def _extract_stream_url(self, iframe_response: HttpResponse, iframe_url: str) -> Dict:
|
||||||
"""
|
"""
|
||||||
Extract final stream URL from iframe content.
|
Extract final stream URL from iframe content.
|
||||||
"""
|
"""
|
||||||
@@ -147,8 +153,9 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
|
|
||||||
# Check if content is already a direct M3U8 stream
|
# Check if content is already a direct M3U8 stream
|
||||||
content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"]
|
content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"]
|
||||||
|
content_type = iframe_response.headers.get("content-type", "")
|
||||||
|
|
||||||
if any(ext in iframe_response.headers["content-type"] for ext in content_types):
|
if any(ext in content_type for ext in content_types):
|
||||||
return {"url": iframe_url, "type": "m3u8"}
|
return {"url": iframe_url, "type": "m3u8"}
|
||||||
|
|
||||||
stream_data = {}
|
stream_data = {}
|
||||||
|
|||||||
27
mediaflow_proxy/extractors/lulustream.py
Normal file
27
mediaflow_proxy/extractors/lulustream.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
import re
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class LuluStreamExtractor(BaseExtractor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
response = await self._make_request(url)
|
||||||
|
|
||||||
|
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py
|
||||||
|
pattern = r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)"""
|
||||||
|
match = re.search(pattern, response.text, re.DOTALL)
|
||||||
|
if not match:
|
||||||
|
raise ExtractorError("Failed to extract source URL")
|
||||||
|
final_url = match.group(1)
|
||||||
|
|
||||||
|
self.base_headers["referer"] = url
|
||||||
|
return {
|
||||||
|
"destination_url": final_url,
|
||||||
|
"request_headers": self.base_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
@@ -1,8 +1,7 @@
|
|||||||
import re
|
|
||||||
import string
|
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||||
|
from mediaflow_proxy.utils.packed import eval_solver
|
||||||
|
|
||||||
|
|
||||||
class MixdropExtractor(BaseExtractor):
|
class MixdropExtractor(BaseExtractor):
|
||||||
@@ -12,23 +11,11 @@ class MixdropExtractor(BaseExtractor):
|
|||||||
"""Extract Mixdrop URL."""
|
"""Extract Mixdrop URL."""
|
||||||
if "club" in url:
|
if "club" in url:
|
||||||
url = url.replace("club", "ps").split("/2")[0]
|
url = url.replace("club", "ps").split("/2")[0]
|
||||||
response = await self._make_request(url, headers={"accept-language": "en-US,en;q=0.5"})
|
|
||||||
|
|
||||||
# Extract and decode URL
|
headers = {"accept-language": "en-US,en;q=0.5"}
|
||||||
match = re.search(r"}\('(.+)',.+,'(.+)'\.split", response.text)
|
patterns = [r'MDCore.wurl ?= ?"(.*?)"']
|
||||||
if not match:
|
|
||||||
raise ExtractorError("Failed to extract URL components")
|
|
||||||
|
|
||||||
s1, s2 = match.group(1, 2)
|
final_url = await eval_solver(self, url, headers, patterns)
|
||||||
schema = s1.split(";")[2][5:-1]
|
|
||||||
terms = s2.split("|")
|
|
||||||
|
|
||||||
# Build character mapping
|
|
||||||
charset = string.digits + string.ascii_letters
|
|
||||||
char_map = {charset[i]: terms[i] or charset[i] for i in range(len(terms))}
|
|
||||||
|
|
||||||
# Construct final URL
|
|
||||||
final_url = "https:" + "".join(char_map.get(c, c) for c in schema)
|
|
||||||
|
|
||||||
self.base_headers["referer"] = url
|
self.base_headers["referer"] = url
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -22,8 +22,9 @@ class OkruExtractor(BaseExtractor):
|
|||||||
data_options = div.get("data-options")
|
data_options = div.get("data-options")
|
||||||
data = json.loads(data_options)
|
data = json.loads(data_options)
|
||||||
metadata = json.loads(data["flashvars"]["metadata"])
|
metadata = json.loads(data["flashvars"]["metadata"])
|
||||||
final_url = metadata["hlsMasterPlaylistUrl"]
|
final_url = (
|
||||||
|
metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl") or metadata.get("ondemandHls")
|
||||||
|
)
|
||||||
self.base_headers["referer"] = url
|
self.base_headers["referer"] = url
|
||||||
return {
|
return {
|
||||||
"destination_url": final_url,
|
"destination_url": final_url,
|
||||||
|
|||||||
188
mediaflow_proxy/extractors/sportsonline.py
Normal file
188
mediaflow_proxy/extractors/sportsonline.py
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
import re
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
from mediaflow_proxy.utils.packed import unpack
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SportsonlineExtractor(BaseExtractor):
|
||||||
|
"""Sportsonline/Sportzonline URL extractor for M3U8 streams.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
1. Fetch page -> find first <iframe src="...">
|
||||||
|
2. Fetch iframe with Referer=https://sportzonline.st/
|
||||||
|
3. Collect packed eval blocks; if >=2 use second (index 1) else first.
|
||||||
|
4. Unpack P.A.C.K.E.R. and search var src="...m3u8".
|
||||||
|
5. Return final m3u8 with referer header.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Multi-domain support for sportzonline.(st|bz|cc|top) and sportsonline.(si|sn)
|
||||||
|
- Uses P.A.C.K.E.R. unpacking from utils.packed module
|
||||||
|
- Returns streams suitable for hls_manifest_proxy endpoint
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, request_headers: dict):
|
||||||
|
super().__init__(request_headers)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
def _detect_packed_blocks(self, html: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
Detect and extract packed eval blocks from HTML.
|
||||||
|
"""
|
||||||
|
# Find all eval(function...) blocks - more greedy to capture full packed code
|
||||||
|
pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL)
|
||||||
|
raw_matches = pattern.findall(html)
|
||||||
|
|
||||||
|
# If no matches with the strict pattern, try a more relaxed one
|
||||||
|
if not raw_matches:
|
||||||
|
# Try to find eval(function and capture until we find the closing ))
|
||||||
|
pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
|
||||||
|
raw_matches = pattern.findall(html)
|
||||||
|
|
||||||
|
return raw_matches
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
"""Main extraction flow: fetch page, extract iframe, unpack and find m3u8."""
|
||||||
|
try:
|
||||||
|
# Step 1: Fetch main page
|
||||||
|
logger.info(f"Fetching main page: {url}")
|
||||||
|
main_response = await self._make_request(url, timeout=15)
|
||||||
|
main_html = main_response.text
|
||||||
|
|
||||||
|
# Extract first iframe
|
||||||
|
iframe_match = re.search(r'<iframe\s+src=["\']([^"\']+)["\']', main_html, re.IGNORECASE)
|
||||||
|
if not iframe_match:
|
||||||
|
raise ExtractorError("No iframe found on the page")
|
||||||
|
|
||||||
|
iframe_url = iframe_match.group(1)
|
||||||
|
|
||||||
|
# Normalize iframe URL
|
||||||
|
if iframe_url.startswith("//"):
|
||||||
|
iframe_url = "https:" + iframe_url
|
||||||
|
elif iframe_url.startswith("/"):
|
||||||
|
parsed_main = urlparse(url)
|
||||||
|
iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}"
|
||||||
|
|
||||||
|
logger.info(f"Found iframe URL: {iframe_url}")
|
||||||
|
|
||||||
|
# Step 2: Fetch iframe with Referer
|
||||||
|
iframe_headers = {
|
||||||
|
"Referer": "https://sportzonline.st/",
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
|
||||||
|
"Cache-Control": "no-cache",
|
||||||
|
}
|
||||||
|
|
||||||
|
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
|
||||||
|
iframe_html = iframe_response.text
|
||||||
|
|
||||||
|
logger.debug(f"Iframe HTML length: {len(iframe_html)}")
|
||||||
|
|
||||||
|
# Step 3: Detect packed blocks
|
||||||
|
packed_blocks = self._detect_packed_blocks(iframe_html)
|
||||||
|
|
||||||
|
logger.info(f"Found {len(packed_blocks)} packed blocks")
|
||||||
|
|
||||||
|
if not packed_blocks:
|
||||||
|
logger.warning("No packed blocks found, trying direct m3u8 search")
|
||||||
|
# Fallback: try direct m3u8 search
|
||||||
|
direct_match = re.search(r'(https?://[^\s"\'>]+\.m3u8[^\s"\'>]*)', iframe_html)
|
||||||
|
if direct_match:
|
||||||
|
m3u8_url = direct_match.group(1)
|
||||||
|
logger.info(f"Found direct m3u8 URL: {m3u8_url}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": m3u8_url,
|
||||||
|
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise ExtractorError("No packed blocks or direct m3u8 URL found")
|
||||||
|
|
||||||
|
logger.info(f"Found {len(packed_blocks)} packed blocks")
|
||||||
|
|
||||||
|
# Choose block: if >=2 use second (index 1), else first (index 0)
|
||||||
|
chosen_idx = 1 if len(packed_blocks) > 1 else 0
|
||||||
|
m3u8_url = None
|
||||||
|
unpacked_code = None
|
||||||
|
|
||||||
|
logger.info(f"Chosen packed block index: {chosen_idx}")
|
||||||
|
|
||||||
|
# Try to unpack chosen block
|
||||||
|
try:
|
||||||
|
unpacked_code = unpack(packed_blocks[chosen_idx])
|
||||||
|
logger.info(f"Successfully unpacked block {chosen_idx}")
|
||||||
|
logger.debug(f"Unpacked code preview: {unpacked_code[:500] if unpacked_code else 'empty'}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to unpack block {chosen_idx}: {e}")
|
||||||
|
|
||||||
|
# Search for var src="...m3u8" with multiple patterns
|
||||||
|
if unpacked_code:
|
||||||
|
# Try multiple patterns as in the TypeScript version
|
||||||
|
patterns = [
|
||||||
|
r'var\s+src\s*=\s*["\']([^"\']+)["\']', # var src="..."
|
||||||
|
r'src\s*=\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # src="...m3u8"
|
||||||
|
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # file: "...m3u8"
|
||||||
|
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']', # any m3u8 URL
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
src_match = re.search(pattern, unpacked_code)
|
||||||
|
if src_match:
|
||||||
|
m3u8_url = src_match.group(1)
|
||||||
|
# Verify it looks like a valid m3u8 URL
|
||||||
|
if ".m3u8" in m3u8_url or "http" in m3u8_url:
|
||||||
|
break
|
||||||
|
m3u8_url = None
|
||||||
|
|
||||||
|
# If not found, try all other blocks
|
||||||
|
if not m3u8_url:
|
||||||
|
logger.info("m3u8 not found in chosen block, trying all blocks")
|
||||||
|
for i, block in enumerate(packed_blocks):
|
||||||
|
if i == chosen_idx:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
unpacked_code = unpack(block)
|
||||||
|
# Use the same patterns as above
|
||||||
|
for pattern in [
|
||||||
|
r'var\s+src\s*=\s*["\']([^"\']+)["\']',
|
||||||
|
r'src\s*=\s*["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||||
|
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||||
|
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']',
|
||||||
|
]:
|
||||||
|
src_match = re.search(pattern, unpacked_code)
|
||||||
|
if src_match:
|
||||||
|
test_url = src_match.group(1)
|
||||||
|
if ".m3u8" in test_url or "http" in test_url:
|
||||||
|
m3u8_url = test_url
|
||||||
|
logger.info(f"Found m3u8 in block {i}")
|
||||||
|
break
|
||||||
|
|
||||||
|
if m3u8_url:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Failed to process block {i}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not m3u8_url:
|
||||||
|
raise ExtractorError("Could not extract m3u8 URL from packed code")
|
||||||
|
|
||||||
|
logger.info(f"Successfully extracted m3u8 URL: {m3u8_url}")
|
||||||
|
|
||||||
|
# Return stream configuration
|
||||||
|
return {
|
||||||
|
"destination_url": m3u8_url,
|
||||||
|
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Sportsonline extraction failed for {url}")
|
||||||
|
raise ExtractorError(f"Extraction failed: {str(e)}")
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
@@ -15,14 +14,10 @@ class StreamtapeExtractor(BaseExtractor):
|
|||||||
matches = re.findall(r"id=.*?(?=')", response.text)
|
matches = re.findall(r"id=.*?(?=')", response.text)
|
||||||
if not matches:
|
if not matches:
|
||||||
raise ExtractorError("Failed to extract URL components")
|
raise ExtractorError("Failed to extract URL components")
|
||||||
final_url = next(
|
i = 0
|
||||||
(
|
for i in range(len(matches)):
|
||||||
f"https://streamtape.com/get_video?{matches[i + 1]}"
|
if matches[i - 1] == matches[i] and "ip=" in matches[i]:
|
||||||
for i in range(len(matches) - 1)
|
final_url = f"https://streamtape.com/get_video?{matches[i]}"
|
||||||
if matches[i] == matches[i + 1]
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.base_headers["referer"] = url
|
self.base_headers["referer"] = url
|
||||||
return {
|
return {
|
||||||
|
|||||||
74
mediaflow_proxy/extractors/streamwish.py
Normal file
74
mediaflow_proxy/extractors/streamwish.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import re
|
||||||
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
from mediaflow_proxy.utils.packed import eval_solver
|
||||||
|
|
||||||
|
|
||||||
|
class StreamWishExtractor(BaseExtractor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str, **_kwargs: Any) -> Dict[str, Any]:
|
||||||
|
referer = self.base_headers.get("Referer")
|
||||||
|
if not referer:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
referer = f"{parsed.scheme}://{parsed.netloc}/"
|
||||||
|
|
||||||
|
headers = {"Referer": referer}
|
||||||
|
response = await self._make_request(url, headers=headers)
|
||||||
|
|
||||||
|
iframe_match = re.search(r'<iframe[^>]+src=["\']([^"\']+)["\']', response.text, re.DOTALL)
|
||||||
|
iframe_url = urljoin(url, iframe_match.group(1)) if iframe_match else url
|
||||||
|
|
||||||
|
iframe_response = await self._make_request(iframe_url, headers=headers)
|
||||||
|
html = iframe_response.text
|
||||||
|
|
||||||
|
final_url = self._extract_m3u8(html)
|
||||||
|
|
||||||
|
if not final_url and "eval(function(p,a,c,k,e,d)" in html:
|
||||||
|
try:
|
||||||
|
final_url = await eval_solver(
|
||||||
|
self,
|
||||||
|
iframe_url,
|
||||||
|
headers,
|
||||||
|
[
|
||||||
|
# absolute m3u8
|
||||||
|
r'(https?://[^"\']+\.m3u8[^"\']*)',
|
||||||
|
# relative stream paths
|
||||||
|
r'(\/stream\/[^"\']+\.m3u8[^"\']*)',
|
||||||
|
],
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
final_url = None
|
||||||
|
|
||||||
|
if not final_url:
|
||||||
|
raise ExtractorError("StreamWish: Failed to extract m3u8")
|
||||||
|
|
||||||
|
if final_url.startswith("/"):
|
||||||
|
final_url = urljoin(iframe_url, final_url)
|
||||||
|
|
||||||
|
origin = f"{urlparse(referer).scheme}://{urlparse(referer).netloc}"
|
||||||
|
self.base_headers.update(
|
||||||
|
{
|
||||||
|
"Referer": referer,
|
||||||
|
"Origin": origin,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": final_url,
|
||||||
|
"request_headers": self.base_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
"stream_transformer": "ts_stream",
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_m3u8(text: str) -> str | None:
|
||||||
|
"""
|
||||||
|
Extract first absolute m3u8 URL from text
|
||||||
|
"""
|
||||||
|
match = re.search(r'https?://[^"\']+\.m3u8[^"\']*', text)
|
||||||
|
return match.group(0) if match else None
|
||||||
@@ -1,27 +1,64 @@
|
|||||||
import re
|
import re
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor
|
from bs4 import BeautifulSoup, SoupStrainer
|
||||||
|
from curl_cffi.requests import AsyncSession
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
from mediaflow_proxy.utils.packed import unpack, detect, UnpackingError
|
||||||
|
|
||||||
|
|
||||||
class SupervideoExtractor(BaseExtractor):
|
class SupervideoExtractor(BaseExtractor):
|
||||||
"""Supervideo URL extractor."""
|
"""Supervideo URL extractor.
|
||||||
|
|
||||||
|
Uses curl_cffi to bypass Cloudflare protection.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
"""Extract Supervideo URL."""
|
"""Extract video URL from Supervideo.
|
||||||
response = await self._make_request(url)
|
|
||||||
# Extract and decode URL
|
|
||||||
s2 = re.search(r"\}\('(.+)',.+,'(.+)'\.split", response.text).group(2)
|
|
||||||
terms = s2.split("|")
|
|
||||||
hfs = next(terms[i] for i in range(terms.index("file"), len(terms)) if "hfs" in terms[i])
|
|
||||||
result = terms[terms.index("urlset") + 1 : terms.index("hls")]
|
|
||||||
|
|
||||||
base_url = f"https://{hfs}.serversicuro.cc/hls/"
|
Uses curl_cffi with Chrome impersonation to bypass Cloudflare.
|
||||||
final_url = base_url + ",".join(reversed(result)) + (".urlset/master.m3u8" if result else "")
|
"""
|
||||||
|
|
||||||
|
patterns = [r'file:"(.*?)"']
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with AsyncSession() as session:
|
||||||
|
response = await session.get(url, impersonate="chrome")
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
|
||||||
|
|
||||||
|
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("script"))
|
||||||
|
script_all = soup.find_all("script")
|
||||||
|
|
||||||
|
for script in script_all:
|
||||||
|
if script.text and detect(script.text):
|
||||||
|
unpacked_code = unpack(script.text)
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, unpacked_code)
|
||||||
|
if match:
|
||||||
|
extracted_url = match.group(1)
|
||||||
|
if not urlparse(extracted_url).scheme:
|
||||||
|
extracted_url = urljoin(url, extracted_url)
|
||||||
|
|
||||||
self.base_headers["referer"] = url
|
self.base_headers["referer"] = url
|
||||||
return {
|
return {
|
||||||
"destination_url": final_url,
|
"destination_url": extracted_url,
|
||||||
"request_headers": self.base_headers,
|
"request_headers": self.base_headers,
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
raise ExtractorError("No packed JS found or no file URL pattern matched")
|
||||||
|
|
||||||
|
except UnpackingError as e:
|
||||||
|
raise ExtractorError(f"Failed to unpack Supervideo JS: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
if isinstance(e, ExtractorError):
|
||||||
|
raise
|
||||||
|
raise ExtractorError(f"Supervideo extraction failed: {e}")
|
||||||
|
|||||||
62
mediaflow_proxy/extractors/turbovidplay.py
Normal file
62
mediaflow_proxy/extractors/turbovidplay.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class TurboVidPlayExtractor(BaseExtractor):
|
||||||
|
domains = [
|
||||||
|
"turboviplay.com",
|
||||||
|
"emturbovid.com",
|
||||||
|
"tuborstb.co",
|
||||||
|
"javggvideo.xyz",
|
||||||
|
"stbturbo.xyz",
|
||||||
|
"turbovidhls.com",
|
||||||
|
]
|
||||||
|
|
||||||
|
mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs):
|
||||||
|
#
|
||||||
|
# 1. Load embed
|
||||||
|
#
|
||||||
|
response = await self._make_request(url)
|
||||||
|
html = response.text
|
||||||
|
|
||||||
|
#
|
||||||
|
# 2. Extract urlPlay or data-hash
|
||||||
|
#
|
||||||
|
m = re.search(r'(?:urlPlay|data-hash)\s*=\s*[\'"]([^\'"]+)', html)
|
||||||
|
if not m:
|
||||||
|
raise ExtractorError("TurboViPlay: No media URL found")
|
||||||
|
|
||||||
|
media_url = m.group(1)
|
||||||
|
|
||||||
|
# Normalize protocol
|
||||||
|
if media_url.startswith("//"):
|
||||||
|
media_url = "https:" + media_url
|
||||||
|
elif media_url.startswith("/"):
|
||||||
|
media_url = response.get_origin() + media_url
|
||||||
|
|
||||||
|
#
|
||||||
|
# 3. Fetch the intermediate playlist
|
||||||
|
#
|
||||||
|
data_resp = await self._make_request(media_url, headers={"Referer": url})
|
||||||
|
playlist = data_resp.text
|
||||||
|
|
||||||
|
#
|
||||||
|
# 4. Extract real m3u8 URL
|
||||||
|
#
|
||||||
|
m2 = re.search(r'https?://[^\'"\s]+\.m3u8', playlist)
|
||||||
|
if not m2:
|
||||||
|
raise ExtractorError("TurboViPlay: Unable to extract playlist URL")
|
||||||
|
|
||||||
|
real_m3u8 = m2.group(0)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": real_m3u8,
|
||||||
|
"request_headers": {"origin": response.get_origin()},
|
||||||
|
"propagate_response_headers": {"content-type": "video/mp2t"},
|
||||||
|
"remove_response_headers": ["content-length", "content-range"],
|
||||||
|
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||||
|
"stream_transformer": "ts_stream", # Use TS transformer for PNG/padding stripping
|
||||||
|
}
|
||||||
252
mediaflow_proxy/extractors/vavoo.py
Normal file
252
mediaflow_proxy/extractors/vavoo.py
Normal file
@@ -0,0 +1,252 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class VavooExtractor(BaseExtractor):
|
||||||
|
"""Vavoo URL extractor for resolving vavoo.to links.
|
||||||
|
|
||||||
|
Supports two URL formats:
|
||||||
|
1. Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
|
||||||
|
These redirect (302) to external video hosts (Doodstream, etc.)
|
||||||
|
2. Legacy mediahubmx format (currently broken on Vavoo's end)
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Uses BaseExtractor's retry/timeouts
|
||||||
|
- Improved headers to mimic Android okhttp client
|
||||||
|
- Robust JSON handling and logging
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, request_headers: dict):
|
||||||
|
super().__init__(request_headers)
|
||||||
|
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||||
|
|
||||||
|
async def _resolve_web_vod_link(self, url: str) -> str:
|
||||||
|
"""Resolve a web-vod API link by getting the redirect Location header."""
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use aiohttp directly with allow_redirects=False to get the Location header
|
||||||
|
timeout = aiohttp.ClientTimeout(total=10)
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
async with session.get(
|
||||||
|
url,
|
||||||
|
headers={"Accept": "application/json"},
|
||||||
|
allow_redirects=False,
|
||||||
|
) as resp:
|
||||||
|
# Check for redirect
|
||||||
|
if resp.status in (301, 302, 303, 307, 308):
|
||||||
|
location = resp.headers.get("Location") or resp.headers.get("location")
|
||||||
|
if location:
|
||||||
|
logger.info(f"Vavoo web-vod redirected to: {location}")
|
||||||
|
return location
|
||||||
|
|
||||||
|
# If we got a 200, the response might contain the URL
|
||||||
|
if resp.status == 200:
|
||||||
|
text = await resp.text()
|
||||||
|
if text and text.startswith("http"):
|
||||||
|
logger.info(f"Vavoo web-vod resolved to: {text.strip()}")
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
raise ExtractorError(f"Vavoo web-vod API returned unexpected status {resp.status}")
|
||||||
|
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}")
|
||||||
|
|
||||||
|
async def get_auth_signature(self) -> Optional[str]:
|
||||||
|
"""Get authentication signature for Vavoo API (async)."""
|
||||||
|
headers = {
|
||||||
|
"user-agent": "okhttp/4.11.0",
|
||||||
|
"accept": "application/json",
|
||||||
|
"content-type": "application/json; charset=utf-8",
|
||||||
|
"accept-encoding": "gzip",
|
||||||
|
}
|
||||||
|
import time
|
||||||
|
|
||||||
|
current_time = int(time.time() * 1000)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"token": "",
|
||||||
|
"reason": "app-blur",
|
||||||
|
"locale": "de",
|
||||||
|
"theme": "dark",
|
||||||
|
"metadata": {
|
||||||
|
"device": {
|
||||||
|
"type": "Handset",
|
||||||
|
"brand": "google",
|
||||||
|
"model": "Pixel",
|
||||||
|
"name": "sdk_gphone64_arm64",
|
||||||
|
"uniqueId": "d10e5d99ab665233",
|
||||||
|
},
|
||||||
|
"os": {"name": "android", "version": "13"},
|
||||||
|
"app": {"platform": "android", "version": "3.1.21"},
|
||||||
|
"version": {"package": "tv.vavoo.app", "binary": "3.1.21", "js": "3.1.21"},
|
||||||
|
},
|
||||||
|
"appFocusTime": 0,
|
||||||
|
"playerActive": False,
|
||||||
|
"playDuration": 0,
|
||||||
|
"devMode": False,
|
||||||
|
"hasAddon": True,
|
||||||
|
"castConnected": False,
|
||||||
|
"package": "tv.vavoo.app",
|
||||||
|
"version": "3.1.21",
|
||||||
|
"process": "app",
|
||||||
|
"firstAppStart": current_time,
|
||||||
|
"lastAppStart": current_time,
|
||||||
|
"ipLocation": "",
|
||||||
|
"adblockEnabled": True,
|
||||||
|
"proxy": {
|
||||||
|
"supported": ["ss", "openvpn"],
|
||||||
|
"engine": "ss",
|
||||||
|
"ssVersion": 1,
|
||||||
|
"enabled": True,
|
||||||
|
"autoServer": True,
|
||||||
|
"id": "de-fra",
|
||||||
|
},
|
||||||
|
"iap": {"supported": False},
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = await self._make_request(
|
||||||
|
"https://www.vavoo.tv/api/app/ping",
|
||||||
|
method="POST",
|
||||||
|
json=data,
|
||||||
|
headers=headers,
|
||||||
|
timeout=10,
|
||||||
|
retries=2,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
result = resp.json()
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status)
|
||||||
|
return None
|
||||||
|
|
||||||
|
addon_sig = result.get("addonSig") if isinstance(result, dict) else None
|
||||||
|
if addon_sig:
|
||||||
|
logger.info("Successfully obtained Vavoo authentication signature")
|
||||||
|
return addon_sig
|
||||||
|
else:
|
||||||
|
logger.warning("No addonSig in Vavoo API response: %s", result)
|
||||||
|
return None
|
||||||
|
except ExtractorError as e:
|
||||||
|
logger.warning("Failed to get Vavoo auth signature: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
"""Extract Vavoo stream URL (async).
|
||||||
|
|
||||||
|
Supports:
|
||||||
|
- Direct play URLs: https://vavoo.to/play/{id}/index.m3u8 (Live TV)
|
||||||
|
- Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
|
||||||
|
- Legacy mediahubmx links (may not work due to Vavoo API changes)
|
||||||
|
"""
|
||||||
|
if "vavoo.to" not in url:
|
||||||
|
raise ExtractorError("Not a valid Vavoo URL")
|
||||||
|
|
||||||
|
# Check if this is a direct play URL (Live TV)
|
||||||
|
# These URLs are already m3u8 streams but need auth signature
|
||||||
|
if "/play/" in url and url.endswith(".m3u8"):
|
||||||
|
signature = await self.get_auth_signature()
|
||||||
|
if not signature:
|
||||||
|
raise ExtractorError("Failed to get Vavoo authentication signature for Live TV")
|
||||||
|
|
||||||
|
stream_headers = {
|
||||||
|
"user-agent": "okhttp/4.11.0",
|
||||||
|
"referer": "https://vavoo.to/",
|
||||||
|
"mediahubmx-signature": signature,
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"destination_url": url,
|
||||||
|
"request_headers": stream_headers,
|
||||||
|
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if this is a web-vod API link (new format)
|
||||||
|
if "/web-vod/api/get" in url:
|
||||||
|
resolved_url = await self._resolve_web_vod_link(url)
|
||||||
|
stream_headers = {
|
||||||
|
"user-agent": self.base_headers.get("user-agent", "Mozilla/5.0"),
|
||||||
|
"referer": "https://vavoo.to/",
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"destination_url": resolved_url,
|
||||||
|
"request_headers": stream_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Legacy mediahubmx flow
|
||||||
|
signature = await self.get_auth_signature()
|
||||||
|
if not signature:
|
||||||
|
raise ExtractorError("Failed to get Vavoo authentication signature")
|
||||||
|
|
||||||
|
resolved_url = await self._resolve_vavoo_link(url, signature)
|
||||||
|
if not resolved_url:
|
||||||
|
raise ExtractorError("Failed to resolve Vavoo URL")
|
||||||
|
|
||||||
|
stream_headers = {
|
||||||
|
"user-agent": self.base_headers.get("user-agent", "okhttp/4.11.0"),
|
||||||
|
"referer": "https://vavoo.to/",
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": resolved_url,
|
||||||
|
"request_headers": stream_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _resolve_vavoo_link(self, link: str, signature: str) -> Optional[str]:
|
||||||
|
"""Resolve a Vavoo link using the MediaHubMX API (async)."""
|
||||||
|
headers = {
|
||||||
|
"user-agent": "okhttp/4.11.0",
|
||||||
|
"accept": "application/json",
|
||||||
|
"content-type": "application/json; charset=utf-8",
|
||||||
|
"accept-encoding": "gzip",
|
||||||
|
"mediahubmx-signature": signature,
|
||||||
|
}
|
||||||
|
data = {"language": "de", "region": "AT", "url": link, "clientVersion": "3.1.21"}
|
||||||
|
try:
|
||||||
|
logger.info(f"Attempting to resolve Vavoo URL: {link}")
|
||||||
|
resp = await self._make_request(
|
||||||
|
"https://vavoo.to/mediahubmx-resolve.json",
|
||||||
|
method="POST",
|
||||||
|
json=data,
|
||||||
|
headers=headers,
|
||||||
|
timeout=12,
|
||||||
|
retries=3,
|
||||||
|
backoff_factor=0.6,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
result = resp.json()
|
||||||
|
except Exception:
|
||||||
|
logger.warning(
|
||||||
|
"Vavoo resolve returned non-json response (status=%s). Body preview: %s",
|
||||||
|
resp.status,
|
||||||
|
getattr(resp, "text", "")[:500],
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.debug("Vavoo API response: %s", result)
|
||||||
|
|
||||||
|
# Accept either list or dict with 'url'
|
||||||
|
if isinstance(result, list) and result and isinstance(result[0], dict) and result[0].get("url"):
|
||||||
|
resolved_url = result[0]["url"]
|
||||||
|
logger.info("Successfully resolved Vavoo URL to: %s", resolved_url)
|
||||||
|
return resolved_url
|
||||||
|
elif isinstance(result, dict) and result.get("url"):
|
||||||
|
resolved_url = result["url"]
|
||||||
|
logger.info("Successfully resolved Vavoo URL to: %s", resolved_url)
|
||||||
|
return resolved_url
|
||||||
|
else:
|
||||||
|
logger.warning("No URL found in Vavoo API response: %s", result)
|
||||||
|
return None
|
||||||
|
except ExtractorError as e:
|
||||||
|
logger.error(f"Vavoo resolution failed for URL {link}: {e}")
|
||||||
|
raise ExtractorError(f"Vavoo resolution failed: {str(e)}") from e
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error while resolving Vavoo URL {link}: {e}")
|
||||||
|
raise ExtractorError(f"Vavoo resolution failed: {str(e)}") from e
|
||||||
57
mediaflow_proxy/extractors/vidmoly.py
Normal file
57
mediaflow_proxy/extractors/vidmoly.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
import re
|
||||||
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class VidmolyExtractor(BaseExtractor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str) -> Dict[str, Any]:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
if not parsed.hostname or "vidmoly" not in parsed.hostname:
|
||||||
|
raise ExtractorError("VIDMOLY: Invalid domain")
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/120 Safari/537.36",
|
||||||
|
"Referer": url,
|
||||||
|
"Sec-Fetch-Dest": "iframe",
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Fetch embed page ---
|
||||||
|
response = await self._make_request(url, headers=headers)
|
||||||
|
html = response.text
|
||||||
|
|
||||||
|
# --- Extract master m3u8 ---
|
||||||
|
match = re.search(r'sources\s*:\s*\[\s*\{\s*file\s*:\s*[\'"]([^\'"]+)', html)
|
||||||
|
if not match:
|
||||||
|
raise ExtractorError("VIDMOLY: Stream URL not found")
|
||||||
|
|
||||||
|
master_url = match.group(1)
|
||||||
|
|
||||||
|
if not master_url.startswith("http"):
|
||||||
|
master_url = urljoin(url, master_url)
|
||||||
|
|
||||||
|
# --- Validate stream (prevents Stremio timeout) ---
|
||||||
|
try:
|
||||||
|
test = await self._make_request(master_url, headers=headers)
|
||||||
|
except Exception as e:
|
||||||
|
if "timeout" in str(e).lower():
|
||||||
|
raise ExtractorError("VIDMOLY: Request timed out")
|
||||||
|
raise
|
||||||
|
|
||||||
|
if test.status >= 400:
|
||||||
|
raise ExtractorError(f"VIDMOLY: Stream unavailable ({test.status})")
|
||||||
|
|
||||||
|
# Return MASTER playlist, not variant
|
||||||
|
# Let MediaFlow Proxy handle variants
|
||||||
|
return {
|
||||||
|
"destination_url": master_url,
|
||||||
|
"request_headers": headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
65
mediaflow_proxy/extractors/vidoza.py
Normal file
65
mediaflow_proxy/extractors/vidoza.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import re
|
||||||
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class VidozaExtractor(BaseExtractor):
|
||||||
|
def __init__(self, request_headers: dict):
|
||||||
|
super().__init__(request_headers)
|
||||||
|
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
|
||||||
|
if not parsed.hostname or not (
|
||||||
|
parsed.hostname.endswith("vidoza.net") or parsed.hostname.endswith("videzz.net")
|
||||||
|
):
|
||||||
|
raise ExtractorError("VIDOZA: Invalid domain")
|
||||||
|
|
||||||
|
# Use the correct referer for clones
|
||||||
|
referer = f"https://{parsed.hostname}/"
|
||||||
|
|
||||||
|
headers = self.base_headers.copy()
|
||||||
|
headers.update(
|
||||||
|
{
|
||||||
|
"referer": referer,
|
||||||
|
"user-agent": (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
),
|
||||||
|
"accept": "*/*",
|
||||||
|
"accept-language": "en-US,en;q=0.9",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# 1) Fetch embed page
|
||||||
|
response = await self._make_request(url, headers=headers)
|
||||||
|
html = response.text or ""
|
||||||
|
|
||||||
|
if not html:
|
||||||
|
raise ExtractorError("VIDOZA: Empty HTML")
|
||||||
|
|
||||||
|
# 2) Extract video URL
|
||||||
|
pattern = re.compile(
|
||||||
|
r"""["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)"""
|
||||||
|
r"""(?:[^}>\]]+)["']?\s*res\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)""",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
match = pattern.search(html)
|
||||||
|
if not match:
|
||||||
|
raise ExtractorError("VIDOZA: Video URL not found")
|
||||||
|
|
||||||
|
video_url = match.group("url")
|
||||||
|
|
||||||
|
if video_url.startswith("//"):
|
||||||
|
video_url = "https:" + video_url
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": video_url,
|
||||||
|
"request_headers": headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from urllib.parse import urlparse, parse_qs
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, SoupStrainer
|
from bs4 import BeautifulSoup, SoupStrainer
|
||||||
|
|
||||||
@@ -17,7 +16,7 @@ class VixCloudExtractor(BaseExtractor):
|
|||||||
|
|
||||||
async def version(self, site_url: str) -> str:
|
async def version(self, site_url: str) -> str:
|
||||||
"""Get version of VixCloud Parent Site."""
|
"""Get version of VixCloud Parent Site."""
|
||||||
base_url = f"{site_url}/richiedi-un-titolo"
|
base_url = f"{site_url}/request-a-title"
|
||||||
response = await self._make_request(
|
response = await self._make_request(
|
||||||
base_url,
|
base_url,
|
||||||
headers={
|
headers={
|
||||||
@@ -25,7 +24,7 @@ class VixCloudExtractor(BaseExtractor):
|
|||||||
"Origin": f"{site_url}",
|
"Origin": f"{site_url}",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if response.status_code != 200:
|
if response.status != 200:
|
||||||
raise ExtractorError("Outdated Url")
|
raise ExtractorError("Outdated Url")
|
||||||
# Soup the response
|
# Soup the response
|
||||||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
|
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
|
||||||
@@ -49,18 +48,16 @@ class VixCloudExtractor(BaseExtractor):
|
|||||||
elif "movie" in url or "tv" in url:
|
elif "movie" in url or "tv" in url:
|
||||||
response = await self._make_request(url)
|
response = await self._make_request(url)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status != 200:
|
||||||
raise ExtractorError("Failed to extract URL components, Invalid Request")
|
raise ExtractorError("Failed to extract URL components, Invalid Request")
|
||||||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body"))
|
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body"))
|
||||||
if soup:
|
if soup:
|
||||||
script = soup.find("body").find("script").text
|
script = soup.find("body").find("script").text
|
||||||
token = re.search(r"'token':\s*'(\w+)'", script).group(1)
|
token = re.search(r"'token':\s*'(\w+)'", script).group(1)
|
||||||
expires = re.search(r"'expires':\s*'(\d+)'", script).group(1)
|
expires = re.search(r"'expires':\s*'(\d+)'", script).group(1)
|
||||||
canPlayFHD = re.search(r"window\.canPlayFHD\s*=\s*(\w+)", script).group(1)
|
|
||||||
print(script,"A")
|
|
||||||
server_url = re.search(r"url:\s*'([^']+)'", script).group(1)
|
server_url = re.search(r"url:\s*'([^']+)'", script).group(1)
|
||||||
if "?b=1" in server_url:
|
if "?b=1" in server_url:
|
||||||
final_url = f'{server_url}&token={token}&expires={expires}'
|
final_url = f"{server_url}&token={token}&expires={expires}"
|
||||||
else:
|
else:
|
||||||
final_url = f"{server_url}?token={token}&expires={expires}"
|
final_url = f"{server_url}?token={token}&expires={expires}"
|
||||||
if "window.canPlayFHD = true" in script:
|
if "window.canPlayFHD = true" in script:
|
||||||
|
|||||||
69
mediaflow_proxy/extractors/voe.py
Normal file
69
mediaflow_proxy/extractors/voe.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
import base64
|
||||||
|
import re
|
||||||
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class VoeExtractor(BaseExtractor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str, redirected: bool = False, **kwargs) -> Dict[str, Any]:
|
||||||
|
response = await self._make_request(url)
|
||||||
|
|
||||||
|
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/voesx.py
|
||||||
|
redirect_pattern = r"""window\.location\.href\s*=\s*'([^']+)"""
|
||||||
|
redirect_match = re.search(redirect_pattern, response.text, re.DOTALL)
|
||||||
|
if redirect_match:
|
||||||
|
if redirected:
|
||||||
|
raise ExtractorError("VOE: too many redirects")
|
||||||
|
|
||||||
|
return await self.extract(redirect_match.group(1))
|
||||||
|
|
||||||
|
code_and_script_pattern = r'json">\["([^"]+)"]</script>\s*<script\s*src="([^"]+)'
|
||||||
|
code_and_script_match = re.search(code_and_script_pattern, response.text, re.DOTALL)
|
||||||
|
if not code_and_script_match:
|
||||||
|
raise ExtractorError("VOE: unable to locate obfuscated payload or external script URL")
|
||||||
|
|
||||||
|
script_response = await self._make_request(urljoin(url, code_and_script_match.group(2)))
|
||||||
|
|
||||||
|
luts_pattern = r"(\[(?:'\W{2}'[,\]]){1,9})"
|
||||||
|
luts_match = re.search(luts_pattern, script_response.text, re.DOTALL)
|
||||||
|
if not luts_match:
|
||||||
|
raise ExtractorError("VOE: unable to locate LUTs in external script")
|
||||||
|
|
||||||
|
data = self.voe_decode(code_and_script_match.group(1), luts_match.group(1))
|
||||||
|
|
||||||
|
final_url = data.get("source")
|
||||||
|
if not final_url:
|
||||||
|
raise ExtractorError("VOE: failed to extract video URL")
|
||||||
|
|
||||||
|
self.base_headers["referer"] = url
|
||||||
|
return {
|
||||||
|
"destination_url": final_url,
|
||||||
|
"request_headers": self.base_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def voe_decode(ct: str, luts: str) -> Dict[str, Any]:
|
||||||
|
import json
|
||||||
|
|
||||||
|
lut = ["".join([("\\" + x) if x in ".*+?^${}()|[]\\" else x for x in i]) for i in luts[2:-2].split("','")]
|
||||||
|
txt = ""
|
||||||
|
for i in ct:
|
||||||
|
x = ord(i)
|
||||||
|
if 64 < x < 91:
|
||||||
|
x = (x - 52) % 26 + 65
|
||||||
|
elif 96 < x < 123:
|
||||||
|
x = (x - 84) % 26 + 97
|
||||||
|
txt += chr(x)
|
||||||
|
for i in lut:
|
||||||
|
txt = re.sub(i, "", txt)
|
||||||
|
ct = base64.b64decode(txt).decode("utf-8")
|
||||||
|
txt = "".join([chr(ord(i) - 3) for i in ct])
|
||||||
|
txt = base64.b64decode(txt[::-1]).decode("utf-8")
|
||||||
|
return json.loads(txt)
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
from importlib import resources
|
from importlib import resources
|
||||||
|
|
||||||
from fastapi import FastAPI, Depends, Security, HTTPException
|
from fastapi import FastAPI, Depends, Security, HTTPException
|
||||||
@@ -10,13 +12,85 @@ from starlette.staticfiles import StaticFiles
|
|||||||
|
|
||||||
from mediaflow_proxy.configs import settings
|
from mediaflow_proxy.configs import settings
|
||||||
from mediaflow_proxy.middleware import UIAccessControlMiddleware
|
from mediaflow_proxy.middleware import UIAccessControlMiddleware
|
||||||
from mediaflow_proxy.routes import proxy_router, extractor_router, speedtest_router
|
from mediaflow_proxy.routes import (
|
||||||
|
proxy_router,
|
||||||
|
extractor_router,
|
||||||
|
speedtest_router,
|
||||||
|
playlist_builder_router,
|
||||||
|
xtream_root_router,
|
||||||
|
acestream_router,
|
||||||
|
telegram_router,
|
||||||
|
)
|
||||||
from mediaflow_proxy.schemas import GenerateUrlRequest, GenerateMultiUrlRequest, MultiUrlRequestItem
|
from mediaflow_proxy.schemas import GenerateUrlRequest, GenerateMultiUrlRequest, MultiUrlRequestItem
|
||||||
from mediaflow_proxy.utils.crypto_utils import EncryptionHandler, EncryptionMiddleware
|
from mediaflow_proxy.utils.crypto_utils import EncryptionHandler, EncryptionMiddleware
|
||||||
|
from mediaflow_proxy.utils import redis_utils
|
||||||
from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url
|
from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url
|
||||||
|
from mediaflow_proxy.utils.base64_utils import encode_url_to_base64, decode_base64_url, is_base64_url
|
||||||
|
from mediaflow_proxy.utils.acestream import acestream_manager
|
||||||
|
from mediaflow_proxy.remuxer.video_transcoder import get_hw_capability, HWAccelType
|
||||||
|
from mediaflow_proxy.utils.telegram import telegram_manager
|
||||||
|
|
||||||
logging.basicConfig(level=settings.log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
logging.basicConfig(level=settings.log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||||
app = FastAPI()
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Suppress Telethon's "RuntimeError: coroutine ignored GeneratorExit" warnings.
|
||||||
|
# These are harmless GC noise from Telethon's internal _recv_loop coroutines
|
||||||
|
# when parallel download connections are cleaned up after client disconnect.
|
||||||
|
_default_unraisable_hook = sys.unraisablehook
|
||||||
|
|
||||||
|
|
||||||
|
def _filtered_unraisable_hook(unraisable):
|
||||||
|
if isinstance(unraisable.exc_value, RuntimeError) and "coroutine ignored GeneratorExit" in str(
|
||||||
|
unraisable.exc_value
|
||||||
|
):
|
||||||
|
return # Suppress Telethon GC noise
|
||||||
|
_default_unraisable_hook(unraisable)
|
||||||
|
|
||||||
|
|
||||||
|
sys.unraisablehook = _filtered_unraisable_hook
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""Application lifespan handler for startup and shutdown events."""
|
||||||
|
# Startup
|
||||||
|
if settings.clear_cache_on_startup:
|
||||||
|
logger.info("Clearing caches on startup (CLEAR_CACHE_ON_STARTUP=true)")
|
||||||
|
# Note: Redis cache clearing would require FLUSHDB which is too aggressive.
|
||||||
|
# Individual cache entries will expire via TTL. If full clear is needed,
|
||||||
|
# use redis-cli KEYS "mfp:*" | xargs redis-cli DEL
|
||||||
|
logger.info("Cache clearing note: Redis entries will expire via TTL")
|
||||||
|
|
||||||
|
# Log transcoding capability
|
||||||
|
hw = get_hw_capability()
|
||||||
|
if hw.accel_type != HWAccelType.NONE and settings.transcode_prefer_gpu:
|
||||||
|
logger.info(
|
||||||
|
"Transcode ready: GPU %s (encoder=%s) | PyAV pipeline",
|
||||||
|
hw.accel_type.value,
|
||||||
|
hw.h264_encoder,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"Transcode ready: CPU (%s) | PyAV pipeline",
|
||||||
|
hw.h264_encoder,
|
||||||
|
)
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
# Shutdown
|
||||||
|
logger.info("Shutting down...")
|
||||||
|
# Close acestream sessions
|
||||||
|
await acestream_manager.close()
|
||||||
|
logger.info("Acestream manager closed")
|
||||||
|
# Close telegram session
|
||||||
|
await telegram_manager.close()
|
||||||
|
logger.info("Telegram manager closed")
|
||||||
|
# Close Redis connections
|
||||||
|
await redis_utils.close_redis()
|
||||||
|
logger.info("Redis connections closed")
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(lifespan=lifespan)
|
||||||
api_password_query = APIKeyQuery(name="api_password", auto_error=False)
|
api_password_query = APIKeyQuery(name="api_password", auto_error=False)
|
||||||
api_password_header = APIKeyHeader(name="api_password", auto_error=False)
|
api_password_header = APIKeyHeader(name="api_password", auto_error=False)
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
@@ -65,6 +139,11 @@ async def show_speedtest_page():
|
|||||||
return RedirectResponse(url="/speedtest.html")
|
return RedirectResponse(url="/speedtest.html")
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/url-generator")
|
||||||
|
async def show_url_generator_page():
|
||||||
|
return RedirectResponse(url="/url_generator.html")
|
||||||
|
|
||||||
|
|
||||||
@app.post(
|
@app.post(
|
||||||
"/generate_encrypted_or_encoded_url",
|
"/generate_encrypted_or_encoded_url",
|
||||||
description="Generate a single encoded URL",
|
description="Generate a single encoded URL",
|
||||||
@@ -99,13 +178,20 @@ async def generate_url(request: GenerateUrlRequest):
|
|||||||
# Convert IP to string if provided
|
# Convert IP to string if provided
|
||||||
ip_str = str(request.ip) if request.ip else None
|
ip_str = str(request.ip) if request.ip else None
|
||||||
|
|
||||||
|
# Handle base64 encoding of destination URL if requested
|
||||||
|
destination_url = request.destination_url
|
||||||
|
if request.base64_encode_destination and destination_url:
|
||||||
|
destination_url = encode_url_to_base64(destination_url)
|
||||||
|
|
||||||
encoded_url = encode_mediaflow_proxy_url(
|
encoded_url = encode_mediaflow_proxy_url(
|
||||||
mediaflow_proxy_url=request.mediaflow_proxy_url,
|
mediaflow_proxy_url=request.mediaflow_proxy_url,
|
||||||
endpoint=request.endpoint,
|
endpoint=request.endpoint,
|
||||||
destination_url=request.destination_url,
|
destination_url=destination_url,
|
||||||
query_params=query_params,
|
query_params=query_params,
|
||||||
request_headers=request.request_headers,
|
request_headers=request.request_headers,
|
||||||
response_headers=request.response_headers,
|
response_headers=request.response_headers,
|
||||||
|
propagate_response_headers=request.propagate_response_headers,
|
||||||
|
remove_response_headers=request.remove_response_headers,
|
||||||
encryption_handler=encryption_handler,
|
encryption_handler=encryption_handler,
|
||||||
expiration=request.expiration,
|
expiration=request.expiration,
|
||||||
ip=ip_str,
|
ip=ip_str,
|
||||||
@@ -145,6 +231,8 @@ async def generate_urls(request: GenerateMultiUrlRequest):
|
|||||||
query_params=query_params,
|
query_params=query_params,
|
||||||
request_headers=url_item.request_headers,
|
request_headers=url_item.request_headers,
|
||||||
response_headers=url_item.response_headers,
|
response_headers=url_item.response_headers,
|
||||||
|
propagate_response_headers=url_item.propagate_response_headers,
|
||||||
|
remove_response_headers=url_item.remove_response_headers,
|
||||||
encryption_handler=encryption_handler,
|
encryption_handler=encryption_handler,
|
||||||
expiration=request.expiration,
|
expiration=request.expiration,
|
||||||
ip=ip_str,
|
ip=ip_str,
|
||||||
@@ -156,9 +244,87 @@ async def generate_urls(request: GenerateMultiUrlRequest):
|
|||||||
return {"urls": encoded_urls}
|
return {"urls": encoded_urls}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/base64/encode",
|
||||||
|
description="Encode a URL to base64 format",
|
||||||
|
response_description="Returns the base64 encoded URL",
|
||||||
|
tags=["base64"],
|
||||||
|
)
|
||||||
|
async def encode_url_base64(url: str):
|
||||||
|
"""
|
||||||
|
Encode a URL to base64 format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str): The URL to encode.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: A dictionary containing the encoded URL.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
encoded_url = encode_url_to_base64(url)
|
||||||
|
return {"encoded_url": encoded_url, "original_url": url}
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=f"Failed to encode URL: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@app.post(
|
||||||
|
"/base64/decode",
|
||||||
|
description="Decode a base64 encoded URL",
|
||||||
|
response_description="Returns the decoded URL",
|
||||||
|
tags=["base64"],
|
||||||
|
)
|
||||||
|
async def decode_url_base64(encoded_url: str):
|
||||||
|
"""
|
||||||
|
Decode a base64 encoded URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
encoded_url (str): The base64 encoded URL to decode.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: A dictionary containing the decoded URL.
|
||||||
|
"""
|
||||||
|
decoded_url = decode_base64_url(encoded_url)
|
||||||
|
if decoded_url is None:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid base64 encoded URL")
|
||||||
|
|
||||||
|
return {"decoded_url": decoded_url, "encoded_url": encoded_url}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get(
|
||||||
|
"/base64/check",
|
||||||
|
description="Check if a string appears to be a base64 encoded URL",
|
||||||
|
response_description="Returns whether the string is likely base64 encoded",
|
||||||
|
tags=["base64"],
|
||||||
|
)
|
||||||
|
async def check_base64_url(url: str):
|
||||||
|
"""
|
||||||
|
Check if a string appears to be a base64 encoded URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url (str): The string to check.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: A dictionary indicating if the string is likely base64 encoded.
|
||||||
|
"""
|
||||||
|
is_base64 = is_base64_url(url)
|
||||||
|
result = {"url": url, "is_base64": is_base64}
|
||||||
|
|
||||||
|
if is_base64:
|
||||||
|
decoded_url = decode_base64_url(url)
|
||||||
|
if decoded_url:
|
||||||
|
result["decoded_url"] = decoded_url
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
app.include_router(proxy_router, prefix="/proxy", tags=["proxy"], dependencies=[Depends(verify_api_key)])
|
app.include_router(proxy_router, prefix="/proxy", tags=["proxy"], dependencies=[Depends(verify_api_key)])
|
||||||
|
app.include_router(acestream_router, prefix="/proxy", tags=["acestream"], dependencies=[Depends(verify_api_key)])
|
||||||
|
app.include_router(telegram_router, prefix="/proxy", tags=["telegram"], dependencies=[Depends(verify_api_key)])
|
||||||
app.include_router(extractor_router, prefix="/extractor", tags=["extractors"], dependencies=[Depends(verify_api_key)])
|
app.include_router(extractor_router, prefix="/extractor", tags=["extractors"], dependencies=[Depends(verify_api_key)])
|
||||||
app.include_router(speedtest_router, prefix="/speedtest", tags=["speedtest"], dependencies=[Depends(verify_api_key)])
|
app.include_router(speedtest_router, prefix="/speedtest", tags=["speedtest"], dependencies=[Depends(verify_api_key)])
|
||||||
|
app.include_router(playlist_builder_router, prefix="/playlist", tags=["playlist"])
|
||||||
|
# Root-level XC endpoints for IPTV player compatibility (handles its own API key verification)
|
||||||
|
app.include_router(xtream_root_router, tags=["xtream"])
|
||||||
|
|
||||||
static_path = resources.files("mediaflow_proxy").joinpath("static")
|
static_path = resources.files("mediaflow_proxy").joinpath("static")
|
||||||
app.mount("/", StaticFiles(directory=str(static_path), html=True), name="static")
|
app.mount("/", StaticFiles(directory=str(static_path), html=True), name="static")
|
||||||
|
|||||||
@@ -1,18 +1,43 @@
|
|||||||
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from fastapi import Request, Response, HTTPException
|
from fastapi import Request, Response, HTTPException
|
||||||
|
|
||||||
from mediaflow_proxy.drm.decrypter import decrypt_segment
|
from mediaflow_proxy.drm.decrypter import decrypt_segment, process_drm_init_segment
|
||||||
from mediaflow_proxy.utils.crypto_utils import encryption_handler
|
from mediaflow_proxy.utils.crypto_utils import encryption_handler
|
||||||
from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url, get_original_scheme, ProxyRequestHeaders
|
from mediaflow_proxy.utils.http_utils import (
|
||||||
|
encode_mediaflow_proxy_url,
|
||||||
|
get_original_scheme,
|
||||||
|
ProxyRequestHeaders,
|
||||||
|
apply_header_manipulation,
|
||||||
|
)
|
||||||
|
from mediaflow_proxy.utils.dash_prebuffer import dash_prebuffer
|
||||||
|
from mediaflow_proxy.utils.cache_utils import get_cached_processed_init, set_cached_processed_init
|
||||||
|
from mediaflow_proxy.utils.m3u8_processor import SkipSegmentFilter
|
||||||
|
from mediaflow_proxy.remuxer.ts_muxer import remux_fmp4_to_ts
|
||||||
|
from mediaflow_proxy.configs import settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_ts_mode(request: Request) -> bool:
|
||||||
|
"""Resolve the effective TS remux mode from the request query params, falling back to settings."""
|
||||||
|
override = request.query_params.get("remux_to_ts")
|
||||||
|
if override is not None:
|
||||||
|
return override.lower() in ("true", "1", "yes")
|
||||||
|
return settings.remux_to_ts
|
||||||
|
|
||||||
|
|
||||||
async def process_manifest(
|
async def process_manifest(
|
||||||
request: Request, mpd_dict: dict, proxy_headers: ProxyRequestHeaders, key_id: str = None, key: str = None
|
request: Request,
|
||||||
|
mpd_dict: dict,
|
||||||
|
proxy_headers: ProxyRequestHeaders,
|
||||||
|
key_id: str = None,
|
||||||
|
key: str = None,
|
||||||
|
resolution: str = None,
|
||||||
|
skip_segments: list = None,
|
||||||
) -> Response:
|
) -> Response:
|
||||||
"""
|
"""
|
||||||
Processes the MPD manifest and converts it to an HLS manifest.
|
Processes the MPD manifest and converts it to an HLS manifest.
|
||||||
@@ -23,16 +48,38 @@ async def process_manifest(
|
|||||||
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
||||||
key_id (str, optional): The DRM key ID. Defaults to None.
|
key_id (str, optional): The DRM key ID. Defaults to None.
|
||||||
key (str, optional): The DRM key. Defaults to None.
|
key (str, optional): The DRM key. Defaults to None.
|
||||||
|
resolution (str, optional): Target resolution (e.g., '1080p', '720p'). Defaults to None.
|
||||||
|
skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Response: The HLS manifest as an HTTP response.
|
Response: The HLS manifest as an HTTP response.
|
||||||
"""
|
"""
|
||||||
hls_content = build_hls(mpd_dict, request, key_id, key)
|
hls_content = build_hls(mpd_dict, request, key_id, key, resolution, skip_segments)
|
||||||
|
|
||||||
|
# Start DASH pre-buffering in background if enabled
|
||||||
|
if settings.enable_dash_prebuffer:
|
||||||
|
# Extract headers for pre-buffering
|
||||||
|
headers = {}
|
||||||
|
for key, value in request.query_params.items():
|
||||||
|
if key.startswith("h_"):
|
||||||
|
headers[key[2:]] = value
|
||||||
|
|
||||||
|
# Get the original MPD URL from the request
|
||||||
|
mpd_url = request.query_params.get("d", "")
|
||||||
|
if mpd_url:
|
||||||
|
# Start pre-buffering in background
|
||||||
|
asyncio.create_task(dash_prebuffer.prebuffer_dash_manifest(mpd_url, headers))
|
||||||
|
|
||||||
return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=proxy_headers.response)
|
return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=proxy_headers.response)
|
||||||
|
|
||||||
|
|
||||||
async def process_playlist(
|
async def process_playlist(
|
||||||
request: Request, mpd_dict: dict, profile_id: str, proxy_headers: ProxyRequestHeaders
|
request: Request,
|
||||||
|
mpd_dict: dict,
|
||||||
|
profile_id: str,
|
||||||
|
proxy_headers: ProxyRequestHeaders,
|
||||||
|
skip_segments: list = None,
|
||||||
|
start_offset: float = None,
|
||||||
) -> Response:
|
) -> Response:
|
||||||
"""
|
"""
|
||||||
Processes the MPD manifest and converts it to an HLS playlist for a specific profile.
|
Processes the MPD manifest and converts it to an HLS playlist for a specific profile.
|
||||||
@@ -42,6 +89,8 @@ async def process_playlist(
|
|||||||
mpd_dict (dict): The MPD manifest data.
|
mpd_dict (dict): The MPD manifest data.
|
||||||
profile_id (str): The profile ID to generate the playlist for.
|
profile_id (str): The profile ID to generate the playlist for.
|
||||||
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
||||||
|
skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
|
||||||
|
start_offset (float, optional): Start offset in seconds for live streams.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Response: The HLS playlist as an HTTP response.
|
Response: The HLS playlist as an HTTP response.
|
||||||
@@ -53,8 +102,22 @@ async def process_playlist(
|
|||||||
if not matching_profiles:
|
if not matching_profiles:
|
||||||
raise HTTPException(status_code=404, detail="Profile not found")
|
raise HTTPException(status_code=404, detail="Profile not found")
|
||||||
|
|
||||||
hls_content = build_hls_playlist(mpd_dict, matching_profiles, request)
|
hls_content = build_hls_playlist(mpd_dict, matching_profiles, request, skip_segments, start_offset)
|
||||||
return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=proxy_headers.response)
|
|
||||||
|
# Trigger prebuffering of upcoming segments for live streams
|
||||||
|
if settings.enable_dash_prebuffer and mpd_dict.get("isLive", False):
|
||||||
|
# Extract headers for pre-buffering
|
||||||
|
headers = {}
|
||||||
|
for key, value in request.query_params.items():
|
||||||
|
if key.startswith("h_"):
|
||||||
|
headers[key[2:]] = value
|
||||||
|
|
||||||
|
# Use the new prefetch method for live playlists
|
||||||
|
asyncio.create_task(dash_prebuffer.prefetch_for_live_playlist(matching_profiles, headers))
|
||||||
|
|
||||||
|
# Don't include propagate headers for playlists - they should only apply to segments
|
||||||
|
response_headers = apply_header_manipulation({}, proxy_headers, include_propagate=False)
|
||||||
|
return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=response_headers)
|
||||||
|
|
||||||
|
|
||||||
async def process_segment(
|
async def process_segment(
|
||||||
@@ -64,9 +127,11 @@ async def process_segment(
|
|||||||
proxy_headers: ProxyRequestHeaders,
|
proxy_headers: ProxyRequestHeaders,
|
||||||
key_id: str = None,
|
key_id: str = None,
|
||||||
key: str = None,
|
key: str = None,
|
||||||
|
use_map: bool = False,
|
||||||
|
remux_ts: bool = None,
|
||||||
) -> Response:
|
) -> Response:
|
||||||
"""
|
"""
|
||||||
Processes and decrypts a media segment.
|
Processes and decrypts a media segment, optionally remuxing to MPEG-TS.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
init_content (bytes): The initialization segment content.
|
init_content (bytes): The initialization segment content.
|
||||||
@@ -75,23 +140,110 @@ async def process_segment(
|
|||||||
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
||||||
key_id (str, optional): The DRM key ID. Defaults to None.
|
key_id (str, optional): The DRM key ID. Defaults to None.
|
||||||
key (str, optional): The DRM key. Defaults to None.
|
key (str, optional): The DRM key. Defaults to None.
|
||||||
|
use_map (bool, optional): If True, init segment is served separately via EXT-X-MAP,
|
||||||
|
so don't concatenate init with segment. Defaults to False.
|
||||||
|
remux_ts (bool, optional): If True, remux fMP4 to MPEG-TS. Defaults to settings.remux_to_ts.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Response: The decrypted segment as an HTTP response.
|
Response: The processed segment as an HTTP response.
|
||||||
"""
|
"""
|
||||||
if key_id and key:
|
if key_id and key:
|
||||||
# For DRM protected content
|
# For DRM protected content
|
||||||
now = time.time()
|
now = time.time()
|
||||||
decrypted_content = decrypt_segment(init_content, segment_content, key_id, key)
|
decrypted_content = decrypt_segment(init_content, segment_content, key_id, key, include_init=not use_map)
|
||||||
logger.info(f"Decryption of {mimetype} segment took {time.time() - now:.4f} seconds")
|
logger.info(f"Decryption of {mimetype} segment took {time.time() - now:.4f} seconds")
|
||||||
else:
|
else:
|
||||||
# For non-DRM protected content, we just concatenate init and segment content
|
# For non-DRM protected content
|
||||||
|
if use_map:
|
||||||
|
# Init is served separately via EXT-X-MAP
|
||||||
|
decrypted_content = segment_content
|
||||||
|
else:
|
||||||
|
# Concatenate init and segment content
|
||||||
decrypted_content = init_content + segment_content
|
decrypted_content = init_content + segment_content
|
||||||
|
|
||||||
return Response(content=decrypted_content, media_type=mimetype, headers=proxy_headers.response)
|
# Check if we should remux to TS
|
||||||
|
should_remux = remux_ts if remux_ts is not None else settings.remux_to_ts
|
||||||
|
|
||||||
|
# Remux both video and audio to MPEG-TS for proper HLS TS playback
|
||||||
|
if should_remux and ("video" in mimetype or "audio" in mimetype):
|
||||||
|
# Remux fMP4 to MPEG-TS for ExoPlayer/VLC compatibility
|
||||||
|
now = time.time()
|
||||||
|
try:
|
||||||
|
# For TS remuxing, we always need init_content for codec config
|
||||||
|
# preserve_timestamps=True keeps the original tfdt timestamps from the
|
||||||
|
# fMP4 segment, ensuring continuous playback across HLS segments
|
||||||
|
ts_content = remux_fmp4_to_ts(
|
||||||
|
init_content,
|
||||||
|
decrypted_content,
|
||||||
|
preserve_timestamps=True,
|
||||||
|
)
|
||||||
|
decrypted_content = ts_content
|
||||||
|
mimetype = "video/mp2t" # Update MIME type for TS (same for audio-only TS)
|
||||||
|
logger.info(f"TS remuxing took {time.time() - now:.4f} seconds")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"TS remuxing failed, returning fMP4: {e}")
|
||||||
|
# Fall through to return original content
|
||||||
|
|
||||||
|
response_headers = apply_header_manipulation({}, proxy_headers)
|
||||||
|
return Response(content=decrypted_content, media_type=mimetype, headers=response_headers)
|
||||||
|
|
||||||
|
|
||||||
def build_hls(mpd_dict: dict, request: Request, key_id: str = None, key: str = None) -> str:
|
async def process_init_segment(
|
||||||
|
init_content: bytes,
|
||||||
|
mimetype: str,
|
||||||
|
proxy_headers: ProxyRequestHeaders,
|
||||||
|
key_id: str = None,
|
||||||
|
key: str = None,
|
||||||
|
init_url: str = None,
|
||||||
|
) -> Response:
|
||||||
|
"""
|
||||||
|
Processes an initialization segment for EXT-X-MAP.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
init_content (bytes): The initialization segment content.
|
||||||
|
mimetype (str): The MIME type of the segment.
|
||||||
|
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
||||||
|
key_id (str, optional): The DRM key ID. Defaults to None.
|
||||||
|
key (str, optional): The DRM key. Defaults to None.
|
||||||
|
init_url (str, optional): The init URL for caching. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Response: The processed init segment as an HTTP response.
|
||||||
|
"""
|
||||||
|
if key_id and key:
|
||||||
|
# Check if we have a cached processed version
|
||||||
|
if init_url:
|
||||||
|
cached_processed = await get_cached_processed_init(init_url, key_id)
|
||||||
|
if cached_processed:
|
||||||
|
logger.debug(f"Using cached processed init segment for {init_url}")
|
||||||
|
response_headers = apply_header_manipulation({}, proxy_headers)
|
||||||
|
return Response(content=cached_processed, media_type=mimetype, headers=response_headers)
|
||||||
|
|
||||||
|
# For DRM protected content, we need to process the init segment
|
||||||
|
# to remove encryption-related boxes but keep the moov structure
|
||||||
|
now = time.time()
|
||||||
|
processed_content = process_drm_init_segment(init_content, key_id, key)
|
||||||
|
logger.info(f"Processing of {mimetype} init segment took {time.time() - now:.4f} seconds")
|
||||||
|
|
||||||
|
# Cache the processed init segment
|
||||||
|
if init_url:
|
||||||
|
await set_cached_processed_init(init_url, key_id, processed_content, ttl=3600)
|
||||||
|
else:
|
||||||
|
# For non-DRM protected content, just return the init segment as-is
|
||||||
|
processed_content = init_content
|
||||||
|
|
||||||
|
response_headers = apply_header_manipulation({}, proxy_headers)
|
||||||
|
return Response(content=processed_content, media_type=mimetype, headers=response_headers)
|
||||||
|
|
||||||
|
|
||||||
|
def build_hls(
|
||||||
|
mpd_dict: dict,
|
||||||
|
request: Request,
|
||||||
|
key_id: str = None,
|
||||||
|
key: str = None,
|
||||||
|
resolution: str = None,
|
||||||
|
skip_segments: list = None,
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Builds an HLS manifest from the MPD manifest.
|
Builds an HLS manifest from the MPD manifest.
|
||||||
|
|
||||||
@@ -100,12 +252,23 @@ def build_hls(mpd_dict: dict, request: Request, key_id: str = None, key: str = N
|
|||||||
request (Request): The incoming HTTP request.
|
request (Request): The incoming HTTP request.
|
||||||
key_id (str, optional): The DRM key ID. Defaults to None.
|
key_id (str, optional): The DRM key ID. Defaults to None.
|
||||||
key (str, optional): The DRM key. Defaults to None.
|
key (str, optional): The DRM key. Defaults to None.
|
||||||
|
resolution (str, optional): Target resolution (e.g., '1080p', '720p'). Defaults to None.
|
||||||
|
skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The HLS manifest as a string.
|
str: The HLS manifest as a string.
|
||||||
"""
|
"""
|
||||||
hls = ["#EXTM3U", "#EXT-X-VERSION:6"]
|
is_ts_mode = _resolve_ts_mode(request)
|
||||||
|
# Use HLS v3 for TS (ExoPlayer compatibility), v6 for fMP4
|
||||||
|
version = 3 if is_ts_mode else 6
|
||||||
|
hls = ["#EXTM3U", f"#EXT-X-VERSION:{version}"]
|
||||||
query_params = dict(request.query_params)
|
query_params = dict(request.query_params)
|
||||||
|
|
||||||
|
# Preserve skip parameter in query params so it propagates to playlists
|
||||||
|
if skip_segments:
|
||||||
|
# Convert back to compact format for URL
|
||||||
|
skip_str = ",".join(f"{s['start']}-{s['end']}" for s in skip_segments)
|
||||||
|
query_params["skip"] = skip_str
|
||||||
has_encrypted = query_params.pop("has_encrypted", False)
|
has_encrypted = query_params.pop("has_encrypted", False)
|
||||||
|
|
||||||
video_profiles = {}
|
video_profiles = {}
|
||||||
@@ -128,24 +291,113 @@ def build_hls(mpd_dict: dict, request: Request, key_id: str = None, key: str = N
|
|||||||
elif "audio" in profile["mimeType"]:
|
elif "audio" in profile["mimeType"]:
|
||||||
audio_profiles[profile["id"]] = (profile, playlist_url)
|
audio_profiles[profile["id"]] = (profile, playlist_url)
|
||||||
|
|
||||||
|
# Filter video profiles by resolution if specified
|
||||||
|
if resolution and video_profiles:
|
||||||
|
video_profiles = _filter_video_profiles_by_resolution(video_profiles, resolution)
|
||||||
|
|
||||||
|
# For TS mode, only expose the highest quality video variant
|
||||||
|
# ExoPlayer handles adaptive switching poorly with TS remuxing
|
||||||
|
if is_ts_mode and video_profiles:
|
||||||
|
max_height = max(p[0].get("height", 0) for p in video_profiles.values())
|
||||||
|
video_profiles = {k: v for k, v in video_profiles.items() if v[0].get("height", 0) >= max_height}
|
||||||
|
|
||||||
# Add audio streams
|
# Add audio streams
|
||||||
for i, (profile, playlist_url) in enumerate(audio_profiles.values()):
|
for i, (profile, playlist_url) in enumerate(audio_profiles.values()):
|
||||||
is_default = "YES" if i == 0 else "NO" # Set the first audio track as default
|
is_default = "YES" if i == 0 else "NO" # Set the first audio track as default
|
||||||
|
lang = profile.get("lang", "und")
|
||||||
|
bandwidth = profile.get("bandwidth", "128000")
|
||||||
|
name = f"Audio {lang} ({bandwidth})" if lang != "und" else f"Audio {i + 1} ({bandwidth})"
|
||||||
hls.append(
|
hls.append(
|
||||||
f'#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",NAME="{profile["id"]}",DEFAULT={is_default},AUTOSELECT={is_default},LANGUAGE="{profile.get("lang", "und")}",URI="{playlist_url}"'
|
f'#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",NAME="{name}",DEFAULT={is_default},AUTOSELECT=YES,LANGUAGE="{lang}",URI="{playlist_url}"'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Build combined codecs string (video + audio) for EXT-X-STREAM-INF
|
||||||
|
# ExoPlayer requires CODECS to list all codecs when AUDIO group is referenced
|
||||||
|
first_audio_codec = None
|
||||||
|
if audio_profiles:
|
||||||
|
first_audio_profile = next(iter(audio_profiles.values()))[0]
|
||||||
|
first_audio_codec = first_audio_profile.get("codecs", "")
|
||||||
|
|
||||||
# Add video streams
|
# Add video streams
|
||||||
for profile, playlist_url in video_profiles.values():
|
for profile, playlist_url in video_profiles.values():
|
||||||
|
# Only add AUDIO attribute if there are audio profiles available
|
||||||
|
audio_attr = ',AUDIO="audio"' if audio_profiles else ""
|
||||||
|
|
||||||
|
# Build combined codecs: video + audio
|
||||||
|
video_codec = profile["codecs"]
|
||||||
|
if first_audio_codec and audio_attr:
|
||||||
|
combined_codecs = f"{video_codec},{first_audio_codec}"
|
||||||
|
else:
|
||||||
|
combined_codecs = video_codec
|
||||||
|
|
||||||
|
# Keep full codec strings (e.g., avc1.42C01F, mp4a.40.2) for ALL modes.
|
||||||
|
# ExoPlayer's CodecSpecificDataUtil rejects simplified strings like "avc1" or "mp4a"
|
||||||
|
# as malformed, which prevents proper codec initialization.
|
||||||
|
|
||||||
|
# Omit FRAME-RATE for TS mode (ExoPlayer compatibility)
|
||||||
|
if is_ts_mode:
|
||||||
hls.append(
|
hls.append(
|
||||||
f'#EXT-X-STREAM-INF:BANDWIDTH={profile["bandwidth"]},RESOLUTION={profile["width"]}x{profile["height"]},CODECS="{profile["codecs"]}",FRAME-RATE={profile["frameRate"]},AUDIO="audio"'
|
f'#EXT-X-STREAM-INF:BANDWIDTH={profile["bandwidth"]},RESOLUTION={profile["width"]}x{profile["height"]},CODECS="{combined_codecs}"{audio_attr}'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
hls.append(
|
||||||
|
f'#EXT-X-STREAM-INF:BANDWIDTH={profile["bandwidth"]},RESOLUTION={profile["width"]}x{profile["height"]},CODECS="{combined_codecs}",FRAME-RATE={profile["frameRate"]}{audio_attr}'
|
||||||
)
|
)
|
||||||
hls.append(playlist_url)
|
hls.append(playlist_url)
|
||||||
|
|
||||||
return "\n".join(hls)
|
return "\n".join(hls)
|
||||||
|
|
||||||
|
|
||||||
def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -> str:
|
def _filter_video_profiles_by_resolution(video_profiles: dict, target_resolution: str) -> dict:
|
||||||
|
"""
|
||||||
|
Filter video profiles to select the one matching the target resolution.
|
||||||
|
Falls back to closest lower resolution if exact match not found.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_profiles: Dictionary of profile_id -> (profile, playlist_url).
|
||||||
|
target_resolution: Target resolution string (e.g., '1080p', '720p').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Filtered dictionary with only the selected profile.
|
||||||
|
"""
|
||||||
|
# Parse target height from "1080p" -> 1080
|
||||||
|
target_height = int(target_resolution.rstrip("p"))
|
||||||
|
|
||||||
|
# Convert to list and sort by height descending
|
||||||
|
profiles_list = [
|
||||||
|
(profile_id, profile, playlist_url)
|
||||||
|
for profile_id, (profile, playlist_url) in video_profiles.items()
|
||||||
|
if profile.get("height", 0) > 0
|
||||||
|
]
|
||||||
|
|
||||||
|
if not profiles_list:
|
||||||
|
logger.warning("No video profiles with valid height found, returning all profiles")
|
||||||
|
return video_profiles
|
||||||
|
|
||||||
|
sorted_profiles = sorted(profiles_list, key=lambda x: x[1]["height"], reverse=True)
|
||||||
|
|
||||||
|
# Find exact match or closest lower
|
||||||
|
selected = None
|
||||||
|
for profile_id, profile, playlist_url in sorted_profiles:
|
||||||
|
if profile["height"] <= target_height:
|
||||||
|
selected = (profile_id, profile, playlist_url)
|
||||||
|
break
|
||||||
|
|
||||||
|
# If all profiles are higher than target, use lowest available
|
||||||
|
if selected is None:
|
||||||
|
selected = sorted_profiles[-1]
|
||||||
|
|
||||||
|
profile_id, profile, playlist_url = selected
|
||||||
|
logger.info(
|
||||||
|
f"Selected MPD video profile with resolution {profile['width']}x{profile['height']} for target {target_resolution}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return {profile_id: (profile, playlist_url)}
|
||||||
|
|
||||||
|
|
||||||
|
def build_hls_playlist(
|
||||||
|
mpd_dict: dict, profiles: list[dict], request: Request, skip_segments: list = None, start_offset: float = None
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Builds an HLS playlist from the MPD manifest for specific profiles.
|
Builds an HLS playlist from the MPD manifest for specific profiles.
|
||||||
|
|
||||||
@@ -153,47 +405,105 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
|
|||||||
mpd_dict (dict): The MPD manifest data.
|
mpd_dict (dict): The MPD manifest data.
|
||||||
profiles (list[dict]): The profiles to include in the playlist.
|
profiles (list[dict]): The profiles to include in the playlist.
|
||||||
request (Request): The incoming HTTP request.
|
request (Request): The incoming HTTP request.
|
||||||
|
skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
|
||||||
|
start_offset (float, optional): Start offset in seconds for live streams. Defaults to settings.livestream_start_offset for live.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The HLS playlist as a string.
|
str: The HLS playlist as a string.
|
||||||
"""
|
"""
|
||||||
hls = ["#EXTM3U", "#EXT-X-VERSION:6"]
|
# Determine if we're in TS remux mode (per-request override > global setting)
|
||||||
|
is_ts_mode = _resolve_ts_mode(request)
|
||||||
|
|
||||||
|
# Use HLS v3 for TS (ExoPlayer compatibility), v6 for fMP4
|
||||||
|
version = 3 if is_ts_mode else 6
|
||||||
|
hls = ["#EXTM3U", f"#EXT-X-VERSION:{version}"]
|
||||||
|
|
||||||
added_segments = 0
|
added_segments = 0
|
||||||
|
skipped_segments = 0
|
||||||
|
is_live = mpd_dict.get("isLive", False)
|
||||||
|
|
||||||
proxy_url = request.url_for("segment_endpoint")
|
# Inject EXT-X-START for live streams (enables prebuffering by starting behind live edge)
|
||||||
|
# User-provided start_offset always takes precedence; otherwise use default for live streams only
|
||||||
|
if is_ts_mode and is_live and start_offset is None:
|
||||||
|
# TS mode needs a larger buffer for ExoPlayer
|
||||||
|
effective_start_offset = -30.0
|
||||||
|
else:
|
||||||
|
effective_start_offset = (
|
||||||
|
start_offset if start_offset is not None else (settings.livestream_start_offset if is_live else None)
|
||||||
|
)
|
||||||
|
if effective_start_offset is not None:
|
||||||
|
# ExoPlayer doesn't handle PRECISE=YES well with TS
|
||||||
|
precise = "NO" if is_ts_mode else "YES"
|
||||||
|
hls.append(f"#EXT-X-START:TIME-OFFSET={effective_start_offset:.1f},PRECISE={precise}")
|
||||||
|
|
||||||
|
# Initialize skip filter if skip_segments provided
|
||||||
|
skip_filter = SkipSegmentFilter(skip_segments) if skip_segments else None
|
||||||
|
|
||||||
|
# In TS mode, we don't use EXT-X-MAP because TS segments are self-contained
|
||||||
|
# (PAT/PMT/VPS/SPS/PPS are embedded in each segment)
|
||||||
|
# Use EXT-X-MAP for live streams, but only for fMP4 (not TS)
|
||||||
|
use_map = is_live and not is_ts_mode
|
||||||
|
|
||||||
|
# Select appropriate endpoint based on remux mode
|
||||||
|
if is_ts_mode:
|
||||||
|
proxy_url = request.url_for("segment_ts_endpoint") # /mpd/segment.ts
|
||||||
|
else:
|
||||||
|
proxy_url = request.url_for("segment_endpoint") # /mpd/segment.mp4
|
||||||
proxy_url = str(proxy_url.replace(scheme=get_original_scheme(request)))
|
proxy_url = str(proxy_url.replace(scheme=get_original_scheme(request)))
|
||||||
|
|
||||||
|
# Get init endpoint URL for EXT-X-MAP (only used for fMP4 mode)
|
||||||
|
init_proxy_url = request.url_for("init_endpoint")
|
||||||
|
init_proxy_url = str(init_proxy_url.replace(scheme=get_original_scheme(request)))
|
||||||
|
|
||||||
for index, profile in enumerate(profiles):
|
for index, profile in enumerate(profiles):
|
||||||
segments = profile["segments"]
|
segments = profile["segments"]
|
||||||
if not segments:
|
if not segments:
|
||||||
logger.warning(f"No segments found for profile {profile['id']}")
|
logger.warning(f"No segments found for profile {profile['id']}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if is_live:
|
||||||
|
# TS mode uses deeper playlist for ExoPlayer buffering
|
||||||
|
depth = 20 if is_ts_mode else max(settings.mpd_live_playlist_depth, 1)
|
||||||
|
trimmed_segments = segments[-depth:]
|
||||||
|
else:
|
||||||
|
trimmed_segments = segments
|
||||||
|
|
||||||
# Add headers for only the first profile
|
# Add headers for only the first profile
|
||||||
if index == 0:
|
if index == 0:
|
||||||
first_segment = segments[0]
|
first_segment = trimmed_segments[0]
|
||||||
extinf_values = [f["extinf"] for f in segments if "extinf" in f]
|
extinf_values = [f["extinf"] for f in trimmed_segments if "extinf" in f]
|
||||||
|
|
||||||
|
# TS mode uses int(max)+1 to reduce buffer underruns in ExoPlayer
|
||||||
|
if is_ts_mode:
|
||||||
|
target_duration = int(max(extinf_values)) + 1 if extinf_values else 10
|
||||||
|
else:
|
||||||
target_duration = math.ceil(max(extinf_values)) if extinf_values else 3
|
target_duration = math.ceil(max(extinf_values)) if extinf_values else 3
|
||||||
|
|
||||||
# Calculate media sequence using adaptive logic for different MPD types
|
# Align HLS media sequence with MPD-provided numbering when available
|
||||||
mpd_start_number = profile.get("segment_template_start_number")
|
if is_ts_mode and is_live:
|
||||||
if mpd_start_number and mpd_start_number >= 1000:
|
# For live TS, derive sequence from timeline first for stable continuity
|
||||||
# Amazon-style: Use absolute segment numbering
|
|
||||||
sequence = first_segment.get("number", mpd_start_number)
|
|
||||||
else:
|
|
||||||
# Sky-style: Use time-based calculation if available
|
|
||||||
time_val = first_segment.get("time")
|
time_val = first_segment.get("time")
|
||||||
duration_val = first_segment.get("duration_mpd_timescale")
|
duration_val = first_segment.get("duration_mpd_timescale")
|
||||||
if time_val is not None and duration_val and duration_val > 0:
|
if time_val is not None and duration_val and duration_val > 0:
|
||||||
calculated_sequence = math.floor(time_val / duration_val)
|
sequence = math.floor(time_val / duration_val)
|
||||||
# For live streams with very large sequence numbers, use modulo to keep reasonable range
|
|
||||||
if mpd_dict.get("isLive", False) and calculated_sequence > 100000:
|
|
||||||
sequence = calculated_sequence % 100000
|
|
||||||
else:
|
else:
|
||||||
sequence = calculated_sequence
|
sequence = first_segment.get("number") or profile.get("segment_template_start_number") or 1
|
||||||
else:
|
else:
|
||||||
sequence = first_segment.get("number", 1)
|
mpd_start_number = profile.get("segment_template_start_number")
|
||||||
|
sequence = first_segment.get("number")
|
||||||
|
|
||||||
|
if sequence is None:
|
||||||
|
# Fallback to MPD template start number
|
||||||
|
if mpd_start_number is not None:
|
||||||
|
sequence = mpd_start_number
|
||||||
|
else:
|
||||||
|
# As a last resort, derive from timeline information
|
||||||
|
time_val = first_segment.get("time")
|
||||||
|
duration_val = first_segment.get("duration_mpd_timescale")
|
||||||
|
if time_val is not None and duration_val and duration_val > 0:
|
||||||
|
sequence = math.floor(time_val / duration_val)
|
||||||
|
else:
|
||||||
|
sequence = 1
|
||||||
|
|
||||||
hls.extend(
|
hls.extend(
|
||||||
[
|
[
|
||||||
@@ -201,23 +511,91 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
|
|||||||
f"#EXT-X-MEDIA-SEQUENCE:{sequence}",
|
f"#EXT-X-MEDIA-SEQUENCE:{sequence}",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
if mpd_dict["isLive"]:
|
# For live streams, don't set PLAYLIST-TYPE to allow sliding window
|
||||||
hls.append("#EXT-X-PLAYLIST-TYPE:EVENT")
|
if not is_live:
|
||||||
else:
|
|
||||||
hls.append("#EXT-X-PLAYLIST-TYPE:VOD")
|
hls.append("#EXT-X-PLAYLIST-TYPE:VOD")
|
||||||
|
|
||||||
init_url = profile["initUrl"]
|
init_url = profile["initUrl"]
|
||||||
|
# For SegmentBase profiles, we may have byte range for initialization segment
|
||||||
|
init_range = profile.get("initRange")
|
||||||
|
|
||||||
query_params = dict(request.query_params)
|
query_params = dict(request.query_params)
|
||||||
query_params.pop("profile_id", None)
|
query_params.pop("profile_id", None)
|
||||||
query_params.pop("d", None)
|
query_params.pop("d", None)
|
||||||
|
query_params.pop("remux_to_ts", None) # per-request override; already resolved into endpoint choice
|
||||||
has_encrypted = query_params.pop("has_encrypted", False)
|
has_encrypted = query_params.pop("has_encrypted", False)
|
||||||
|
|
||||||
for segment in segments:
|
# Add EXT-X-MAP for init segment (for live streams or when beneficial)
|
||||||
hls.append(f'#EXTINF:{segment["extinf"]:.3f},')
|
if use_map:
|
||||||
query_params.update(
|
init_query_params = {
|
||||||
{"init_url": init_url, "segment_url": segment["media"], "mime_type": profile["mimeType"]}
|
"init_url": init_url,
|
||||||
|
"mime_type": profile["mimeType"],
|
||||||
|
"is_live": "true" if is_live else "false",
|
||||||
|
}
|
||||||
|
if init_range:
|
||||||
|
init_query_params["init_range"] = init_range
|
||||||
|
# Add key parameters
|
||||||
|
if query_params.get("key_id"):
|
||||||
|
init_query_params["key_id"] = query_params["key_id"]
|
||||||
|
if query_params.get("key"):
|
||||||
|
init_query_params["key"] = query_params["key"]
|
||||||
|
# Add api_password for authentication
|
||||||
|
if query_params.get("api_password"):
|
||||||
|
init_query_params["api_password"] = query_params["api_password"]
|
||||||
|
|
||||||
|
init_map_url = encode_mediaflow_proxy_url(
|
||||||
|
init_proxy_url,
|
||||||
|
query_params=init_query_params,
|
||||||
|
encryption_handler=encryption_handler if has_encrypted else None,
|
||||||
)
|
)
|
||||||
|
hls.append(f'#EXT-X-MAP:URI="{init_map_url}"')
|
||||||
|
|
||||||
|
need_discontinuity = False
|
||||||
|
for segment in trimmed_segments:
|
||||||
|
duration = segment["extinf"]
|
||||||
|
|
||||||
|
# Check if this segment should be skipped
|
||||||
|
if skip_filter:
|
||||||
|
if skip_filter.should_skip_segment(duration):
|
||||||
|
skip_filter.advance_time(duration)
|
||||||
|
skipped_segments += 1
|
||||||
|
need_discontinuity = True
|
||||||
|
continue
|
||||||
|
skip_filter.advance_time(duration)
|
||||||
|
|
||||||
|
# Add discontinuity marker after skipped segments
|
||||||
|
if need_discontinuity:
|
||||||
|
hls.append("#EXT-X-DISCONTINUITY")
|
||||||
|
need_discontinuity = False
|
||||||
|
|
||||||
|
# Emit EXT-X-PROGRAM-DATE-TIME only for fMP4 (not TS)
|
||||||
|
program_date_time = segment.get("program_date_time")
|
||||||
|
if program_date_time and not is_ts_mode:
|
||||||
|
hls.append(f"#EXT-X-PROGRAM-DATE-TIME:{program_date_time}")
|
||||||
|
hls.append(f"#EXTINF:{duration:.3f},")
|
||||||
|
|
||||||
|
segment_query_params = {
|
||||||
|
"init_url": init_url,
|
||||||
|
"segment_url": segment["media"],
|
||||||
|
"mime_type": profile["mimeType"],
|
||||||
|
"is_live": "true" if is_live else "false",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add use_map flag so segment endpoint knows not to include init
|
||||||
|
if use_map and not is_ts_mode:
|
||||||
|
segment_query_params["use_map"] = "true"
|
||||||
|
elif is_ts_mode:
|
||||||
|
# TS segments are self-contained; init is always embedded by remuxer
|
||||||
|
segment_query_params["use_map"] = "false"
|
||||||
|
|
||||||
|
# Add byte range parameters for SegmentBase
|
||||||
|
if init_range:
|
||||||
|
segment_query_params["init_range"] = init_range
|
||||||
|
# Segment may also have its own range (for SegmentBase)
|
||||||
|
if "initRange" in segment:
|
||||||
|
segment_query_params["init_range"] = segment["initRange"]
|
||||||
|
|
||||||
|
query_params.update(segment_query_params)
|
||||||
hls.append(
|
hls.append(
|
||||||
encode_mediaflow_proxy_url(
|
encode_mediaflow_proxy_url(
|
||||||
proxy_url,
|
proxy_url,
|
||||||
@@ -230,5 +608,8 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
|
|||||||
if not mpd_dict["isLive"]:
|
if not mpd_dict["isLive"]:
|
||||||
hls.append("#EXT-X-ENDLIST")
|
hls.append("#EXT-X-ENDLIST")
|
||||||
|
|
||||||
|
if skip_filter and skipped_segments > 0:
|
||||||
|
logger.info(f"Added {added_segments} segments to HLS playlist (skipped {skipped_segments} segments)")
|
||||||
|
else:
|
||||||
logger.info(f"Added {added_segments} segments to HLS playlist")
|
logger.info(f"Added {added_segments} segments to HLS playlist")
|
||||||
return "\n".join(hls)
|
return "\n".join(hls)
|
||||||
|
|||||||
18
mediaflow_proxy/remuxer/__init__.py
Normal file
18
mediaflow_proxy/remuxer/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
"""
|
||||||
|
Media remuxer package.
|
||||||
|
|
||||||
|
Provides pure Python implementations for media container parsing, remuxing,
|
||||||
|
and transcoding:
|
||||||
|
|
||||||
|
- ebml_parser: Minimal EBML/MKV parser for seeking and demuxing
|
||||||
|
- ts_muxer: fMP4 -> MPEG-TS remuxer
|
||||||
|
- mkv_demuxer: Streaming MKV demuxer
|
||||||
|
- mp4_muxer: MP4 box builder for standard moov-first MP4
|
||||||
|
- audio_transcoder: PyAV-based audio frame transcoding
|
||||||
|
- video_transcoder: GPU-accelerated video transcoding via PyAV
|
||||||
|
- pyav_demuxer: Universal PyAV-based streaming demuxer (any container)
|
||||||
|
- codec_utils: Codec compatibility detection and decision engine
|
||||||
|
- media_source: Abstract MediaSource protocol (Telegram, HTTP, etc.)
|
||||||
|
- transcode_handler: Shared transcode request orchestrator
|
||||||
|
- transcode_pipeline: MKV fast-path and universal transcode pipelines
|
||||||
|
"""
|
||||||
BIN
mediaflow_proxy/remuxer/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/codec_utils.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/codec_utils.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/ebml_parser.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/ebml_parser.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/hls_manifest.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/hls_manifest.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/media_source.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/media_source.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/mkv_demuxer.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/mkv_demuxer.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/mp4_muxer.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/mp4_muxer.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/mp4_parser.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/mp4_parser.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/pyav_demuxer.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/pyav_demuxer.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/ts_muxer.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/ts_muxer.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
351
mediaflow_proxy/remuxer/audio_transcoder.py
Normal file
351
mediaflow_proxy/remuxer/audio_transcoder.py
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
"""
|
||||||
|
PyAV-based audio transcoder for frame-level codec conversion.
|
||||||
|
|
||||||
|
Transcodes audio frames between codecs using PyAV's CodecContext API
|
||||||
|
(Python bindings for FFmpeg's libavcodec). This provides in-process
|
||||||
|
audio transcoding without subprocess management or pipe overhead.
|
||||||
|
|
||||||
|
Supported input codecs: EAC3, AC3, AAC, Opus, Vorbis, FLAC, MP3
|
||||||
|
Output codec: AAC-LC (stereo, configurable bitrate)
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
raw_frame_bytes -> parse() -> decode() -> resample() -> encode() -> raw_aac_bytes
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
transcoder = AudioTranscoder("eac3", sample_rate=48000, channels=6)
|
||||||
|
for raw_eac3_frame in frames:
|
||||||
|
aac_frames = transcoder.transcode(raw_eac3_frame)
|
||||||
|
for aac_data in aac_frames:
|
||||||
|
write(aac_data)
|
||||||
|
# Flush remaining frames
|
||||||
|
for aac_data in transcoder.flush():
|
||||||
|
write(aac_data)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import av
|
||||||
|
from av.audio.resampler import AudioResampler
|
||||||
|
|
||||||
|
from mediaflow_proxy.remuxer.ebml_parser import (
|
||||||
|
CODEC_ID_AAC,
|
||||||
|
CODEC_ID_AC3,
|
||||||
|
CODEC_ID_EAC3,
|
||||||
|
CODEC_ID_FLAC,
|
||||||
|
CODEC_ID_OPUS,
|
||||||
|
CODEC_ID_VORBIS,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_silence_aac_frame() -> bytes | None:
|
||||||
|
"""Pre-encode a single silent AAC frame (48 kHz stereo, 1024 samples).
|
||||||
|
|
||||||
|
PyAV's AAC encoder has an intermittent ``avcodec_send_frame`` bug when
|
||||||
|
rapidly creating/destroying codec contexts, so we retry a few times.
|
||||||
|
This function is called once at module load; the result is cached in
|
||||||
|
``_SILENCE_AAC_FRAME``.
|
||||||
|
"""
|
||||||
|
for _attempt in range(10):
|
||||||
|
try:
|
||||||
|
enc = av.CodecContext.create("aac", "w")
|
||||||
|
enc.sample_rate = 48000
|
||||||
|
enc.layout = "stereo"
|
||||||
|
enc.format = av.AudioFormat("fltp")
|
||||||
|
enc.bit_rate = 192000
|
||||||
|
enc.open()
|
||||||
|
|
||||||
|
frame = av.AudioFrame(
|
||||||
|
format=enc.format.name,
|
||||||
|
layout=enc.layout.name,
|
||||||
|
samples=enc.frame_size or 1024,
|
||||||
|
)
|
||||||
|
frame.sample_rate = enc.sample_rate
|
||||||
|
frame.pts = 0
|
||||||
|
|
||||||
|
for pkt in enc.encode(frame):
|
||||||
|
return bytes(pkt)
|
||||||
|
# AAC priming delay: first encode buffered; flush to retrieve
|
||||||
|
for pkt in enc.encode(None):
|
||||||
|
return bytes(pkt)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level silence frame -- generated once, reused by every transcoder.
|
||||||
|
_SILENCE_AAC_FRAME: bytes | None = _generate_silence_aac_frame()
|
||||||
|
|
||||||
|
# Map MKV codec IDs to PyAV/FFmpeg codec names
|
||||||
|
_MKV_TO_FFMPEG_CODEC = {
|
||||||
|
CODEC_ID_EAC3: "eac3",
|
||||||
|
CODEC_ID_AC3: "ac3",
|
||||||
|
CODEC_ID_AAC: "aac",
|
||||||
|
CODEC_ID_OPUS: "opus",
|
||||||
|
CODEC_ID_VORBIS: "vorbis",
|
||||||
|
CODEC_ID_FLAC: "flac",
|
||||||
|
"A_DTS": "dts",
|
||||||
|
"A_MP3": "mp3",
|
||||||
|
"A_MPEG/L3": "mp3",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Codecs that need transcoding to AAC for browser playback
|
||||||
|
NEEDS_TRANSCODE = frozenset(
|
||||||
|
{
|
||||||
|
CODEC_ID_EAC3,
|
||||||
|
CODEC_ID_AC3,
|
||||||
|
CODEC_ID_OPUS,
|
||||||
|
CODEC_ID_VORBIS,
|
||||||
|
CODEC_ID_FLAC,
|
||||||
|
"A_DTS",
|
||||||
|
"A_MP3",
|
||||||
|
"A_MPEG/L3",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Output AAC settings
|
||||||
|
_OUTPUT_CODEC = "aac"
|
||||||
|
_OUTPUT_SAMPLE_FORMAT = "fltp" # AAC requires float planar
|
||||||
|
_OUTPUT_LAYOUT = "stereo"
|
||||||
|
|
||||||
|
# Map channel count -> FFmpeg layout name
|
||||||
|
_CHANNEL_LAYOUT_MAP = {
|
||||||
|
1: "mono",
|
||||||
|
2: "stereo",
|
||||||
|
3: "2.1",
|
||||||
|
4: "quad",
|
||||||
|
6: "5.1",
|
||||||
|
8: "7.1",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def needs_transcode(codec_id: str) -> bool:
|
||||||
|
"""Check if an MKV audio codec needs transcoding for browser playback."""
|
||||||
|
return codec_id in NEEDS_TRANSCODE
|
||||||
|
|
||||||
|
|
||||||
|
def get_ffmpeg_codec_name(mkv_codec_id: str) -> str | None:
|
||||||
|
"""Map an MKV CodecID to an FFmpeg codec name."""
|
||||||
|
return _MKV_TO_FFMPEG_CODEC.get(mkv_codec_id)
|
||||||
|
|
||||||
|
|
||||||
|
class AudioTranscoder:
|
||||||
|
"""
|
||||||
|
In-process audio transcoder using PyAV's CodecContext API.
|
||||||
|
|
||||||
|
Decodes raw audio frames from one codec and encodes them to AAC-LC
|
||||||
|
stereo, suitable for MP4 container and browser playback. No container
|
||||||
|
I/O or subprocess involved -- operates directly on raw frame bytes.
|
||||||
|
|
||||||
|
The transcoder handles sample format conversion and resampling
|
||||||
|
automatically via AudioResampler.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_codec: str,
|
||||||
|
input_sample_rate: int = 48000,
|
||||||
|
input_channels: int = 6,
|
||||||
|
output_sample_rate: int = 48000,
|
||||||
|
output_channels: int = 2,
|
||||||
|
output_bitrate: int = 192000,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Initialize the transcoder.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_codec: FFmpeg codec name (e.g., "eac3", "ac3", "aac").
|
||||||
|
input_sample_rate: Input sample rate in Hz.
|
||||||
|
input_channels: Input channel count.
|
||||||
|
output_sample_rate: Output sample rate in Hz (default 48000).
|
||||||
|
output_channels: Output channel count (default 2 = stereo).
|
||||||
|
output_bitrate: Output bitrate in bits/s (default 192000).
|
||||||
|
"""
|
||||||
|
# Set up decoder -- use layout to configure channel count
|
||||||
|
# (PyAV's channels property is read-only; layout drives it)
|
||||||
|
self._decoder = av.CodecContext.create(input_codec, "r")
|
||||||
|
self._decoder.sample_rate = input_sample_rate
|
||||||
|
input_layout = _CHANNEL_LAYOUT_MAP.get(input_channels, "stereo")
|
||||||
|
self._decoder.layout = input_layout
|
||||||
|
|
||||||
|
# Set up encoder
|
||||||
|
self._encoder = av.CodecContext.create(_OUTPUT_CODEC, "w")
|
||||||
|
self._encoder.sample_rate = output_sample_rate
|
||||||
|
self._encoder.layout = _OUTPUT_LAYOUT
|
||||||
|
self._encoder.format = av.AudioFormat(_OUTPUT_SAMPLE_FORMAT)
|
||||||
|
self._encoder.bit_rate = output_bitrate
|
||||||
|
self._encoder.open()
|
||||||
|
|
||||||
|
# Set up resampler for format/rate/channel conversion
|
||||||
|
self._resampler = AudioResampler(
|
||||||
|
format=_OUTPUT_SAMPLE_FORMAT,
|
||||||
|
layout=_OUTPUT_LAYOUT,
|
||||||
|
rate=output_sample_rate,
|
||||||
|
)
|
||||||
|
|
||||||
|
self._input_codec = input_codec
|
||||||
|
self._frames_decoded = 0
|
||||||
|
self._frames_encoded = 0
|
||||||
|
self._audio_specific_config: bytes | None = None
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[audio_transcoder] Initialized: %s %dHz %dch -> aac %dHz %dch @%dk",
|
||||||
|
input_codec,
|
||||||
|
input_sample_rate,
|
||||||
|
input_channels,
|
||||||
|
output_sample_rate,
|
||||||
|
output_channels,
|
||||||
|
output_bitrate // 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def audio_specific_config(self) -> bytes | None:
|
||||||
|
"""
|
||||||
|
AAC AudioSpecificConfig from the encoder (available after first encode).
|
||||||
|
|
||||||
|
This is needed for the MP4 esds box.
|
||||||
|
"""
|
||||||
|
if self._audio_specific_config is not None:
|
||||||
|
return self._audio_specific_config
|
||||||
|
|
||||||
|
# PyAV exposes extradata after the encoder is opened
|
||||||
|
if self._encoder.extradata:
|
||||||
|
self._audio_specific_config = bytes(self._encoder.extradata)
|
||||||
|
return self._audio_specific_config
|
||||||
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def output_sample_rate(self) -> int:
|
||||||
|
return self._encoder.sample_rate
|
||||||
|
|
||||||
|
@property
|
||||||
|
def output_channels(self) -> int:
|
||||||
|
return self._encoder.channels
|
||||||
|
|
||||||
|
@property
|
||||||
|
def frame_size(self) -> int:
|
||||||
|
"""AAC frame size (samples per frame), typically 1024."""
|
||||||
|
return self._encoder.frame_size or 1024
|
||||||
|
|
||||||
|
def transcode(self, raw_frame_data: bytes) -> list[bytes]:
|
||||||
|
"""
|
||||||
|
Transcode a raw audio frame from the input codec to AAC.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
raw_frame_data: Raw audio frame bytes (one codec frame, e.g.,
|
||||||
|
one EAC3 sync frame).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of raw AAC frame bytes. May return 0, 1, or more frames
|
||||||
|
depending on codec frame sizes and buffering.
|
||||||
|
"""
|
||||||
|
output = []
|
||||||
|
|
||||||
|
# Parse raw bytes into packets
|
||||||
|
packets = self._decoder.parse(raw_frame_data)
|
||||||
|
|
||||||
|
for packet in packets:
|
||||||
|
# Decode to PCM frames
|
||||||
|
try:
|
||||||
|
decoded_frames = self._decoder.decode(packet)
|
||||||
|
except av.error.InvalidDataError as e:
|
||||||
|
logger.debug("[audio_transcoder] Decode error (skipping frame): %s", e)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for frame in decoded_frames:
|
||||||
|
self._frames_decoded += 1
|
||||||
|
|
||||||
|
# Resample to match encoder format
|
||||||
|
resampled = self._resampler.resample(frame)
|
||||||
|
if resampled is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# resampled can be a single frame or list of frames
|
||||||
|
if not isinstance(resampled, list):
|
||||||
|
resampled = [resampled]
|
||||||
|
|
||||||
|
for rs_frame in resampled:
|
||||||
|
# Encode to AAC
|
||||||
|
try:
|
||||||
|
encoded_packets = self._encoder.encode(rs_frame)
|
||||||
|
except av.error.InvalidDataError as e:
|
||||||
|
logger.debug("[audio_transcoder] Encode error: %s", e)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for enc_packet in encoded_packets:
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(bytes(enc_packet))
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def flush(self) -> list[bytes]:
|
||||||
|
"""
|
||||||
|
Flush the decoder and encoder buffers.
|
||||||
|
|
||||||
|
Call this when the input stream ends to get remaining frames.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of remaining raw AAC frame bytes.
|
||||||
|
"""
|
||||||
|
output = []
|
||||||
|
|
||||||
|
# Flush decoder
|
||||||
|
try:
|
||||||
|
for frame in self._decoder.decode(None):
|
||||||
|
self._frames_decoded += 1
|
||||||
|
resampled = self._resampler.resample(frame)
|
||||||
|
if resampled is None:
|
||||||
|
continue
|
||||||
|
if not isinstance(resampled, list):
|
||||||
|
resampled = [resampled]
|
||||||
|
for rs_frame in resampled:
|
||||||
|
for enc_packet in self._encoder.encode(rs_frame):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(bytes(enc_packet))
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[audio_transcoder] Decoder flush error: %s", e)
|
||||||
|
|
||||||
|
# Flush resampler
|
||||||
|
try:
|
||||||
|
resampled = self._resampler.resample(None)
|
||||||
|
if resampled is not None:
|
||||||
|
if not isinstance(resampled, list):
|
||||||
|
resampled = [resampled]
|
||||||
|
for rs_frame in resampled:
|
||||||
|
for enc_packet in self._encoder.encode(rs_frame):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(bytes(enc_packet))
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[audio_transcoder] Resampler flush error: %s", e)
|
||||||
|
|
||||||
|
# Flush encoder
|
||||||
|
try:
|
||||||
|
for enc_packet in self._encoder.encode(None):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(bytes(enc_packet))
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[audio_transcoder] Encoder flush error: %s", e)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[audio_transcoder] Flushed: %d decoded, %d encoded total",
|
||||||
|
self._frames_decoded,
|
||||||
|
self._frames_encoded,
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def generate_silence_frame(self) -> bytes | None:
|
||||||
|
"""Return a pre-encoded silent AAC frame (module-level singleton)."""
|
||||||
|
return _SILENCE_AAC_FRAME
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Release codec contexts (best-effort; PyAV AudioCodecContext may not have close())."""
|
||||||
|
for ctx in (self._decoder, self._encoder):
|
||||||
|
try:
|
||||||
|
if hasattr(ctx, "close"):
|
||||||
|
ctx.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __del__(self) -> None:
|
||||||
|
self.close()
|
||||||
515
mediaflow_proxy/remuxer/codec_utils.py
Normal file
515
mediaflow_proxy/remuxer/codec_utils.py
Normal file
@@ -0,0 +1,515 @@
|
|||||||
|
"""
|
||||||
|
Codec decision engine for browser compatibility detection.
|
||||||
|
|
||||||
|
Determines whether video/audio streams need transcoding for browser
|
||||||
|
playback and selects appropriate output codecs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import struct
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# Browser-compatible codecs (work natively in HTML5 <video>)
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
BROWSER_VIDEO_CODECS = frozenset(
|
||||||
|
{
|
||||||
|
"V_MPEG4/ISO/AVC", # H.264/AVC -- universal
|
||||||
|
"h264",
|
||||||
|
"avc1", # FFmpeg/PyAV names
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
BROWSER_AUDIO_CODECS = frozenset(
|
||||||
|
{
|
||||||
|
"A_AAC", # AAC-LC -- universal
|
||||||
|
"A_AAC/MPEG2/LC",
|
||||||
|
"A_AAC/MPEG4/LC",
|
||||||
|
"aac", # FFmpeg/PyAV name
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# Video codecs that need re-encoding to H.264
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
VIDEO_NEEDS_REENCODE = frozenset(
|
||||||
|
{
|
||||||
|
"V_MPEGH/ISO/HEVC", # H.265/HEVC (Chrome/Firefox don't support)
|
||||||
|
"V_MPEG2", # MPEG-2 (DVD-era)
|
||||||
|
"V_MPEG4/ISO/SP", # MPEG-4 Part 2 Simple Profile
|
||||||
|
"V_MPEG4/ISO/ASP", # MPEG-4 Part 2 Advanced Simple (DivX/Xvid)
|
||||||
|
"V_MPEG4/ISO/AP", # MPEG-4 Part 2 Advanced Profile
|
||||||
|
"V_MPEG4/MS/V3", # MS MPEG-4 v3 (WMV)
|
||||||
|
"V_MS/VFW/FOURCC", # Generic VFW (VC-1, etc.)
|
||||||
|
"V_REAL/RV10",
|
||||||
|
"V_REAL/RV20",
|
||||||
|
"V_REAL/RV30",
|
||||||
|
"V_REAL/RV40",
|
||||||
|
"V_THEORA",
|
||||||
|
"V_VP8",
|
||||||
|
"V_VP9", # VP9 in MKV (needs WebM container for browser)
|
||||||
|
"V_AV1", # AV1 (partial support, safer to reencode)
|
||||||
|
# PyAV / FFmpeg codec names
|
||||||
|
"hevc",
|
||||||
|
"h265",
|
||||||
|
"mpeg2video",
|
||||||
|
"mpeg4",
|
||||||
|
"vc1",
|
||||||
|
"vp8",
|
||||||
|
"vp9",
|
||||||
|
"av1",
|
||||||
|
"theora",
|
||||||
|
"wmv3",
|
||||||
|
"rv30",
|
||||||
|
"rv40",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# Audio codecs that need transcoding to AAC
|
||||||
|
# (superset of the list in audio_transcoder.py, uses both MKV and
|
||||||
|
# PyAV codec names for universal lookup)
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
AUDIO_NEEDS_TRANSCODE = frozenset(
|
||||||
|
{
|
||||||
|
# MKV CodecIDs
|
||||||
|
"A_EAC3",
|
||||||
|
"A_AC3",
|
||||||
|
"A_DTS",
|
||||||
|
"A_DTS/EXPRESS",
|
||||||
|
"A_DTS/LOSSLESS",
|
||||||
|
"A_OPUS",
|
||||||
|
"A_VORBIS",
|
||||||
|
"A_FLAC",
|
||||||
|
"A_TRUEHD",
|
||||||
|
"A_MLP",
|
||||||
|
"A_PCM/INT/LIT",
|
||||||
|
"A_PCM/INT/BIG",
|
||||||
|
"A_PCM/FLOAT/IEEE",
|
||||||
|
"A_REAL/28_8",
|
||||||
|
"A_REAL/COOK",
|
||||||
|
"A_REAL/SIPR",
|
||||||
|
"A_REAL/ATRC",
|
||||||
|
"A_MS/ACM", # Generic Windows audio
|
||||||
|
"A_MP3",
|
||||||
|
"A_MPEG/L3",
|
||||||
|
# PyAV / FFmpeg names
|
||||||
|
"eac3",
|
||||||
|
"ac3",
|
||||||
|
"dts",
|
||||||
|
"dca",
|
||||||
|
"truehd",
|
||||||
|
"mlp",
|
||||||
|
"mp3",
|
||||||
|
"opus",
|
||||||
|
"vorbis",
|
||||||
|
"flac",
|
||||||
|
"pcm_s16le",
|
||||||
|
"pcm_s24le",
|
||||||
|
"pcm_f32le",
|
||||||
|
"wmav2",
|
||||||
|
"wmavoice",
|
||||||
|
"wmapro",
|
||||||
|
"cook",
|
||||||
|
"sipr",
|
||||||
|
"atrac3",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Map PyAV codec names to MKV CodecIDs (for the MKV fast-path)
|
||||||
|
_PYAV_TO_MKV_VIDEO = {
|
||||||
|
"h264": "V_MPEG4/ISO/AVC",
|
||||||
|
"hevc": "V_MPEGH/ISO/HEVC",
|
||||||
|
"h265": "V_MPEGH/ISO/HEVC",
|
||||||
|
"mpeg2video": "V_MPEG2",
|
||||||
|
"vp8": "V_VP8",
|
||||||
|
"vp9": "V_VP9",
|
||||||
|
"av1": "V_AV1",
|
||||||
|
}
|
||||||
|
|
||||||
|
_PYAV_TO_MKV_AUDIO = {
|
||||||
|
"aac": "A_AAC",
|
||||||
|
"eac3": "A_EAC3",
|
||||||
|
"ac3": "A_AC3",
|
||||||
|
"dts": "A_DTS",
|
||||||
|
"opus": "A_OPUS",
|
||||||
|
"vorbis": "A_VORBIS",
|
||||||
|
"flac": "A_FLAC",
|
||||||
|
"mp3": "A_MPEG/L3",
|
||||||
|
"truehd": "A_TRUEHD",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# NAL unit format conversion (Annex B ↔ AVCC)
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# H.264 NAL types that belong in the init segment (avcC), not in samples
|
||||||
|
_H264_PARAM_NAL_TYPES = frozenset({7, 8, 9}) # SPS, PPS, AUD
|
||||||
|
|
||||||
|
|
||||||
|
def _find_annexb_nals(data: bytes) -> list[tuple[int, int]]:
|
||||||
|
"""
|
||||||
|
Find all NAL unit [start, end) byte ranges in Annex B formatted data.
|
||||||
|
|
||||||
|
Handles both 3-byte (00 00 01) and 4-byte (00 00 00 01) start codes.
|
||||||
|
Returns a list of (start, end) tuples pointing into *data*.
|
||||||
|
"""
|
||||||
|
size = len(data)
|
||||||
|
nals: list[tuple[int, int]] = []
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
while i < size - 2:
|
||||||
|
# Scan for 0x000001 or 0x00000001
|
||||||
|
if data[i] != 0:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
if data[i + 1] != 0:
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
if data[i + 2] == 1:
|
||||||
|
nal_start = i + 3
|
||||||
|
elif data[i + 2] == 0 and i + 3 < size and data[i + 3] == 1:
|
||||||
|
nal_start = i + 4
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Record end of previous NAL
|
||||||
|
if nals:
|
||||||
|
nals[-1] = (nals[-1][0], i)
|
||||||
|
nals.append((nal_start, size))
|
||||||
|
i = nal_start
|
||||||
|
|
||||||
|
return nals
|
||||||
|
|
||||||
|
|
||||||
|
def is_annexb(data: bytes) -> bool:
|
||||||
|
"""
|
||||||
|
Return True if *data* starts with an Annex B start code.
|
||||||
|
|
||||||
|
Disambiguates AVCC (4-byte length prefix) from Annex B when the data
|
||||||
|
begins with ``00 00 01 xx`` or ``00 00 00 01`` by checking whether
|
||||||
|
the AVCC interpretation yields a plausible H.264 NAL. If the 4-byte
|
||||||
|
big-endian length + subsequent NAL header byte is valid and the
|
||||||
|
length fits within the data, this is AVCC -- not Annex B.
|
||||||
|
"""
|
||||||
|
if len(data) < 5:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 4-byte start code: 00 00 00 01
|
||||||
|
if data[0] == 0 and data[1] == 0 and data[2] == 0 and data[3] == 1:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 3-byte start code: 00 00 01 -- but could also be AVCC with length
|
||||||
|
# that starts with 00 00 01 (i.e. length 0x000001xx = 256..511).
|
||||||
|
if data[0] == 0 and data[1] == 0 and data[2] == 1:
|
||||||
|
# Interpret as AVCC: 4-byte big-endian length
|
||||||
|
avcc_len = int.from_bytes(data[0:4], "big")
|
||||||
|
if 0 < avcc_len <= len(data) - 4:
|
||||||
|
# Check if the NAL header byte is a valid H.264 NAL
|
||||||
|
nal_byte = data[4]
|
||||||
|
forbidden = (nal_byte >> 7) & 1
|
||||||
|
nal_type = nal_byte & 0x1F
|
||||||
|
if forbidden == 0 and 1 <= nal_type <= 12:
|
||||||
|
# Plausible AVCC: valid length + valid NAL type
|
||||||
|
return False
|
||||||
|
# Not plausible AVCC, treat as Annex B
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def annexb_to_avcc(data: bytes, filter_ps: bool = True) -> bytes:
|
||||||
|
"""
|
||||||
|
Convert Annex B (start-code-prefixed) NAL units to AVCC
|
||||||
|
(4-byte length-prefixed) format suitable for fMP4 samples.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: H.264 access unit in Annex B format.
|
||||||
|
filter_ps: If True, strip SPS/PPS/AUD NAL units (they belong
|
||||||
|
in the avcC box of the init segment, not in samples).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The same NAL units with 4-byte big-endian length prefixes.
|
||||||
|
"""
|
||||||
|
if not data or not is_annexb(data):
|
||||||
|
return data # Already AVCC or empty
|
||||||
|
|
||||||
|
nals = _find_annexb_nals(data)
|
||||||
|
if not nals:
|
||||||
|
return data
|
||||||
|
|
||||||
|
out = bytearray()
|
||||||
|
for start, end in nals:
|
||||||
|
# Strip trailing zero-padding before next start code
|
||||||
|
while end > start and data[end - 1] == 0:
|
||||||
|
end -= 1
|
||||||
|
if end <= start:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if filter_ps:
|
||||||
|
nal_type = data[start] & 0x1F
|
||||||
|
if nal_type in _H264_PARAM_NAL_TYPES:
|
||||||
|
continue
|
||||||
|
|
||||||
|
length = end - start
|
||||||
|
out.extend(length.to_bytes(4, "big"))
|
||||||
|
out.extend(data[start:end])
|
||||||
|
|
||||||
|
# If every NAL was filtered out (e.g. packet only contains SPS/PPS/AUD),
|
||||||
|
# return empty so callers can drop this sample. Returning original Annex-B
|
||||||
|
# bytes here would corrupt fMP4 samples (expects AVCC length prefixes).
|
||||||
|
return bytes(out)
|
||||||
|
|
||||||
|
|
||||||
|
# H.264 profiles that require the avcC High Profile extension fields
|
||||||
|
# (chroma_format_idc, bit_depth_luma/chroma, numSpsExt).
|
||||||
|
_HIGH_PROFILE_IDCS = frozenset({100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134})
|
||||||
|
|
||||||
|
|
||||||
|
def _fix_avcc_high_profile(avcc: bytes) -> bytes:
|
||||||
|
"""
|
||||||
|
Ensure an avcC record includes High Profile extension bytes.
|
||||||
|
|
||||||
|
The ISO/IEC 14496-15 spec requires additional fields after the PPS
|
||||||
|
section when ``AVCProfileIndication`` is 100 (High), 110, 122, or 244.
|
||||||
|
Some MKV muxers omit these, causing decoders to not know the chroma
|
||||||
|
format or bit depth, which leads to widespread decode errors.
|
||||||
|
|
||||||
|
If the extensions are missing, appends the defaults for 4:2:0 / 8-bit
|
||||||
|
with zero extended SPS sets.
|
||||||
|
"""
|
||||||
|
if len(avcc) < 7:
|
||||||
|
return avcc
|
||||||
|
if avcc[0] != 1:
|
||||||
|
return avcc # Not an avcC record
|
||||||
|
|
||||||
|
profile_idc = avcc[1]
|
||||||
|
if profile_idc not in _HIGH_PROFILE_IDCS:
|
||||||
|
return avcc # Not a High Profile variant, no extensions needed
|
||||||
|
|
||||||
|
# Walk past SPS and PPS sections to find where extensions should be
|
||||||
|
off = 5
|
||||||
|
num_sps = avcc[off] & 0x1F
|
||||||
|
off += 1
|
||||||
|
for _ in range(num_sps):
|
||||||
|
if off + 2 > len(avcc):
|
||||||
|
return avcc
|
||||||
|
sps_len = struct.unpack(">H", avcc[off : off + 2])[0]
|
||||||
|
off += 2 + sps_len
|
||||||
|
|
||||||
|
if off >= len(avcc):
|
||||||
|
return avcc
|
||||||
|
num_pps = avcc[off]
|
||||||
|
off += 1
|
||||||
|
for _ in range(num_pps):
|
||||||
|
if off + 2 > len(avcc):
|
||||||
|
return avcc
|
||||||
|
pps_len = struct.unpack(">H", avcc[off : off + 2])[0]
|
||||||
|
off += 2 + pps_len
|
||||||
|
|
||||||
|
# If there are already bytes after the PPS section, extensions exist
|
||||||
|
if off < len(avcc):
|
||||||
|
return avcc
|
||||||
|
|
||||||
|
# Append default High Profile extensions:
|
||||||
|
# chroma_format_idc = 1 (4:2:0) -> 0xFC | 0x01 = 0xFD (reserved 111111 + 01)
|
||||||
|
# bit_depth_luma_minus8 = 0 -> 0xF8 | 0x00 = 0xF8 (reserved 11111 + 000)
|
||||||
|
# bit_depth_chroma_minus8 = 0 -> 0xF8 | 0x00 = 0xF8 (reserved 11111 + 000)
|
||||||
|
# numOfSequenceParameterSetExt = 0
|
||||||
|
ext = bytearray(avcc)
|
||||||
|
ext.append(0xFD) # 111111_01 : chroma_format_idc = 1
|
||||||
|
ext.append(0xF8) # 11111_000 : bit_depth_luma_minus8 = 0
|
||||||
|
ext.append(0xF8) # 11111_000 : bit_depth_chroma_minus8 = 0
|
||||||
|
ext.append(0x00) # numOfSequenceParameterSetExt = 0
|
||||||
|
return bytes(ext)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_avcc_extradata(extradata: bytes) -> bytes:
|
||||||
|
"""
|
||||||
|
Ensure h264 extradata is in avcC format for the fMP4 init segment.
|
||||||
|
|
||||||
|
PyAV returns extradata in the container's native format:
|
||||||
|
- MKV/MP4: avcC format (starts with 0x01)
|
||||||
|
- MPEG-TS: Annex B format (starts with 0x00 0x00)
|
||||||
|
|
||||||
|
If Annex B, parses SPS/PPS NAL units and builds proper avcC.
|
||||||
|
If already avcC, validates and fixes High Profile extension fields.
|
||||||
|
"""
|
||||||
|
if not extradata or len(extradata) < 4:
|
||||||
|
return extradata
|
||||||
|
|
||||||
|
# Already avcC format (configurationVersion == 1)
|
||||||
|
if extradata[0] == 0x01:
|
||||||
|
return _fix_avcc_high_profile(extradata)
|
||||||
|
|
||||||
|
# Parse Annex B NAL units to extract SPS and PPS
|
||||||
|
nals = _find_annexb_nals(extradata)
|
||||||
|
if not nals:
|
||||||
|
return extradata
|
||||||
|
|
||||||
|
sps_list: list[bytes] = []
|
||||||
|
pps_list: list[bytes] = []
|
||||||
|
|
||||||
|
for start, end in nals:
|
||||||
|
while end > start and extradata[end - 1] == 0:
|
||||||
|
end -= 1
|
||||||
|
if end <= start:
|
||||||
|
continue
|
||||||
|
nal_type = extradata[start] & 0x1F
|
||||||
|
nal_data = extradata[start:end]
|
||||||
|
if nal_type == 7: # SPS
|
||||||
|
sps_list.append(nal_data)
|
||||||
|
elif nal_type == 8: # PPS
|
||||||
|
pps_list.append(nal_data)
|
||||||
|
|
||||||
|
if not sps_list:
|
||||||
|
return extradata # Can't build avcC without SPS
|
||||||
|
|
||||||
|
sps = sps_list[0]
|
||||||
|
if len(sps) < 4:
|
||||||
|
return extradata
|
||||||
|
|
||||||
|
# Build avcC box content
|
||||||
|
avcc = bytearray()
|
||||||
|
avcc.append(1) # configurationVersion
|
||||||
|
avcc.append(sps[1]) # AVCProfileIndication
|
||||||
|
avcc.append(sps[2]) # profile_compatibility
|
||||||
|
avcc.append(sps[3]) # AVCLevelIndication
|
||||||
|
avcc.append(0xFF) # 6 bits reserved (0x3F) + lengthSizeMinusOne=3 -> 4-byte NAL lengths
|
||||||
|
avcc.append(0xE0 | len(sps_list)) # 3 bits reserved (0x07) + numOfSPS
|
||||||
|
|
||||||
|
for s in sps_list:
|
||||||
|
avcc.extend(struct.pack(">H", len(s)))
|
||||||
|
avcc.extend(s)
|
||||||
|
|
||||||
|
avcc.append(len(pps_list)) # numOfPPS
|
||||||
|
for p in pps_list:
|
||||||
|
avcc.extend(struct.pack(">H", len(p)))
|
||||||
|
avcc.extend(p)
|
||||||
|
|
||||||
|
return _fix_avcc_high_profile(bytes(avcc))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_sps_pps_from_annexb(data: bytes) -> bytes:
|
||||||
|
"""
|
||||||
|
Extract SPS and PPS NAL units from Annex B encoded data and build
|
||||||
|
an avcC-format extradata blob.
|
||||||
|
|
||||||
|
Hardware encoders like VideoToolbox embed SPS/PPS as in-band NAL
|
||||||
|
units in their first keyframe output rather than setting extradata
|
||||||
|
on the codec context. This function finds those parameter sets
|
||||||
|
and returns proper avcC bytes suitable for the fMP4 init segment.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
avcC bytes if SPS/PPS were found, empty bytes otherwise.
|
||||||
|
"""
|
||||||
|
if not data or not is_annexb(data):
|
||||||
|
return b""
|
||||||
|
|
||||||
|
nals = _find_annexb_nals(data)
|
||||||
|
if not nals:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
sps_list: list[bytes] = []
|
||||||
|
pps_list: list[bytes] = []
|
||||||
|
|
||||||
|
for start, end in nals:
|
||||||
|
# Strip trailing zero-padding
|
||||||
|
while end > start and data[end - 1] == 0:
|
||||||
|
end -= 1
|
||||||
|
if end <= start:
|
||||||
|
continue
|
||||||
|
|
||||||
|
nal_type = data[start] & 0x1F
|
||||||
|
if nal_type == 7: # SPS
|
||||||
|
sps_list.append(data[start:end])
|
||||||
|
elif nal_type == 8: # PPS
|
||||||
|
pps_list.append(data[start:end])
|
||||||
|
|
||||||
|
if not sps_list:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
sps = sps_list[0]
|
||||||
|
if len(sps) < 4:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
# Build avcC box content
|
||||||
|
avcc = bytearray()
|
||||||
|
avcc.append(1) # configurationVersion
|
||||||
|
avcc.append(sps[1]) # AVCProfileIndication
|
||||||
|
avcc.append(sps[2]) # profile_compatibility
|
||||||
|
avcc.append(sps[3]) # AVCLevelIndication
|
||||||
|
avcc.append(0xFF) # 6 bits reserved + lengthSizeMinusOne=3
|
||||||
|
avcc.append(0xE0 | len(sps_list)) # 3 bits reserved + numOfSPS
|
||||||
|
|
||||||
|
for s in sps_list:
|
||||||
|
avcc.extend(struct.pack(">H", len(s)))
|
||||||
|
avcc.extend(s)
|
||||||
|
|
||||||
|
avcc.append(len(pps_list)) # numOfPPS
|
||||||
|
for p in pps_list:
|
||||||
|
avcc.extend(struct.pack(">H", len(p)))
|
||||||
|
avcc.extend(p)
|
||||||
|
|
||||||
|
return bytes(avcc)
|
||||||
|
|
||||||
|
|
||||||
|
def video_needs_reencode(codec_id: str) -> bool:
|
||||||
|
"""Check if a video codec requires re-encoding for browser playback."""
|
||||||
|
if not codec_id:
|
||||||
|
return False
|
||||||
|
return codec_id in VIDEO_NEEDS_REENCODE
|
||||||
|
|
||||||
|
|
||||||
|
def audio_needs_transcode(codec_id: str) -> bool:
|
||||||
|
"""Check if an audio codec requires transcoding for browser playback."""
|
||||||
|
if not codec_id:
|
||||||
|
return False
|
||||||
|
return codec_id in AUDIO_NEEDS_TRANSCODE
|
||||||
|
|
||||||
|
|
||||||
|
def is_browser_compatible(video_codec: str, audio_codec: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a video+audio combination is fully browser-compatible.
|
||||||
|
|
||||||
|
Returns True only if BOTH video and audio can be played natively in
|
||||||
|
an HTML5 <video> element inside an MP4 container.
|
||||||
|
"""
|
||||||
|
video_ok = video_codec in BROWSER_VIDEO_CODECS or not video_codec
|
||||||
|
audio_ok = audio_codec in BROWSER_AUDIO_CODECS or not audio_codec
|
||||||
|
return video_ok and audio_ok
|
||||||
|
|
||||||
|
|
||||||
|
class TranscodeDecision:
|
||||||
|
"""Result of analyzing a stream's codec compatibility."""
|
||||||
|
|
||||||
|
__slots__ = ("transcode_video", "transcode_audio", "video_codec", "audio_codec")
|
||||||
|
|
||||||
|
def __init__(self, video_codec: str = "", audio_codec: str = "") -> None:
|
||||||
|
self.video_codec = video_codec
|
||||||
|
self.audio_codec = audio_codec
|
||||||
|
self.transcode_video = video_needs_reencode(video_codec)
|
||||||
|
self.transcode_audio = audio_needs_transcode(audio_codec)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def needs_transcode(self) -> bool:
|
||||||
|
"""True if any stream needs transcoding."""
|
||||||
|
return self.transcode_video or self.transcode_audio
|
||||||
|
|
||||||
|
@property
|
||||||
|
def passthrough_ok(self) -> bool:
|
||||||
|
"""True if the stream can be served as-is to a browser."""
|
||||||
|
return not self.needs_transcode
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
parts = []
|
||||||
|
if self.transcode_video:
|
||||||
|
parts.append(f"video:{self.video_codec}->h264")
|
||||||
|
if self.transcode_audio:
|
||||||
|
parts.append(f"audio:{self.audio_codec}->aac")
|
||||||
|
if not parts:
|
||||||
|
parts.append("passthrough")
|
||||||
|
return f"TranscodeDecision({', '.join(parts)})"
|
||||||
614
mediaflow_proxy/remuxer/container_probe.py
Normal file
614
mediaflow_proxy/remuxer/container_probe.py
Normal file
@@ -0,0 +1,614 @@
|
|||||||
|
"""
|
||||||
|
Container format probing -- MKV Cues and MP4 moov.
|
||||||
|
|
||||||
|
Pure Python probing using EBML parsing (MKV) and struct-based atom
|
||||||
|
scanning (MP4). No FFmpeg dependency.
|
||||||
|
|
||||||
|
Source-agnostic: accepts any MediaSource protocol implementation
|
||||||
|
(Telegram, HTTP, etc.) for byte-range reads.
|
||||||
|
|
||||||
|
Provides:
|
||||||
|
- probe_mkv_cues: probe MKV file to extract seek index (MKVCueIndex)
|
||||||
|
- probe_mp4_moov: probe MP4 file to extract moov atom and build seek index (MP4Index)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import struct
|
||||||
|
|
||||||
|
from mediaflow_proxy.utils import redis_utils
|
||||||
|
from mediaflow_proxy.remuxer.ebml_parser import (
|
||||||
|
MKVCueIndex,
|
||||||
|
build_cue_index,
|
||||||
|
parse_ebml_header,
|
||||||
|
parse_seek_head,
|
||||||
|
CUES,
|
||||||
|
INFO,
|
||||||
|
)
|
||||||
|
from mediaflow_proxy.remuxer.mp4_parser import (
|
||||||
|
MP4Index,
|
||||||
|
build_cue_points_from_moov,
|
||||||
|
is_mp4_header,
|
||||||
|
rewrite_moov_offsets,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# How much of the MKV header to fetch for SeekHead + Info parsing
|
||||||
|
_HEADER_PROBE_SIZE = 64 * 1024 # 64 KB
|
||||||
|
|
||||||
|
# Max Cues element size we'll attempt to fetch
|
||||||
|
_MAX_CUES_SIZE = 2 * 1024 * 1024 # 2 MB
|
||||||
|
|
||||||
|
# Redis cache for MKV Cue indexes
|
||||||
|
_CUE_INDEX_CACHE_PREFIX = "mfp:cue_index:"
|
||||||
|
_CUE_INDEX_CACHE_TTL = 3600 # 1 hour
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MKV Cues probing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def derive_cue_cache_key(
|
||||||
|
source_key: str = "",
|
||||||
|
*,
|
||||||
|
chat_id: str | int | None = None,
|
||||||
|
message_id: int | None = None,
|
||||||
|
file_id: str | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Derive a deterministic cache key for a file's cue index.
|
||||||
|
|
||||||
|
Accepts either a pre-computed source_key (from MediaSource.cache_key)
|
||||||
|
or legacy Telegram-style parameters for backwards compatibility.
|
||||||
|
"""
|
||||||
|
if source_key:
|
||||||
|
return source_key
|
||||||
|
if file_id:
|
||||||
|
raw = f"file_id:{file_id}"
|
||||||
|
elif chat_id is not None and message_id is not None:
|
||||||
|
raw = f"chat:{chat_id}:msg:{message_id}"
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_cached_cue_index(cache_key: str) -> MKVCueIndex | None:
|
||||||
|
"""Try to load a MKVCueIndex from Redis cache."""
|
||||||
|
if not cache_key:
|
||||||
|
return None
|
||||||
|
r = await redis_utils.get_redis()
|
||||||
|
if r is None:
|
||||||
|
return None
|
||||||
|
redis_key = f"{_CUE_INDEX_CACHE_PREFIX}{cache_key}"
|
||||||
|
data = await r.get(redis_key)
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
d = json.loads(data)
|
||||||
|
seek_header = b""
|
||||||
|
if d.get("seek_header_b64"):
|
||||||
|
seek_header = base64.b64decode(d["seek_header_b64"])
|
||||||
|
video_codec_private = b""
|
||||||
|
if d.get("video_codec_private_b64"):
|
||||||
|
video_codec_private = base64.b64decode(d["video_codec_private_b64"])
|
||||||
|
index = MKVCueIndex(
|
||||||
|
duration_ms=d["duration_ms"],
|
||||||
|
timestamp_scale=d["timestamp_scale"],
|
||||||
|
cue_points=[(cp[0], cp[1]) for cp in d["cue_points"]],
|
||||||
|
segment_data_offset=d["segment_data_offset"],
|
||||||
|
first_cluster_offset=d.get("first_cluster_offset", 0),
|
||||||
|
seek_header=seek_header,
|
||||||
|
audio_codec_id=d.get("audio_codec_id", ""),
|
||||||
|
audio_bitrate=d.get("audio_bitrate", 0),
|
||||||
|
audio_channels=d.get("audio_channels", 0),
|
||||||
|
audio_sample_rate=d.get("audio_sample_rate", 0.0),
|
||||||
|
video_codec_id=d.get("video_codec_id", ""),
|
||||||
|
video_codec_private=video_codec_private,
|
||||||
|
video_width=d.get("video_width", 0),
|
||||||
|
video_height=d.get("video_height", 0),
|
||||||
|
video_fps=d.get("video_fps", 0.0),
|
||||||
|
video_default_duration_ns=d.get("video_default_duration_ns", 0),
|
||||||
|
)
|
||||||
|
logger.debug("[container_probe] Loaded cue index from cache: %s", cache_key)
|
||||||
|
return index
|
||||||
|
except (KeyError, TypeError, json.JSONDecodeError) as e:
|
||||||
|
logger.warning("[container_probe] Invalid cached cue index: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _set_cached_cue_index(cache_key: str, index: MKVCueIndex) -> None:
|
||||||
|
"""Cache a MKVCueIndex in Redis."""
|
||||||
|
if not cache_key:
|
||||||
|
return
|
||||||
|
r = await redis_utils.get_redis()
|
||||||
|
if r is None:
|
||||||
|
return
|
||||||
|
redis_key = f"{_CUE_INDEX_CACHE_PREFIX}{cache_key}"
|
||||||
|
data = json.dumps(
|
||||||
|
{
|
||||||
|
"duration_ms": index.duration_ms,
|
||||||
|
"timestamp_scale": index.timestamp_scale,
|
||||||
|
"cue_points": index.cue_points,
|
||||||
|
"segment_data_offset": index.segment_data_offset,
|
||||||
|
"first_cluster_offset": index.first_cluster_offset,
|
||||||
|
"seek_header_b64": base64.b64encode(index.seek_header).decode() if index.seek_header else "",
|
||||||
|
"audio_codec_id": index.audio_codec_id,
|
||||||
|
"audio_bitrate": index.audio_bitrate,
|
||||||
|
"audio_channels": index.audio_channels,
|
||||||
|
"audio_sample_rate": index.audio_sample_rate,
|
||||||
|
"video_codec_id": index.video_codec_id,
|
||||||
|
"video_codec_private_b64": base64.b64encode(index.video_codec_private).decode()
|
||||||
|
if index.video_codec_private
|
||||||
|
else "",
|
||||||
|
"video_width": index.video_width,
|
||||||
|
"video_height": index.video_height,
|
||||||
|
"video_fps": index.video_fps,
|
||||||
|
"video_default_duration_ns": index.video_default_duration_ns,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
await r.set(redis_key, data, ex=_CUE_INDEX_CACHE_TTL)
|
||||||
|
logger.debug("[container_probe] Cached cue index: %s", cache_key)
|
||||||
|
|
||||||
|
|
||||||
|
async def probe_mkv_cues(
|
||||||
|
source,
|
||||||
|
file_size: int = 0,
|
||||||
|
cache_key: str = "",
|
||||||
|
header_data: bytes | None = None,
|
||||||
|
) -> MKVCueIndex | None:
|
||||||
|
"""
|
||||||
|
Probe an MKV file's EBML header and Cues to build a seek index.
|
||||||
|
|
||||||
|
Pure Python -- parses EBML structures directly, no FFmpeg involved.
|
||||||
|
|
||||||
|
Makes up to two small byte-range reads via the provided source:
|
||||||
|
1. First ~64KB: EBML header + SeekHead + Info (skipped if header_data provided)
|
||||||
|
2. Cues section: byte range from SeekHead's Cues position
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: A MediaSource protocol implementation, or any object with
|
||||||
|
a ``stream(offset, limit)`` async generator method.
|
||||||
|
file_size: Total file size in bytes. If 0, tries ``source.file_size``.
|
||||||
|
cache_key: Optional cache key for Redis caching. If empty, tries
|
||||||
|
``source.cache_key``.
|
||||||
|
header_data: Pre-fetched header bytes (first ~64KB). If provided,
|
||||||
|
skips the initial header fetch from source.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MKVCueIndex if successful, None if the file has no Cues or parsing fails.
|
||||||
|
"""
|
||||||
|
# Resolve file_size and cache_key from source if not provided
|
||||||
|
if file_size <= 0:
|
||||||
|
file_size = getattr(source, "file_size", 0)
|
||||||
|
if not cache_key:
|
||||||
|
cache_key = getattr(source, "cache_key", "")
|
||||||
|
|
||||||
|
# Check cache first
|
||||||
|
if cache_key:
|
||||||
|
cached = await _get_cached_cue_index(cache_key)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Use pre-fetched header or fetch from source
|
||||||
|
if header_data is None:
|
||||||
|
header_size = min(_HEADER_PROBE_SIZE, file_size) if file_size > 0 else _HEADER_PROBE_SIZE
|
||||||
|
header_data = b""
|
||||||
|
async for chunk in source.stream(offset=0, limit=header_size):
|
||||||
|
header_data += chunk
|
||||||
|
|
||||||
|
if len(header_data) < 64:
|
||||||
|
logger.warning("[container_probe] Header too small (%d bytes), cannot probe", len(header_data))
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 2: Parse EBML header to find Segment data offset
|
||||||
|
segment_data_offset = parse_ebml_header(header_data)
|
||||||
|
|
||||||
|
# Step 3: Parse SeekHead to find Cues and Info positions
|
||||||
|
seek_positions = parse_seek_head(header_data, segment_data_offset)
|
||||||
|
|
||||||
|
if CUES not in seek_positions:
|
||||||
|
logger.info("[container_probe] No Cues position in SeekHead, seeking not available")
|
||||||
|
return None
|
||||||
|
|
||||||
|
cues_relative_offset = seek_positions[CUES]
|
||||||
|
cues_absolute_offset = segment_data_offset + cues_relative_offset
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[container_probe] SeekHead: Cues at offset %d (absolute %d), Info at %s",
|
||||||
|
cues_relative_offset,
|
||||||
|
cues_absolute_offset,
|
||||||
|
seek_positions.get(INFO, "not found"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 4: Fetch the Cues element
|
||||||
|
cues_max = file_size - cues_absolute_offset if file_size > 0 else _MAX_CUES_SIZE
|
||||||
|
cues_fetch_size = min(_MAX_CUES_SIZE, cues_max)
|
||||||
|
if cues_fetch_size <= 0:
|
||||||
|
logger.warning("[container_probe] Cues offset %d beyond file size %d", cues_absolute_offset, file_size)
|
||||||
|
return None
|
||||||
|
|
||||||
|
cues_data = b""
|
||||||
|
async for chunk in source.stream(offset=cues_absolute_offset, limit=cues_fetch_size):
|
||||||
|
cues_data += chunk
|
||||||
|
|
||||||
|
if len(cues_data) < 16:
|
||||||
|
logger.warning("[container_probe] Cues data too small (%d bytes)", len(cues_data))
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 5: Build the cue index
|
||||||
|
index = build_cue_index(
|
||||||
|
header_data=header_data,
|
||||||
|
cues_data=cues_data,
|
||||||
|
cues_file_offset=cues_absolute_offset,
|
||||||
|
segment_data_offset=segment_data_offset,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache the result
|
||||||
|
if cache_key:
|
||||||
|
await _set_cached_cue_index(cache_key, index)
|
||||||
|
|
||||||
|
return index
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("[container_probe] Failed to probe MKV cues: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MP4 Moov probing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Redis cache for MP4 indexes
|
||||||
|
_MP4_INDEX_CACHE_PREFIX = "mfp:mp4_index:"
|
||||||
|
_MP4_INDEX_CACHE_TTL = 3600 # 1 hour
|
||||||
|
|
||||||
|
# How much to read from the start for ftyp + initial atom scanning
|
||||||
|
_MP4_HEADER_PROBE_SIZE = 64 * 1024 # 64 KB
|
||||||
|
|
||||||
|
# Max moov size we'll accept
|
||||||
|
_MAX_MOOV_SIZE = 50 * 1024 * 1024 # 50 MB
|
||||||
|
|
||||||
|
# How much to read from the end of the file to find moov
|
||||||
|
_MP4_TAIL_PROBE_SIZE = 512 * 1024 # 512 KB
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_cached_mp4_index(cache_key: str) -> MP4Index | None:
|
||||||
|
"""Try to load an MP4Index from Redis cache."""
|
||||||
|
if not cache_key:
|
||||||
|
return None
|
||||||
|
r = await redis_utils.get_redis()
|
||||||
|
if r is None:
|
||||||
|
return None
|
||||||
|
redis_key = f"{_MP4_INDEX_CACHE_PREFIX}{cache_key}"
|
||||||
|
data = await r.get(redis_key)
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
d = json.loads(data)
|
||||||
|
ftyp_data = b""
|
||||||
|
if d.get("ftyp_data_b64"):
|
||||||
|
ftyp_data = base64.b64decode(d["ftyp_data_b64"])
|
||||||
|
index = MP4Index(
|
||||||
|
duration_ms=d["duration_ms"],
|
||||||
|
timescale=d["timescale"],
|
||||||
|
cue_points=[(cp[0], cp[1]) for cp in d["cue_points"]],
|
||||||
|
moov_offset=d["moov_offset"],
|
||||||
|
moov_size=d["moov_size"],
|
||||||
|
ftyp_data=ftyp_data,
|
||||||
|
mdat_offset=d["mdat_offset"],
|
||||||
|
mdat_size=d["mdat_size"],
|
||||||
|
video_codec=d.get("video_codec", ""),
|
||||||
|
audio_codec=d.get("audio_codec", ""),
|
||||||
|
# moov_data is NOT cached (too large), it will be re-fetched
|
||||||
|
)
|
||||||
|
logger.debug("[container_probe] Loaded MP4 index from cache: %s", cache_key)
|
||||||
|
return index
|
||||||
|
except (KeyError, TypeError, json.JSONDecodeError) as e:
|
||||||
|
logger.warning("[container_probe] Invalid cached MP4 index: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _set_cached_mp4_index(cache_key: str, index: MP4Index) -> None:
|
||||||
|
"""Cache an MP4Index in Redis (without moov_data)."""
|
||||||
|
if not cache_key:
|
||||||
|
return
|
||||||
|
r = await redis_utils.get_redis()
|
||||||
|
if r is None:
|
||||||
|
return
|
||||||
|
redis_key = f"{_MP4_INDEX_CACHE_PREFIX}{cache_key}"
|
||||||
|
data = json.dumps(
|
||||||
|
{
|
||||||
|
"duration_ms": index.duration_ms,
|
||||||
|
"timescale": index.timescale,
|
||||||
|
"cue_points": index.cue_points,
|
||||||
|
"moov_offset": index.moov_offset,
|
||||||
|
"moov_size": index.moov_size,
|
||||||
|
"ftyp_data_b64": base64.b64encode(index.ftyp_data).decode() if index.ftyp_data else "",
|
||||||
|
"mdat_offset": index.mdat_offset,
|
||||||
|
"mdat_size": index.mdat_size,
|
||||||
|
"video_codec": index.video_codec,
|
||||||
|
"audio_codec": index.audio_codec,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
await r.set(redis_key, data, ex=_MP4_INDEX_CACHE_TTL)
|
||||||
|
logger.debug("[container_probe] Cached MP4 index: %s", cache_key)
|
||||||
|
|
||||||
|
|
||||||
|
def _scan_top_level_atoms(data: bytes) -> list[tuple[bytes, int, int]]:
|
||||||
|
"""
|
||||||
|
Scan top-level atom headers from raw file bytes.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (box_type, absolute_offset, total_size) for each atom found.
|
||||||
|
"""
|
||||||
|
atoms = []
|
||||||
|
offset = 0
|
||||||
|
while offset + 8 <= len(data):
|
||||||
|
size = struct.unpack_from(">I", data, offset)[0]
|
||||||
|
box_type = data[offset + 4 : offset + 8]
|
||||||
|
|
||||||
|
if size == 1: # Extended size
|
||||||
|
if offset + 16 > len(data):
|
||||||
|
break
|
||||||
|
size = struct.unpack_from(">Q", data, offset + 8)[0]
|
||||||
|
elif size == 0:
|
||||||
|
# Extends to end of file - we can't know the real size from
|
||||||
|
# a partial read, but record what we have
|
||||||
|
atoms.append((box_type, offset, 0))
|
||||||
|
break
|
||||||
|
|
||||||
|
if size < 8:
|
||||||
|
break
|
||||||
|
|
||||||
|
atoms.append((box_type, offset, size))
|
||||||
|
offset += size
|
||||||
|
|
||||||
|
return atoms
|
||||||
|
|
||||||
|
|
||||||
|
async def probe_mp4_moov(
|
||||||
|
source,
|
||||||
|
file_size: int = 0,
|
||||||
|
cache_key: str = "",
|
||||||
|
header_data: bytes | None = None,
|
||||||
|
) -> MP4Index | None:
|
||||||
|
"""
|
||||||
|
Probe an MP4 file's moov atom to build a seek index.
|
||||||
|
|
||||||
|
Pure Python -- scans MP4 box headers with struct, no FFmpeg involved.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
1. Read first ~64KB to check for ftyp (MP4 signature).
|
||||||
|
2. Scan top-level atoms to find moov and mdat.
|
||||||
|
3. If moov is at the start (faststart), read it from the header data.
|
||||||
|
4. If moov is not in the header, read from the tail of the file.
|
||||||
|
5. Parse moov sample tables to build cue points.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: A MediaSource protocol implementation with stream(offset, limit).
|
||||||
|
file_size: Total file size in bytes.
|
||||||
|
cache_key: Optional cache key for Redis caching.
|
||||||
|
header_data: Pre-fetched header bytes (first ~64KB). If provided,
|
||||||
|
skips the initial header fetch from source.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MP4Index if successful, None if not an MP4 or parsing fails.
|
||||||
|
"""
|
||||||
|
if file_size <= 0:
|
||||||
|
file_size = getattr(source, "file_size", 0)
|
||||||
|
if not cache_key:
|
||||||
|
cache_key = getattr(source, "cache_key", "")
|
||||||
|
|
||||||
|
# Check cache first
|
||||||
|
if cache_key:
|
||||||
|
cached = await _get_cached_mp4_index(cache_key)
|
||||||
|
if cached:
|
||||||
|
# Re-fetch moov_data (not cached due to size) and rewrite offsets
|
||||||
|
if cached.moov_size > 0 and cached.moov_size <= _MAX_MOOV_SIZE:
|
||||||
|
moov_data = b""
|
||||||
|
async for chunk in source.stream(offset=cached.moov_offset, limit=cached.moov_size):
|
||||||
|
moov_data += chunk
|
||||||
|
if cached.mdat_offset >= 0:
|
||||||
|
new_mdat_start = len(cached.ftyp_data) + cached.moov_size
|
||||||
|
offset_delta = new_mdat_start - cached.mdat_offset
|
||||||
|
if offset_delta != 0:
|
||||||
|
moov_data = rewrite_moov_offsets(moov_data, offset_delta)
|
||||||
|
cached.moov_data = moov_data
|
||||||
|
return cached
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Use pre-fetched header or fetch from source
|
||||||
|
if header_data is None:
|
||||||
|
header_size = min(_MP4_HEADER_PROBE_SIZE, file_size) if file_size > 0 else _MP4_HEADER_PROBE_SIZE
|
||||||
|
header_data = b""
|
||||||
|
async for chunk in source.stream(offset=0, limit=header_size):
|
||||||
|
header_data += chunk
|
||||||
|
|
||||||
|
if len(header_data) < 12:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 2: Check for ftyp
|
||||||
|
if not is_mp4_header(header_data):
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info("[container_probe] MP4 detected, scanning atoms (header=%d bytes)", len(header_data))
|
||||||
|
|
||||||
|
# Step 3: Scan top-level atoms from header
|
||||||
|
atoms = _scan_top_level_atoms(header_data)
|
||||||
|
|
||||||
|
ftyp_offset = -1
|
||||||
|
ftyp_size = 0
|
||||||
|
moov_offset = -1
|
||||||
|
moov_size = 0
|
||||||
|
mdat_offset = -1
|
||||||
|
mdat_size = 0
|
||||||
|
|
||||||
|
for box_type, atom_offset, atom_size in atoms:
|
||||||
|
if box_type == b"ftyp":
|
||||||
|
ftyp_offset = atom_offset
|
||||||
|
ftyp_size = atom_size
|
||||||
|
elif box_type == b"moov":
|
||||||
|
moov_offset = atom_offset
|
||||||
|
moov_size = atom_size
|
||||||
|
elif box_type == b"mdat":
|
||||||
|
mdat_offset = atom_offset
|
||||||
|
mdat_size = atom_size
|
||||||
|
|
||||||
|
# Step 4: If moov not found in header, scan from tail
|
||||||
|
if moov_offset < 0 and file_size > 0:
|
||||||
|
tail_start = max(0, file_size - _MP4_TAIL_PROBE_SIZE)
|
||||||
|
tail_data = b""
|
||||||
|
async for chunk in source.stream(offset=tail_start, limit=file_size - tail_start):
|
||||||
|
tail_data += chunk
|
||||||
|
|
||||||
|
if tail_data:
|
||||||
|
tail_atoms = _scan_top_level_atoms(tail_data)
|
||||||
|
for box_type, rel_offset, atom_size in tail_atoms:
|
||||||
|
abs_offset = tail_start + rel_offset
|
||||||
|
if box_type == b"moov":
|
||||||
|
moov_offset = abs_offset
|
||||||
|
moov_size = atom_size
|
||||||
|
elif box_type == b"mdat" and mdat_offset < 0:
|
||||||
|
mdat_offset = abs_offset
|
||||||
|
mdat_size = atom_size
|
||||||
|
|
||||||
|
# If the initial scan yielded no moov (tail_start may land
|
||||||
|
# inside a large mdat payload producing garbage atom headers),
|
||||||
|
# resync by scanning 8-byte aligned windows for b"moov".
|
||||||
|
if moov_offset < 0:
|
||||||
|
needle = b"moov"
|
||||||
|
search_pos = 0
|
||||||
|
while search_pos + 8 <= len(tail_data):
|
||||||
|
idx = tail_data.find(needle, search_pos)
|
||||||
|
if idx < 0 or idx < 4:
|
||||||
|
break
|
||||||
|
candidate_size = struct.unpack_from(">I", tail_data, idx - 4)[0]
|
||||||
|
if 8 < candidate_size <= _MAX_MOOV_SIZE:
|
||||||
|
moov_offset = tail_start + idx - 4
|
||||||
|
moov_size = candidate_size
|
||||||
|
break
|
||||||
|
search_pos = idx + 4
|
||||||
|
|
||||||
|
if moov_offset < 0:
|
||||||
|
logger.info("[container_probe] No moov atom found in MP4")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if moov_size <= 0 or moov_size > _MAX_MOOV_SIZE:
|
||||||
|
logger.warning("[container_probe] moov size %d is invalid or too large", moov_size)
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[container_probe] MP4 atoms: moov at %d (%d bytes), mdat at %d (%d bytes)",
|
||||||
|
moov_offset,
|
||||||
|
moov_size,
|
||||||
|
mdat_offset,
|
||||||
|
mdat_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 5: Fetch full moov atom
|
||||||
|
# Check if moov is already contained in the header data we read
|
||||||
|
if moov_offset + moov_size <= len(header_data):
|
||||||
|
moov_data = header_data[moov_offset : moov_offset + moov_size]
|
||||||
|
else:
|
||||||
|
moov_data = b""
|
||||||
|
async for chunk in source.stream(offset=moov_offset, limit=moov_size):
|
||||||
|
moov_data += chunk
|
||||||
|
|
||||||
|
if len(moov_data) < moov_size:
|
||||||
|
logger.warning(
|
||||||
|
"[container_probe] Incomplete moov: got %d of %d bytes",
|
||||||
|
len(moov_data),
|
||||||
|
moov_size,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 6: Parse moov body (skip box header)
|
||||||
|
# Determine header size
|
||||||
|
raw_size = struct.unpack_from(">I", moov_data, 0)[0]
|
||||||
|
hdr_size = 16 if raw_size == 1 else 8
|
||||||
|
moov_body = moov_data[hdr_size:]
|
||||||
|
|
||||||
|
cue_points, duration_ms, timescale, video_codec, audio_codec = build_cue_points_from_moov(moov_body)
|
||||||
|
|
||||||
|
# If mdat wasn't found via header scan, it's likely right after ftyp
|
||||||
|
# or right after moov. Common layouts:
|
||||||
|
# ftyp + moov + mdat (faststart) or ftyp + mdat + moov
|
||||||
|
if mdat_offset < 0:
|
||||||
|
# Walk atoms to find mdat by scanning just enough from the file
|
||||||
|
# In most cases, mdat is either before or after moov
|
||||||
|
if moov_offset < file_size // 2:
|
||||||
|
# moov is early -> mdat likely follows
|
||||||
|
mdat_search_offset = moov_offset + moov_size
|
||||||
|
else:
|
||||||
|
# moov is late -> mdat likely right after ftyp
|
||||||
|
ftyp_size = struct.unpack_from(">I", header_data, 0)[0]
|
||||||
|
if ftyp_size == 1:
|
||||||
|
ftyp_size = struct.unpack_from(">Q", header_data, 8)[0]
|
||||||
|
mdat_search_offset = ftyp_size
|
||||||
|
|
||||||
|
# Read a small amount to find the mdat header
|
||||||
|
mdat_header = b""
|
||||||
|
async for chunk in source.stream(offset=mdat_search_offset, limit=16):
|
||||||
|
mdat_header += chunk
|
||||||
|
if len(mdat_header) >= 8:
|
||||||
|
box_type = mdat_header[4:8]
|
||||||
|
if box_type == b"mdat":
|
||||||
|
mdat_offset = mdat_search_offset
|
||||||
|
raw_sz = struct.unpack_from(">I", mdat_header, 0)[0]
|
||||||
|
if raw_sz == 1 and len(mdat_header) >= 16:
|
||||||
|
mdat_size = struct.unpack_from(">Q", mdat_header, 8)[0]
|
||||||
|
else:
|
||||||
|
mdat_size = raw_sz
|
||||||
|
|
||||||
|
# Step 7: Extract ftyp data (always in the header since it's the first atom)
|
||||||
|
ftyp_data = b""
|
||||||
|
if ftyp_offset >= 0 and ftyp_size > 0 and ftyp_offset + ftyp_size <= len(header_data):
|
||||||
|
ftyp_data = header_data[ftyp_offset : ftyp_offset + ftyp_size]
|
||||||
|
|
||||||
|
# Step 8: Rewrite moov chunk offsets for faststart pipe layout.
|
||||||
|
# The pipe stream will be: ftyp + moov + mdat. The stco/co64
|
||||||
|
# offsets in the original moov point to positions in the original
|
||||||
|
# file. We need to shift them to account for the new layout.
|
||||||
|
# New mdat position = ftyp_size + moov_size
|
||||||
|
# Delta = new_mdat_position - original_mdat_offset
|
||||||
|
if mdat_offset >= 0:
|
||||||
|
new_mdat_start = len(ftyp_data) + moov_size
|
||||||
|
offset_delta = new_mdat_start - mdat_offset
|
||||||
|
if offset_delta != 0:
|
||||||
|
moov_data = rewrite_moov_offsets(moov_data, offset_delta)
|
||||||
|
|
||||||
|
index = MP4Index(
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
timescale=timescale,
|
||||||
|
cue_points=cue_points,
|
||||||
|
moov_offset=moov_offset,
|
||||||
|
moov_size=moov_size,
|
||||||
|
moov_data=moov_data,
|
||||||
|
ftyp_data=ftyp_data,
|
||||||
|
mdat_offset=mdat_offset,
|
||||||
|
mdat_size=mdat_size,
|
||||||
|
video_codec=video_codec,
|
||||||
|
audio_codec=audio_codec,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[container_probe] MP4 index: duration=%.1fs, %d cue points, video=%s, audio=%s",
|
||||||
|
duration_ms / 1000.0,
|
||||||
|
len(cue_points),
|
||||||
|
video_codec,
|
||||||
|
audio_codec,
|
||||||
|
)
|
||||||
|
|
||||||
|
if cache_key:
|
||||||
|
await _set_cached_mp4_index(cache_key, index)
|
||||||
|
|
||||||
|
return index
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("[container_probe] Failed to probe MP4 moov: %s", e)
|
||||||
|
return None
|
||||||
1228
mediaflow_proxy/remuxer/ebml_parser.py
Normal file
1228
mediaflow_proxy/remuxer/ebml_parser.py
Normal file
File diff suppressed because it is too large
Load Diff
151
mediaflow_proxy/remuxer/hls_manifest.py
Normal file
151
mediaflow_proxy/remuxer/hls_manifest.py
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
"""
|
||||||
|
HLS VOD playlist generator for on-the-fly fMP4 transcoding.
|
||||||
|
|
||||||
|
Produces an M3U8 VOD playlist from an ``MKVCueIndex`` or ``MP4Index``.
|
||||||
|
Consecutive keyframes that are closer together than the target segment
|
||||||
|
duration are merged into a single HLS segment, matching the behaviour
|
||||||
|
of ``ffmpeg -hls_time``.
|
||||||
|
|
||||||
|
The init segment is referenced via ``#EXT-X-MAP``.
|
||||||
|
|
||||||
|
Requires ``#EXT-X-VERSION:7`` for fMP4 (CMAF) segments.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def merge_cue_points(
|
||||||
|
cue_points: list[tuple[float, int]],
|
||||||
|
target_duration_ms: float = 5000.0,
|
||||||
|
) -> list[tuple[float, int]]:
|
||||||
|
"""Merge consecutive keyframes into segments of *>= target_duration_ms*.
|
||||||
|
|
||||||
|
This replicates the logic of ``ffmpeg -hls_time``: a new segment
|
||||||
|
boundary is created only when a keyframe is encountered **at least**
|
||||||
|
``target_duration_ms`` after the start of the current segment.
|
||||||
|
Keyframes that fall within the target window are absorbed into the
|
||||||
|
current segment.
|
||||||
|
|
||||||
|
Side-effects:
|
||||||
|
* Eliminates duplicate byte-offset entries (previously handled by
|
||||||
|
``deduplicate_cue_points``).
|
||||||
|
* Eliminates very short "runt" segments (e.g. 0.3 s).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cue_points: Sorted ``(time_ms, byte_offset)`` list.
|
||||||
|
target_duration_ms: Minimum segment duration in milliseconds.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A reduced list of ``(time_ms, byte_offset)`` tuples representing
|
||||||
|
the merged segment boundaries.
|
||||||
|
"""
|
||||||
|
if not cue_points:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Normalize duplicate offsets first: keep the earliest timestamp for each
|
||||||
|
# byte offset. Some MKV files expose multiple cue times for the same
|
||||||
|
# cluster offset; if we keep a later duplicate, segment start times no
|
||||||
|
# longer match the actual bytes and can produce timestamp regressions.
|
||||||
|
# Sorting by (time, offset) ensures earliest time wins deterministically.
|
||||||
|
by_time = sorted(cue_points, key=lambda x: (x[0], x[1]))
|
||||||
|
deduped: list[tuple[float, int]] = []
|
||||||
|
seen_offsets: set[int] = set()
|
||||||
|
for time_ms, byte_offset in by_time:
|
||||||
|
if byte_offset in seen_offsets:
|
||||||
|
continue
|
||||||
|
seen_offsets.add(byte_offset)
|
||||||
|
deduped.append((time_ms, byte_offset))
|
||||||
|
|
||||||
|
if not deduped:
|
||||||
|
return []
|
||||||
|
|
||||||
|
merged: list[tuple[float, int]] = [deduped[0]]
|
||||||
|
for i in range(1, len(deduped)):
|
||||||
|
time_since_last = deduped[i][0] - merged[-1][0]
|
||||||
|
if time_since_last >= target_duration_ms:
|
||||||
|
merged.append(deduped[i])
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def generate_vod_playlist(
|
||||||
|
cue_points: list[tuple[float, int]],
|
||||||
|
duration_ms: float,
|
||||||
|
init_url: str,
|
||||||
|
segment_url_template: str,
|
||||||
|
target_segment_duration_ms: float = 5000.0,
|
||||||
|
) -> str:
|
||||||
|
"""Build an HLS VOD M3U8 playlist from cue-point data.
|
||||||
|
|
||||||
|
Consecutive keyframes that are closer than *target_segment_duration_ms*
|
||||||
|
are merged into a single segment (matching ``ffmpeg -hls_time``).
|
||||||
|
|
||||||
|
Segment URLs use ``{start_ms}`` and ``{end_ms}`` placeholders that are
|
||||||
|
replaced with the segment's time range in milliseconds.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cue_points: Sorted list of ``(time_ms, byte_offset)`` tuples.
|
||||||
|
duration_ms: Total media duration in milliseconds.
|
||||||
|
init_url: URL for the fMP4 init segment (``#EXT-X-MAP`` URI).
|
||||||
|
segment_url_template: URL template containing ``{seg}``,
|
||||||
|
``{start_ms}`` and ``{end_ms}`` placeholders.
|
||||||
|
target_segment_duration_ms: Target minimum segment duration.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete M3U8 playlist string.
|
||||||
|
"""
|
||||||
|
if not cue_points:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
merged = merge_cue_points(cue_points, target_segment_duration_ms)
|
||||||
|
|
||||||
|
# Build per-segment (start_ms, end_ms, duration_s) list.
|
||||||
|
segments: list[tuple[float, float, float]] = []
|
||||||
|
for i in range(len(merged)):
|
||||||
|
start_ms = merged[i][0]
|
||||||
|
end_ms = merged[i + 1][0] if i + 1 < len(merged) else duration_ms
|
||||||
|
dur_s = max((end_ms - start_ms) / 1000.0, 0.001)
|
||||||
|
segments.append((start_ms, end_ms, dur_s))
|
||||||
|
|
||||||
|
if not segments:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
target_duration = math.ceil(max(dur_s for _, _, dur_s in segments))
|
||||||
|
target_duration = max(target_duration, 1)
|
||||||
|
|
||||||
|
lines: list[str] = [
|
||||||
|
"#EXTM3U",
|
||||||
|
"#EXT-X-VERSION:7",
|
||||||
|
f"#EXT-X-TARGETDURATION:{target_duration}",
|
||||||
|
"#EXT-X-PLAYLIST-TYPE:VOD",
|
||||||
|
"#EXT-X-MEDIA-SEQUENCE:0",
|
||||||
|
f'#EXT-X-MAP:URI="{init_url}"',
|
||||||
|
]
|
||||||
|
|
||||||
|
for seg_num, (start_ms, end_ms, dur_s) in enumerate(segments):
|
||||||
|
lines.append(f"#EXTINF:{dur_s:.3f},")
|
||||||
|
url = (
|
||||||
|
segment_url_template.replace(
|
||||||
|
"{seg}",
|
||||||
|
str(seg_num),
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
"{start_ms}",
|
||||||
|
str(int(start_ms)),
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
"{end_ms}",
|
||||||
|
str(int(end_ms)),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
lines.append(url)
|
||||||
|
|
||||||
|
lines.append("#EXT-X-ENDLIST")
|
||||||
|
lines.append("") # trailing newline
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
234
mediaflow_proxy/remuxer/media_source.py
Normal file
234
mediaflow_proxy/remuxer/media_source.py
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
"""
|
||||||
|
Abstract media source protocol for source-agnostic transcode pipeline.
|
||||||
|
|
||||||
|
Decouples the transcode pipeline, MKV cue probing, and seeking logic
|
||||||
|
from any specific transport (Telegram, HTTP, etc.). Each transport
|
||||||
|
implements the MediaSource protocol to provide byte-range streaming.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from typing import Protocol, runtime_checkable
|
||||||
|
from urllib.parse import urlparse, unquote
|
||||||
|
|
||||||
|
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||||
|
from mediaflow_proxy.utils.telegram import telegram_manager
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Extensions mapped to container format hints used by transcode_handler
|
||||||
|
_MKV_EXTENSIONS = frozenset({".mkv", ".webm"})
|
||||||
|
_MP4_EXTENSIONS = frozenset({".mp4", ".m4v", ".mov", ".m4a", ".3gp"})
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_extension(path: str) -> str:
|
||||||
|
"""Extract lowercase file extension (e.g. '.mkv') from a path or URL."""
|
||||||
|
# Strip query/fragment first for URL paths
|
||||||
|
dot_pos = path.rfind(".")
|
||||||
|
if dot_pos < 0:
|
||||||
|
return ""
|
||||||
|
ext = path[dot_pos:].lower()
|
||||||
|
# Trim anything after the extension (query params from raw paths)
|
||||||
|
for ch in ("?", "#", "&"):
|
||||||
|
idx = ext.find(ch)
|
||||||
|
if idx > 0:
|
||||||
|
ext = ext[:idx]
|
||||||
|
return ext
|
||||||
|
|
||||||
|
|
||||||
|
def filename_hint_from_url(url: str) -> str:
|
||||||
|
"""Derive a filename hint from a URL path (e.g. '.mkv', '.mp4')."""
|
||||||
|
try:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
return _extract_extension(unquote(parsed.path))
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def filename_hint_from_name(filename: str) -> str:
|
||||||
|
"""Derive a filename hint from a filename string."""
|
||||||
|
return _extract_extension(filename) if filename else ""
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class MediaSource(Protocol):
|
||||||
|
"""
|
||||||
|
Protocol for streaming media byte ranges.
|
||||||
|
|
||||||
|
Implementations must provide:
|
||||||
|
- stream(): async iterator of bytes from offset/limit
|
||||||
|
- file_size: total file size in bytes
|
||||||
|
- cache_key: deterministic key for caching (cue index, etc.)
|
||||||
|
- filename_hint: optional file extension hint (e.g. '.mkv', '.mp4')
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def file_size(self) -> int:
|
||||||
|
"""Total file size in bytes."""
|
||||||
|
...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cache_key(self) -> str:
|
||||||
|
"""Deterministic cache key derived from the source identity."""
|
||||||
|
...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename_hint(self) -> str:
|
||||||
|
"""Optional file extension hint (e.g. '.mkv', '.mp4') for format detection."""
|
||||||
|
...
|
||||||
|
|
||||||
|
async def stream(self, offset: int = 0, limit: int | None = None) -> AsyncIterator[bytes]:
|
||||||
|
"""
|
||||||
|
Stream bytes from the source.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
offset: Byte offset to start from.
|
||||||
|
limit: Number of bytes to read. None = read to end.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Chunks of bytes.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class TelegramMediaSource:
|
||||||
|
"""
|
||||||
|
MediaSource backed by Telegram MTProto downloads.
|
||||||
|
|
||||||
|
Supports two download modes:
|
||||||
|
|
||||||
|
* **parallel** (default): Uses ``ParallelTransferrer`` with multiple
|
||||||
|
MTProtoSender connections for maximum throughput. Best for full-file
|
||||||
|
streaming (e.g. ``/proxy/telegram/stream``).
|
||||||
|
|
||||||
|
* **single** (``use_single_client=True``): Uses Telethon's built-in
|
||||||
|
``iter_download`` over the existing client connection. Avoids the
|
||||||
|
overhead of creating/destroying extra connections for each request,
|
||||||
|
ideal for small byte-range fetches like HLS segments and probe
|
||||||
|
headers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
telegram_ref,
|
||||||
|
file_size: int,
|
||||||
|
file_name: str = "",
|
||||||
|
*,
|
||||||
|
use_single_client: bool = False,
|
||||||
|
) -> None:
|
||||||
|
self._ref = telegram_ref
|
||||||
|
self._file_size = file_size
|
||||||
|
self._filename_hint = filename_hint_from_name(file_name)
|
||||||
|
self._use_single_client = use_single_client
|
||||||
|
|
||||||
|
@property
|
||||||
|
def file_size(self) -> int:
|
||||||
|
return self._file_size
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cache_key(self) -> str:
|
||||||
|
ref = self._ref
|
||||||
|
if ref.file_id:
|
||||||
|
raw = f"file_id:{ref.file_id}"
|
||||||
|
elif ref.chat_id is not None and ref.message_id is not None:
|
||||||
|
raw = f"chat:{ref.chat_id}:msg:{ref.message_id}"
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename_hint(self) -> str:
|
||||||
|
return self._filename_hint
|
||||||
|
|
||||||
|
async def stream(self, offset: int = 0, limit: int | None = None) -> AsyncIterator[bytes]:
|
||||||
|
effective_limit = limit or self._file_size
|
||||||
|
if self._use_single_client:
|
||||||
|
async for chunk in telegram_manager.stream_media_single(
|
||||||
|
self._ref,
|
||||||
|
offset=offset,
|
||||||
|
limit=effective_limit,
|
||||||
|
file_size=self._file_size,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
|
else:
|
||||||
|
async for chunk in telegram_manager.stream_media(
|
||||||
|
self._ref,
|
||||||
|
offset=offset,
|
||||||
|
limit=effective_limit,
|
||||||
|
file_size=self._file_size,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPMediaSource:
|
||||||
|
"""MediaSource backed by HTTP byte-range requests via aiohttp."""
|
||||||
|
|
||||||
|
def __init__(self, url: str, headers: dict | None = None, file_size: int = 0) -> None:
|
||||||
|
self._url = url
|
||||||
|
self._headers = headers or {}
|
||||||
|
self._file_size = file_size
|
||||||
|
self._filename_hint = filename_hint_from_url(url)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def file_size(self) -> int:
|
||||||
|
return self._file_size
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cache_key(self) -> str:
|
||||||
|
return hashlib.sha256(self._url.encode()).hexdigest()[:16]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename_hint(self) -> str:
|
||||||
|
return self._filename_hint
|
||||||
|
|
||||||
|
async def resolve_file_size(self) -> int:
|
||||||
|
"""Perform a HEAD request to determine file size if not already known."""
|
||||||
|
if self._file_size > 0:
|
||||||
|
return self._file_size
|
||||||
|
|
||||||
|
async with create_aiohttp_session(self._url, headers=self._headers) as (session, proxy_url):
|
||||||
|
async with session.head(
|
||||||
|
self._url,
|
||||||
|
headers=self._headers,
|
||||||
|
proxy=proxy_url,
|
||||||
|
allow_redirects=True,
|
||||||
|
) as resp:
|
||||||
|
cl = resp.headers.get("content-length")
|
||||||
|
if cl:
|
||||||
|
self._file_size = int(cl)
|
||||||
|
else:
|
||||||
|
# Try GET with range to get content-range
|
||||||
|
async with session.get(
|
||||||
|
self._url,
|
||||||
|
headers={**self._headers, "range": "bytes=0-0"},
|
||||||
|
proxy=proxy_url,
|
||||||
|
allow_redirects=True,
|
||||||
|
) as range_resp:
|
||||||
|
cr = range_resp.headers.get("content-range", "")
|
||||||
|
if "/" in cr:
|
||||||
|
try:
|
||||||
|
self._file_size = int(cr.split("/")[-1])
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return self._file_size
|
||||||
|
|
||||||
|
async def stream(self, offset: int = 0, limit: int | None = None) -> AsyncIterator[bytes]:
|
||||||
|
headers = dict(self._headers)
|
||||||
|
|
||||||
|
if offset > 0 or limit is not None:
|
||||||
|
end = ""
|
||||||
|
if limit is not None:
|
||||||
|
end = str(offset + limit - 1)
|
||||||
|
headers["range"] = f"bytes={offset}-{end}"
|
||||||
|
|
||||||
|
async with create_aiohttp_session(self._url, headers=headers) as (session, proxy_url):
|
||||||
|
async with session.get(
|
||||||
|
self._url,
|
||||||
|
headers=headers,
|
||||||
|
proxy=proxy_url,
|
||||||
|
allow_redirects=True,
|
||||||
|
) as resp:
|
||||||
|
resp.raise_for_status()
|
||||||
|
async for chunk in resp.content.iter_any():
|
||||||
|
yield chunk
|
||||||
469
mediaflow_proxy/remuxer/mkv_demuxer.py
Normal file
469
mediaflow_proxy/remuxer/mkv_demuxer.py
Normal file
@@ -0,0 +1,469 @@
|
|||||||
|
"""
|
||||||
|
Streaming MKV demuxer.
|
||||||
|
|
||||||
|
Reads an MKV byte stream via an async iterator and yields individual media
|
||||||
|
frames (MKVFrame) with absolute timestamps. Designed for on-the-fly remuxing
|
||||||
|
without buffering the entire file.
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
AsyncIterator[bytes] -> StreamBuffer -> EBML parsing -> MKVFrame yields
|
||||||
|
|
||||||
|
The demuxer works in two phases:
|
||||||
|
1. read_header(): Consume bytes until Tracks is fully parsed, returning
|
||||||
|
a list of MKVTrack with codec metadata.
|
||||||
|
2. iter_frames(): Yield MKVFrame objects from Cluster/SimpleBlock data
|
||||||
|
as clusters arrive.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from mediaflow_proxy.remuxer.ebml_parser import (
|
||||||
|
CLUSTER,
|
||||||
|
CLUSTER_TIMESTAMP,
|
||||||
|
EBML_HEADER,
|
||||||
|
INFO,
|
||||||
|
MKVFrame,
|
||||||
|
MKVTrack,
|
||||||
|
SEGMENT,
|
||||||
|
SIMPLE_BLOCK,
|
||||||
|
BLOCK_GROUP,
|
||||||
|
TRACKS,
|
||||||
|
TIMESTAMP_SCALE,
|
||||||
|
DURATION,
|
||||||
|
UNKNOWN_SIZE,
|
||||||
|
extract_block_frames,
|
||||||
|
parse_tracks,
|
||||||
|
read_element_id,
|
||||||
|
read_element_size,
|
||||||
|
read_float,
|
||||||
|
read_uint,
|
||||||
|
_parse_block_group,
|
||||||
|
iter_elements,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class StreamBuffer:
|
||||||
|
"""
|
||||||
|
Accumulating byte buffer for streaming EBML parsing.
|
||||||
|
|
||||||
|
Collects chunks from an async byte source and provides read-ahead
|
||||||
|
capabilities for EBML element parsing. Supports consuming parsed
|
||||||
|
bytes to keep memory usage bounded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._chunks: list[bytes] = []
|
||||||
|
self._total: int = 0
|
||||||
|
self._consumed: int = 0 # Logical bytes consumed (for offset tracking)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def available(self) -> int:
|
||||||
|
"""Number of buffered bytes available for reading."""
|
||||||
|
return self._total
|
||||||
|
|
||||||
|
@property
|
||||||
|
def consumed(self) -> int:
|
||||||
|
"""Total bytes consumed so far (for absolute offset tracking)."""
|
||||||
|
return self._consumed
|
||||||
|
|
||||||
|
def append(self, data: bytes) -> None:
|
||||||
|
"""Add bytes to the buffer."""
|
||||||
|
if data:
|
||||||
|
self._chunks.append(data)
|
||||||
|
self._total += len(data)
|
||||||
|
|
||||||
|
def peek(self, size: int) -> bytes:
|
||||||
|
"""Read up to size bytes without consuming."""
|
||||||
|
if size <= 0:
|
||||||
|
return b""
|
||||||
|
result = bytearray()
|
||||||
|
remaining = size
|
||||||
|
for chunk in self._chunks:
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
take = min(len(chunk), remaining)
|
||||||
|
result.extend(chunk[:take])
|
||||||
|
remaining -= take
|
||||||
|
return bytes(result)
|
||||||
|
|
||||||
|
def get_all(self) -> bytes:
|
||||||
|
"""Get all buffered data as a single bytes object (without consuming)."""
|
||||||
|
if len(self._chunks) == 1:
|
||||||
|
return self._chunks[0]
|
||||||
|
data = b"".join(self._chunks)
|
||||||
|
self._chunks = [data]
|
||||||
|
return data
|
||||||
|
|
||||||
|
def consume(self, size: int) -> bytes:
|
||||||
|
"""Remove and return size bytes from the front of the buffer."""
|
||||||
|
if size <= 0:
|
||||||
|
return b""
|
||||||
|
if size > self._total:
|
||||||
|
size = self._total
|
||||||
|
|
||||||
|
result = bytearray()
|
||||||
|
remaining = size
|
||||||
|
while remaining > 0 and self._chunks:
|
||||||
|
chunk = self._chunks[0]
|
||||||
|
if len(chunk) <= remaining:
|
||||||
|
result.extend(chunk)
|
||||||
|
remaining -= len(chunk)
|
||||||
|
self._chunks.pop(0)
|
||||||
|
else:
|
||||||
|
result.extend(chunk[:remaining])
|
||||||
|
self._chunks[0] = chunk[remaining:]
|
||||||
|
remaining = 0
|
||||||
|
|
||||||
|
consumed = len(result)
|
||||||
|
self._total -= consumed
|
||||||
|
self._consumed += consumed
|
||||||
|
return bytes(result)
|
||||||
|
|
||||||
|
def skip(self, size: int) -> int:
|
||||||
|
"""Discard size bytes from the front. Returns actual bytes skipped."""
|
||||||
|
if size <= 0:
|
||||||
|
return 0
|
||||||
|
actual = min(size, self._total)
|
||||||
|
remaining = actual
|
||||||
|
while remaining > 0 and self._chunks:
|
||||||
|
chunk = self._chunks[0]
|
||||||
|
if len(chunk) <= remaining:
|
||||||
|
remaining -= len(chunk)
|
||||||
|
self._chunks.pop(0)
|
||||||
|
else:
|
||||||
|
self._chunks[0] = chunk[remaining:]
|
||||||
|
remaining = 0
|
||||||
|
self._total -= actual
|
||||||
|
self._consumed += actual
|
||||||
|
return actual
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MKVHeader:
|
||||||
|
"""Parsed MKV header metadata."""
|
||||||
|
|
||||||
|
tracks: list[MKVTrack] = field(default_factory=list)
|
||||||
|
timestamp_scale_ns: int = 1_000_000 # Default 1ms
|
||||||
|
duration_ms: float = 0.0
|
||||||
|
segment_data_offset: int = 0 # Absolute byte offset of Segment children
|
||||||
|
|
||||||
|
|
||||||
|
class MKVDemuxer:
|
||||||
|
"""
|
||||||
|
Streaming async MKV demuxer.
|
||||||
|
|
||||||
|
Reads an MKV byte stream from an async iterator and provides:
|
||||||
|
- read_header(): Parse EBML header + Segment metadata + Tracks
|
||||||
|
- iter_frames(): Yield MKVFrame objects from Clusters
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
demuxer = MKVDemuxer()
|
||||||
|
header = await demuxer.read_header(source)
|
||||||
|
async for frame in demuxer.iter_frames(source):
|
||||||
|
process(frame)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Minimum bytes to try parsing an element header (ID + size)
|
||||||
|
_MIN_ELEMENT_HEADER = 12
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._buf = StreamBuffer()
|
||||||
|
self._header: MKVHeader | None = None
|
||||||
|
self._scale_ms: float = 1.0 # timestamp_scale / 1_000_000
|
||||||
|
|
||||||
|
@property
|
||||||
|
def header(self) -> MKVHeader | None:
|
||||||
|
return self._header
|
||||||
|
|
||||||
|
async def read_header(self, source: AsyncIterator[bytes]) -> MKVHeader:
|
||||||
|
"""
|
||||||
|
Read and parse the MKV header (EBML header, Segment, Info, Tracks).
|
||||||
|
|
||||||
|
Consumes bytes from source until Tracks is fully parsed. Any leftover
|
||||||
|
bytes (start of first Cluster) remain in the internal buffer for
|
||||||
|
iter_frames().
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MKVHeader with track info and timing metadata.
|
||||||
|
"""
|
||||||
|
header = MKVHeader()
|
||||||
|
|
||||||
|
# Phase 1: Accumulate enough data for EBML header + Segment header
|
||||||
|
await self._ensure_bytes(source, 64)
|
||||||
|
|
||||||
|
data = self._buf.get_all()
|
||||||
|
if len(data) < 4:
|
||||||
|
raise ValueError(
|
||||||
|
f"Source ended prematurely: got {len(data)} bytes, need at least an EBML header (source disconnected?)"
|
||||||
|
)
|
||||||
|
pos = 0
|
||||||
|
|
||||||
|
# Parse EBML Header
|
||||||
|
eid, pos = read_element_id(data, pos)
|
||||||
|
if eid != EBML_HEADER:
|
||||||
|
raise ValueError(f"Not an MKV file: expected EBML header, got 0x{eid:X}")
|
||||||
|
size, pos = read_element_size(data, pos)
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
raise ValueError("EBML header has unknown size")
|
||||||
|
pos += size # Skip EBML header content
|
||||||
|
|
||||||
|
# Parse Segment element header
|
||||||
|
eid, pos = read_element_id(data, pos)
|
||||||
|
if eid != SEGMENT:
|
||||||
|
raise ValueError(f"Expected Segment, got 0x{eid:X}")
|
||||||
|
_seg_size, pos = read_element_size(data, pos)
|
||||||
|
header.segment_data_offset = self._buf.consumed + pos
|
||||||
|
|
||||||
|
# Phase 2: Parse Segment children until we have Tracks
|
||||||
|
# We need to iterate top-level Segment children: SeekHead, Info, Tracks
|
||||||
|
# Stop when we hit the first Cluster (media data).
|
||||||
|
tracks_found = False
|
||||||
|
|
||||||
|
while not tracks_found:
|
||||||
|
# Ensure we have enough for element header
|
||||||
|
await self._ensure_bytes(source, pos + self._MIN_ELEMENT_HEADER)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
|
||||||
|
if pos >= len(data):
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
await self._ensure_bytes(source, pos + 32)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
break
|
||||||
|
|
||||||
|
if eid == CLUSTER:
|
||||||
|
# Reached media data; header parsing is done.
|
||||||
|
# Don't consume the Cluster -- leave it for iter_frames.
|
||||||
|
break
|
||||||
|
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
# Can't handle unknown-size elements in header
|
||||||
|
logger.warning("[mkv_demuxer] Unknown-size element 0x%X in header at pos %d", eid, pos)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Ensure we have the full element
|
||||||
|
elem_end = pos3 + size
|
||||||
|
await self._ensure_bytes(source, elem_end)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
|
||||||
|
if eid == INFO:
|
||||||
|
self._parse_info_element(data, pos3, pos3 + size, header)
|
||||||
|
elif eid == TRACKS:
|
||||||
|
header.tracks = parse_tracks(data, pos3, pos3 + size)
|
||||||
|
tracks_found = True
|
||||||
|
logger.info(
|
||||||
|
"[mkv_demuxer] Parsed %d tracks: %s",
|
||||||
|
len(header.tracks),
|
||||||
|
", ".join(f"#{t.track_number}={t.codec_id}" for t in header.tracks),
|
||||||
|
)
|
||||||
|
|
||||||
|
pos = elem_end
|
||||||
|
|
||||||
|
# Consume everything up to the current position (Cluster boundary)
|
||||||
|
self._buf.consume(pos)
|
||||||
|
|
||||||
|
# Set timing scale
|
||||||
|
self._scale_ms = header.timestamp_scale_ns / 1_000_000.0
|
||||||
|
self._header = header
|
||||||
|
return header
|
||||||
|
|
||||||
|
async def iter_frames(self, source: AsyncIterator[bytes]) -> AsyncIterator[MKVFrame]:
|
||||||
|
"""
|
||||||
|
Yield MKVFrame objects from Cluster/SimpleBlock data.
|
||||||
|
|
||||||
|
Must be called after read_header(). Continues consuming bytes from
|
||||||
|
source, parsing Clusters and yielding individual frames.
|
||||||
|
"""
|
||||||
|
if self._header is None:
|
||||||
|
raise RuntimeError("read_header() must be called before iter_frames()")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Try to read the next element header
|
||||||
|
if not await self._ensure_bytes_soft(source, self._MIN_ELEMENT_HEADER):
|
||||||
|
break
|
||||||
|
|
||||||
|
data = self._buf.get_all()
|
||||||
|
pos = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
# Try to get more data
|
||||||
|
if not await self._ensure_bytes_soft(source, len(data) + 4096):
|
||||||
|
break
|
||||||
|
data = self._buf.get_all()
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
break
|
||||||
|
|
||||||
|
if eid == CLUSTER:
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
# Unknown-size Cluster: parse children until we hit the next
|
||||||
|
# Cluster or run out of data
|
||||||
|
self._buf.consume(pos3) # consume Cluster header
|
||||||
|
async for frame in self._parse_unknown_size_cluster(source):
|
||||||
|
yield frame
|
||||||
|
else:
|
||||||
|
# Known-size Cluster: ensure we have all data
|
||||||
|
elem_end = pos3 + size
|
||||||
|
await self._ensure_bytes(source, elem_end)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
|
||||||
|
for frame in self._parse_cluster_data(data, pos3, pos3 + size):
|
||||||
|
yield frame
|
||||||
|
|
||||||
|
self._buf.consume(elem_end)
|
||||||
|
else:
|
||||||
|
# Skip non-Cluster top-level elements
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
break
|
||||||
|
elem_end = pos3 + size
|
||||||
|
if elem_end > len(data):
|
||||||
|
# Need to skip bytes we don't have yet
|
||||||
|
self._buf.consume(len(data))
|
||||||
|
skip_remaining = elem_end - len(data)
|
||||||
|
await self._skip_bytes(source, skip_remaining)
|
||||||
|
else:
|
||||||
|
self._buf.consume(elem_end)
|
||||||
|
|
||||||
|
def _parse_info_element(self, data: bytes, start: int, end: int, header: MKVHeader) -> None:
|
||||||
|
"""Parse Info element children for timestamp scale and duration."""
|
||||||
|
for eid, off, size, _ in iter_elements(data, start, end):
|
||||||
|
if eid == TIMESTAMP_SCALE:
|
||||||
|
header.timestamp_scale_ns = read_uint(data, off, size)
|
||||||
|
elif eid == DURATION:
|
||||||
|
scale = header.timestamp_scale_ns / 1_000_000.0
|
||||||
|
header.duration_ms = read_float(data, off, size) * scale
|
||||||
|
|
||||||
|
def _parse_cluster_data(self, data: bytes, start: int, end: int) -> list[MKVFrame]:
|
||||||
|
"""Parse a known-size Cluster and return its frames."""
|
||||||
|
cluster_timecode = 0
|
||||||
|
frames = []
|
||||||
|
|
||||||
|
for eid, data_off, size, _ in iter_elements(data, start, end):
|
||||||
|
if eid == CLUSTER_TIMESTAMP:
|
||||||
|
cluster_timecode = read_uint(data, data_off, size)
|
||||||
|
elif eid == SIMPLE_BLOCK:
|
||||||
|
for track_num, rel_tc, flags, frame_list in extract_block_frames(data, data_off, size):
|
||||||
|
is_kf = bool(flags & 0x80)
|
||||||
|
abs_ts_ms = (cluster_timecode + rel_tc) * self._scale_ms
|
||||||
|
for frame_data in frame_list:
|
||||||
|
frames.append(
|
||||||
|
MKVFrame(
|
||||||
|
track_number=track_num,
|
||||||
|
timestamp_ms=abs_ts_ms,
|
||||||
|
is_keyframe=is_kf,
|
||||||
|
data=frame_data,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif eid == BLOCK_GROUP:
|
||||||
|
_parse_block_group(data, data_off, data_off + size, cluster_timecode, self._scale_ms, frames)
|
||||||
|
|
||||||
|
return frames
|
||||||
|
|
||||||
|
async def _parse_unknown_size_cluster(self, source: AsyncIterator[bytes]) -> AsyncIterator[MKVFrame]:
|
||||||
|
"""Parse an unknown-size Cluster by reading children until next Cluster."""
|
||||||
|
cluster_timecode = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if not await self._ensure_bytes_soft(source, self._MIN_ELEMENT_HEADER):
|
||||||
|
break
|
||||||
|
|
||||||
|
data = self._buf.get_all()
|
||||||
|
pos = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
if not await self._ensure_bytes_soft(source, len(data) + 4096):
|
||||||
|
break
|
||||||
|
data = self._buf.get_all()
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
break
|
||||||
|
|
||||||
|
# A new Cluster or top-level element signals end of current Cluster
|
||||||
|
if eid == CLUSTER or eid == SEGMENT:
|
||||||
|
break
|
||||||
|
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
break
|
||||||
|
|
||||||
|
elem_end = pos3 + size
|
||||||
|
await self._ensure_bytes(source, elem_end)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
|
||||||
|
if eid == CLUSTER_TIMESTAMP:
|
||||||
|
cluster_timecode = read_uint(data, pos3, size)
|
||||||
|
elif eid == SIMPLE_BLOCK:
|
||||||
|
for track_num, rel_tc, flags, frame_list in extract_block_frames(data, pos3, size):
|
||||||
|
is_kf = bool(flags & 0x80)
|
||||||
|
abs_ts_ms = (cluster_timecode + rel_tc) * self._scale_ms
|
||||||
|
for frame_data in frame_list:
|
||||||
|
yield MKVFrame(
|
||||||
|
track_number=track_num,
|
||||||
|
timestamp_ms=abs_ts_ms,
|
||||||
|
is_keyframe=is_kf,
|
||||||
|
data=frame_data,
|
||||||
|
)
|
||||||
|
elif eid == BLOCK_GROUP:
|
||||||
|
bg_frames = []
|
||||||
|
_parse_block_group(data, pos3, pos3 + size, cluster_timecode, self._scale_ms, bg_frames)
|
||||||
|
for frame in bg_frames:
|
||||||
|
yield frame
|
||||||
|
|
||||||
|
self._buf.consume(elem_end)
|
||||||
|
|
||||||
|
async def _ensure_bytes(self, source: AsyncIterator[bytes], needed: int) -> None:
|
||||||
|
"""Ensure the buffer has at least 'needed' bytes. Raises StopAsyncIteration if exhausted."""
|
||||||
|
while self._buf.available < needed:
|
||||||
|
try:
|
||||||
|
chunk = await source.__anext__()
|
||||||
|
self._buf.append(chunk)
|
||||||
|
except StopAsyncIteration:
|
||||||
|
return
|
||||||
|
|
||||||
|
async def _ensure_bytes_soft(self, source: AsyncIterator[bytes], needed: int) -> bool:
|
||||||
|
"""Like _ensure_bytes but returns False instead of raising."""
|
||||||
|
while self._buf.available < needed:
|
||||||
|
try:
|
||||||
|
chunk = await source.__anext__()
|
||||||
|
if not chunk:
|
||||||
|
return self._buf.available > 0
|
||||||
|
self._buf.append(chunk)
|
||||||
|
except StopAsyncIteration:
|
||||||
|
return self._buf.available > 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def _skip_bytes(self, source: AsyncIterator[bytes], count: int) -> None:
|
||||||
|
"""Skip count bytes from the source without buffering."""
|
||||||
|
remaining = count
|
||||||
|
while remaining > 0:
|
||||||
|
try:
|
||||||
|
chunk = await source.__anext__()
|
||||||
|
if len(chunk) <= remaining:
|
||||||
|
remaining -= len(chunk)
|
||||||
|
else:
|
||||||
|
# Put the excess back
|
||||||
|
self._buf.append(chunk[remaining:])
|
||||||
|
remaining = 0
|
||||||
|
except StopAsyncIteration:
|
||||||
|
break
|
||||||
1376
mediaflow_proxy/remuxer/mp4_muxer.py
Normal file
1376
mediaflow_proxy/remuxer/mp4_muxer.py
Normal file
File diff suppressed because it is too large
Load Diff
834
mediaflow_proxy/remuxer/mp4_parser.py
Normal file
834
mediaflow_proxy/remuxer/mp4_parser.py
Normal file
@@ -0,0 +1,834 @@
|
|||||||
|
"""
|
||||||
|
MP4 container parser for moov atom probing.
|
||||||
|
|
||||||
|
Provides:
|
||||||
|
- MP4Index: seek index extracted from MP4 moov atom (parallel to MKVCueIndex)
|
||||||
|
- Top-level atom scanning
|
||||||
|
- Sample table parsers (stco, co64, stss, stsz, stts, stsc)
|
||||||
|
- Moov-to-cue-point builder
|
||||||
|
- rewrite_moov_offsets: adjust stco/co64 in moov for file rearrangement
|
||||||
|
|
||||||
|
The parsers are the inverse of the builder functions in mp4_muxer.py.
|
||||||
|
Box navigation reuses the pattern from ts_muxer.py's read_box/find_box/iter_boxes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import bisect
|
||||||
|
import logging
|
||||||
|
import struct
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MP4 Box Utilities
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Minimum bytes needed to read a standard box header
|
||||||
|
_BOX_HEADER_SIZE = 8
|
||||||
|
|
||||||
|
# ftyp brands that identify MP4/MOV containers
|
||||||
|
_MP4_BRANDS = {
|
||||||
|
b"isom",
|
||||||
|
b"iso2",
|
||||||
|
b"iso3",
|
||||||
|
b"iso4",
|
||||||
|
b"iso5",
|
||||||
|
b"iso6",
|
||||||
|
b"mp41",
|
||||||
|
b"mp42",
|
||||||
|
b"M4V ",
|
||||||
|
b"M4A ",
|
||||||
|
b"f4v ",
|
||||||
|
b"kddi",
|
||||||
|
b"avc1",
|
||||||
|
b"qt ",
|
||||||
|
b"MSNV",
|
||||||
|
b"dash",
|
||||||
|
b"3gp4",
|
||||||
|
b"3gp5",
|
||||||
|
b"3gp6",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def is_mp4_header(data: bytes) -> bool:
|
||||||
|
"""Check if the data starts with an ftyp box (MP4 signature)."""
|
||||||
|
if len(data) < 8:
|
||||||
|
return False
|
||||||
|
size = struct.unpack_from(">I", data, 0)[0]
|
||||||
|
box_type = data[4:8]
|
||||||
|
if box_type != b"ftyp":
|
||||||
|
return False
|
||||||
|
if size < 12 or size > len(data):
|
||||||
|
return size >= 12 # might be valid but truncated
|
||||||
|
major_brand = data[8:12]
|
||||||
|
return major_brand in _MP4_BRANDS
|
||||||
|
|
||||||
|
|
||||||
|
def read_box_header(data: bytes, offset: int) -> tuple[bytes, int, int] | None:
|
||||||
|
"""
|
||||||
|
Read a box header at the given offset.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(box_type, header_size, total_box_size) or None if not enough data.
|
||||||
|
"""
|
||||||
|
if offset + 8 > len(data):
|
||||||
|
return None
|
||||||
|
|
||||||
|
size, box_type = struct.unpack_from(">I4s", data, offset)
|
||||||
|
header_size = 8
|
||||||
|
|
||||||
|
if size == 1: # Extended size (64-bit)
|
||||||
|
if offset + 16 > len(data):
|
||||||
|
return None
|
||||||
|
size = struct.unpack_from(">Q", data, offset + 8)[0]
|
||||||
|
header_size = 16
|
||||||
|
elif size == 0: # Box extends to end of data
|
||||||
|
size = len(data) - offset
|
||||||
|
|
||||||
|
return box_type, header_size, size
|
||||||
|
|
||||||
|
|
||||||
|
def iter_top_level_boxes(data: bytes):
|
||||||
|
"""
|
||||||
|
Iterate over top-level box headers.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
(box_type, header_size, total_size, data_offset)
|
||||||
|
"""
|
||||||
|
offset = 0
|
||||||
|
while offset < len(data):
|
||||||
|
result = read_box_header(data, offset)
|
||||||
|
if result is None:
|
||||||
|
break
|
||||||
|
box_type, header_size, total_size = result
|
||||||
|
yield box_type, header_size, total_size, offset + header_size
|
||||||
|
if total_size == 0:
|
||||||
|
break
|
||||||
|
offset += total_size
|
||||||
|
|
||||||
|
|
||||||
|
def find_box(data: bytes, target: bytes) -> bytes | None:
|
||||||
|
"""Find a box by type and return its body (data after header)."""
|
||||||
|
for box_type, header_size, total_size, data_offset in iter_top_level_boxes(data):
|
||||||
|
if box_type == target:
|
||||||
|
return data[data_offset : data_offset - header_size + total_size]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def iter_boxes(data: bytes):
|
||||||
|
"""Iterate over child boxes: yields (box_type, box_body_bytes)."""
|
||||||
|
for box_type, header_size, total_size, data_offset in iter_top_level_boxes(data):
|
||||||
|
end = data_offset - header_size + total_size
|
||||||
|
yield box_type, data[data_offset:end]
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Sample Table Parsers (inverse of mp4_muxer.py builders)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def parse_full_box_header(data: bytes) -> tuple[int, int, int]:
|
||||||
|
"""
|
||||||
|
Parse a full box header (version + flags).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(version, flags, header_size) where header_size is 4 bytes.
|
||||||
|
"""
|
||||||
|
if len(data) < 4:
|
||||||
|
return 0, 0, 0
|
||||||
|
version = data[0]
|
||||||
|
flags = (data[1] << 16) | (data[2] << 8) | data[3]
|
||||||
|
return version, flags, 4
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stco(data: bytes) -> list[int]:
|
||||||
|
"""
|
||||||
|
Parse Chunk Offset box (stco) - 32-bit offsets.
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) + [offset(4)]...
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 4:
|
||||||
|
return []
|
||||||
|
|
||||||
|
offsets = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
offsets.append(struct.unpack_from(">I", data, pos)[0])
|
||||||
|
pos += 4
|
||||||
|
return offsets
|
||||||
|
|
||||||
|
|
||||||
|
def parse_co64(data: bytes) -> list[int]:
|
||||||
|
"""
|
||||||
|
Parse Chunk Offset box (co64) - 64-bit offsets.
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) + [offset(8)]...
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 8:
|
||||||
|
return []
|
||||||
|
|
||||||
|
offsets = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
offsets.append(struct.unpack_from(">Q", data, pos)[0])
|
||||||
|
pos += 8
|
||||||
|
return offsets
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stss(data: bytes) -> list[int]:
|
||||||
|
"""
|
||||||
|
Parse Sync Sample box (stss) - keyframe indices (1-based).
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) + [sample_number(4)]...
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 4:
|
||||||
|
return []
|
||||||
|
|
||||||
|
indices = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
indices.append(struct.unpack_from(">I", data, pos)[0])
|
||||||
|
pos += 4
|
||||||
|
return indices
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stsz(data: bytes) -> tuple[int, list[int]]:
|
||||||
|
"""
|
||||||
|
Parse Sample Size box (stsz).
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + sample_size(4) + sample_count(4) + [size(4)]...
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(uniform_size, sizes_list).
|
||||||
|
If uniform_size > 0, all samples have that size and sizes_list is empty.
|
||||||
|
Otherwise, sizes_list contains per-sample sizes.
|
||||||
|
"""
|
||||||
|
if len(data) < 12:
|
||||||
|
return 0, []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
sample_size = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
sample_count = struct.unpack_from(">I", data, pos + 4)[0]
|
||||||
|
pos += 8
|
||||||
|
|
||||||
|
if sample_size > 0:
|
||||||
|
return sample_size, []
|
||||||
|
|
||||||
|
if len(data) < pos + sample_count * 4:
|
||||||
|
return 0, []
|
||||||
|
|
||||||
|
sizes = []
|
||||||
|
for _ in range(sample_count):
|
||||||
|
sizes.append(struct.unpack_from(">I", data, pos)[0])
|
||||||
|
pos += 4
|
||||||
|
return 0, sizes
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stts(data: bytes) -> list[tuple[int, int]]:
|
||||||
|
"""
|
||||||
|
Parse Time-to-Sample box (stts) - run-length encoded durations.
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) + [sample_count(4) + sample_delta(4)]...
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (sample_count, sample_delta) entries.
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 8:
|
||||||
|
return []
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
delta = struct.unpack_from(">I", data, pos + 4)[0]
|
||||||
|
entries.append((count, delta))
|
||||||
|
pos += 8
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stsc(data: bytes) -> list[tuple[int, int, int]]:
|
||||||
|
"""
|
||||||
|
Parse Sample-to-Chunk box (stsc).
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) +
|
||||||
|
[first_chunk(4) + samples_per_chunk(4) + sample_desc_index(4)]...
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (first_chunk, samples_per_chunk, sample_desc_index) entries.
|
||||||
|
first_chunk is 1-based.
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 12:
|
||||||
|
return []
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
first_chunk = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
spc = struct.unpack_from(">I", data, pos + 4)[0]
|
||||||
|
sdi = struct.unpack_from(">I", data, pos + 8)[0]
|
||||||
|
entries.append((first_chunk, spc, sdi))
|
||||||
|
pos += 12
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def parse_mdhd(data: bytes) -> tuple[int, int]:
|
||||||
|
"""
|
||||||
|
Parse Media Header box (mdhd) for timescale and duration.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(timescale, duration) in media timescale units.
|
||||||
|
"""
|
||||||
|
if len(data) < 4:
|
||||||
|
return 0, 0
|
||||||
|
version = data[0]
|
||||||
|
if version == 1:
|
||||||
|
# 64-bit: skip version(1)+flags(3)+creation(8)+modification(8)
|
||||||
|
if len(data) < 32:
|
||||||
|
return 0, 0
|
||||||
|
timescale = struct.unpack_from(">I", data, 20)[0]
|
||||||
|
duration = struct.unpack_from(">Q", data, 24)[0]
|
||||||
|
else:
|
||||||
|
# 32-bit: skip version(1)+flags(3)+creation(4)+modification(4)
|
||||||
|
if len(data) < 20:
|
||||||
|
return 0, 0
|
||||||
|
timescale = struct.unpack_from(">I", data, 12)[0]
|
||||||
|
duration = struct.unpack_from(">I", data, 16)[0]
|
||||||
|
return timescale, duration
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stsd_codec(data: bytes) -> str:
|
||||||
|
"""
|
||||||
|
Parse Sample Description box (stsd) to extract the codec FourCC.
|
||||||
|
|
||||||
|
Returns the codec name as a string (e.g. "avc1", "hvc1", "mp4a").
|
||||||
|
"""
|
||||||
|
if len(data) < 16:
|
||||||
|
return ""
|
||||||
|
# version(1)+flags(3)+entry_count(4)
|
||||||
|
pos = 8
|
||||||
|
# First entry: size(4)+type(4)
|
||||||
|
if pos + 8 > len(data):
|
||||||
|
return ""
|
||||||
|
codec_fourcc = data[pos + 4 : pos + 8]
|
||||||
|
try:
|
||||||
|
return codec_fourcc.decode("ascii").strip()
|
||||||
|
except (UnicodeDecodeError, ValueError):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MP4 Index (parallel to MKVCueIndex)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MP4Index:
|
||||||
|
"""
|
||||||
|
Seek index extracted from an MP4 file's moov atom.
|
||||||
|
|
||||||
|
Parallel to ``MKVCueIndex`` for MKV files. Provides keyframe-indexed
|
||||||
|
cue points for time-based seeking and the raw moov bytes needed to
|
||||||
|
reconstruct a streamable (faststart) MP4 for on-the-fly demuxing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
duration_ms: float = 0.0
|
||||||
|
timescale: int = 0
|
||||||
|
cue_points: list[tuple[float, int]] = field(default_factory=list) # [(time_ms, byte_offset), ...]
|
||||||
|
moov_offset: int = 0 # Absolute file offset where moov atom starts
|
||||||
|
moov_size: int = 0 # Total size of the moov atom (header + body)
|
||||||
|
moov_data: bytes = b"" # Raw moov atom bytes (for prepending to mdat pipe)
|
||||||
|
ftyp_data: bytes = b"" # Raw ftyp atom bytes (for prepending before moov)
|
||||||
|
mdat_offset: int = 0 # Absolute file offset where mdat atom starts
|
||||||
|
mdat_size: int = 0 # Total size of the mdat atom
|
||||||
|
video_codec: str = "" # e.g. "avc1", "hvc1", "mp4v"
|
||||||
|
audio_codec: str = "" # e.g. "mp4a", "ac-3"
|
||||||
|
|
||||||
|
def byte_offset_for_time(self, time_ms: float) -> tuple[int, float]:
|
||||||
|
"""
|
||||||
|
Find the byte offset for the nearest keyframe at or before time_ms.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(absolute_byte_offset, actual_keyframe_time_ms)
|
||||||
|
"""
|
||||||
|
if not self.cue_points:
|
||||||
|
return 0, 0.0
|
||||||
|
|
||||||
|
times = [cp[0] for cp in self.cue_points]
|
||||||
|
idx = bisect.bisect_right(times, time_ms) - 1
|
||||||
|
if idx < 0:
|
||||||
|
idx = 0
|
||||||
|
|
||||||
|
cue_time_ms, byte_offset = self.cue_points[idx]
|
||||||
|
return byte_offset, cue_time_ms
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Moov -> Cue Points Builder
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _find_nested_box(data: bytes, *path: bytes) -> bytes | None:
|
||||||
|
"""Walk a box hierarchy: find_nested_box(data, b"trak", b"mdia") etc."""
|
||||||
|
current = data
|
||||||
|
for box_name in path:
|
||||||
|
found = find_box(current, box_name)
|
||||||
|
if found is None:
|
||||||
|
return None
|
||||||
|
current = found
|
||||||
|
return current
|
||||||
|
|
||||||
|
|
||||||
|
def build_cue_points_from_moov(moov_body: bytes) -> tuple[list[tuple[float, int]], float, int, str, str]:
|
||||||
|
"""
|
||||||
|
Parse a moov body to build keyframe-indexed cue points.
|
||||||
|
|
||||||
|
Walks the first video trak's stbl to extract:
|
||||||
|
- Chunk offsets (stco/co64)
|
||||||
|
- Keyframe sample indices (stss)
|
||||||
|
- Sample sizes (stsz)
|
||||||
|
- Sample durations (stts)
|
||||||
|
- Sample-to-chunk mapping (stsc)
|
||||||
|
- Timescale and duration from mdhd
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(cue_points, duration_ms, timescale, video_codec, audio_codec)
|
||||||
|
"""
|
||||||
|
cue_points: list[tuple[float, int]] = []
|
||||||
|
duration_ms = 0.0
|
||||||
|
timescale = 0
|
||||||
|
video_codec = ""
|
||||||
|
audio_codec = ""
|
||||||
|
|
||||||
|
# Find all traks
|
||||||
|
video_stbl = None
|
||||||
|
video_mdhd = None
|
||||||
|
|
||||||
|
offset = 0
|
||||||
|
data = moov_body
|
||||||
|
while offset < len(data):
|
||||||
|
result = read_box_header(data, offset)
|
||||||
|
if result is None:
|
||||||
|
break
|
||||||
|
box_type, hdr_size, total_size = result
|
||||||
|
|
||||||
|
if box_type == b"trak":
|
||||||
|
trak_body = data[offset + hdr_size : offset + total_size]
|
||||||
|
|
||||||
|
# Check handler type to identify video/audio
|
||||||
|
hdlr_data = _find_nested_box(trak_body, b"mdia", b"hdlr")
|
||||||
|
handler_type = b""
|
||||||
|
if hdlr_data and len(hdlr_data) >= 12:
|
||||||
|
# hdlr: version(1)+flags(3)+pre_defined(4)+handler_type(4)
|
||||||
|
handler_type = hdlr_data[8:12]
|
||||||
|
|
||||||
|
if handler_type == b"vide" and video_stbl is None:
|
||||||
|
video_stbl = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl")
|
||||||
|
video_mdhd_data = _find_nested_box(trak_body, b"mdia", b"mdhd")
|
||||||
|
if video_mdhd_data:
|
||||||
|
video_mdhd = video_mdhd_data
|
||||||
|
|
||||||
|
stsd_data = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl", b"stsd")
|
||||||
|
if stsd_data:
|
||||||
|
video_codec = parse_stsd_codec(stsd_data)
|
||||||
|
|
||||||
|
elif handler_type == b"soun" and not audio_codec:
|
||||||
|
stsd_data = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl", b"stsd")
|
||||||
|
if stsd_data:
|
||||||
|
audio_codec = parse_stsd_codec(stsd_data)
|
||||||
|
|
||||||
|
elif box_type == b"mvhd":
|
||||||
|
# Fallback: parse mvhd for timescale/duration if no mdhd
|
||||||
|
mvhd_body = data[offset + hdr_size : offset + total_size]
|
||||||
|
if len(mvhd_body) >= 20:
|
||||||
|
version = mvhd_body[0]
|
||||||
|
if version == 1:
|
||||||
|
if len(mvhd_body) >= 28:
|
||||||
|
ts = struct.unpack_from(">I", mvhd_body, 20)[0]
|
||||||
|
dur = struct.unpack_from(">Q", mvhd_body, 24)[0]
|
||||||
|
if timescale == 0:
|
||||||
|
timescale = ts
|
||||||
|
duration_ms = dur / ts * 1000.0 if ts else 0.0
|
||||||
|
else:
|
||||||
|
ts = struct.unpack_from(">I", mvhd_body, 12)[0]
|
||||||
|
dur = struct.unpack_from(">I", mvhd_body, 16)[0]
|
||||||
|
if timescale == 0:
|
||||||
|
timescale = ts
|
||||||
|
duration_ms = dur / ts * 1000.0 if ts else 0.0
|
||||||
|
|
||||||
|
if total_size == 0:
|
||||||
|
break
|
||||||
|
offset += total_size
|
||||||
|
|
||||||
|
# Parse mdhd for video timescale (more precise than mvhd)
|
||||||
|
if video_mdhd:
|
||||||
|
ts, dur = parse_mdhd(video_mdhd)
|
||||||
|
if ts > 0:
|
||||||
|
timescale = ts
|
||||||
|
duration_ms = dur / ts * 1000.0
|
||||||
|
|
||||||
|
if video_stbl is None:
|
||||||
|
logger.warning("[mp4_parser] No video stbl found in moov")
|
||||||
|
return cue_points, duration_ms, timescale, video_codec, audio_codec
|
||||||
|
|
||||||
|
# Parse sample tables from video stbl
|
||||||
|
stco_data = find_box(video_stbl, b"stco")
|
||||||
|
co64_data = find_box(video_stbl, b"co64")
|
||||||
|
stss_data = find_box(video_stbl, b"stss")
|
||||||
|
stsz_data = find_box(video_stbl, b"stsz")
|
||||||
|
stts_data = find_box(video_stbl, b"stts")
|
||||||
|
stsc_data = find_box(video_stbl, b"stsc")
|
||||||
|
|
||||||
|
# Chunk offsets
|
||||||
|
chunk_offsets = parse_co64(co64_data) if co64_data else (parse_stco(stco_data) if stco_data else [])
|
||||||
|
|
||||||
|
# Keyframe sample numbers (1-based)
|
||||||
|
keyframe_samples = set(parse_stss(stss_data)) if stss_data else set()
|
||||||
|
all_are_keyframes = not stss_data # No stss means all samples are sync
|
||||||
|
|
||||||
|
# Sample sizes
|
||||||
|
uniform_size, size_list = parse_stsz(stsz_data) if stsz_data else (0, [])
|
||||||
|
|
||||||
|
# Sample durations (run-length encoded)
|
||||||
|
stts_entries = parse_stts(stts_data) if stts_data else []
|
||||||
|
|
||||||
|
# Sample-to-chunk mapping
|
||||||
|
stsc_entries = parse_stsc(stsc_data) if stsc_data else []
|
||||||
|
|
||||||
|
if not chunk_offsets or timescale == 0:
|
||||||
|
logger.warning(
|
||||||
|
"[mp4_parser] Missing data: chunks=%d, timescale=%d",
|
||||||
|
len(chunk_offsets),
|
||||||
|
timescale,
|
||||||
|
)
|
||||||
|
return cue_points, duration_ms, timescale, video_codec, audio_codec
|
||||||
|
|
||||||
|
# Expand stts to per-sample durations
|
||||||
|
sample_durations: list[int] = []
|
||||||
|
for count, delta in stts_entries:
|
||||||
|
sample_durations.extend([delta] * count)
|
||||||
|
|
||||||
|
# Expand stsc to determine which samples belong to which chunk
|
||||||
|
# Build a mapping: chunk_index (0-based) -> samples_per_chunk
|
||||||
|
total_chunks = len(chunk_offsets)
|
||||||
|
chunk_sample_counts: list[int] = [0] * total_chunks
|
||||||
|
|
||||||
|
if stsc_entries:
|
||||||
|
for i, (first_chunk, spc, _sdi) in enumerate(stsc_entries):
|
||||||
|
# first_chunk is 1-based
|
||||||
|
start = first_chunk - 1
|
||||||
|
if i + 1 < len(stsc_entries):
|
||||||
|
end = stsc_entries[i + 1][0] - 1
|
||||||
|
else:
|
||||||
|
end = total_chunks
|
||||||
|
for c in range(start, end):
|
||||||
|
if c < total_chunks:
|
||||||
|
chunk_sample_counts[c] = spc
|
||||||
|
else:
|
||||||
|
# Default: 1 sample per chunk
|
||||||
|
chunk_sample_counts = [1] * total_chunks
|
||||||
|
|
||||||
|
# Count total samples
|
||||||
|
total_samples = sum(chunk_sample_counts)
|
||||||
|
|
||||||
|
# Get per-sample sizes
|
||||||
|
if uniform_size > 0:
|
||||||
|
sample_sizes = [uniform_size] * total_samples
|
||||||
|
else:
|
||||||
|
sample_sizes = size_list
|
||||||
|
|
||||||
|
# Build cumulative timestamp for each sample and map keyframes to byte offsets
|
||||||
|
current_sample = 0 # 0-based sample index
|
||||||
|
current_time = 0 # in timescale units
|
||||||
|
|
||||||
|
for chunk_idx, chunk_offset in enumerate(chunk_offsets):
|
||||||
|
spc = chunk_sample_counts[chunk_idx] if chunk_idx < len(chunk_sample_counts) else 1
|
||||||
|
byte_pos = chunk_offset
|
||||||
|
|
||||||
|
for s in range(spc):
|
||||||
|
sample_num = current_sample + 1 # 1-based for stss comparison
|
||||||
|
is_keyframe = all_are_keyframes or sample_num in keyframe_samples
|
||||||
|
|
||||||
|
if is_keyframe:
|
||||||
|
time_ms = current_time / timescale * 1000.0
|
||||||
|
cue_points.append((time_ms, byte_pos))
|
||||||
|
|
||||||
|
# Advance byte position by this sample's size
|
||||||
|
if current_sample < len(sample_sizes):
|
||||||
|
byte_pos += sample_sizes[current_sample]
|
||||||
|
|
||||||
|
# Advance timestamp
|
||||||
|
if current_sample < len(sample_durations):
|
||||||
|
current_time += sample_durations[current_sample]
|
||||||
|
|
||||||
|
current_sample += 1
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[mp4_parser] Built %d cue points from %d samples, duration=%.1fs, video=%s, audio=%s",
|
||||||
|
len(cue_points),
|
||||||
|
total_samples,
|
||||||
|
duration_ms / 1000.0,
|
||||||
|
video_codec,
|
||||||
|
audio_codec,
|
||||||
|
)
|
||||||
|
|
||||||
|
return cue_points, duration_ms, timescale, video_codec, audio_codec
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Moov Offset Rewriting (for faststart pipe construction)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _rewrite_stco_in_place(data: bytearray, box_start: int, box_size: int, delta: int) -> int:
|
||||||
|
"""Rewrite stco chunk offsets by adding delta. Returns number of entries fixed."""
|
||||||
|
# FullBox header: version(1) + flags(3) = 4 bytes
|
||||||
|
body_start = box_start + 4
|
||||||
|
if body_start + 4 > box_start + box_size:
|
||||||
|
return 0
|
||||||
|
entry_count = struct.unpack_from(">I", data, body_start)[0]
|
||||||
|
pos = body_start + 4
|
||||||
|
for _ in range(entry_count):
|
||||||
|
if pos + 4 > box_start + box_size:
|
||||||
|
break
|
||||||
|
old_val = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
struct.pack_into(">I", data, pos, old_val + delta)
|
||||||
|
pos += 4
|
||||||
|
return entry_count
|
||||||
|
|
||||||
|
|
||||||
|
def _rewrite_co64_in_place(data: bytearray, box_start: int, box_size: int, delta: int) -> int:
|
||||||
|
"""Rewrite co64 chunk offsets by adding delta. Returns number of entries fixed."""
|
||||||
|
body_start = box_start + 4
|
||||||
|
if body_start + 4 > box_start + box_size:
|
||||||
|
return 0
|
||||||
|
entry_count = struct.unpack_from(">I", data, body_start)[0]
|
||||||
|
pos = body_start + 4
|
||||||
|
for _ in range(entry_count):
|
||||||
|
if pos + 8 > box_start + box_size:
|
||||||
|
break
|
||||||
|
old_val = struct.unpack_from(">Q", data, pos)[0]
|
||||||
|
struct.pack_into(">Q", data, pos, old_val + delta)
|
||||||
|
pos += 8
|
||||||
|
return entry_count
|
||||||
|
|
||||||
|
|
||||||
|
def _walk_and_rewrite(data: bytearray, start: int, end: int, delta: int) -> int:
|
||||||
|
"""
|
||||||
|
Recursively walk boxes within [start, end) looking for stco/co64 boxes
|
||||||
|
and rewriting their offsets.
|
||||||
|
|
||||||
|
Returns total number of offset entries rewritten.
|
||||||
|
"""
|
||||||
|
total = 0
|
||||||
|
offset = start
|
||||||
|
while offset + 8 <= end:
|
||||||
|
size = struct.unpack_from(">I", data, offset)[0]
|
||||||
|
box_type = data[offset + 4 : offset + 8]
|
||||||
|
hdr_size = 8
|
||||||
|
|
||||||
|
if size == 1:
|
||||||
|
if offset + 16 > end:
|
||||||
|
break
|
||||||
|
size = struct.unpack_from(">Q", data, offset + 8)[0]
|
||||||
|
hdr_size = 16
|
||||||
|
elif size == 0:
|
||||||
|
size = end - offset
|
||||||
|
|
||||||
|
if size < 8 or offset + size > end:
|
||||||
|
break
|
||||||
|
|
||||||
|
body_start = offset + hdr_size
|
||||||
|
body_end = offset + size
|
||||||
|
|
||||||
|
if box_type == b"stco":
|
||||||
|
total += _rewrite_stco_in_place(data, body_start, size - hdr_size, delta)
|
||||||
|
elif box_type == b"co64":
|
||||||
|
total += _rewrite_co64_in_place(data, body_start, size - hdr_size, delta)
|
||||||
|
elif box_type in (b"moov", b"trak", b"mdia", b"minf", b"stbl"):
|
||||||
|
# Container box -- recurse into children
|
||||||
|
total += _walk_and_rewrite(data, body_start, body_end, delta)
|
||||||
|
|
||||||
|
offset += size
|
||||||
|
|
||||||
|
return total
|
||||||
|
|
||||||
|
|
||||||
|
def extract_video_track_from_moov(moov_data: bytes):
|
||||||
|
"""
|
||||||
|
Extract video codec configuration from an MP4 moov atom.
|
||||||
|
|
||||||
|
Walks the moov box tree to find the first video trak, extracts its
|
||||||
|
resolution and codec-private data (avcC/hvcC), and returns a synthetic
|
||||||
|
``MKVTrack`` suitable for building an fMP4 init segment.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An ``MKVTrack`` with video metadata, or ``None`` if no video track
|
||||||
|
is found.
|
||||||
|
"""
|
||||||
|
from mediaflow_proxy.remuxer.ebml_parser import (
|
||||||
|
CODEC_ID_H264,
|
||||||
|
CODEC_ID_H265,
|
||||||
|
MKVTrack,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Strip the moov box header to get the body
|
||||||
|
if len(moov_data) < 8:
|
||||||
|
return None
|
||||||
|
raw_size = struct.unpack_from(">I", moov_data, 0)[0]
|
||||||
|
hdr_size = 16 if raw_size == 1 else 8
|
||||||
|
moov_body = moov_data[hdr_size:]
|
||||||
|
|
||||||
|
# Walk traks looking for video handler
|
||||||
|
offset = 0
|
||||||
|
while offset < len(moov_body):
|
||||||
|
result = read_box_header(moov_body, offset)
|
||||||
|
if result is None:
|
||||||
|
break
|
||||||
|
box_type, box_hdr_size, total_size = result
|
||||||
|
|
||||||
|
if box_type == b"trak":
|
||||||
|
trak_body = moov_body[offset + box_hdr_size : offset + total_size]
|
||||||
|
|
||||||
|
# Check handler type
|
||||||
|
hdlr_data = _find_nested_box(trak_body, b"mdia", b"hdlr")
|
||||||
|
handler_type = b""
|
||||||
|
if hdlr_data and len(hdlr_data) >= 12:
|
||||||
|
handler_type = hdlr_data[8:12]
|
||||||
|
|
||||||
|
if handler_type == b"vide":
|
||||||
|
# Found video trak -- extract stsd for codec config
|
||||||
|
stsd_data = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl", b"stsd")
|
||||||
|
if not stsd_data or len(stsd_data) < 16:
|
||||||
|
offset += total_size
|
||||||
|
continue
|
||||||
|
|
||||||
|
codec_name = parse_stsd_codec(stsd_data)
|
||||||
|
|
||||||
|
# Map MP4 codec names to MKV codec IDs
|
||||||
|
if codec_name in ("avc1", "avc3"):
|
||||||
|
mkv_codec_id = CODEC_ID_H264
|
||||||
|
elif codec_name in ("hvc1", "hev1"):
|
||||||
|
mkv_codec_id = CODEC_ID_H265
|
||||||
|
else:
|
||||||
|
mkv_codec_id = f"V_MP4/{codec_name}"
|
||||||
|
|
||||||
|
# Extract codec private (avcC or hvcC box) from inside the
|
||||||
|
# sample entry. The stsd structure is:
|
||||||
|
# version(1) + flags(3) + entry_count(4)
|
||||||
|
# then entry: size(4) + type(4) + ... + nested boxes
|
||||||
|
# The avcC/hvcC is a child box of the sample entry.
|
||||||
|
codec_private = b""
|
||||||
|
width = 0
|
||||||
|
height = 0
|
||||||
|
|
||||||
|
# Parse sample entry to get width/height and codec config
|
||||||
|
entry_start = 8 # skip version+flags+entry_count
|
||||||
|
if entry_start + 8 <= len(stsd_data):
|
||||||
|
entry_size = struct.unpack_from(">I", stsd_data, entry_start)[0]
|
||||||
|
entry_body_start = entry_start + 8 # skip size+type
|
||||||
|
entry_end = min(entry_start + entry_size, len(stsd_data))
|
||||||
|
|
||||||
|
# Visual sample entry: 6 reserved + 2 data_ref_idx + ...
|
||||||
|
# At offset 24 from entry body start: width(2) + height(2)
|
||||||
|
vis_offset = entry_body_start + 24
|
||||||
|
if vis_offset + 4 <= entry_end:
|
||||||
|
width = struct.unpack_from(">H", stsd_data, vis_offset)[0]
|
||||||
|
height = struct.unpack_from(">H", stsd_data, vis_offset + 2)[0]
|
||||||
|
|
||||||
|
# Scan nested boxes for avcC or hvcC
|
||||||
|
# Visual sample entry fixed fields = 70 bytes from entry body
|
||||||
|
nested_start = entry_body_start + 70
|
||||||
|
if nested_start < entry_end:
|
||||||
|
nested_data = stsd_data[nested_start:entry_end]
|
||||||
|
for target in (b"avcC", b"hvcC"):
|
||||||
|
found = find_box(nested_data, target)
|
||||||
|
if found:
|
||||||
|
codec_private = found
|
||||||
|
break
|
||||||
|
|
||||||
|
# Get duration from mdhd if available
|
||||||
|
default_duration_ns = 0
|
||||||
|
mdhd_data = _find_nested_box(trak_body, b"mdia", b"mdhd")
|
||||||
|
if mdhd_data and len(mdhd_data) >= 20:
|
||||||
|
version = mdhd_data[0]
|
||||||
|
if version == 1 and len(mdhd_data) >= 28:
|
||||||
|
ts = struct.unpack_from(">I", mdhd_data, 20)[0]
|
||||||
|
dur = struct.unpack_from(">Q", mdhd_data, 24)[0]
|
||||||
|
else:
|
||||||
|
ts = struct.unpack_from(">I", mdhd_data, 12)[0]
|
||||||
|
dur = struct.unpack_from(">I", mdhd_data, 16)[0]
|
||||||
|
if ts > 0 and dur > 0:
|
||||||
|
# Rough estimate: assume 24fps if we can't determine.
|
||||||
|
default_duration_ns = int(1_000_000_000 / 24)
|
||||||
|
|
||||||
|
return MKVTrack(
|
||||||
|
track_number=1,
|
||||||
|
track_type=1, # video
|
||||||
|
codec_id=mkv_codec_id,
|
||||||
|
codec_private=codec_private,
|
||||||
|
pixel_width=width,
|
||||||
|
pixel_height=height,
|
||||||
|
default_duration_ns=default_duration_ns,
|
||||||
|
)
|
||||||
|
|
||||||
|
offset += total_size
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def rewrite_moov_offsets(moov_data: bytes, delta: int) -> bytes:
|
||||||
|
"""
|
||||||
|
Rewrite all stco/co64 chunk offsets in a moov atom by adding ``delta``.
|
||||||
|
|
||||||
|
This is needed when rearranging an MP4 file for pipe streaming:
|
||||||
|
the original moov's chunk offsets reference positions in the original
|
||||||
|
file layout. When we prepend moov before mdat, the offsets must be
|
||||||
|
shifted by ``delta = moov_size - original_mdat_offset``.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
moov_data: Raw bytes of the complete moov box (header + body).
|
||||||
|
delta: Offset adjustment to add to every chunk offset.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Modified moov bytes with updated chunk offsets.
|
||||||
|
"""
|
||||||
|
buf = bytearray(moov_data)
|
||||||
|
|
||||||
|
# Determine moov box header size
|
||||||
|
raw_size = struct.unpack_from(">I", buf, 0)[0]
|
||||||
|
hdr_size = 16 if raw_size == 1 else 8
|
||||||
|
|
||||||
|
total = _walk_and_rewrite(buf, hdr_size, len(buf), delta)
|
||||||
|
logger.info("[mp4_parser] Rewrote %d chunk offset entries (delta=%+d)", total, delta)
|
||||||
|
|
||||||
|
return bytes(buf)
|
||||||
608
mediaflow_proxy/remuxer/pyav_demuxer.py
Normal file
608
mediaflow_proxy/remuxer/pyav_demuxer.py
Normal file
@@ -0,0 +1,608 @@
|
|||||||
|
"""
|
||||||
|
Universal PyAV-based streaming demuxer.
|
||||||
|
|
||||||
|
Bridges async byte streams to PyAV's synchronous I/O using an OS pipe,
|
||||||
|
allowing on-the-fly demuxing of any container format (MKV, MP4, TS,
|
||||||
|
FLV, WebM, etc.) from an async source.
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
AsyncIterator[bytes] --> async feeder task --> queue.Queue --> writer thread (pipe)
|
||||||
|
|
|
||||||
|
OS pipe (kernel buffer)
|
||||||
|
|
|
||||||
|
demux thread: av.open + discover + demux
|
||||||
|
|
|
||||||
|
queue.Queue --> run_in_executor consumer
|
||||||
|
|
||||||
|
Performance: Uses plain threading.Queue on both sides (writer input and
|
||||||
|
packet output) to avoid per-item ``run_coroutine_threadsafe`` overhead.
|
||||||
|
The async/thread bridge is done via ``run_in_executor`` on the consumer
|
||||||
|
side and a dedicated asyncio task on the producer side.
|
||||||
|
|
||||||
|
For MP4 inputs, the caller (transcode_handler) prepends the moov atom
|
||||||
|
to the stream so PyAV receives a "faststart"-style MP4 through the pipe.
|
||||||
|
This allows true on-the-fly demuxing for all container formats.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import av
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Sentinel object to signal end-of-stream in queues
|
||||||
|
_SENTINEL = object()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DemuxedStream:
|
||||||
|
"""Metadata about a demuxed stream."""
|
||||||
|
|
||||||
|
index: int
|
||||||
|
codec_name: str
|
||||||
|
codec_type: str # "video" or "audio"
|
||||||
|
# Video-specific
|
||||||
|
width: int = 0
|
||||||
|
height: int = 0
|
||||||
|
fps: float = 0.0
|
||||||
|
pixel_format: str = ""
|
||||||
|
# Audio-specific
|
||||||
|
sample_rate: int = 0
|
||||||
|
channels: int = 0
|
||||||
|
# Timing
|
||||||
|
time_base_num: int = 1
|
||||||
|
time_base_den: int = 1000
|
||||||
|
duration_seconds: float = 0.0
|
||||||
|
# Raw codec extradata (e.g. SPS/PPS for H.264, AudioSpecificConfig for AAC)
|
||||||
|
extradata: bytes = b""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DemuxedPacket:
|
||||||
|
"""A demuxed packet with timing info."""
|
||||||
|
|
||||||
|
stream_index: int
|
||||||
|
codec_type: str # "video" or "audio"
|
||||||
|
data: bytes
|
||||||
|
pts: int # Presentation timestamp in stream time_base units
|
||||||
|
dts: int # Decode timestamp in stream time_base units
|
||||||
|
duration: int # Duration in stream time_base units
|
||||||
|
is_keyframe: bool
|
||||||
|
time_base_num: int
|
||||||
|
time_base_den: int
|
||||||
|
# Optional decoded frame when decode_video/decode_audio is True
|
||||||
|
# av.VideoFrame for video, av.AudioFrame for audio
|
||||||
|
decoded_frame: object = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pts_seconds(self) -> float:
|
||||||
|
if self.time_base_den == 0:
|
||||||
|
return 0.0
|
||||||
|
return self.pts * self.time_base_num / self.time_base_den
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dts_seconds(self) -> float:
|
||||||
|
if self.time_base_den == 0:
|
||||||
|
return 0.0
|
||||||
|
return self.dts * self.time_base_num / self.time_base_den
|
||||||
|
|
||||||
|
@property
|
||||||
|
def duration_seconds(self) -> float:
|
||||||
|
if self.time_base_den == 0:
|
||||||
|
return 0.0
|
||||||
|
return self.duration * self.time_base_num / self.time_base_den
|
||||||
|
|
||||||
|
|
||||||
|
class PyAVDemuxer:
|
||||||
|
"""
|
||||||
|
Streaming demuxer using PyAV with pipe-based I/O.
|
||||||
|
|
||||||
|
All container I/O happens in background threads. The writer thread
|
||||||
|
feeds source bytes into a pipe; a single demux thread opens the
|
||||||
|
container, discovers streams, and demuxes packets -- all on the
|
||||||
|
same file object, ensuring the pipe's read cursor is never lost.
|
||||||
|
|
||||||
|
Performance optimisation: both the writer-input side and the
|
||||||
|
packet-output side use plain ``queue.Queue`` (no event-loop
|
||||||
|
involvement per item). The async/thread bridge is done via
|
||||||
|
``run_in_executor`` on the consumer and an asyncio task on the
|
||||||
|
producer, eliminating ~1700 ``run_coroutine_threadsafe`` round-trips
|
||||||
|
per 30 s of 4K content.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
demuxer = PyAVDemuxer()
|
||||||
|
await demuxer.start(source_async_iter)
|
||||||
|
# demuxer.video_stream / audio_stream are now available
|
||||||
|
async for packet in demuxer.iter_packets():
|
||||||
|
if packet.codec_type == "video":
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, decode_video: bool = False, decode_audio: bool = False) -> None:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
decode_video: If True, the demux thread will decode video packets
|
||||||
|
using the container's codec context and attach decoded frames
|
||||||
|
to DemuxedPacket.decoded_frame. This avoids format conversion
|
||||||
|
issues with standalone decoders (HVCC vs Annex B).
|
||||||
|
decode_audio: If True, the demux thread will decode audio packets
|
||||||
|
using the container's codec context and attach decoded frames
|
||||||
|
to DemuxedPacket.decoded_frame. This is needed for codecs like
|
||||||
|
Vorbis/Opus where the standalone decoder requires codec headers
|
||||||
|
that are only available in the container context. Can also be
|
||||||
|
set after start() returns (before packets are consumed) via
|
||||||
|
the ``enable_audio_decode()`` method.
|
||||||
|
"""
|
||||||
|
self._decode_video = decode_video
|
||||||
|
self._decode_audio = decode_audio
|
||||||
|
self._video_decode_decided = threading.Event()
|
||||||
|
self._audio_decode_decided = threading.Event()
|
||||||
|
# If decode flags were set at construction time, mark decided immediately
|
||||||
|
if decode_video:
|
||||||
|
self._video_decode_decided.set()
|
||||||
|
if decode_audio:
|
||||||
|
self._audio_decode_decided.set()
|
||||||
|
self._container: av.InputContainer | None = None
|
||||||
|
self._video_stream: DemuxedStream | None = None
|
||||||
|
self._audio_stream: DemuxedStream | None = None
|
||||||
|
# Thread-safe queues (no event-loop involvement per put/get)
|
||||||
|
self._packet_queue: queue.Queue | None = None
|
||||||
|
self._source_queue: queue.Queue | None = None
|
||||||
|
self._demux_thread: threading.Thread | None = None
|
||||||
|
self._writer_thread: threading.Thread | None = None
|
||||||
|
self._feeder_task: asyncio.Task | None = None
|
||||||
|
self._write_fd: int | None = None
|
||||||
|
self._read_fd: int | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def video_stream(self) -> DemuxedStream | None:
|
||||||
|
return self._video_stream
|
||||||
|
|
||||||
|
@property
|
||||||
|
def audio_stream(self) -> DemuxedStream | None:
|
||||||
|
return self._audio_stream
|
||||||
|
|
||||||
|
def enable_video_decode(self, enable: bool = True) -> None:
|
||||||
|
"""
|
||||||
|
Enable or disable in-thread video decoding.
|
||||||
|
|
||||||
|
Call this after ``start()`` returns (stream metadata is available)
|
||||||
|
but before consuming packets via ``iter_packets()``. The demux
|
||||||
|
thread waits for this signal before processing video packets.
|
||||||
|
"""
|
||||||
|
self._decode_video = enable
|
||||||
|
self._video_decode_decided.set()
|
||||||
|
|
||||||
|
def enable_audio_decode(self, enable: bool = True) -> None:
|
||||||
|
"""
|
||||||
|
Enable or disable in-thread audio decoding.
|
||||||
|
|
||||||
|
Call this after ``start()`` returns (stream metadata is available)
|
||||||
|
but before consuming packets via ``iter_packets()``. The demux
|
||||||
|
thread waits for this signal before processing audio packets.
|
||||||
|
"""
|
||||||
|
self._decode_audio = enable
|
||||||
|
self._audio_decode_decided.set()
|
||||||
|
|
||||||
|
# ── Writer side ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def _async_feeder(self, source: AsyncIterator[bytes]) -> None:
|
||||||
|
"""
|
||||||
|
Async task: pull chunks from the async source and push them
|
||||||
|
into a plain ``queue.Queue`` for the writer thread.
|
||||||
|
|
||||||
|
This replaces the old per-chunk ``run_coroutine_threadsafe``
|
||||||
|
pattern, batching the async-to-sync bridge into one task.
|
||||||
|
|
||||||
|
``queue.Queue.put()`` is a blocking call, so we use
|
||||||
|
``run_in_executor`` to avoid blocking the event loop when the
|
||||||
|
queue is full.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
sq = self._source_queue
|
||||||
|
try:
|
||||||
|
async for chunk in source:
|
||||||
|
await loop.run_in_executor(None, sq.put, chunk)
|
||||||
|
except (asyncio.CancelledError, GeneratorExit):
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
sq.put(_SENTINEL)
|
||||||
|
|
||||||
|
def _write_chunks_sync(self) -> None:
|
||||||
|
"""
|
||||||
|
Writer thread: pull pre-buffered chunks from ``_source_queue``
|
||||||
|
and write to the OS pipe. No event-loop interaction.
|
||||||
|
"""
|
||||||
|
write_fd = self._write_fd
|
||||||
|
sq = self._source_queue
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
chunk = sq.get(timeout=30.0)
|
||||||
|
if chunk is _SENTINEL:
|
||||||
|
break
|
||||||
|
os.write(write_fd, chunk)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.close(write_fd)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
self._write_fd = None
|
||||||
|
|
||||||
|
# ── Demux side ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def start(self, source: AsyncIterator[bytes]) -> None:
|
||||||
|
"""
|
||||||
|
Start pipe-based streaming: writer thread feeds the pipe, a single
|
||||||
|
demux thread opens the container, discovers streams, and begins
|
||||||
|
enqueuing packets.
|
||||||
|
|
||||||
|
After this returns, ``video_stream`` and ``audio_stream`` are
|
||||||
|
populated and packets are being enqueued for ``iter_packets()``.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
# Create OS pipe
|
||||||
|
self._read_fd, self._write_fd = os.pipe()
|
||||||
|
|
||||||
|
# Source buffer queue (async feeder task -> writer thread)
|
||||||
|
self._source_queue = queue.Queue(maxsize=256)
|
||||||
|
|
||||||
|
# Kick off the async feeder task
|
||||||
|
self._feeder_task = asyncio.create_task(self._async_feeder(source))
|
||||||
|
|
||||||
|
# Start writer thread (drains source_queue into the pipe)
|
||||||
|
self._writer_thread = threading.Thread(
|
||||||
|
target=self._write_chunks_sync,
|
||||||
|
daemon=True,
|
||||||
|
name="pyav-writer",
|
||||||
|
)
|
||||||
|
self._writer_thread.start()
|
||||||
|
|
||||||
|
# Packet queue for demux-thread -> async consumer bridge
|
||||||
|
self._packet_queue = queue.Queue(maxsize=128)
|
||||||
|
streams_ready = threading.Event()
|
||||||
|
|
||||||
|
def _open_and_demux():
|
||||||
|
"""
|
||||||
|
Single background thread: open container, discover streams,
|
||||||
|
demux all packets.
|
||||||
|
|
||||||
|
Critical: av.open(), _discover_streams(), and container.demux()
|
||||||
|
all happen on the same file object in the same thread. This
|
||||||
|
ensures the pipe read cursor is never lost between open and demux.
|
||||||
|
"""
|
||||||
|
pkt_count = 0
|
||||||
|
pq = self._packet_queue
|
||||||
|
try:
|
||||||
|
# Open container from read end of pipe
|
||||||
|
read_file = os.fdopen(self._read_fd, "rb")
|
||||||
|
self._read_fd = None # ownership transferred
|
||||||
|
|
||||||
|
self._container = av.open(
|
||||||
|
read_file,
|
||||||
|
mode="r",
|
||||||
|
options={
|
||||||
|
# Tolerate mid-stream joins / broken data in live TS
|
||||||
|
"err_detect": "ignore_err",
|
||||||
|
"fflags": "+discardcorrupt+genpts",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self._discover_streams()
|
||||||
|
|
||||||
|
# Signal stream metadata is available
|
||||||
|
streams_ready.set()
|
||||||
|
|
||||||
|
if self._video_stream is None and self._audio_stream is None:
|
||||||
|
logger.warning("[pyav_demuxer] No video or audio streams found")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Select streams to demux
|
||||||
|
streams_to_demux = []
|
||||||
|
if self._video_stream is not None:
|
||||||
|
streams_to_demux.append(self._container.streams[self._video_stream.index])
|
||||||
|
if self._audio_stream is not None:
|
||||||
|
streams_to_demux.append(self._container.streams[self._audio_stream.index])
|
||||||
|
|
||||||
|
# Wait for the caller to decide on video/audio decoding
|
||||||
|
# (if not already decided at construction time).
|
||||||
|
if not self._video_decode_decided.is_set():
|
||||||
|
self._video_decode_decided.wait(timeout=10.0)
|
||||||
|
if not self._audio_decode_decided.is_set():
|
||||||
|
self._audio_decode_decided.wait(timeout=10.0)
|
||||||
|
|
||||||
|
# Cache stream objects and time_base for the hot loop
|
||||||
|
video_stream_obj = (
|
||||||
|
self._container.streams[self._video_stream.index] if self._video_stream is not None else None
|
||||||
|
)
|
||||||
|
audio_stream_obj = (
|
||||||
|
self._container.streams[self._audio_stream.index] if self._audio_stream is not None else None
|
||||||
|
)
|
||||||
|
|
||||||
|
video_tb_num = video_stream_obj.time_base.numerator if video_stream_obj else 1
|
||||||
|
video_tb_den = video_stream_obj.time_base.denominator if video_stream_obj else 1
|
||||||
|
audio_tb_num = audio_stream_obj.time_base.numerator if audio_stream_obj else 1
|
||||||
|
audio_tb_den = audio_stream_obj.time_base.denominator if audio_stream_obj else 1
|
||||||
|
|
||||||
|
decode_video = self._decode_video
|
||||||
|
decode_audio = self._decode_audio
|
||||||
|
|
||||||
|
# Demux and enqueue packets -- plain queue.put(), no event loop
|
||||||
|
for packet in self._container.demux(*streams_to_demux):
|
||||||
|
if packet.size == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
stream = self._container.streams[packet.stream_index]
|
||||||
|
is_video = stream.type == "video"
|
||||||
|
is_audio = stream.type == "audio"
|
||||||
|
|
||||||
|
# Optionally decode video packets in-thread
|
||||||
|
if decode_video and is_video and video_stream_obj is not None:
|
||||||
|
try:
|
||||||
|
frames = video_stream_obj.codec_context.decode(packet)
|
||||||
|
except Exception:
|
||||||
|
frames = []
|
||||||
|
for frame in frames:
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=packet.stream_index,
|
||||||
|
codec_type="video",
|
||||||
|
data=b"",
|
||||||
|
pts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
dts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
duration=int(packet.duration) if packet.duration is not None else 0,
|
||||||
|
is_keyframe=frame.key_frame,
|
||||||
|
time_base_num=video_tb_num,
|
||||||
|
time_base_den=video_tb_den,
|
||||||
|
decoded_frame=frame,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
|
||||||
|
# Optionally decode audio packets in-thread
|
||||||
|
elif decode_audio and is_audio and audio_stream_obj is not None:
|
||||||
|
try:
|
||||||
|
frames = audio_stream_obj.codec_context.decode(packet)
|
||||||
|
except Exception:
|
||||||
|
frames = []
|
||||||
|
for frame in frames:
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=packet.stream_index,
|
||||||
|
codec_type="audio",
|
||||||
|
data=b"",
|
||||||
|
pts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
dts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
duration=int(packet.duration) if packet.duration is not None else 0,
|
||||||
|
is_keyframe=False,
|
||||||
|
time_base_num=audio_tb_num,
|
||||||
|
time_base_den=audio_tb_den,
|
||||||
|
decoded_frame=frame,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
tb_num = video_tb_num if is_video else audio_tb_num
|
||||||
|
tb_den = video_tb_den if is_video else audio_tb_den
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=packet.stream_index,
|
||||||
|
codec_type=stream.type,
|
||||||
|
data=bytes(packet),
|
||||||
|
pts=int(packet.pts) if packet.pts is not None else 0,
|
||||||
|
dts=int(packet.dts) if packet.dts is not None else 0,
|
||||||
|
duration=int(packet.duration) if packet.duration is not None else 0,
|
||||||
|
is_keyframe=packet.is_keyframe,
|
||||||
|
time_base_num=tb_num,
|
||||||
|
time_base_den=tb_den,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
|
||||||
|
# Flush the video decoder if we were decoding
|
||||||
|
if decode_video and video_stream_obj is not None:
|
||||||
|
try:
|
||||||
|
for frame in video_stream_obj.codec_context.decode(None):
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=video_stream_obj.index,
|
||||||
|
codec_type="video",
|
||||||
|
data=b"",
|
||||||
|
pts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
dts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
duration=0,
|
||||||
|
is_keyframe=frame.key_frame,
|
||||||
|
time_base_num=video_tb_num,
|
||||||
|
time_base_den=video_tb_den,
|
||||||
|
decoded_frame=frame,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Flush the audio decoder if we were decoding
|
||||||
|
if decode_audio and audio_stream_obj is not None:
|
||||||
|
try:
|
||||||
|
for frame in audio_stream_obj.codec_context.decode(None):
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=audio_stream_obj.index,
|
||||||
|
codec_type="audio",
|
||||||
|
data=b"",
|
||||||
|
pts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
dts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
duration=0,
|
||||||
|
is_keyframe=False,
|
||||||
|
time_base_num=audio_tb_num,
|
||||||
|
time_base_den=audio_tb_den,
|
||||||
|
decoded_frame=frame,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
logger.info("[pyav_demuxer] Demux complete: %d packets", pkt_count)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if "Invalid data" not in str(e):
|
||||||
|
logger.debug("[pyav_demuxer] Demux thread error: %s", e)
|
||||||
|
# Ensure streams_ready is set even on error
|
||||||
|
streams_ready.set()
|
||||||
|
finally:
|
||||||
|
pq.put(_SENTINEL)
|
||||||
|
|
||||||
|
self._demux_thread = threading.Thread(target=_open_and_demux, daemon=True, name="pyav-demux")
|
||||||
|
self._demux_thread.start()
|
||||||
|
|
||||||
|
# Wait for stream discovery before returning.
|
||||||
|
# Use run_in_executor to avoid blocking the event loop.
|
||||||
|
await loop.run_in_executor(None, streams_ready.wait)
|
||||||
|
|
||||||
|
async def iter_packets(self) -> AsyncIterator[DemuxedPacket]:
|
||||||
|
"""
|
||||||
|
Yield demuxed packets from the background thread.
|
||||||
|
|
||||||
|
Uses ``run_in_executor`` for the blocking ``queue.get()`` call,
|
||||||
|
avoiding per-packet ``run_coroutine_threadsafe`` overhead.
|
||||||
|
|
||||||
|
``start()`` must be called first.
|
||||||
|
"""
|
||||||
|
if self._packet_queue is None:
|
||||||
|
raise RuntimeError("Call start() before iter_packets()")
|
||||||
|
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
pq = self._packet_queue
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
packet = await loop.run_in_executor(None, pq.get)
|
||||||
|
if packet is _SENTINEL:
|
||||||
|
break
|
||||||
|
yield packet
|
||||||
|
|
||||||
|
if self._demux_thread is not None:
|
||||||
|
self._demux_thread.join(timeout=5.0)
|
||||||
|
|
||||||
|
except GeneratorExit:
|
||||||
|
logger.debug("[pyav_demuxer] Generator closed")
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
logger.debug("[pyav_demuxer] Cancelled")
|
||||||
|
finally:
|
||||||
|
self._cleanup()
|
||||||
|
|
||||||
|
def _discover_streams(self) -> None:
|
||||||
|
"""Inspect the opened container and record stream metadata."""
|
||||||
|
if self._container is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
for stream in self._container.streams:
|
||||||
|
if stream.type == "video" and self._video_stream is None:
|
||||||
|
codec_ctx = stream.codec_context
|
||||||
|
fps = float(stream.average_rate) if stream.average_rate else 24.0
|
||||||
|
self._video_stream = DemuxedStream(
|
||||||
|
index=stream.index,
|
||||||
|
codec_name=codec_ctx.name if codec_ctx else stream.codec.name,
|
||||||
|
codec_type="video",
|
||||||
|
width=codec_ctx.width if codec_ctx else 0,
|
||||||
|
height=codec_ctx.height if codec_ctx else 0,
|
||||||
|
fps=fps,
|
||||||
|
pixel_format=str(codec_ctx.pix_fmt) if codec_ctx and codec_ctx.pix_fmt else "yuv420p",
|
||||||
|
time_base_num=stream.time_base.numerator,
|
||||||
|
time_base_den=stream.time_base.denominator,
|
||||||
|
duration_seconds=float(stream.duration * stream.time_base) if stream.duration else 0.0,
|
||||||
|
extradata=bytes(codec_ctx.extradata) if codec_ctx and codec_ctx.extradata else b"",
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"[pyav_demuxer] Video: %s %dx%d @%.1ffps",
|
||||||
|
self._video_stream.codec_name,
|
||||||
|
self._video_stream.width,
|
||||||
|
self._video_stream.height,
|
||||||
|
self._video_stream.fps,
|
||||||
|
)
|
||||||
|
|
||||||
|
elif stream.type == "audio" and self._audio_stream is None:
|
||||||
|
codec_ctx = stream.codec_context
|
||||||
|
self._audio_stream = DemuxedStream(
|
||||||
|
index=stream.index,
|
||||||
|
codec_name=codec_ctx.name if codec_ctx else stream.codec.name,
|
||||||
|
codec_type="audio",
|
||||||
|
sample_rate=codec_ctx.sample_rate if codec_ctx else 0,
|
||||||
|
channels=codec_ctx.channels if codec_ctx else 0,
|
||||||
|
time_base_num=stream.time_base.numerator,
|
||||||
|
time_base_den=stream.time_base.denominator,
|
||||||
|
duration_seconds=float(stream.duration * stream.time_base) if stream.duration else 0.0,
|
||||||
|
extradata=bytes(codec_ctx.extradata) if codec_ctx and codec_ctx.extradata else b"",
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"[pyav_demuxer] Audio: %s %dHz %dch",
|
||||||
|
self._audio_stream.codec_name,
|
||||||
|
self._audio_stream.sample_rate,
|
||||||
|
self._audio_stream.channels,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _cleanup(self) -> None:
|
||||||
|
"""Stop threads and release all resources safely.
|
||||||
|
|
||||||
|
The order is critical to avoid SIGSEGV from closing the container
|
||||||
|
while the demux thread is still calling container.demux():
|
||||||
|
|
||||||
|
1. Cancel the feeder task (stops new bytes being queued).
|
||||||
|
2. Put a sentinel into the source queue so the writer thread
|
||||||
|
unblocks and exits. The writer's ``finally`` closes the pipe
|
||||||
|
write-end, which causes the demux thread to see EOF.
|
||||||
|
3. Join the writer thread (wait for it to drain and exit).
|
||||||
|
4. Join the demux thread (it finishes after pipe EOF).
|
||||||
|
5. ONLY THEN close the container (no thread is using it).
|
||||||
|
6. Close any remaining pipe FDs (read end, if still open).
|
||||||
|
"""
|
||||||
|
# 1. Cancel feeder task
|
||||||
|
if self._feeder_task is not None:
|
||||||
|
self._feeder_task.cancel()
|
||||||
|
self._feeder_task = None
|
||||||
|
|
||||||
|
# 2. Unblock writer thread so it exits and closes the pipe
|
||||||
|
if self._source_queue is not None:
|
||||||
|
try:
|
||||||
|
self._source_queue.put_nowait(_SENTINEL)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 3. Join writer thread (it closes _write_fd in its finally block)
|
||||||
|
if self._writer_thread is not None:
|
||||||
|
self._writer_thread.join(timeout=5.0)
|
||||||
|
self._writer_thread = None
|
||||||
|
|
||||||
|
# 4. Join demux thread -- must finish before we close the container
|
||||||
|
if self._demux_thread is not None:
|
||||||
|
self._demux_thread.join(timeout=5.0)
|
||||||
|
self._demux_thread = None
|
||||||
|
|
||||||
|
# 5. Now safe to close the container (no thread is using it)
|
||||||
|
if self._container is not None:
|
||||||
|
try:
|
||||||
|
self._container.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._container = None
|
||||||
|
|
||||||
|
# 6. Close any remaining pipe FDs
|
||||||
|
for fd_name in ("_read_fd", "_write_fd"):
|
||||||
|
fd = getattr(self, fd_name, None)
|
||||||
|
if fd is not None:
|
||||||
|
try:
|
||||||
|
os.close(fd)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
setattr(self, fd_name, None)
|
||||||
1121
mediaflow_proxy/remuxer/transcode_handler.py
Normal file
1121
mediaflow_proxy/remuxer/transcode_handler.py
Normal file
File diff suppressed because it is too large
Load Diff
1268
mediaflow_proxy/remuxer/transcode_pipeline.py
Normal file
1268
mediaflow_proxy/remuxer/transcode_pipeline.py
Normal file
File diff suppressed because it is too large
Load Diff
1728
mediaflow_proxy/remuxer/ts_muxer.py
Normal file
1728
mediaflow_proxy/remuxer/ts_muxer.py
Normal file
File diff suppressed because it is too large
Load Diff
403
mediaflow_proxy/remuxer/video_transcoder.py
Normal file
403
mediaflow_proxy/remuxer/video_transcoder.py
Normal file
@@ -0,0 +1,403 @@
|
|||||||
|
"""
|
||||||
|
GPU-accelerated video transcoder with runtime detection.
|
||||||
|
|
||||||
|
Detects available hardware encoders/decoders at first use and selects
|
||||||
|
the best available backend:
|
||||||
|
- NVIDIA: h264_nvenc / hevc_cuvid (NVENC + CUDA)
|
||||||
|
- Apple macOS: h264_videotoolbox / hevc_videotoolbox
|
||||||
|
- Intel Linux: h264_vaapi / h264_qsv
|
||||||
|
- Fallback: libx264 (CPU)
|
||||||
|
|
||||||
|
The transcoder operates at the packet/frame level via PyAV, suitable
|
||||||
|
for integration into the streaming pipeline.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from enum import Enum
|
||||||
|
from fractions import Fraction
|
||||||
|
|
||||||
|
import av
|
||||||
|
|
||||||
|
from mediaflow_proxy.configs import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class HWAccelType(Enum):
|
||||||
|
NONE = "none"
|
||||||
|
NVIDIA = "nvidia"
|
||||||
|
VIDEOTOOLBOX = "videotoolbox"
|
||||||
|
VAAPI = "vaapi"
|
||||||
|
QSV = "qsv"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HWCapability:
|
||||||
|
"""Detected hardware acceleration capability."""
|
||||||
|
|
||||||
|
accel_type: HWAccelType = HWAccelType.NONE
|
||||||
|
h264_encoder: str = "libx264"
|
||||||
|
h264_decoder: str | None = None # None = use default software decoder
|
||||||
|
hevc_decoder: str | None = None
|
||||||
|
available_encoders: list[str] = field(default_factory=list)
|
||||||
|
available_decoders: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level singleton -- populated on first call to get_hw_capability()
|
||||||
|
_hw_capability: HWCapability | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _probe_codec(name: str, mode: str = "w") -> bool:
|
||||||
|
"""
|
||||||
|
Check if a PyAV codec is available by name.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Codec name (e.g. 'h264_videotoolbox').
|
||||||
|
mode: 'w' for encoder, 'r' for decoder.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
av.Codec(name, mode)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_hw_capability() -> HWCapability:
|
||||||
|
"""
|
||||||
|
Probe the runtime environment for hardware encoder/decoder availability.
|
||||||
|
|
||||||
|
Checks NVIDIA, Apple VideoToolbox, Intel VAAPI/QSV in priority order.
|
||||||
|
Falls back to libx264 CPU encoding.
|
||||||
|
"""
|
||||||
|
cap = HWCapability()
|
||||||
|
|
||||||
|
# Collect available encoders/decoders for logging
|
||||||
|
hw_encoders = [
|
||||||
|
"h264_nvenc",
|
||||||
|
"hevc_nvenc",
|
||||||
|
"h264_videotoolbox",
|
||||||
|
"hevc_videotoolbox",
|
||||||
|
"h264_vaapi",
|
||||||
|
"hevc_vaapi",
|
||||||
|
"h264_qsv",
|
||||||
|
"hevc_qsv",
|
||||||
|
]
|
||||||
|
hw_decoders = [
|
||||||
|
"h264_cuvid",
|
||||||
|
"hevc_cuvid",
|
||||||
|
"h264_qsv",
|
||||||
|
"hevc_qsv",
|
||||||
|
]
|
||||||
|
|
||||||
|
cap.available_encoders = [c for c in hw_encoders if _probe_codec(c, "w")]
|
||||||
|
cap.available_decoders = [c for c in hw_decoders if _probe_codec(c, "r")]
|
||||||
|
|
||||||
|
# Priority 1: NVIDIA NVENC
|
||||||
|
if "h264_nvenc" in cap.available_encoders:
|
||||||
|
cap.accel_type = HWAccelType.NVIDIA
|
||||||
|
cap.h264_encoder = "h264_nvenc"
|
||||||
|
if "h264_cuvid" in cap.available_decoders:
|
||||||
|
cap.h264_decoder = "h264_cuvid"
|
||||||
|
if "hevc_cuvid" in cap.available_decoders:
|
||||||
|
cap.hevc_decoder = "hevc_cuvid"
|
||||||
|
return cap
|
||||||
|
|
||||||
|
# Priority 2: Apple VideoToolbox
|
||||||
|
if "h264_videotoolbox" in cap.available_encoders:
|
||||||
|
cap.accel_type = HWAccelType.VIDEOTOOLBOX
|
||||||
|
cap.h264_encoder = "h264_videotoolbox"
|
||||||
|
# VideoToolbox decoders are used automatically via hwaccel
|
||||||
|
return cap
|
||||||
|
|
||||||
|
# Priority 3: Intel VAAPI (Linux)
|
||||||
|
if "h264_vaapi" in cap.available_encoders:
|
||||||
|
cap.accel_type = HWAccelType.VAAPI
|
||||||
|
cap.h264_encoder = "h264_vaapi"
|
||||||
|
return cap
|
||||||
|
|
||||||
|
# Priority 4: Intel QSV
|
||||||
|
if "h264_qsv" in cap.available_encoders:
|
||||||
|
cap.accel_type = HWAccelType.QSV
|
||||||
|
cap.h264_encoder = "h264_qsv"
|
||||||
|
if "h264_qsv" in cap.available_decoders:
|
||||||
|
cap.h264_decoder = "h264_qsv"
|
||||||
|
if "hevc_qsv" in cap.available_decoders:
|
||||||
|
cap.hevc_decoder = "hevc_qsv"
|
||||||
|
return cap
|
||||||
|
|
||||||
|
# Fallback: CPU
|
||||||
|
cap.accel_type = HWAccelType.NONE
|
||||||
|
cap.h264_encoder = "libx264"
|
||||||
|
return cap
|
||||||
|
|
||||||
|
|
||||||
|
def get_hw_capability() -> HWCapability:
|
||||||
|
"""Get the detected hardware acceleration capability (cached singleton)."""
|
||||||
|
global _hw_capability
|
||||||
|
if _hw_capability is None:
|
||||||
|
_hw_capability = _detect_hw_capability()
|
||||||
|
if settings.transcode_prefer_gpu and _hw_capability.accel_type != HWAccelType.NONE:
|
||||||
|
logger.info(
|
||||||
|
"[video_transcoder] GPU acceleration: %s (encoder=%s, decoders=%s)",
|
||||||
|
_hw_capability.accel_type.value,
|
||||||
|
_hw_capability.h264_encoder,
|
||||||
|
_hw_capability.available_decoders or "software",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"[video_transcoder] Using CPU encoder: %s (available HW: encoders=%s, decoders=%s)",
|
||||||
|
_hw_capability.h264_encoder,
|
||||||
|
_hw_capability.available_encoders or "none",
|
||||||
|
_hw_capability.available_decoders or "none",
|
||||||
|
)
|
||||||
|
return _hw_capability
|
||||||
|
|
||||||
|
|
||||||
|
class VideoTranscoder:
|
||||||
|
"""
|
||||||
|
In-process video transcoder using PyAV.
|
||||||
|
|
||||||
|
Decodes input video packets and re-encodes to H.264 using the best
|
||||||
|
available hardware encoder (or CPU libx264 fallback).
|
||||||
|
|
||||||
|
Operates at the frame level: caller provides raw video packets (from
|
||||||
|
PyAV demuxer), transcoder returns encoded H.264 NAL data suitable
|
||||||
|
for the fMP4 muxer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_codec_name: str,
|
||||||
|
width: int,
|
||||||
|
height: int,
|
||||||
|
fps: float = 24.0,
|
||||||
|
pixel_format: str = "yuv420p",
|
||||||
|
force_software: bool = False,
|
||||||
|
) -> None:
|
||||||
|
hw = get_hw_capability()
|
||||||
|
use_gpu = settings.transcode_prefer_gpu and hw.accel_type != HWAccelType.NONE and not force_software
|
||||||
|
|
||||||
|
# --- Decoder ---
|
||||||
|
hw_decoder = None
|
||||||
|
if use_gpu:
|
||||||
|
if "hevc" in input_codec_name or "h265" in input_codec_name:
|
||||||
|
hw_decoder = hw.hevc_decoder
|
||||||
|
else:
|
||||||
|
hw_decoder = hw.h264_decoder
|
||||||
|
|
||||||
|
decoder_name = hw_decoder or input_codec_name
|
||||||
|
self._decoder = av.CodecContext.create(decoder_name, "r")
|
||||||
|
|
||||||
|
# --- Encoder ---
|
||||||
|
encoder_name = hw.h264_encoder if use_gpu else "libx264"
|
||||||
|
|
||||||
|
# H.264 requires even dimensions
|
||||||
|
enc_width = width if width % 2 == 0 else width + 1
|
||||||
|
enc_height = height if height % 2 == 0 else height + 1
|
||||||
|
|
||||||
|
self._encoder = av.CodecContext.create(encoder_name, "w")
|
||||||
|
self._encoder.width = enc_width
|
||||||
|
self._encoder.height = enc_height
|
||||||
|
self._encoder.pix_fmt = "yuv420p" # H.264 requires yuv420p
|
||||||
|
self._encoder.time_base = Fraction(1, int(fps * 1000))
|
||||||
|
self._encoder.framerate = Fraction(int(fps * 1000), 1000)
|
||||||
|
self._encoder.bit_rate = _parse_bitrate(settings.transcode_video_bitrate)
|
||||||
|
self._encoder.gop_size = int(fps * 2) # Keyframe every ~2 seconds
|
||||||
|
|
||||||
|
# Encoder options based on backend
|
||||||
|
opts = {}
|
||||||
|
if encoder_name == "libx264":
|
||||||
|
opts["preset"] = settings.transcode_video_preset
|
||||||
|
opts["tune"] = "zerolatency"
|
||||||
|
opts["profile"] = "high"
|
||||||
|
elif "nvenc" in encoder_name:
|
||||||
|
opts["preset"] = "p4" # NVENC preset (p1=fastest .. p7=slowest)
|
||||||
|
opts["tune"] = "ll" # Low latency
|
||||||
|
opts["rc"] = "vbr"
|
||||||
|
elif "videotoolbox" in encoder_name:
|
||||||
|
opts["realtime"] = "1"
|
||||||
|
opts["allow_sw"] = "1" # Fallback to software if HW busy
|
||||||
|
elif "vaapi" in encoder_name:
|
||||||
|
opts["rc_mode"] = "VBR"
|
||||||
|
elif "qsv" in encoder_name:
|
||||||
|
opts["preset"] = "medium"
|
||||||
|
|
||||||
|
self._encoder.options = opts
|
||||||
|
self._encoder.open()
|
||||||
|
|
||||||
|
width = enc_width
|
||||||
|
height = enc_height
|
||||||
|
|
||||||
|
self._input_codec = input_codec_name
|
||||||
|
self._encoder_name = encoder_name
|
||||||
|
self._frames_decoded = 0
|
||||||
|
self._frames_encoded = 0
|
||||||
|
self._width = width
|
||||||
|
self._height = height
|
||||||
|
# Tracks whether the standalone decoder was actually used (via decode_packet).
|
||||||
|
# When the demux thread decodes frames in-thread (decode_video=True),
|
||||||
|
# the standalone decoder is never fed packets and flushing it is wasted work.
|
||||||
|
self._decoder_used = False
|
||||||
|
self._flushed = False # Prevents double-flush which causes SIGSEGV
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[video_transcoder] Initialized: %s -> %s (%s), %dx%d @%.1ffps %dk",
|
||||||
|
input_codec_name,
|
||||||
|
encoder_name,
|
||||||
|
hw.accel_type.value,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
fps,
|
||||||
|
self._encoder.bit_rate // 1000 if self._encoder.bit_rate else 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def codec_private_data(self) -> bytes | None:
|
||||||
|
"""H.264 extradata (SPS/PPS) from the encoder, for the fMP4 init segment."""
|
||||||
|
if self._encoder.extradata:
|
||||||
|
return bytes(self._encoder.extradata)
|
||||||
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def width(self) -> int:
|
||||||
|
return self._width
|
||||||
|
|
||||||
|
@property
|
||||||
|
def height(self) -> int:
|
||||||
|
return self._height
|
||||||
|
|
||||||
|
def transcode_frame(self, frame: av.VideoFrame) -> list[tuple[bytes, bool, int, int]]:
|
||||||
|
"""
|
||||||
|
Encode a decoded video frame to H.264.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
frame: A decoded av.VideoFrame.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (nal_data, is_keyframe, pts, dts) tuples.
|
||||||
|
"""
|
||||||
|
self._frames_decoded += 1
|
||||||
|
output = []
|
||||||
|
|
||||||
|
# Ensure correct pixel format for encoder
|
||||||
|
if frame.format.name != self._encoder.pix_fmt:
|
||||||
|
frame = frame.reformat(format=self._encoder.pix_fmt)
|
||||||
|
|
||||||
|
try:
|
||||||
|
for packet in self._encoder.encode(frame):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(
|
||||||
|
(
|
||||||
|
bytes(packet),
|
||||||
|
packet.is_keyframe,
|
||||||
|
int(packet.pts) if packet.pts is not None else 0,
|
||||||
|
int(packet.dts) if packet.dts is not None else 0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except av.error.InvalidDataError as e:
|
||||||
|
logger.debug("[video_transcoder] Encode error: %s", e)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def decode_packet(self, packet: av.Packet) -> list[av.VideoFrame]:
|
||||||
|
"""Decode a video packet into frames."""
|
||||||
|
self._decoder_used = True
|
||||||
|
try:
|
||||||
|
return list(self._decoder.decode(packet))
|
||||||
|
except av.error.InvalidDataError as e:
|
||||||
|
logger.debug("[video_transcoder] Decode error: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
def flush(self) -> list[tuple[bytes, bool, int, int]]:
|
||||||
|
"""
|
||||||
|
Flush encoder (and decoder, if it was used) buffers.
|
||||||
|
|
||||||
|
When ``decode_video=True`` is used in PyAVDemuxer, the demux thread
|
||||||
|
decodes frames using the container's codec context. In that case the
|
||||||
|
standalone ``_decoder`` here is never fed any packets, so flushing
|
||||||
|
it is skipped -- avoiding a stall that added ~5 s on some backends.
|
||||||
|
|
||||||
|
Safe to call multiple times -- subsequent calls return an empty list.
|
||||||
|
"""
|
||||||
|
if self._flushed:
|
||||||
|
return []
|
||||||
|
self._flushed = True
|
||||||
|
|
||||||
|
output = []
|
||||||
|
|
||||||
|
# Flush decoder only if it was actually used (via decode_packet)
|
||||||
|
if self._decoder_used:
|
||||||
|
try:
|
||||||
|
for frame in self._decoder.decode(None):
|
||||||
|
self._frames_decoded += 1
|
||||||
|
if frame.format.name != self._encoder.pix_fmt:
|
||||||
|
frame = frame.reformat(format=self._encoder.pix_fmt)
|
||||||
|
for packet in self._encoder.encode(frame):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(
|
||||||
|
(
|
||||||
|
bytes(packet),
|
||||||
|
packet.is_keyframe,
|
||||||
|
int(packet.pts) if packet.pts is not None else 0,
|
||||||
|
int(packet.dts) if packet.dts is not None else 0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[video_transcoder] Decoder flush error: %s", e)
|
||||||
|
else:
|
||||||
|
logger.debug("[video_transcoder] Skipping decoder flush (decoder not used)")
|
||||||
|
|
||||||
|
# Flush encoder
|
||||||
|
try:
|
||||||
|
for packet in self._encoder.encode(None):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(
|
||||||
|
(
|
||||||
|
bytes(packet),
|
||||||
|
packet.is_keyframe,
|
||||||
|
int(packet.pts) if packet.pts is not None else 0,
|
||||||
|
int(packet.dts) if packet.dts is not None else 0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[video_transcoder] Encoder flush error: %s", e)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[video_transcoder] Flushed: %d decoded, %d encoded total (decoder_used=%s)",
|
||||||
|
self._frames_decoded,
|
||||||
|
self._frames_encoded,
|
||||||
|
self._decoder_used,
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Release codec contexts.
|
||||||
|
|
||||||
|
Flushes the encoder (if not already flushed) before releasing to avoid
|
||||||
|
SIGSEGV when libx264 or hardware encoders have buffered frames at
|
||||||
|
teardown time. Double-flushing is the most common cause of SIGSEGV
|
||||||
|
in the transcode pipeline.
|
||||||
|
|
||||||
|
PyAV codec contexts are released via garbage collection (no explicit
|
||||||
|
close method), so we flush first to ensure native buffers are drained
|
||||||
|
before the C-level codec is freed.
|
||||||
|
"""
|
||||||
|
# flush() is idempotent -- safe to call even if already flushed
|
||||||
|
self.flush()
|
||||||
|
# Release references -- GC will free the native codec contexts
|
||||||
|
self._encoder = None
|
||||||
|
self._decoder = None
|
||||||
|
|
||||||
|
def __del__(self) -> None:
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_bitrate(bitrate_str: str) -> int:
|
||||||
|
"""Parse a bitrate string like '4M', '2000k', '5000000' to int bits/s."""
|
||||||
|
s = bitrate_str.strip().lower()
|
||||||
|
if s.endswith("m"):
|
||||||
|
return int(float(s[:-1]) * 1_000_000)
|
||||||
|
if s.endswith("k"):
|
||||||
|
return int(float(s[:-1]) * 1_000)
|
||||||
|
return int(s)
|
||||||
@@ -1,5 +1,17 @@
|
|||||||
from .proxy import proxy_router
|
from .proxy import proxy_router
|
||||||
from .extractor import extractor_router
|
from .extractor import extractor_router
|
||||||
from .speedtest import speedtest_router
|
from .speedtest import speedtest_router
|
||||||
|
from .playlist_builder import playlist_builder_router
|
||||||
|
from .xtream import xtream_root_router
|
||||||
|
from .acestream import acestream_router
|
||||||
|
from .telegram import telegram_router
|
||||||
|
|
||||||
__all__ = ["proxy_router", "extractor_router", "speedtest_router"]
|
__all__ = [
|
||||||
|
"proxy_router",
|
||||||
|
"extractor_router",
|
||||||
|
"speedtest_router",
|
||||||
|
"playlist_builder_router",
|
||||||
|
"xtream_root_router",
|
||||||
|
"acestream_router",
|
||||||
|
"telegram_router",
|
||||||
|
]
|
||||||
|
|||||||
BIN
mediaflow_proxy/routes/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/routes/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/routes/__pycache__/acestream.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/routes/__pycache__/acestream.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/routes/__pycache__/extractor.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/routes/__pycache__/extractor.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user