mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-09 02:40:47 +00:00
update
This commit is contained in:
BIN
mediaflow_proxy/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/configs.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/configs.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/const.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/const.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/handlers.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/handlers.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/main.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/main.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/middleware.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/middleware.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/__pycache__/schemas.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/__pycache__/schemas.cpython-313.pyc
Normal file
Binary file not shown.
@@ -1,7 +1,6 @@
|
|||||||
from typing import Dict, Literal, Optional, Union
|
from typing import Dict, Literal, Optional
|
||||||
|
|
||||||
import httpx
|
from pydantic import BaseModel, Field, SecretStr
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from pydantic_settings import BaseSettings
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
|
|
||||||
@@ -28,48 +27,6 @@ class TransportConfig(BaseSettings):
|
|||||||
)
|
)
|
||||||
timeout: int = Field(60, description="Timeout for HTTP requests in seconds")
|
timeout: int = Field(60, description="Timeout for HTTP requests in seconds")
|
||||||
|
|
||||||
def get_mounts(
|
|
||||||
self, async_http: bool = True
|
|
||||||
) -> Dict[str, Optional[Union[httpx.HTTPTransport, httpx.AsyncHTTPTransport]]]:
|
|
||||||
"""
|
|
||||||
Get a dictionary of httpx mount points to transport instances.
|
|
||||||
"""
|
|
||||||
mounts = {}
|
|
||||||
transport_cls = httpx.AsyncHTTPTransport if async_http else httpx.HTTPTransport
|
|
||||||
global_verify = not self.disable_ssl_verification_globally
|
|
||||||
|
|
||||||
# Configure specific routes
|
|
||||||
for pattern, route in self.transport_routes.items():
|
|
||||||
mounts[pattern] = transport_cls(
|
|
||||||
verify=route.verify_ssl if global_verify else False,
|
|
||||||
proxy=route.proxy_url or self.proxy_url if route.proxy else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Hardcoded configuration for jxoplay.xyz domain - SSL verification disabled
|
|
||||||
mounts["all://jxoplay.xyz"] = transport_cls(
|
|
||||||
verify=False, proxy=self.proxy_url if self.all_proxy else None
|
|
||||||
)
|
|
||||||
|
|
||||||
mounts["all://dlhd.dad"] = transport_cls(
|
|
||||||
verify=False, proxy=self.proxy_url if self.all_proxy else None
|
|
||||||
)
|
|
||||||
|
|
||||||
mounts["all://*.newkso.ru"] = transport_cls(
|
|
||||||
verify=False, proxy=self.proxy_url if self.all_proxy else None
|
|
||||||
)
|
|
||||||
|
|
||||||
# Apply global settings for proxy and SSL
|
|
||||||
default_proxy_url = self.proxy_url if self.all_proxy else None
|
|
||||||
if default_proxy_url or not global_verify:
|
|
||||||
mounts["all://"] = transport_cls(proxy=default_proxy_url, verify=global_verify)
|
|
||||||
|
|
||||||
# Set default proxy for all routes if enabled
|
|
||||||
# This part is now handled above to combine proxy and SSL settings
|
|
||||||
# if self.all_proxy:
|
|
||||||
# mounts["all://"] = transport_cls(proxy=self.proxy_url)
|
|
||||||
|
|
||||||
return mounts
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
env_file = ".env"
|
env_file = ".env"
|
||||||
extra = "ignore"
|
extra = "ignore"
|
||||||
@@ -78,30 +35,80 @@ class TransportConfig(BaseSettings):
|
|||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
api_password: str | None = None # The password for protecting the API endpoints.
|
api_password: str | None = None # The password for protecting the API endpoints.
|
||||||
log_level: str = "INFO" # The logging level to use.
|
log_level: str = "INFO" # The logging level to use.
|
||||||
transport_config: TransportConfig = Field(default_factory=TransportConfig) # Configuration for httpx transport.
|
transport_config: TransportConfig = Field(default_factory=TransportConfig) # Configuration for HTTP transport.
|
||||||
enable_streaming_progress: bool = False # Whether to enable streaming progress tracking.
|
enable_streaming_progress: bool = False # Whether to enable streaming progress tracking.
|
||||||
disable_home_page: bool = False # Whether to disable the home page UI.
|
disable_home_page: bool = False # Whether to disable the home page UI.
|
||||||
disable_docs: bool = False # Whether to disable the API documentation (Swagger UI).
|
disable_docs: bool = False # Whether to disable the API documentation (Swagger UI).
|
||||||
disable_speedtest: bool = False # Whether to disable the speedtest UI.
|
disable_speedtest: bool = False # Whether to disable the speedtest UI.
|
||||||
|
clear_cache_on_startup: bool = (
|
||||||
|
False # Whether to clear all caches (extractor, MPD, etc.) on startup. Useful for development.
|
||||||
|
)
|
||||||
stremio_proxy_url: str | None = None # The Stremio server URL for alternative content proxying.
|
stremio_proxy_url: str | None = None # The Stremio server URL for alternative content proxying.
|
||||||
m3u8_content_routing: Literal["mediaflow", "stremio", "direct"] = (
|
m3u8_content_routing: Literal["mediaflow", "stremio", "direct"] = (
|
||||||
"mediaflow" # Routing strategy for M3U8 content URLs: "mediaflow", "stremio", or "direct"
|
"mediaflow" # Routing strategy for M3U8 content URLs: "mediaflow", "stremio", or "direct"
|
||||||
)
|
)
|
||||||
enable_hls_prebuffer: bool = False # Whether to enable HLS pre-buffering for improved streaming performance.
|
enable_hls_prebuffer: bool = True # Whether to enable HLS pre-buffering for improved streaming performance.
|
||||||
|
livestream_start_offset: (
|
||||||
|
float | None
|
||||||
|
) = -18 # Default start offset for live streams (e.g., -18 to start 18 seconds behind live edge). Applies to HLS and MPD live playlists. Set to None to disable.
|
||||||
hls_prebuffer_segments: int = 5 # Number of segments to pre-buffer ahead.
|
hls_prebuffer_segments: int = 5 # Number of segments to pre-buffer ahead.
|
||||||
hls_prebuffer_cache_size: int = 50 # Maximum number of segments to cache in memory.
|
hls_prebuffer_cache_size: int = 50 # Maximum number of segments to cache in memory.
|
||||||
hls_prebuffer_max_memory_percent: int = 80 # Maximum percentage of system memory to use for HLS pre-buffer cache.
|
hls_prebuffer_max_memory_percent: int = 80 # Maximum percentage of system memory to use for HLS pre-buffer cache.
|
||||||
hls_prebuffer_emergency_threshold: int = 90 # Emergency threshold percentage to trigger aggressive cache cleanup.
|
hls_prebuffer_emergency_threshold: int = 90 # Emergency threshold percentage to trigger aggressive cache cleanup.
|
||||||
enable_dash_prebuffer: bool = False # Whether to enable DASH pre-buffering for improved streaming performance.
|
hls_prebuffer_inactivity_timeout: int = 60 # Seconds of inactivity before stopping playlist refresh loop.
|
||||||
|
hls_segment_cache_ttl: int = 300 # TTL (seconds) for cached HLS segments; 300s (5min) for VOD, lower for live.
|
||||||
|
enable_dash_prebuffer: bool = True # Whether to enable DASH pre-buffering for improved streaming performance.
|
||||||
dash_prebuffer_segments: int = 5 # Number of segments to pre-buffer ahead.
|
dash_prebuffer_segments: int = 5 # Number of segments to pre-buffer ahead.
|
||||||
dash_prebuffer_cache_size: int = 50 # Maximum number of segments to cache in memory.
|
dash_prebuffer_cache_size: int = 50 # Maximum number of segments to cache in memory.
|
||||||
dash_prebuffer_max_memory_percent: int = 80 # Maximum percentage of system memory to use for DASH pre-buffer cache.
|
dash_prebuffer_max_memory_percent: int = 80 # Maximum percentage of system memory to use for DASH pre-buffer cache.
|
||||||
dash_prebuffer_emergency_threshold: int = 90 # Emergency threshold percentage to trigger aggressive cache cleanup.
|
dash_prebuffer_emergency_threshold: int = 90 # Emergency threshold percentage to trigger aggressive cache cleanup.
|
||||||
mpd_live_init_cache_ttl: int = 0 # TTL (seconds) for live init segment cache; 0 disables caching.
|
dash_prebuffer_inactivity_timeout: int = 60 # Seconds of inactivity before cleaning up stream state.
|
||||||
|
dash_segment_cache_ttl: int = 60 # TTL (seconds) for cached media segments; longer = better for slow playback.
|
||||||
|
mpd_live_init_cache_ttl: int = 60 # TTL (seconds) for live init segment cache; 0 disables caching.
|
||||||
mpd_live_playlist_depth: int = 8 # Number of recent segments to expose per live playlist variant.
|
mpd_live_playlist_depth: int = 8 # Number of recent segments to expose per live playlist variant.
|
||||||
|
remux_to_ts: bool = False # Remux fMP4 segments to MPEG-TS for ExoPlayer/VLC compatibility.
|
||||||
|
processed_segment_cache_ttl: int = 60 # TTL (seconds) for caching processed (decrypted/remuxed) segments.
|
||||||
|
|
||||||
user_agent: str = (
|
# FlareSolverr settings (for Cloudflare bypass)
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36" # The user agent to use for HTTP requests.
|
flaresolverr_url: str | None = None # FlareSolverr service URL. Example: http://localhost:8191
|
||||||
|
flaresolverr_timeout: int = 60 # Timeout (seconds) for FlareSolverr requests.
|
||||||
|
|
||||||
|
# Acestream settings
|
||||||
|
enable_acestream: bool = False # Whether to enable Acestream proxy support.
|
||||||
|
acestream_host: str = "localhost" # Acestream engine host.
|
||||||
|
acestream_port: int = 6878 # Acestream engine port.
|
||||||
|
acestream_buffer_size: int = 4 * 1024 * 1024 # Buffer size for MPEG-TS streaming (4MB default, like acexy).
|
||||||
|
acestream_empty_timeout: int = 30 # Timeout (seconds) when no data is received from upstream.
|
||||||
|
acestream_session_timeout: int = 60 # Session timeout (seconds) for cleanup of inactive sessions.
|
||||||
|
acestream_keepalive_interval: int = 15 # Interval (seconds) for session keepalive polling.
|
||||||
|
|
||||||
|
# Telegram MTProto settings
|
||||||
|
enable_telegram: bool = False # Whether to enable Telegram MTProto proxy support.
|
||||||
|
telegram_api_id: int | None = None # Telegram API ID from https://my.telegram.org/apps
|
||||||
|
telegram_api_hash: SecretStr | None = None # Telegram API hash from https://my.telegram.org/apps
|
||||||
|
telegram_session_string: SecretStr | None = None # Persistent session string (avoids re-authentication).
|
||||||
|
telegram_max_connections: int = 8 # Max parallel DC connections for downloads (max 20, careful of floods).
|
||||||
|
telegram_request_timeout: int = 30 # Request timeout in seconds.
|
||||||
|
|
||||||
|
# Transcode settings
|
||||||
|
enable_transcode: bool = True # Whether to enable on-the-fly transcoding endpoints (MKV→fMP4, HLS VOD).
|
||||||
|
transcode_prefer_gpu: bool = True # Prefer GPU acceleration (NVENC/VideoToolbox/VAAPI) when available.
|
||||||
|
transcode_video_bitrate: str = "4M" # Target video bitrate for re-encoding (e.g. "4M", "2000k").
|
||||||
|
transcode_audio_bitrate: int = 192000 # AAC audio bitrate in bits/s for the Python transcode pipeline.
|
||||||
|
transcode_video_preset: str = "medium" # Encoding speed/quality tradeoff (libx264: ultrafast..veryslow).
|
||||||
|
|
||||||
|
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36" # The user agent to use for HTTP requests.
|
||||||
|
|
||||||
|
# Upstream error resilience settings
|
||||||
|
upstream_retry_on_disconnect: bool = True # Enable/disable retry when upstream disconnects mid-stream.
|
||||||
|
upstream_retry_attempts: int = 2 # Number of retry attempts when upstream disconnects during streaming.
|
||||||
|
upstream_retry_delay: float = 1.0 # Delay (seconds) between retry attempts.
|
||||||
|
graceful_stream_end: bool = True # Return valid empty playlist instead of error when upstream fails.
|
||||||
|
|
||||||
|
# Redis settings
|
||||||
|
redis_url: str | None = None # Redis URL for distributed locking and caching. None = disabled.
|
||||||
|
cache_namespace: str | None = (
|
||||||
|
None # Optional namespace for instance-specific caches (e.g. pod name or hostname). When set, extractor results and other IP-bound data are stored under this namespace so multiple pods sharing one Redis don't serve each other's IP-specific URLs.
|
||||||
)
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
|
|||||||
BIN
mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc
Normal file
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -65,9 +65,9 @@ class F16PxExtractor(BaseExtractor):
|
|||||||
raise ExtractorError("F16PX: No playback data")
|
raise ExtractorError("F16PX: No playback data")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
iv = self._b64url_decode(pb["iv"]) # nonce
|
iv = self._b64url_decode(pb["iv"]) # nonce
|
||||||
key = self._join_key_parts(pb["key_parts"]) # AES key
|
key = self._join_key_parts(pb["key_parts"]) # AES key
|
||||||
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
|
payload = self._b64url_decode(pb["payload"]) # ciphertext + tag
|
||||||
|
|
||||||
cipher = python_aesgcm.new(key)
|
cipher = python_aesgcm.new(key)
|
||||||
decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL
|
decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL
|
||||||
@@ -95,7 +95,7 @@ class F16PxExtractor(BaseExtractor):
|
|||||||
self.base_headers["origin"] = origin
|
self.base_headers["origin"] = origin
|
||||||
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||||
self.base_headers["Accept"] = "*/*"
|
self.base_headers["Accept"] = "*/*"
|
||||||
self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'
|
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"destination_url": best,
|
"destination_url": best,
|
||||||
|
|||||||
BIN
mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc
Normal file
Binary file not shown.
@@ -1,21 +1,53 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
from typing import Dict, Optional, Any
|
from typing import Dict, Optional, Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import httpx
|
import aiohttp
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from mediaflow_proxy.configs import settings
|
from mediaflow_proxy.configs import settings
|
||||||
from mediaflow_proxy.utils.http_utils import create_httpx_client, DownloadError
|
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||||
|
from mediaflow_proxy.utils.http_utils import DownloadError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ExtractorError(Exception):
|
class ExtractorError(Exception):
|
||||||
"""Base exception for all extractors."""
|
"""Base exception for all extractors."""
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HttpResponse:
|
||||||
|
"""
|
||||||
|
Simple response container for extractor HTTP requests.
|
||||||
|
|
||||||
|
Uses aiohttp-style naming conventions:
|
||||||
|
- status (not status_code)
|
||||||
|
- text (pre-loaded content as string)
|
||||||
|
- content (pre-loaded content as bytes)
|
||||||
|
"""
|
||||||
|
|
||||||
|
status: int
|
||||||
|
headers: Dict[str, str]
|
||||||
|
text: str
|
||||||
|
content: bytes
|
||||||
|
url: str
|
||||||
|
|
||||||
|
def json(self) -> Any:
|
||||||
|
"""Parse response content as JSON."""
|
||||||
|
return json.loads(self.text)
|
||||||
|
|
||||||
|
def get_origin(self) -> str:
|
||||||
|
"""Get the origin (scheme + host) from the response URL."""
|
||||||
|
parsed = urlparse(self.url)
|
||||||
|
return f"{parsed.scheme}://{parsed.netloc}"
|
||||||
|
|
||||||
|
|
||||||
class BaseExtractor(ABC):
|
class BaseExtractor(ABC):
|
||||||
"""Base class for all URL extractors.
|
"""Base class for all URL extractors.
|
||||||
|
|
||||||
@@ -43,74 +75,99 @@ class BaseExtractor(ABC):
|
|||||||
backoff_factor: float = 0.5,
|
backoff_factor: float = 0.5,
|
||||||
raise_on_status: bool = True,
|
raise_on_status: bool = True,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> httpx.Response:
|
) -> HttpResponse:
|
||||||
"""
|
"""
|
||||||
Make HTTP request with retry and timeout support.
|
Make HTTP request with retry and timeout support using aiohttp.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
|
url : str
|
||||||
|
The URL to request.
|
||||||
|
method : str
|
||||||
|
HTTP method (GET, POST, etc.). Defaults to GET.
|
||||||
|
headers : dict | None
|
||||||
|
Additional headers to merge with base headers.
|
||||||
timeout : float | None
|
timeout : float | None
|
||||||
Seconds to wait for the request (applied to httpx.Timeout). Defaults to 15s.
|
Seconds to wait for the request. Defaults to 15s.
|
||||||
retries : int
|
retries : int
|
||||||
Number of attempts for transient errors.
|
Number of attempts for transient errors.
|
||||||
backoff_factor : float
|
backoff_factor : float
|
||||||
Base for exponential backoff between retries.
|
Base for exponential backoff between retries.
|
||||||
raise_on_status : bool
|
raise_on_status : bool
|
||||||
If True, HTTP non-2xx raises DownloadError (preserves status code).
|
If True, HTTP non-2xx raises DownloadError.
|
||||||
|
**kwargs
|
||||||
|
Additional arguments passed to aiohttp request (e.g., data, json).
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
HttpResponse
|
||||||
|
Response object with pre-loaded content.
|
||||||
"""
|
"""
|
||||||
attempt = 0
|
attempt = 0
|
||||||
last_exc = None
|
last_exc = None
|
||||||
|
|
||||||
# build request headers merging base and per-request
|
# Build request headers merging base and per-request
|
||||||
request_headers = self.base_headers.copy()
|
request_headers = self.base_headers.copy()
|
||||||
if headers:
|
if headers:
|
||||||
request_headers.update(headers)
|
request_headers.update(headers)
|
||||||
|
|
||||||
timeout_cfg = httpx.Timeout(timeout or 15.0)
|
timeout_val = timeout or 15.0
|
||||||
|
|
||||||
while attempt < retries:
|
while attempt < retries:
|
||||||
try:
|
try:
|
||||||
async with create_httpx_client(timeout=timeout_cfg) as client:
|
async with create_aiohttp_session(url, timeout=timeout_val) as (session, proxy_url):
|
||||||
response = await client.request(
|
async with session.request(
|
||||||
method,
|
method,
|
||||||
url,
|
url,
|
||||||
headers=request_headers,
|
headers=request_headers,
|
||||||
|
proxy=proxy_url,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
) as response:
|
||||||
|
# Read content while session is still open
|
||||||
|
content = await response.read()
|
||||||
|
text = content.decode("utf-8", errors="replace")
|
||||||
|
final_url = str(response.url)
|
||||||
|
status = response.status
|
||||||
|
resp_headers = dict(response.headers)
|
||||||
|
|
||||||
if raise_on_status:
|
if raise_on_status and status >= 400:
|
||||||
try:
|
body_preview = text[:500]
|
||||||
response.raise_for_status()
|
|
||||||
except httpx.HTTPStatusError as e:
|
|
||||||
# Provide a short body preview for debugging
|
|
||||||
body_preview = ""
|
|
||||||
try:
|
|
||||||
body_preview = e.response.text[:500]
|
|
||||||
except Exception:
|
|
||||||
body_preview = "<unreadable body>"
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"HTTPStatusError for %s (status=%s) -- body preview: %s",
|
"HTTP error for %s (status=%s) -- body preview: %s",
|
||||||
url,
|
url,
|
||||||
e.response.status_code,
|
status,
|
||||||
body_preview,
|
body_preview,
|
||||||
)
|
)
|
||||||
raise DownloadError(e.response.status_code, f"HTTP error {e.response.status_code} while requesting {url}")
|
raise DownloadError(status, f"HTTP error {status} while requesting {url}")
|
||||||
return response
|
|
||||||
|
return HttpResponse(
|
||||||
|
status=status,
|
||||||
|
headers=resp_headers,
|
||||||
|
text=text,
|
||||||
|
content=content,
|
||||||
|
url=final_url,
|
||||||
|
)
|
||||||
|
|
||||||
except DownloadError:
|
except DownloadError:
|
||||||
# Do not retry on explicit HTTP status errors (they are intentional)
|
# Do not retry on explicit HTTP status errors (they are intentional)
|
||||||
raise
|
raise
|
||||||
except (httpx.ReadTimeout, httpx.ConnectTimeout, httpx.NetworkError, httpx.TransportError) as e:
|
except (asyncio.TimeoutError, aiohttp.ClientError) as e:
|
||||||
# Transient network error — retry with backoff
|
# Transient network error - retry with backoff
|
||||||
last_exc = e
|
last_exc = e
|
||||||
attempt += 1
|
attempt += 1
|
||||||
sleep_for = backoff_factor * (2 ** (attempt - 1))
|
sleep_for = backoff_factor * (2 ** (attempt - 1))
|
||||||
logger.warning("Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
|
logger.warning(
|
||||||
attempt, retries, url, e, sleep_for)
|
"Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
|
||||||
|
attempt,
|
||||||
|
retries,
|
||||||
|
url,
|
||||||
|
e,
|
||||||
|
sleep_for,
|
||||||
|
)
|
||||||
await asyncio.sleep(sleep_for)
|
await asyncio.sleep(sleep_for)
|
||||||
continue
|
continue
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Unexpected exception — wrap as ExtractorError to keep interface consistent
|
# Unexpected exception - wrap as ExtractorError to keep interface consistent
|
||||||
logger.exception("Unhandled exception while requesting %s: %s", url, e)
|
logger.exception("Unhandled exception while requesting %s: %s", url, e)
|
||||||
raise ExtractorError(f"Request failed for URL {url}: {str(e)}")
|
raise ExtractorError(f"Request failed for URL {url}: {str(e)}")
|
||||||
|
|
||||||
|
|||||||
@@ -1,133 +1,345 @@
|
|||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
import re
|
import re
|
||||||
import base64
|
import time
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from typing import Any, Dict, Optional, List
|
from typing import Any, Dict, Optional
|
||||||
from urllib.parse import urlparse, quote_plus, urljoin
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
import httpx
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
|
||||||
|
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||||
|
from mediaflow_proxy.configs import settings
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Silenzia l'errore ConnectionResetError su Windows
|
# Silenzia l'errore ConnectionResetError su Windows
|
||||||
logging.getLogger('asyncio').setLevel(logging.CRITICAL)
|
logging.getLogger("asyncio").setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
|
# Default fingerprint parameters
|
||||||
|
DEFAULT_DLHD_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"
|
||||||
|
DEFAULT_DLHD_SCREEN_RESOLUTION = "1920x1080"
|
||||||
|
DEFAULT_DLHD_TIMEZONE = "UTC"
|
||||||
|
DEFAULT_DLHD_LANGUAGE = "en"
|
||||||
|
|
||||||
|
|
||||||
|
def compute_fingerprint(
|
||||||
|
user_agent: str = DEFAULT_DLHD_USER_AGENT,
|
||||||
|
screen_resolution: str = DEFAULT_DLHD_SCREEN_RESOLUTION,
|
||||||
|
timezone: str = DEFAULT_DLHD_TIMEZONE,
|
||||||
|
language: str = DEFAULT_DLHD_LANGUAGE,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Compute the X-Fingerprint header value.
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
fingerprint = SHA256(useragent + screen_resolution + timezone + language).hex()[:16]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_agent: The user agent string
|
||||||
|
screen_resolution: The screen resolution (e.g., "1920x1080")
|
||||||
|
timezone: The timezone (e.g., "UTC")
|
||||||
|
language: The language code (e.g., "en")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The 16-character fingerprint
|
||||||
|
"""
|
||||||
|
combined = f"{user_agent}{screen_resolution}{timezone}{language}"
|
||||||
|
return hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def compute_key_path(resource: str, number: str, timestamp: int, fingerprint: str, secret_key: str) -> str:
|
||||||
|
"""
|
||||||
|
Compute the X-Key-Path header value.
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
key_path = HMAC-SHA256("resource|number|timestamp|fingerprint", secret_key).hex()[:16]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resource: The resource from the key URL
|
||||||
|
number: The number from the key URL
|
||||||
|
timestamp: The Unix timestamp
|
||||||
|
fingerprint: The fingerprint value
|
||||||
|
secret_key: The HMAC secret key (channel_salt)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The 16-character key path
|
||||||
|
"""
|
||||||
|
combined = f"{resource}|{number}|{timestamp}|{fingerprint}"
|
||||||
|
hmac_hash = hmac.new(secret_key.encode("utf-8"), combined.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||||
|
return hmac_hash[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def compute_key_headers(key_url: str, secret_key: str) -> tuple[int, int, str, str] | None:
|
||||||
|
"""
|
||||||
|
Compute X-Key-Timestamp, X-Key-Nonce, X-Key-Path, and X-Fingerprint for a /key/ URL.
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
1. Extract resource and number from URL pattern /key/{resource}/{number}
|
||||||
|
2. ts = Unix timestamp in seconds
|
||||||
|
3. hmac_hash = HMAC-SHA256(resource, secret_key).hex()
|
||||||
|
4. nonce = proof-of-work: find i where MD5(hmac+resource+number+ts+i)[:4] < 0x1000
|
||||||
|
5. fingerprint = compute_fingerprint()
|
||||||
|
6. key_path = HMAC-SHA256("resource|number|ts|fingerprint", secret_key).hex()[:16]
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key_url: The key URL containing /key/{resource}/{number}
|
||||||
|
secret_key: The HMAC secret key (channel_salt)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (timestamp, nonce, key_path, fingerprint) or None if URL doesn't match pattern
|
||||||
|
"""
|
||||||
|
# Extract resource and number from URL
|
||||||
|
pattern = r"/key/([^/]+)/(\d+)"
|
||||||
|
match = re.search(pattern, key_url)
|
||||||
|
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
|
||||||
|
resource = match.group(1)
|
||||||
|
number = match.group(2)
|
||||||
|
|
||||||
|
ts = int(time.time())
|
||||||
|
|
||||||
|
# Compute HMAC-SHA256
|
||||||
|
hmac_hash = hmac.new(secret_key.encode("utf-8"), resource.encode("utf-8"), hashlib.sha256).hexdigest()
|
||||||
|
|
||||||
|
# Proof-of-work loop
|
||||||
|
nonce = 0
|
||||||
|
for i in range(100000):
|
||||||
|
combined = f"{hmac_hash}{resource}{number}{ts}{i}"
|
||||||
|
md5_hash = hashlib.md5(combined.encode("utf-8")).hexdigest()
|
||||||
|
prefix_value = int(md5_hash[:4], 16)
|
||||||
|
|
||||||
|
if prefix_value < 0x1000: # < 4096
|
||||||
|
nonce = i
|
||||||
|
break
|
||||||
|
|
||||||
|
fingerprint = compute_fingerprint()
|
||||||
|
key_path = compute_key_path(resource, number, ts, fingerprint, secret_key)
|
||||||
|
|
||||||
|
return ts, nonce, key_path, fingerprint
|
||||||
|
|
||||||
|
|
||||||
class DLHDExtractor(BaseExtractor):
|
class DLHDExtractor(BaseExtractor):
|
||||||
"""DLHD (DaddyLive) URL extractor for M3U8 streams.
|
"""DLHD (DaddyLive) URL extractor for M3U8 streams.
|
||||||
|
|
||||||
|
Supports the new authentication flow with:
|
||||||
Notes:
|
- EPlayerAuth extraction (auth_token, channel_key, channel_salt)
|
||||||
- Multi-domain support for daddylive.sx / dlhd.dad
|
- Server lookup for dynamic server selection
|
||||||
- Robust extraction of auth parameters and server lookup
|
- Dynamic key header computation for AES-128 encrypted streams
|
||||||
- Uses retries/timeouts via BaseExtractor where possible
|
|
||||||
- Multi-iframe fallback for resilience
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, request_headers: dict):
|
def __init__(self, request_headers: dict):
|
||||||
super().__init__(request_headers)
|
super().__init__(request_headers)
|
||||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
self.mediaflow_endpoint = "hls_key_proxy"
|
||||||
self._iframe_context: Optional[str] = None
|
self._iframe_context: Optional[str] = None
|
||||||
|
self._flaresolverr_cookies: Optional[str] = None
|
||||||
|
self._flaresolverr_user_agent: Optional[str] = None
|
||||||
|
|
||||||
|
async def _fetch_via_flaresolverr(self, url: str) -> HttpResponse:
|
||||||
|
"""Fetch a URL using FlareSolverr to bypass Cloudflare protection."""
|
||||||
|
if not settings.flaresolverr_url:
|
||||||
|
raise ExtractorError("FlareSolverr URL not configured. Set FLARESOLVERR_URL in environment.")
|
||||||
|
|
||||||
|
flaresolverr_endpoint = f"{settings.flaresolverr_url.rstrip('/')}/v1"
|
||||||
|
payload = {
|
||||||
|
"cmd": "request.get",
|
||||||
|
"url": url,
|
||||||
|
"maxTimeout": settings.flaresolverr_timeout * 1000,
|
||||||
|
}
|
||||||
|
|
||||||
async def _make_request(self, url: str, method: str = "GET", headers: Optional[Dict] = None, **kwargs) -> Any:
|
logger.info(f"Using FlareSolverr to fetch: {url}")
|
||||||
"""Override to disable SSL verification for this extractor and use fetch_with_retry if available."""
|
|
||||||
from mediaflow_proxy.utils.http_utils import create_httpx_client, fetch_with_retry
|
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.post(
|
||||||
|
flaresolverr_endpoint,
|
||||||
|
json=payload,
|
||||||
|
timeout=aiohttp.ClientTimeout(total=settings.flaresolverr_timeout + 10),
|
||||||
|
) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
raise ExtractorError(f"FlareSolverr returned status {response.status}")
|
||||||
|
|
||||||
|
data = await response.json()
|
||||||
|
|
||||||
|
if data.get("status") != "ok":
|
||||||
|
raise ExtractorError(f"FlareSolverr failed: {data.get('message', 'Unknown error')}")
|
||||||
|
|
||||||
|
solution = data.get("solution", {})
|
||||||
|
html_content = solution.get("response", "")
|
||||||
|
final_url = solution.get("url", url)
|
||||||
|
status = solution.get("status", 200)
|
||||||
|
|
||||||
|
# Store cookies and user-agent for subsequent requests
|
||||||
|
cookies = solution.get("cookies", [])
|
||||||
|
if cookies:
|
||||||
|
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
|
||||||
|
self._flaresolverr_cookies = cookie_str
|
||||||
|
logger.info(f"FlareSolverr provided {len(cookies)} cookies")
|
||||||
|
|
||||||
|
user_agent = solution.get("userAgent")
|
||||||
|
if user_agent:
|
||||||
|
self._flaresolverr_user_agent = user_agent
|
||||||
|
logger.info(f"FlareSolverr user-agent: {user_agent}")
|
||||||
|
|
||||||
|
logger.info(f"FlareSolverr successfully bypassed Cloudflare for: {url}")
|
||||||
|
|
||||||
|
return HttpResponse(
|
||||||
|
status=status,
|
||||||
|
headers={},
|
||||||
|
text=html_content,
|
||||||
|
content=html_content.encode("utf-8", errors="replace"),
|
||||||
|
url=final_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _make_request(
|
||||||
|
self, url: str, method: str = "GET", headers: Optional[Dict] = None, use_flaresolverr: bool = False, **kwargs
|
||||||
|
) -> HttpResponse:
|
||||||
|
"""Override to disable SSL verification and optionally use FlareSolverr."""
|
||||||
|
# Use FlareSolverr for Cloudflare-protected pages
|
||||||
|
if use_flaresolverr and settings.flaresolverr_url:
|
||||||
|
return await self._fetch_via_flaresolverr(url)
|
||||||
|
|
||||||
timeout = kwargs.pop("timeout", 15)
|
timeout = kwargs.pop("timeout", 15)
|
||||||
retries = kwargs.pop("retries", 3)
|
kwargs.pop("retries", 3) # consumed but not used directly
|
||||||
backoff_factor = kwargs.pop("backoff_factor", 0.5)
|
kwargs.pop("backoff_factor", 0.5) # consumed but not used directly
|
||||||
|
|
||||||
|
# Merge headers
|
||||||
|
request_headers = self.base_headers.copy()
|
||||||
|
if headers:
|
||||||
|
request_headers.update(headers)
|
||||||
|
|
||||||
async with create_httpx_client(verify=False, timeout=httpx.Timeout(timeout)) as client:
|
# Add FlareSolverr cookies if available
|
||||||
try:
|
if self._flaresolverr_cookies:
|
||||||
return await fetch_with_retry(client, method, url, headers or {}, timeout=timeout)
|
existing_cookies = request_headers.get("Cookie", "")
|
||||||
except Exception:
|
if existing_cookies:
|
||||||
logger.debug("fetch_with_retry failed or unavailable; falling back to direct request for %s", url)
|
request_headers["Cookie"] = f"{existing_cookies}; {self._flaresolverr_cookies}"
|
||||||
response = await client.request(method, url, headers=headers or {}, timeout=timeout)
|
else:
|
||||||
response.raise_for_status()
|
request_headers["Cookie"] = self._flaresolverr_cookies
|
||||||
return response
|
|
||||||
|
|
||||||
|
# Use FlareSolverr user-agent if available
|
||||||
|
if self._flaresolverr_user_agent:
|
||||||
|
request_headers["User-Agent"] = self._flaresolverr_user_agent
|
||||||
|
|
||||||
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
# Use create_aiohttp_session with verify=False for SSL bypass
|
||||||
|
async with create_aiohttp_session(url, timeout=timeout, verify=False) as (session, proxy_url):
|
||||||
|
async with session.request(method, url, headers=request_headers, proxy=proxy_url, **kwargs) as response:
|
||||||
|
content = await response.read()
|
||||||
|
final_url = str(response.url)
|
||||||
|
status = response.status
|
||||||
|
resp_headers = dict(response.headers)
|
||||||
|
|
||||||
|
if status >= 400:
|
||||||
|
raise ExtractorError(f"HTTP error {status} while requesting {url}")
|
||||||
|
|
||||||
|
return HttpResponse(
|
||||||
|
status=status,
|
||||||
|
headers=resp_headers,
|
||||||
|
text=content.decode("utf-8", errors="replace"),
|
||||||
|
content=content,
|
||||||
|
url=final_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _extract_session_data(self, iframe_url: str, main_url: str) -> dict | None:
|
||||||
"""
|
"""
|
||||||
Estrattore alternativo per iframe lovecdn.ru che usa un formato diverso.
|
Fetch the iframe URL and extract auth_token, channel_key, and channel_salt.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
iframe_url: The iframe URL to fetch
|
||||||
|
main_url: The main site domain for Referer header
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with auth_token, channel_key, channel_salt, or None if not found
|
||||||
"""
|
"""
|
||||||
|
headers = {
|
||||||
|
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||||
|
"Referer": f"https://{main_url}/",
|
||||||
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Cerca pattern di stream URL diretto
|
resp = await self._make_request(iframe_url, headers=headers, timeout=12)
|
||||||
m3u8_patterns = [
|
html = resp.text
|
||||||
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
|
|
||||||
r'source[:\s]+["\']([^"\']+)["\']',
|
|
||||||
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
|
|
||||||
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
|
|
||||||
]
|
|
||||||
|
|
||||||
stream_url = None
|
|
||||||
for pattern in m3u8_patterns:
|
|
||||||
matches = re.findall(pattern, iframe_content)
|
|
||||||
for match in matches:
|
|
||||||
if '.m3u8' in match and match.startswith('http'):
|
|
||||||
stream_url = match
|
|
||||||
logger.info(f"Found direct m3u8 URL: {stream_url}")
|
|
||||||
break
|
|
||||||
if stream_url:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Pattern 2: Cerca costruzione dinamica URL
|
|
||||||
if not stream_url:
|
|
||||||
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
|
||||||
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
|
||||||
|
|
||||||
if channel_match:
|
|
||||||
channel_name = channel_match.group(1)
|
|
||||||
server = server_match.group(1) if server_match else 'newkso.ru'
|
|
||||||
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
|
|
||||||
logger.info(f"Constructed stream URL: {stream_url}")
|
|
||||||
|
|
||||||
if not stream_url:
|
|
||||||
# Fallback: cerca qualsiasi URL che sembri uno stream
|
|
||||||
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
|
|
||||||
matches = re.findall(url_pattern, iframe_content)
|
|
||||||
if matches:
|
|
||||||
stream_url = matches[0]
|
|
||||||
logger.info(f"Found fallback stream URL: {stream_url}")
|
|
||||||
|
|
||||||
if not stream_url:
|
|
||||||
raise ExtractorError(f"Could not find stream URL in lovecdn.ru iframe")
|
|
||||||
|
|
||||||
# Usa iframe URL come referer
|
|
||||||
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
|
||||||
stream_headers = {
|
|
||||||
'User-Agent': headers['User-Agent'],
|
|
||||||
'Referer': iframe_url,
|
|
||||||
'Origin': iframe_origin
|
|
||||||
}
|
|
||||||
|
|
||||||
# Determina endpoint in base al dominio dello stream
|
|
||||||
endpoint = "hls_key_proxy"
|
|
||||||
|
|
||||||
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
|
|
||||||
|
|
||||||
return {
|
|
||||||
"destination_url": stream_url,
|
|
||||||
"request_headers": stream_headers,
|
|
||||||
"mediaflow_endpoint": endpoint,
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
|
logger.warning(f"Error fetching iframe URL: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Pattern to extract EPlayerAuth.init block with authToken, channelKey, channelSalt
|
||||||
|
# Matches: EPlayerAuth.init({ authToken: '...', channelKey: '...', ..., channelSalt: '...' });
|
||||||
|
auth_pattern = r"EPlayerAuth\.init\s*\(\s*\{\s*authToken:\s*'([^']+)'"
|
||||||
|
channel_key_pattern = r"channelKey:\s*'([^']+)'"
|
||||||
|
channel_salt_pattern = r"channelSalt:\s*'([^']+)'"
|
||||||
|
|
||||||
|
# Pattern to extract server lookup base URL from fetchWithRetry call
|
||||||
|
lookup_pattern = r"fetchWithRetry\s*\(\s*'([^']+server_lookup\?channel_id=)"
|
||||||
|
|
||||||
|
auth_match = re.search(auth_pattern, html)
|
||||||
|
channel_key_match = re.search(channel_key_pattern, html)
|
||||||
|
channel_salt_match = re.search(channel_salt_pattern, html)
|
||||||
|
lookup_match = re.search(lookup_pattern, html)
|
||||||
|
|
||||||
|
if auth_match and channel_key_match and channel_salt_match:
|
||||||
|
result = {
|
||||||
|
"auth_token": auth_match.group(1),
|
||||||
|
"channel_key": channel_key_match.group(1),
|
||||||
|
"channel_salt": channel_salt_match.group(1),
|
||||||
|
}
|
||||||
|
if lookup_match:
|
||||||
|
result["server_lookup_url"] = lookup_match.group(1) + result["channel_key"]
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _get_server_key(self, server_lookup_url: str, iframe_url: str) -> str | None:
|
||||||
|
"""
|
||||||
|
Fetch the server lookup URL and extract the server_key.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
server_lookup_url: The server lookup URL
|
||||||
|
iframe_url: The iframe URL for extracting the host for headers
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The server_key or None if not found
|
||||||
|
"""
|
||||||
|
parsed = urlparse(iframe_url)
|
||||||
|
iframe_host = parsed.netloc
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||||
|
"Referer": f"https://{iframe_host}/",
|
||||||
|
"Origin": f"https://{iframe_host}",
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
||||||
|
data = resp.json()
|
||||||
|
return data.get("server_key")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error fetching server lookup: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _build_m3u8_url(self, server_key: str, channel_key: str) -> str:
|
||||||
|
"""
|
||||||
|
Build the m3u8 URL based on the server_key.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
server_key: The server key from server lookup
|
||||||
|
channel_key: The channel key
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The m3u8 URL (with .css extension as per the original implementation)
|
||||||
|
"""
|
||||||
|
if server_key == "top1/cdn":
|
||||||
|
return f"https://top1.dvalna.ru/top1/cdn/{channel_key}/mono.css"
|
||||||
|
else:
|
||||||
|
return f"https://{server_key}new.dvalna.ru/{server_key}/{channel_key}/mono.css"
|
||||||
|
|
||||||
async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||||
"""Handles the new authentication flow found in recent updates."""
|
"""Handles the new authentication flow found in recent updates."""
|
||||||
|
|
||||||
def _extract_params(js: str) -> Dict[str, Optional[str]]:
|
def _extract_params(js: str) -> Dict[str, Optional[str]]:
|
||||||
params = {}
|
params = {}
|
||||||
patterns = {
|
patterns = {
|
||||||
@@ -143,82 +355,93 @@ class DLHDExtractor(BaseExtractor):
|
|||||||
return params
|
return params
|
||||||
|
|
||||||
params = _extract_params(iframe_content)
|
params = _extract_params(iframe_content)
|
||||||
|
|
||||||
missing_params = [k for k, v in params.items() if not v]
|
missing_params = [k for k, v in params.items() if not v]
|
||||||
if missing_params:
|
if missing_params:
|
||||||
# This is not an error, just means it's not the new flow
|
# This is not an error, just means it's not the new flow
|
||||||
raise ExtractorError(f"Not the new auth flow: missing params {missing_params}")
|
raise ExtractorError(f"Not the new auth flow: missing params {missing_params}")
|
||||||
|
|
||||||
logger.info("New auth flow detected. Proceeding with POST auth.")
|
logger.info("New auth flow detected. Proceeding with POST auth.")
|
||||||
|
|
||||||
# 1. Initial Auth POST
|
# 1. Initial Auth POST
|
||||||
auth_url = 'https://security.newkso.ru/auth2.php'
|
auth_url = "https://security.newkso.ru/auth2.php"
|
||||||
# Use files parameter to force multipart/form-data which is required by the server
|
|
||||||
# (None, value) tells httpx to send it as a form field, not a file upload
|
|
||||||
multipart_data = {
|
|
||||||
'channelKey': (None, params["channel_key"]),
|
|
||||||
'country': (None, params["auth_country"]),
|
|
||||||
'timestamp': (None, params["auth_ts"]),
|
|
||||||
'expiry': (None, params["auth_expiry"]),
|
|
||||||
'token': (None, params["auth_token"]),
|
|
||||||
}
|
|
||||||
|
|
||||||
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||||
auth_headers = headers.copy()
|
auth_headers = headers.copy()
|
||||||
auth_headers.update({
|
auth_headers.update(
|
||||||
'Accept': '*/*',
|
{
|
||||||
'Accept-Language': 'en-US,en;q=0.9',
|
"Accept": "*/*",
|
||||||
'Origin': iframe_origin,
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
'Referer': iframe_url,
|
"Origin": iframe_origin,
|
||||||
'Sec-Fetch-Dest': 'empty',
|
"Referer": iframe_url,
|
||||||
'Sec-Fetch-Mode': 'cors',
|
"Sec-Fetch-Dest": "empty",
|
||||||
'Sec-Fetch-Site': 'cross-site',
|
"Sec-Fetch-Mode": "cors",
|
||||||
'Priority': 'u=1, i',
|
"Sec-Fetch-Site": "cross-site",
|
||||||
})
|
"Priority": "u=1, i",
|
||||||
|
}
|
||||||
from mediaflow_proxy.utils.http_utils import create_httpx_client
|
)
|
||||||
|
|
||||||
|
# Build form data for multipart/form-data
|
||||||
|
form_data = aiohttp.FormData()
|
||||||
|
form_data.add_field("channelKey", params["channel_key"])
|
||||||
|
form_data.add_field("country", params["auth_country"])
|
||||||
|
form_data.add_field("timestamp", params["auth_ts"])
|
||||||
|
form_data.add_field("expiry", params["auth_expiry"])
|
||||||
|
form_data.add_field("token", params["auth_token"])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with create_httpx_client(verify=False) as client:
|
async with create_aiohttp_session(auth_url, timeout=12, verify=False) as (session, proxy_url):
|
||||||
# Note: using 'files' instead of 'data' to ensure multipart/form-data Content-Type
|
async with session.post(
|
||||||
auth_resp = await client.post(auth_url, files=multipart_data, headers=auth_headers, timeout=12)
|
auth_url,
|
||||||
auth_resp.raise_for_status()
|
headers=auth_headers,
|
||||||
auth_data = auth_resp.json()
|
data=form_data,
|
||||||
if not (auth_data.get("valid") or auth_data.get("success")):
|
proxy=proxy_url,
|
||||||
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
|
) as response:
|
||||||
|
content = await response.read()
|
||||||
|
response.raise_for_status()
|
||||||
|
import json
|
||||||
|
|
||||||
|
auth_data = json.loads(content.decode("utf-8"))
|
||||||
|
if not (auth_data.get("valid") or auth_data.get("success")):
|
||||||
|
raise ExtractorError(f"Initial auth failed with response: {auth_data}")
|
||||||
logger.info("New auth flow: Initial auth successful.")
|
logger.info("New auth flow: Initial auth successful.")
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"New auth flow failed during initial auth POST: {e}")
|
raise ExtractorError(f"New auth flow failed during initial auth POST: {e}")
|
||||||
|
|
||||||
# 2. Server Lookup
|
# 2. Server Lookup
|
||||||
server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}"
|
server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}"
|
||||||
try:
|
try:
|
||||||
# Use _make_request as it handles retries and expects JSON
|
# Use _make_request as it handles retries
|
||||||
lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10)
|
||||||
server_data = lookup_resp.json()
|
server_data = lookup_resp.json()
|
||||||
server_key = server_data.get('server_key')
|
server_key = server_data.get("server_key")
|
||||||
if not server_key:
|
if not server_key:
|
||||||
raise ExtractorError(f"No server_key in lookup response: {server_data}")
|
raise ExtractorError(f"No server_key in lookup response: {server_data}")
|
||||||
logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}")
|
logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}")
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"New auth flow failed during server lookup: {e}")
|
raise ExtractorError(f"New auth flow failed during server lookup: {e}")
|
||||||
|
|
||||||
# 3. Build final stream URL
|
# 3. Build final stream URL
|
||||||
channel_key = params['channel_key']
|
channel_key = params["channel_key"]
|
||||||
auth_token = params['auth_token']
|
auth_token = params["auth_token"]
|
||||||
# The JS logic uses .css, not .m3u8
|
# The JS logic uses .css, not .m3u8
|
||||||
if server_key == 'top1/cdn':
|
if server_key == "top1/cdn":
|
||||||
stream_url = f'https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css'
|
stream_url = f"https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css"
|
||||||
else:
|
else:
|
||||||
stream_url = f'https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css'
|
stream_url = f"https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css"
|
||||||
|
|
||||||
logger.info(f'New auth flow: Constructed stream URL: {stream_url}')
|
logger.info(f"New auth flow: Constructed stream URL: {stream_url}")
|
||||||
|
|
||||||
stream_headers = {
|
stream_headers = {
|
||||||
'User-Agent': headers['User-Agent'],
|
"User-Agent": headers["User-Agent"],
|
||||||
'Referer': iframe_url,
|
"Referer": iframe_url,
|
||||||
'Origin': iframe_origin,
|
"Origin": iframe_origin,
|
||||||
'Authorization': f'Bearer {auth_token}',
|
"Authorization": f"Bearer {auth_token}",
|
||||||
'X-Channel-Key': channel_key
|
"X-Channel-Key": channel_key,
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -227,106 +450,255 @@ class DLHDExtractor(BaseExtractor):
|
|||||||
"mediaflow_endpoint": "hls_manifest_proxy",
|
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||||
}
|
}
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]:
|
||||||
"""Main extraction flow: resolve base, fetch players, extract iframe, auth and final m3u8."""
|
"""
|
||||||
baseurl = "https://dlhd.dad/"
|
Alternative extractor for lovecdn.ru iframe that uses a different format.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Look for direct stream URL patterns
|
||||||
|
m3u8_patterns = [
|
||||||
|
r'["\']([^"\']*\.m3u8[^"\']*)["\']',
|
||||||
|
r'source[:\s]+["\']([^"\']+)["\']',
|
||||||
|
r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']',
|
||||||
|
r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']',
|
||||||
|
]
|
||||||
|
|
||||||
def extract_channel_id(u: str) -> Optional[str]:
|
stream_url = None
|
||||||
match_watch_id = re.search(r'watch\.php\?id=(\d+)', u)
|
for pattern in m3u8_patterns:
|
||||||
if match_watch_id:
|
matches = re.findall(pattern, iframe_content)
|
||||||
return match_watch_id.group(1)
|
for match in matches:
|
||||||
return None
|
if ".m3u8" in match and match.startswith("http"):
|
||||||
|
stream_url = match
|
||||||
|
logger.info(f"Found direct m3u8 URL: {stream_url}")
|
||||||
|
break
|
||||||
|
if stream_url:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Pattern 2: Look for dynamic URL construction
|
||||||
|
if not stream_url:
|
||||||
|
channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||||
|
server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content)
|
||||||
|
|
||||||
async def get_stream_data(initial_url: str):
|
if channel_match:
|
||||||
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
|
channel_name = channel_match.group(1)
|
||||||
daddylive_headers = {
|
server = server_match.group(1) if server_match else "newkso.ru"
|
||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
|
stream_url = f"https://{server}/{channel_name}/mono.m3u8"
|
||||||
'Referer': baseurl,
|
logger.info(f"Constructed stream URL: {stream_url}")
|
||||||
'Origin': daddy_origin
|
|
||||||
|
if not stream_url:
|
||||||
|
# Fallback: look for any URL that looks like a stream
|
||||||
|
url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*'
|
||||||
|
matches = re.findall(url_pattern, iframe_content)
|
||||||
|
if matches:
|
||||||
|
stream_url = matches[0]
|
||||||
|
logger.info(f"Found fallback stream URL: {stream_url}")
|
||||||
|
|
||||||
|
if not stream_url:
|
||||||
|
raise ExtractorError("Could not find stream URL in lovecdn.ru iframe")
|
||||||
|
|
||||||
|
# Use iframe URL as referer
|
||||||
|
iframe_origin = f"https://{urlparse(iframe_url).netloc}"
|
||||||
|
stream_headers = {"User-Agent": headers["User-Agent"], "Referer": iframe_url, "Origin": iframe_origin}
|
||||||
|
|
||||||
|
# Determine endpoint based on the stream domain
|
||||||
|
endpoint = "hls_key_proxy"
|
||||||
|
|
||||||
|
logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"destination_url": stream_url,
|
||||||
|
"request_headers": stream_headers,
|
||||||
|
"mediaflow_endpoint": endpoint,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}")
|
||||||
|
|
||||||
# 1. Request initial page
|
async def _extract_direct_stream(self, channel_id: str) -> Dict[str, Any]:
|
||||||
resp1 = await self._make_request(initial_url, headers=daddylive_headers, timeout=15)
|
"""
|
||||||
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1.text)
|
Direct stream extraction using server lookup API with the new auth flow.
|
||||||
if not player_links:
|
This extracts auth_token, channel_key, channel_salt and computes key headers.
|
||||||
raise ExtractorError("No player links found on the page.")
|
"""
|
||||||
|
# Common iframe domains for DLHD
|
||||||
|
iframe_domains = ["lefttoplay.xyz"]
|
||||||
|
|
||||||
|
for iframe_domain in iframe_domains:
|
||||||
|
try:
|
||||||
|
iframe_url = f"https://{iframe_domain}/premiumtv/daddyhd.php?id={channel_id}"
|
||||||
|
logger.info(f"Attempting extraction via {iframe_domain}")
|
||||||
|
|
||||||
# Prova tutti i player e raccogli tutti gli iframe validi
|
session_data = await self._extract_session_data(iframe_url, "dlhd.link")
|
||||||
last_player_error = None
|
|
||||||
iframe_candidates = []
|
|
||||||
|
|
||||||
for player_url in player_links:
|
if not session_data:
|
||||||
try:
|
logger.debug(f"No session data from {iframe_domain}")
|
||||||
if not player_url.startswith('http'):
|
|
||||||
player_url = baseurl + player_url.lstrip('/')
|
|
||||||
|
|
||||||
|
|
||||||
daddylive_headers['Referer'] = player_url
|
|
||||||
daddylive_headers['Origin'] = player_url
|
|
||||||
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
|
|
||||||
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2.text)
|
|
||||||
|
|
||||||
# Raccogli tutti gli iframe trovati
|
|
||||||
for iframe in iframes2:
|
|
||||||
if iframe not in iframe_candidates:
|
|
||||||
iframe_candidates.append(iframe)
|
|
||||||
logger.info(f"Found iframe candidate: {iframe}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
last_player_error = e
|
|
||||||
logger.warning(f"Failed to process player link {player_url}: {e}")
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Got session data from {iframe_domain}: channel_key={session_data['channel_key']}")
|
||||||
|
|
||||||
if not iframe_candidates:
|
# Get server key
|
||||||
if last_player_error:
|
if "server_lookup_url" not in session_data:
|
||||||
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
|
logger.debug(f"No server lookup URL from {iframe_domain}")
|
||||||
raise ExtractorError("No valid iframe found in any player page")
|
|
||||||
|
|
||||||
|
|
||||||
# Prova ogni iframe finché uno non funziona
|
|
||||||
last_iframe_error = None
|
|
||||||
|
|
||||||
for iframe_candidate in iframe_candidates:
|
|
||||||
try:
|
|
||||||
logger.info(f"Trying iframe: {iframe_candidate}")
|
|
||||||
|
|
||||||
iframe_domain = urlparse(iframe_candidate).netloc
|
|
||||||
if not iframe_domain:
|
|
||||||
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
self._iframe_context = iframe_candidate
|
|
||||||
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
|
|
||||||
iframe_content = resp3.text
|
|
||||||
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
|
|
||||||
|
|
||||||
if 'lovecdn.ru' in iframe_domain:
|
|
||||||
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
|
|
||||||
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
|
|
||||||
else:
|
|
||||||
logger.info("Attempting new auth flow extraction.")
|
|
||||||
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
|
|
||||||
last_iframe_error = e
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
|
server_key = await self._get_server_key(session_data["server_lookup_url"], iframe_url)
|
||||||
|
|
||||||
|
if not server_key:
|
||||||
|
logger.debug(f"No server key from {iframe_domain}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Got server key: {server_key}")
|
||||||
|
|
||||||
|
# Build m3u8 URL
|
||||||
|
m3u8_url = self._build_m3u8_url(server_key, session_data["channel_key"])
|
||||||
|
logger.info(f"M3U8 URL: {m3u8_url}")
|
||||||
|
|
||||||
|
# Build stream headers with auth
|
||||||
|
iframe_origin = f"https://{iframe_domain}"
|
||||||
|
stream_headers = {
|
||||||
|
"User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT,
|
||||||
|
"Referer": iframe_url,
|
||||||
|
"Origin": iframe_origin,
|
||||||
|
"Authorization": f"Bearer {session_data['auth_token']}",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Return the result with key header parameters
|
||||||
|
# These will be used to compute headers when fetching keys
|
||||||
|
return {
|
||||||
|
"destination_url": m3u8_url,
|
||||||
|
"request_headers": stream_headers,
|
||||||
|
"mediaflow_endpoint": "hls_key_proxy",
|
||||||
|
# Force playlist processing since DLHD uses .css extension for m3u8
|
||||||
|
"force_playlist_proxy": True,
|
||||||
|
# Key header computation parameters
|
||||||
|
"dlhd_key_params": {
|
||||||
|
"channel_salt": session_data["channel_salt"],
|
||||||
|
"auth_token": session_data["auth_token"],
|
||||||
|
"iframe_url": iframe_url,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed extraction via {iframe_domain}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
raise ExtractorError(f"Failed to extract stream from all iframe domains for channel {channel_id}")
|
||||||
|
|
||||||
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
|
"""Main extraction flow - uses direct server lookup with new auth flow."""
|
||||||
|
|
||||||
|
def extract_channel_id(u: str) -> Optional[str]:
|
||||||
|
match_watch_id = re.search(r"watch\.php\?id=(\d+)", u)
|
||||||
|
if match_watch_id:
|
||||||
|
return match_watch_id.group(1)
|
||||||
|
# Also try stream-XXX pattern
|
||||||
|
match_stream = re.search(r"stream-(\d+)", u)
|
||||||
|
if match_stream:
|
||||||
|
return match_stream.group(1)
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
channel_id = extract_channel_id(url)
|
channel_id = extract_channel_id(url)
|
||||||
if not channel_id:
|
if not channel_id:
|
||||||
raise ExtractorError(f"Unable to extract channel ID from {url}")
|
raise ExtractorError(f"Unable to extract channel ID from {url}")
|
||||||
|
|
||||||
logger.info(f"Using base domain: {baseurl}")
|
logger.info(f"Extracting DLHD stream for channel ID: {channel_id}")
|
||||||
return await get_stream_data(url)
|
|
||||||
|
|
||||||
|
# Try direct stream extraction with new auth flow
|
||||||
|
try:
|
||||||
|
return await self._extract_direct_stream(channel_id)
|
||||||
|
except ExtractorError as e:
|
||||||
|
logger.warning(f"Direct stream extraction failed: {e}")
|
||||||
|
|
||||||
|
# Fallback to legacy iframe-based extraction if direct fails
|
||||||
|
logger.info("Falling back to iframe-based extraction...")
|
||||||
|
return await self._extract_via_iframe(url, channel_id)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"Extraction failed: {str(e)}")
|
raise ExtractorError(f"Extraction failed: {str(e)}")
|
||||||
|
|
||||||
|
async def _extract_via_iframe(self, url: str, channel_id: str) -> Dict[str, Any]:
|
||||||
|
"""Legacy iframe-based extraction flow - used as fallback."""
|
||||||
|
baseurl = "https://dlhd.dad/"
|
||||||
|
|
||||||
|
daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc
|
||||||
|
daddylive_headers = {
|
||||||
|
"User-Agent": self._flaresolverr_user_agent
|
||||||
|
or "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||||
|
"Referer": baseurl,
|
||||||
|
"Origin": daddy_origin,
|
||||||
|
}
|
||||||
|
|
||||||
|
# 1. Request initial page - use FlareSolverr if available to bypass Cloudflare
|
||||||
|
use_flaresolverr = settings.flaresolverr_url is not None
|
||||||
|
resp1 = await self._make_request(url, headers=daddylive_headers, timeout=15, use_flaresolverr=use_flaresolverr)
|
||||||
|
resp1_text = resp1.text
|
||||||
|
|
||||||
|
# Update headers with FlareSolverr user-agent after initial request
|
||||||
|
if self._flaresolverr_user_agent:
|
||||||
|
daddylive_headers["User-Agent"] = self._flaresolverr_user_agent
|
||||||
|
|
||||||
|
player_links = re.findall(r'<button[^>]*data-url="([^"]+)"[^>]*>Player\s*\d+</button>', resp1_text)
|
||||||
|
if not player_links:
|
||||||
|
raise ExtractorError("No player links found on the page.")
|
||||||
|
|
||||||
|
# Try all players and collect all valid iframes
|
||||||
|
last_player_error = None
|
||||||
|
iframe_candidates = []
|
||||||
|
|
||||||
|
for player_url in player_links:
|
||||||
|
try:
|
||||||
|
if not player_url.startswith("http"):
|
||||||
|
player_url = baseurl + player_url.lstrip("/")
|
||||||
|
|
||||||
|
daddylive_headers["Referer"] = player_url
|
||||||
|
daddylive_headers["Origin"] = player_url
|
||||||
|
resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12)
|
||||||
|
resp2_text = resp2.text
|
||||||
|
iframes2 = re.findall(r'<iframe.*?src="([^"]*)"', resp2_text)
|
||||||
|
|
||||||
|
# Collect all found iframes
|
||||||
|
for iframe in iframes2:
|
||||||
|
if iframe not in iframe_candidates:
|
||||||
|
iframe_candidates.append(iframe)
|
||||||
|
logger.info(f"Found iframe candidate: {iframe}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
last_player_error = e
|
||||||
|
logger.warning(f"Failed to process player link {player_url}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not iframe_candidates:
|
||||||
|
if last_player_error:
|
||||||
|
raise ExtractorError(f"All player links failed. Last error: {last_player_error}")
|
||||||
|
raise ExtractorError("No valid iframe found in any player page")
|
||||||
|
|
||||||
|
# Try each iframe until one works
|
||||||
|
last_iframe_error = None
|
||||||
|
|
||||||
|
for iframe_candidate in iframe_candidates:
|
||||||
|
try:
|
||||||
|
logger.info(f"Trying iframe: {iframe_candidate}")
|
||||||
|
|
||||||
|
iframe_domain = urlparse(iframe_candidate).netloc
|
||||||
|
if not iframe_domain:
|
||||||
|
logger.warning(f"Invalid iframe URL format: {iframe_candidate}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._iframe_context = iframe_candidate
|
||||||
|
resp3 = await self._make_request(iframe_candidate, headers=daddylive_headers, timeout=12)
|
||||||
|
iframe_content = resp3.text
|
||||||
|
logger.info(f"Successfully loaded iframe from: {iframe_domain}")
|
||||||
|
|
||||||
|
if "lovecdn.ru" in iframe_domain:
|
||||||
|
logger.info("Detected lovecdn.ru iframe - using alternative extraction")
|
||||||
|
return await self._extract_lovecdn_stream(iframe_candidate, iframe_content, daddylive_headers)
|
||||||
|
else:
|
||||||
|
logger.info("Attempting new auth flow extraction.")
|
||||||
|
return await self._extract_new_auth_flow(iframe_candidate, iframe_content, daddylive_headers)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to process iframe {iframe_candidate}: {e}")
|
||||||
|
last_iframe_error = e
|
||||||
|
continue
|
||||||
|
|
||||||
|
raise ExtractorError(f"All iframe candidates failed. Last error: {last_iframe_error}")
|
||||||
|
|||||||
@@ -1,39 +1,52 @@
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from typing import Dict
|
from urllib.parse import urlparse, urljoin
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class DoodStreamExtractor(BaseExtractor):
|
class DoodStreamExtractor(BaseExtractor):
|
||||||
"""DoodStream URL extractor."""
|
"""
|
||||||
|
Dood / MyVidPlay extractor
|
||||||
|
Resolves to direct CDN MP4
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, request_headers: dict):
|
def __init__(self, request_headers: dict):
|
||||||
super().__init__(request_headers)
|
super().__init__(request_headers)
|
||||||
self.base_url = "https://d000d.com"
|
self.base_url = "https://myvidplay.com"
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, str]:
|
async def extract(self, url: str, **kwargs):
|
||||||
"""Extract DoodStream URL."""
|
parsed = urlparse(url)
|
||||||
response = await self._make_request(url)
|
video_id = parsed.path.rstrip("/").split("/")[-1]
|
||||||
|
if not video_id:
|
||||||
|
raise ExtractorError("Invalid Dood URL")
|
||||||
|
|
||||||
# Extract URL pattern
|
headers = {
|
||||||
pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
|
"User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0",
|
||||||
match = re.search(pattern, response.text, re.DOTALL)
|
"Referer": f"{self.base_url}/",
|
||||||
|
}
|
||||||
|
|
||||||
|
embed_url = f"{self.base_url}/e/{video_id}"
|
||||||
|
html = (await self._make_request(embed_url, headers=headers)).text
|
||||||
|
|
||||||
|
match = re.search(r"(\/pass_md5\/[^']+)", html)
|
||||||
if not match:
|
if not match:
|
||||||
raise ExtractorError("Failed to extract URL pattern")
|
raise ExtractorError("Dood: pass_md5 not found")
|
||||||
|
|
||||||
# Build final URL
|
pass_url = urljoin(self.base_url, match.group(1))
|
||||||
pass_url = f"{self.base_url}{match[1]}"
|
|
||||||
referer = f"{self.base_url}/"
|
|
||||||
headers = {"range": "bytes=0-", "referer": referer}
|
|
||||||
|
|
||||||
response = await self._make_request(pass_url, headers=headers)
|
base_stream = (await self._make_request(pass_url, headers=headers)).text.strip()
|
||||||
timestamp = str(int(time.time()))
|
|
||||||
final_url = f"{response.text}123456789{match[2]}{timestamp}"
|
token_match = re.search(r"token=([^&]+)", html)
|
||||||
|
if not token_match:
|
||||||
|
raise ExtractorError("Dood: token missing")
|
||||||
|
|
||||||
|
token = token_match.group(1)
|
||||||
|
|
||||||
|
final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}"
|
||||||
|
|
||||||
self.base_headers["referer"] = referer
|
|
||||||
return {
|
return {
|
||||||
"destination_url": final_url,
|
"destination_url": final_url,
|
||||||
"request_headers": self.base_headers,
|
"request_headers": headers,
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": "proxy_stream_endpoint",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor
|
|||||||
from mediaflow_proxy.extractors.filelions import FileLionsExtractor
|
from mediaflow_proxy.extractors.filelions import FileLionsExtractor
|
||||||
from mediaflow_proxy.extractors.filemoon import FileMoonExtractor
|
from mediaflow_proxy.extractors.filemoon import FileMoonExtractor
|
||||||
from mediaflow_proxy.extractors.F16Px import F16PxExtractor
|
from mediaflow_proxy.extractors.F16Px import F16PxExtractor
|
||||||
|
from mediaflow_proxy.extractors.gupload import GuploadExtractor
|
||||||
from mediaflow_proxy.extractors.livetv import LiveTVExtractor
|
from mediaflow_proxy.extractors.livetv import LiveTVExtractor
|
||||||
from mediaflow_proxy.extractors.lulustream import LuluStreamExtractor
|
from mediaflow_proxy.extractors.lulustream import LuluStreamExtractor
|
||||||
from mediaflow_proxy.extractors.maxstream import MaxstreamExtractor
|
from mediaflow_proxy.extractors.maxstream import MaxstreamExtractor
|
||||||
@@ -33,6 +34,7 @@ class ExtractorFactory:
|
|||||||
"FileLions": FileLionsExtractor,
|
"FileLions": FileLionsExtractor,
|
||||||
"FileMoon": FileMoonExtractor,
|
"FileMoon": FileMoonExtractor,
|
||||||
"F16Px": F16PxExtractor,
|
"F16Px": F16PxExtractor,
|
||||||
|
"Gupload": GuploadExtractor,
|
||||||
"Uqload": UqloadExtractor,
|
"Uqload": UqloadExtractor,
|
||||||
"Mixdrop": MixdropExtractor,
|
"Mixdrop": MixdropExtractor,
|
||||||
"Streamtape": StreamtapeExtractor,
|
"Streamtape": StreamtapeExtractor,
|
||||||
|
|||||||
@@ -4,25 +4,29 @@ from mediaflow_proxy.extractors.base import BaseExtractor
|
|||||||
from mediaflow_proxy.utils.packed import eval_solver
|
from mediaflow_proxy.utils.packed import eval_solver
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class FastreamExtractor(BaseExtractor):
|
class FastreamExtractor(BaseExtractor):
|
||||||
"""Fastream URL extractor."""
|
"""Fastream URL extractor."""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
headers = {'Accept': '*/*', 'Connection': 'keep-alive','Accept-Language': 'en-US,en;q=0.5','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'}
|
headers = {
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
|
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0",
|
||||||
|
}
|
||||||
patterns = [r'file:"(.*?)"']
|
patterns = [r'file:"(.*?)"']
|
||||||
|
|
||||||
final_url = await eval_solver(self, url, headers, patterns)
|
final_url = await eval_solver(self, url, headers, patterns)
|
||||||
|
|
||||||
self.base_headers["referer"] = f'https://{url.replace("https://","").split("/")[0]}/'
|
self.base_headers["referer"] = f"https://{url.replace('https://', '').split('/')[0]}/"
|
||||||
self.base_headers["origin"] = f'https://{url.replace("https://","").split("/")[0]}'
|
self.base_headers["origin"] = f"https://{url.replace('https://', '').split('/')[0]}"
|
||||||
self.base_headers['Accept-Language'] = 'en-US,en;q=0.5'
|
self.base_headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||||
self.base_headers['Accept'] = '*/*'
|
self.base_headers["Accept"] = "*/*"
|
||||||
self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'
|
self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0"
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"destination_url": final_url,
|
"destination_url": final_url,
|
||||||
|
|||||||
@@ -3,17 +3,18 @@ from typing import Dict, Any
|
|||||||
from mediaflow_proxy.extractors.base import BaseExtractor
|
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||||
from mediaflow_proxy.utils.packed import eval_solver
|
from mediaflow_proxy.utils.packed import eval_solver
|
||||||
|
|
||||||
|
|
||||||
class FileLionsExtractor(BaseExtractor):
|
class FileLionsExtractor(BaseExtractor):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
headers = {}
|
headers = {}
|
||||||
patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
|
patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py
|
||||||
r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)''',
|
r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)""",
|
||||||
r'''["']hls4["']:\s*["'](?P<url>[^"']+)''',
|
r"""["']hls4["']:\s*["'](?P<url>[^"']+)""",
|
||||||
r'''["']hls2["']:\s*["'](?P<url>[^"']+)'''
|
r"""["']hls2["']:\s*["'](?P<url>[^"']+)""",
|
||||||
]
|
]
|
||||||
|
|
||||||
final_url = await eval_solver(self, url, headers, patterns)
|
final_url = await eval_solver(self, url, headers, patterns)
|
||||||
@@ -23,4 +24,5 @@ class FileLionsExtractor(BaseExtractor):
|
|||||||
"destination_url": final_url,
|
"destination_url": final_url,
|
||||||
"request_headers": self.base_headers,
|
"request_headers": self.base_headers,
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
"stream_transformer": "ts_stream",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ class FileMoonExtractor(BaseExtractor):
|
|||||||
)
|
)
|
||||||
|
|
||||||
test_resp = await self._make_request(final_url, headers=headers)
|
test_resp = await self._make_request(final_url, headers=headers)
|
||||||
if test_resp.status_code == 404:
|
if test_resp.status == 404:
|
||||||
raise ExtractorError("Stream not found (404)")
|
raise ExtractorError("Stream not found (404)")
|
||||||
|
|
||||||
self.base_headers["referer"] = url
|
self.base_headers["referer"] = url
|
||||||
|
|||||||
65
mediaflow_proxy/extractors/gupload.py
Normal file
65
mediaflow_proxy/extractors/gupload.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import re
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class GuploadExtractor(BaseExtractor):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
|
async def extract(self, url: str) -> Dict[str, Any]:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
if not parsed.hostname or "gupload.xyz" not in parsed.hostname:
|
||||||
|
raise ExtractorError("GUPLOAD: Invalid domain")
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/144 Safari/537.36"
|
||||||
|
),
|
||||||
|
"Referer": "https://gupload.xyz/",
|
||||||
|
"Origin": "https://gupload.xyz",
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Fetch embed page ---
|
||||||
|
response = await self._make_request(url, headers=headers)
|
||||||
|
html = response.text
|
||||||
|
|
||||||
|
# --- Extract base64 payload ---
|
||||||
|
match = re.search(r"decodePayload\('([^']+)'\)", html)
|
||||||
|
if not match:
|
||||||
|
raise ExtractorError("GUPLOAD: Payload not found")
|
||||||
|
|
||||||
|
encoded = match.group(1).strip()
|
||||||
|
|
||||||
|
# --- Decode payload ---
|
||||||
|
try:
|
||||||
|
decoded = base64.b64decode(encoded).decode("utf-8", "ignore")
|
||||||
|
# payload format: <junk>|{json}
|
||||||
|
json_part = decoded.split("|", 1)[1]
|
||||||
|
payload = json.loads(json_part)
|
||||||
|
except Exception:
|
||||||
|
raise ExtractorError("GUPLOAD: Payload decode failed")
|
||||||
|
|
||||||
|
# --- Extract HLS URL ---
|
||||||
|
hls_url = payload.get("videoUrl")
|
||||||
|
if not hls_url:
|
||||||
|
raise ExtractorError("GUPLOAD: videoUrl missing")
|
||||||
|
|
||||||
|
# --- Validate stream (prevents client timeout) ---
|
||||||
|
test = await self._make_request(hls_url, headers=headers, raise_on_status=False)
|
||||||
|
if test.status >= 400:
|
||||||
|
raise ExtractorError(f"GUPLOAD: Stream unavailable ({test.status})")
|
||||||
|
|
||||||
|
# Return MASTER playlist
|
||||||
|
return {
|
||||||
|
"destination_url": hls_url,
|
||||||
|
"request_headers": headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
@@ -2,9 +2,9 @@ import re
|
|||||||
from typing import Dict, Tuple, Optional
|
from typing import Dict, Tuple, Optional
|
||||||
from urllib.parse import urljoin, urlparse, unquote
|
from urllib.parse import urljoin, urlparse, unquote
|
||||||
|
|
||||||
from httpx import Response
|
import aiohttp
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse
|
||||||
|
|
||||||
|
|
||||||
class LiveTVExtractor(BaseExtractor):
|
class LiveTVExtractor(BaseExtractor):
|
||||||
@@ -33,20 +33,21 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
stream_title: Optional stream title to filter specific stream
|
stream_title: Optional stream title to filter specific stream
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[str, Dict[str, str]]: Stream URL and required headers
|
Dict containing destination_url, request_headers, and mediaflow_endpoint
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get the channel page
|
# Get the channel page
|
||||||
response = await self._make_request(url)
|
response = await self._make_request(url)
|
||||||
|
response_text = response.text
|
||||||
self.base_headers["referer"] = urljoin(url, "/")
|
self.base_headers["referer"] = urljoin(url, "/")
|
||||||
|
|
||||||
# Extract player API details
|
# Extract player API details
|
||||||
player_api_base, method = await self._extract_player_api_base(response.text)
|
player_api_base, method = await self._extract_player_api_base(response_text)
|
||||||
if not player_api_base:
|
if not player_api_base:
|
||||||
raise ExtractorError("Failed to extract player API URL")
|
raise ExtractorError("Failed to extract player API URL")
|
||||||
|
|
||||||
# Get player options
|
# Get player options
|
||||||
options_data = await self._get_player_options(response.text)
|
options_data = await self._get_player_options(response_text)
|
||||||
if not options_data:
|
if not options_data:
|
||||||
raise ExtractorError("No player options found")
|
raise ExtractorError("No player options found")
|
||||||
|
|
||||||
@@ -66,7 +67,7 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
if not stream_url:
|
if not stream_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
response = {
|
result = {
|
||||||
"destination_url": stream_url,
|
"destination_url": stream_url,
|
||||||
"request_headers": self.base_headers,
|
"request_headers": self.base_headers,
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
@@ -75,7 +76,7 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
# Set endpoint based on stream type
|
# Set endpoint based on stream type
|
||||||
if stream_data.get("type") == "mpd":
|
if stream_data.get("type") == "mpd":
|
||||||
if stream_data.get("drm_key_id") and stream_data.get("drm_key"):
|
if stream_data.get("drm_key_id") and stream_data.get("drm_key"):
|
||||||
response.update(
|
result.update(
|
||||||
{
|
{
|
||||||
"query_params": {
|
"query_params": {
|
||||||
"key_id": stream_data["drm_key_id"],
|
"key_id": stream_data["drm_key_id"],
|
||||||
@@ -85,7 +86,7 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
return response
|
return result
|
||||||
|
|
||||||
raise ExtractorError("No valid stream found")
|
raise ExtractorError("No valid stream found")
|
||||||
|
|
||||||
@@ -120,7 +121,12 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
api_url = f"{api_base}{post}/{type_}/{nume}"
|
api_url = f"{api_base}{post}/{type_}/{nume}"
|
||||||
response = await self._make_request(api_url)
|
response = await self._make_request(api_url)
|
||||||
else:
|
else:
|
||||||
form_data = {"action": "doo_player_ajax", "post": post, "nume": nume, "type": type_}
|
# Use aiohttp FormData for POST requests
|
||||||
|
form_data = aiohttp.FormData()
|
||||||
|
form_data.add_field("action", "doo_player_ajax")
|
||||||
|
form_data.add_field("post", post)
|
||||||
|
form_data.add_field("nume", nume)
|
||||||
|
form_data.add_field("type", type_)
|
||||||
response = await self._make_request(api_base, method="POST", data=form_data)
|
response = await self._make_request(api_base, method="POST", data=form_data)
|
||||||
|
|
||||||
# Get iframe URL from API response
|
# Get iframe URL from API response
|
||||||
@@ -136,7 +142,7 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ExtractorError(f"Failed to process player option: {str(e)}")
|
raise ExtractorError(f"Failed to process player option: {str(e)}")
|
||||||
|
|
||||||
async def _extract_stream_url(self, iframe_response: Response, iframe_url: str) -> Dict:
|
async def _extract_stream_url(self, iframe_response: HttpResponse, iframe_url: str) -> Dict:
|
||||||
"""
|
"""
|
||||||
Extract final stream URL from iframe content.
|
Extract final stream URL from iframe content.
|
||||||
"""
|
"""
|
||||||
@@ -147,8 +153,9 @@ class LiveTVExtractor(BaseExtractor):
|
|||||||
|
|
||||||
# Check if content is already a direct M3U8 stream
|
# Check if content is already a direct M3U8 stream
|
||||||
content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"]
|
content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"]
|
||||||
|
content_type = iframe_response.headers.get("content-type", "")
|
||||||
|
|
||||||
if any(ext in iframe_response.headers["content-type"] for ext in content_types):
|
if any(ext in content_type for ext in content_types):
|
||||||
return {"url": iframe_url, "type": "m3u8"}
|
return {"url": iframe_url, "type": "m3u8"}
|
||||||
|
|
||||||
stream_data = {}
|
stream_data = {}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ class LuluStreamExtractor(BaseExtractor):
|
|||||||
response = await self._make_request(url)
|
response = await self._make_request(url)
|
||||||
|
|
||||||
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py
|
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py
|
||||||
pattern = r'''sources:\s*\[{file:\s*["'](?P<url>[^"']+)'''
|
pattern = r"""sources:\s*\[{file:\s*["'](?P<url>[^"']+)"""
|
||||||
match = re.search(pattern, response.text, re.DOTALL)
|
match = re.search(pattern, response.text, re.DOTALL)
|
||||||
if not match:
|
if not match:
|
||||||
raise ExtractorError("Failed to extract source URL")
|
raise ExtractorError("Failed to extract source URL")
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor
|
||||||
from mediaflow_proxy.utils.packed import eval_solver
|
from mediaflow_proxy.utils.packed import eval_solver
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,9 @@ class OkruExtractor(BaseExtractor):
|
|||||||
data_options = div.get("data-options")
|
data_options = div.get("data-options")
|
||||||
data = json.loads(data_options)
|
data = json.loads(data_options)
|
||||||
metadata = json.loads(data["flashvars"]["metadata"])
|
metadata = json.loads(data["flashvars"]["metadata"])
|
||||||
final_url = metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl")
|
final_url = (
|
||||||
|
metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl") or metadata.get("ondemandHls")
|
||||||
|
)
|
||||||
self.base_headers["referer"] = url
|
self.base_headers["referer"] = url
|
||||||
return {
|
return {
|
||||||
"destination_url": final_url,
|
"destination_url": final_url,
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
from mediaflow_proxy.utils.packed import detect, unpack
|
from mediaflow_proxy.utils.packed import unpack
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -32,18 +32,17 @@ class SportsonlineExtractor(BaseExtractor):
|
|||||||
def _detect_packed_blocks(self, html: str) -> list[str]:
|
def _detect_packed_blocks(self, html: str) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Detect and extract packed eval blocks from HTML.
|
Detect and extract packed eval blocks from HTML.
|
||||||
Replicates the TypeScript logic: /eval\(function(.+?.+)/g
|
|
||||||
"""
|
"""
|
||||||
# Find all eval(function...) blocks - more greedy to capture full packed code
|
# Find all eval(function...) blocks - more greedy to capture full packed code
|
||||||
pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL)
|
pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL)
|
||||||
raw_matches = pattern.findall(html)
|
raw_matches = pattern.findall(html)
|
||||||
|
|
||||||
# If no matches with the strict pattern, try a more relaxed one
|
# If no matches with the strict pattern, try a more relaxed one
|
||||||
if not raw_matches:
|
if not raw_matches:
|
||||||
# Try to find eval(function and capture until we find the closing ))
|
# Try to find eval(function and capture until we find the closing ))
|
||||||
pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
|
pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL)
|
||||||
raw_matches = pattern.findall(html)
|
raw_matches = pattern.findall(html)
|
||||||
|
|
||||||
return raw_matches
|
return raw_matches
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
@@ -60,25 +59,25 @@ class SportsonlineExtractor(BaseExtractor):
|
|||||||
raise ExtractorError("No iframe found on the page")
|
raise ExtractorError("No iframe found on the page")
|
||||||
|
|
||||||
iframe_url = iframe_match.group(1)
|
iframe_url = iframe_match.group(1)
|
||||||
|
|
||||||
# Normalize iframe URL
|
# Normalize iframe URL
|
||||||
if iframe_url.startswith('//'):
|
if iframe_url.startswith("//"):
|
||||||
iframe_url = 'https:' + iframe_url
|
iframe_url = "https:" + iframe_url
|
||||||
elif iframe_url.startswith('/'):
|
elif iframe_url.startswith("/"):
|
||||||
parsed_main = urlparse(url)
|
parsed_main = urlparse(url)
|
||||||
iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}"
|
iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}"
|
||||||
|
|
||||||
logger.info(f"Found iframe URL: {iframe_url}")
|
logger.info(f"Found iframe URL: {iframe_url}")
|
||||||
|
|
||||||
# Step 2: Fetch iframe with Referer
|
# Step 2: Fetch iframe with Referer
|
||||||
iframe_headers = {
|
iframe_headers = {
|
||||||
'Referer': 'https://sportzonline.st/',
|
"Referer": "https://sportzonline.st/",
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36',
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36",
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
'Accept-Language': 'en-US,en;q=0.9,it;q=0.8',
|
"Accept-Language": "en-US,en;q=0.9,it;q=0.8",
|
||||||
'Cache-Control': 'no-cache'
|
"Cache-Control": "no-cache",
|
||||||
}
|
}
|
||||||
|
|
||||||
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
|
iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15)
|
||||||
iframe_html = iframe_response.text
|
iframe_html = iframe_response.text
|
||||||
|
|
||||||
@@ -86,9 +85,9 @@ class SportsonlineExtractor(BaseExtractor):
|
|||||||
|
|
||||||
# Step 3: Detect packed blocks
|
# Step 3: Detect packed blocks
|
||||||
packed_blocks = self._detect_packed_blocks(iframe_html)
|
packed_blocks = self._detect_packed_blocks(iframe_html)
|
||||||
|
|
||||||
logger.info(f"Found {len(packed_blocks)} packed blocks")
|
logger.info(f"Found {len(packed_blocks)} packed blocks")
|
||||||
|
|
||||||
if not packed_blocks:
|
if not packed_blocks:
|
||||||
logger.warning("No packed blocks found, trying direct m3u8 search")
|
logger.warning("No packed blocks found, trying direct m3u8 search")
|
||||||
# Fallback: try direct m3u8 search
|
# Fallback: try direct m3u8 search
|
||||||
@@ -96,13 +95,10 @@ class SportsonlineExtractor(BaseExtractor):
|
|||||||
if direct_match:
|
if direct_match:
|
||||||
m3u8_url = direct_match.group(1)
|
m3u8_url = direct_match.group(1)
|
||||||
logger.info(f"Found direct m3u8 URL: {m3u8_url}")
|
logger.info(f"Found direct m3u8 URL: {m3u8_url}")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"destination_url": m3u8_url,
|
"destination_url": m3u8_url,
|
||||||
"request_headers": {
|
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
|
||||||
'Referer': iframe_url,
|
|
||||||
'User-Agent': iframe_headers['User-Agent']
|
|
||||||
},
|
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
@@ -134,13 +130,13 @@ class SportsonlineExtractor(BaseExtractor):
|
|||||||
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # file: "...m3u8"
|
r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # file: "...m3u8"
|
||||||
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']', # any m3u8 URL
|
r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']', # any m3u8 URL
|
||||||
]
|
]
|
||||||
|
|
||||||
for pattern in patterns:
|
for pattern in patterns:
|
||||||
src_match = re.search(pattern, unpacked_code)
|
src_match = re.search(pattern, unpacked_code)
|
||||||
if src_match:
|
if src_match:
|
||||||
m3u8_url = src_match.group(1)
|
m3u8_url = src_match.group(1)
|
||||||
# Verify it looks like a valid m3u8 URL
|
# Verify it looks like a valid m3u8 URL
|
||||||
if '.m3u8' in m3u8_url or 'http' in m3u8_url:
|
if ".m3u8" in m3u8_url or "http" in m3u8_url:
|
||||||
break
|
break
|
||||||
m3u8_url = None
|
m3u8_url = None
|
||||||
|
|
||||||
@@ -162,11 +158,11 @@ class SportsonlineExtractor(BaseExtractor):
|
|||||||
src_match = re.search(pattern, unpacked_code)
|
src_match = re.search(pattern, unpacked_code)
|
||||||
if src_match:
|
if src_match:
|
||||||
test_url = src_match.group(1)
|
test_url = src_match.group(1)
|
||||||
if '.m3u8' in test_url or 'http' in test_url:
|
if ".m3u8" in test_url or "http" in test_url:
|
||||||
m3u8_url = test_url
|
m3u8_url = test_url
|
||||||
logger.info(f"Found m3u8 in block {i}")
|
logger.info(f"Found m3u8 in block {i}")
|
||||||
break
|
break
|
||||||
|
|
||||||
if m3u8_url:
|
if m3u8_url:
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -181,10 +177,7 @@ class SportsonlineExtractor(BaseExtractor):
|
|||||||
# Return stream configuration
|
# Return stream configuration
|
||||||
return {
|
return {
|
||||||
"destination_url": m3u8_url,
|
"destination_url": m3u8_url,
|
||||||
"request_headers": {
|
"request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]},
|
||||||
'Referer': iframe_url,
|
|
||||||
'User-Agent': iframe_headers['User-Agent']
|
|
||||||
},
|
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ class StreamtapeExtractor(BaseExtractor):
|
|||||||
if not matches:
|
if not matches:
|
||||||
raise ExtractorError("Failed to extract URL components")
|
raise ExtractorError("Failed to extract URL components")
|
||||||
i = 0
|
i = 0
|
||||||
for i in range(len(matches)):
|
for i in range(len(matches)):
|
||||||
if matches[i-1] == matches[i] and "ip=" in matches[i]:
|
if matches[i - 1] == matches[i] and "ip=" in matches[i]:
|
||||||
final_url = f"https://streamtape.com/get_video?{matches[i]}"
|
final_url = f"https://streamtape.com/get_video?{matches[i]}"
|
||||||
|
|
||||||
self.base_headers["referer"] = url
|
self.base_headers["referer"] = url
|
||||||
|
|||||||
@@ -19,18 +19,11 @@ class StreamWishExtractor(BaseExtractor):
|
|||||||
|
|
||||||
headers = {"Referer": referer}
|
headers = {"Referer": referer}
|
||||||
response = await self._make_request(url, headers=headers)
|
response = await self._make_request(url, headers=headers)
|
||||||
|
|
||||||
iframe_match = re.search(
|
iframe_match = re.search(r'<iframe[^>]+src=["\']([^"\']+)["\']', response.text, re.DOTALL)
|
||||||
r'<iframe[^>]+src=["\']([^"\']+)["\']',
|
|
||||||
response.text,
|
|
||||||
re.DOTALL
|
|
||||||
)
|
|
||||||
iframe_url = urljoin(url, iframe_match.group(1)) if iframe_match else url
|
iframe_url = urljoin(url, iframe_match.group(1)) if iframe_match else url
|
||||||
|
|
||||||
iframe_response = await self._make_request(
|
iframe_response = await self._make_request(iframe_url, headers=headers)
|
||||||
iframe_url,
|
|
||||||
headers=headers
|
|
||||||
)
|
|
||||||
html = iframe_response.text
|
html = iframe_response.text
|
||||||
|
|
||||||
final_url = self._extract_m3u8(html)
|
final_url = self._extract_m3u8(html)
|
||||||
@@ -58,15 +51,18 @@ class StreamWishExtractor(BaseExtractor):
|
|||||||
final_url = urljoin(iframe_url, final_url)
|
final_url = urljoin(iframe_url, final_url)
|
||||||
|
|
||||||
origin = f"{urlparse(referer).scheme}://{urlparse(referer).netloc}"
|
origin = f"{urlparse(referer).scheme}://{urlparse(referer).netloc}"
|
||||||
self.base_headers.update({
|
self.base_headers.update(
|
||||||
"Referer": referer,
|
{
|
||||||
"Origin": origin,
|
"Referer": referer,
|
||||||
})
|
"Origin": origin,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"destination_url": final_url,
|
"destination_url": final_url,
|
||||||
"request_headers": self.base_headers,
|
"request_headers": self.base_headers,
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
"stream_transformer": "ts_stream",
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -74,8 +70,5 @@ class StreamWishExtractor(BaseExtractor):
|
|||||||
"""
|
"""
|
||||||
Extract first absolute m3u8 URL from text
|
Extract first absolute m3u8 URL from text
|
||||||
"""
|
"""
|
||||||
match = re.search(
|
match = re.search(r'https?://[^"\']+\.m3u8[^"\']*', text)
|
||||||
r'https?://[^"\']+\.m3u8[^"\']*',
|
|
||||||
text
|
|
||||||
)
|
|
||||||
return match.group(0) if match else None
|
return match.group(0) if match else None
|
||||||
|
|||||||
@@ -1,27 +1,64 @@
|
|||||||
import re
|
import re
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor
|
from bs4 import BeautifulSoup, SoupStrainer
|
||||||
from mediaflow_proxy.utils.packed import eval_solver
|
from curl_cffi.requests import AsyncSession
|
||||||
|
|
||||||
|
|
||||||
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
from mediaflow_proxy.utils.packed import unpack, detect, UnpackingError
|
||||||
|
|
||||||
|
|
||||||
class SupervideoExtractor(BaseExtractor):
|
class SupervideoExtractor(BaseExtractor):
|
||||||
"""Supervideo URL extractor."""
|
"""Supervideo URL extractor.
|
||||||
|
|
||||||
|
Uses curl_cffi to bypass Cloudflare protection.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.mediaflow_endpoint = "hls_manifest_proxy"
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
headers = {'Accept': '*/*', 'Connection': 'keep-alive', 'User-Agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36', 'user-agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36'}
|
"""Extract video URL from Supervideo.
|
||||||
|
|
||||||
|
Uses curl_cffi with Chrome impersonation to bypass Cloudflare.
|
||||||
|
"""
|
||||||
|
|
||||||
patterns = [r'file:"(.*?)"']
|
patterns = [r'file:"(.*?)"']
|
||||||
|
|
||||||
final_url = await eval_solver(self, url, headers, patterns)
|
try:
|
||||||
|
async with AsyncSession() as session:
|
||||||
|
response = await session.get(url, impersonate="chrome")
|
||||||
|
|
||||||
self.base_headers["referer"] = url
|
if response.status_code != 200:
|
||||||
return {
|
raise ExtractorError(f"HTTP {response.status_code} while fetching {url}")
|
||||||
"destination_url": final_url,
|
|
||||||
"request_headers": self.base_headers,
|
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("script"))
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
script_all = soup.find_all("script")
|
||||||
}
|
|
||||||
|
for script in script_all:
|
||||||
|
if script.text and detect(script.text):
|
||||||
|
unpacked_code = unpack(script.text)
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, unpacked_code)
|
||||||
|
if match:
|
||||||
|
extracted_url = match.group(1)
|
||||||
|
if not urlparse(extracted_url).scheme:
|
||||||
|
extracted_url = urljoin(url, extracted_url)
|
||||||
|
|
||||||
|
self.base_headers["referer"] = url
|
||||||
|
return {
|
||||||
|
"destination_url": extracted_url,
|
||||||
|
"request_headers": self.base_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
raise ExtractorError("No packed JS found or no file URL pattern matched")
|
||||||
|
|
||||||
|
except UnpackingError as e:
|
||||||
|
raise ExtractorError(f"Failed to unpack Supervideo JS: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
if isinstance(e, ExtractorError):
|
||||||
|
raise
|
||||||
|
raise ExtractorError(f"Supervideo extraction failed: {e}")
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import re
|
import re
|
||||||
from typing import Dict, Any
|
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
@@ -36,7 +35,7 @@ class TurboVidPlayExtractor(BaseExtractor):
|
|||||||
if media_url.startswith("//"):
|
if media_url.startswith("//"):
|
||||||
media_url = "https:" + media_url
|
media_url = "https:" + media_url
|
||||||
elif media_url.startswith("/"):
|
elif media_url.startswith("/"):
|
||||||
media_url = response.url.origin + media_url
|
media_url = response.get_origin() + media_url
|
||||||
|
|
||||||
#
|
#
|
||||||
# 3. Fetch the intermediate playlist
|
# 3. Fetch the intermediate playlist
|
||||||
@@ -53,16 +52,11 @@ class TurboVidPlayExtractor(BaseExtractor):
|
|||||||
|
|
||||||
real_m3u8 = m2.group(0)
|
real_m3u8 = m2.group(0)
|
||||||
|
|
||||||
#
|
|
||||||
# 5. Final headers
|
|
||||||
#
|
|
||||||
self.base_headers["referer"] = url
|
|
||||||
|
|
||||||
#
|
|
||||||
# 6. Always return master proxy (your MediaFlow only supports this)
|
|
||||||
#
|
|
||||||
return {
|
return {
|
||||||
"destination_url": real_m3u8,
|
"destination_url": real_m3u8,
|
||||||
"request_headers": self.base_headers,
|
"request_headers": {"origin": response.get_origin()},
|
||||||
|
"propagate_response_headers": {"content-type": "video/mp2t"},
|
||||||
|
"remove_response_headers": ["content-length", "content-range"],
|
||||||
"mediaflow_endpoint": "hls_manifest_proxy",
|
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||||
|
"stream_transformer": "ts_stream", # Use TS transformer for PNG/padding stripping
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -8,6 +9,11 @@ logger = logging.getLogger(__name__)
|
|||||||
class VavooExtractor(BaseExtractor):
|
class VavooExtractor(BaseExtractor):
|
||||||
"""Vavoo URL extractor for resolving vavoo.to links.
|
"""Vavoo URL extractor for resolving vavoo.to links.
|
||||||
|
|
||||||
|
Supports two URL formats:
|
||||||
|
1. Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
|
||||||
|
These redirect (302) to external video hosts (Doodstream, etc.)
|
||||||
|
2. Legacy mediahubmx format (currently broken on Vavoo's end)
|
||||||
|
|
||||||
Features:
|
Features:
|
||||||
- Uses BaseExtractor's retry/timeouts
|
- Uses BaseExtractor's retry/timeouts
|
||||||
- Improved headers to mimic Android okhttp client
|
- Improved headers to mimic Android okhttp client
|
||||||
@@ -18,6 +24,40 @@ class VavooExtractor(BaseExtractor):
|
|||||||
super().__init__(request_headers)
|
super().__init__(request_headers)
|
||||||
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||||
|
|
||||||
|
async def _resolve_web_vod_link(self, url: str) -> str:
|
||||||
|
"""Resolve a web-vod API link by getting the redirect Location header."""
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use aiohttp directly with allow_redirects=False to get the Location header
|
||||||
|
timeout = aiohttp.ClientTimeout(total=10)
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
async with session.get(
|
||||||
|
url,
|
||||||
|
headers={"Accept": "application/json"},
|
||||||
|
allow_redirects=False,
|
||||||
|
) as resp:
|
||||||
|
# Check for redirect
|
||||||
|
if resp.status in (301, 302, 303, 307, 308):
|
||||||
|
location = resp.headers.get("Location") or resp.headers.get("location")
|
||||||
|
if location:
|
||||||
|
logger.info(f"Vavoo web-vod redirected to: {location}")
|
||||||
|
return location
|
||||||
|
|
||||||
|
# If we got a 200, the response might contain the URL
|
||||||
|
if resp.status == 200:
|
||||||
|
text = await resp.text()
|
||||||
|
if text and text.startswith("http"):
|
||||||
|
logger.info(f"Vavoo web-vod resolved to: {text.strip()}")
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
raise ExtractorError(f"Vavoo web-vod API returned unexpected status {resp.status}")
|
||||||
|
|
||||||
|
except ExtractorError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}")
|
||||||
|
|
||||||
async def get_auth_signature(self) -> Optional[str]:
|
async def get_auth_signature(self) -> Optional[str]:
|
||||||
"""Get authentication signature for Vavoo API (async)."""
|
"""Get authentication signature for Vavoo API (async)."""
|
||||||
headers = {
|
headers = {
|
||||||
@@ -27,10 +67,11 @@ class VavooExtractor(BaseExtractor):
|
|||||||
"accept-encoding": "gzip",
|
"accept-encoding": "gzip",
|
||||||
}
|
}
|
||||||
import time
|
import time
|
||||||
|
|
||||||
current_time = int(time.time() * 1000)
|
current_time = int(time.time() * 1000)
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"token": "tosFwQCJMS8qrW_AjLoHPQ41646J5dRNha6ZWHnijoYQQQoADQoXYSo7ki7O5-CsgN4CH0uRk6EEoJ0728ar9scCRQW3ZkbfrPfeCXW2VgopSW2FWDqPOoVYIuVPAOnXCZ5g",
|
"token": "",
|
||||||
"reason": "app-blur",
|
"reason": "app-blur",
|
||||||
"locale": "de",
|
"locale": "de",
|
||||||
"theme": "dark",
|
"theme": "dark",
|
||||||
@@ -40,21 +81,11 @@ class VavooExtractor(BaseExtractor):
|
|||||||
"brand": "google",
|
"brand": "google",
|
||||||
"model": "Pixel",
|
"model": "Pixel",
|
||||||
"name": "sdk_gphone64_arm64",
|
"name": "sdk_gphone64_arm64",
|
||||||
"uniqueId": "d10e5d99ab665233"
|
"uniqueId": "d10e5d99ab665233",
|
||||||
},
|
|
||||||
"os": {
|
|
||||||
"name": "android",
|
|
||||||
"version": "13"
|
|
||||||
},
|
|
||||||
"app": {
|
|
||||||
"platform": "android",
|
|
||||||
"version": "3.1.21"
|
|
||||||
},
|
|
||||||
"version": {
|
|
||||||
"package": "tv.vavoo.app",
|
|
||||||
"binary": "3.1.21",
|
|
||||||
"js": "3.1.21"
|
|
||||||
},
|
},
|
||||||
|
"os": {"name": "android", "version": "13"},
|
||||||
|
"app": {"platform": "android", "version": "3.1.21"},
|
||||||
|
"version": {"package": "tv.vavoo.app", "binary": "3.1.21", "js": "3.1.21"},
|
||||||
},
|
},
|
||||||
"appFocusTime": 0,
|
"appFocusTime": 0,
|
||||||
"playerActive": False,
|
"playerActive": False,
|
||||||
@@ -75,11 +106,9 @@ class VavooExtractor(BaseExtractor):
|
|||||||
"ssVersion": 1,
|
"ssVersion": 1,
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"autoServer": True,
|
"autoServer": True,
|
||||||
"id": "de-fra"
|
"id": "de-fra",
|
||||||
},
|
},
|
||||||
"iap": {
|
"iap": {"supported": False},
|
||||||
"supported": False
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -94,7 +123,7 @@ class VavooExtractor(BaseExtractor):
|
|||||||
try:
|
try:
|
||||||
result = resp.json()
|
result = resp.json()
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status_code)
|
logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
addon_sig = result.get("addonSig") if isinstance(result, dict) else None
|
addon_sig = result.get("addonSig") if isinstance(result, dict) else None
|
||||||
@@ -109,10 +138,48 @@ class VavooExtractor(BaseExtractor):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
"""Extract Vavoo stream URL (async)."""
|
"""Extract Vavoo stream URL (async).
|
||||||
|
|
||||||
|
Supports:
|
||||||
|
- Direct play URLs: https://vavoo.to/play/{id}/index.m3u8 (Live TV)
|
||||||
|
- Web-VOD API links: https://vavoo.to/web-vod/api/get?link=...
|
||||||
|
- Legacy mediahubmx links (may not work due to Vavoo API changes)
|
||||||
|
"""
|
||||||
if "vavoo.to" not in url:
|
if "vavoo.to" not in url:
|
||||||
raise ExtractorError("Not a valid Vavoo URL")
|
raise ExtractorError("Not a valid Vavoo URL")
|
||||||
|
|
||||||
|
# Check if this is a direct play URL (Live TV)
|
||||||
|
# These URLs are already m3u8 streams but need auth signature
|
||||||
|
if "/play/" in url and url.endswith(".m3u8"):
|
||||||
|
signature = await self.get_auth_signature()
|
||||||
|
if not signature:
|
||||||
|
raise ExtractorError("Failed to get Vavoo authentication signature for Live TV")
|
||||||
|
|
||||||
|
stream_headers = {
|
||||||
|
"user-agent": "okhttp/4.11.0",
|
||||||
|
"referer": "https://vavoo.to/",
|
||||||
|
"mediahubmx-signature": signature,
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"destination_url": url,
|
||||||
|
"request_headers": stream_headers,
|
||||||
|
"mediaflow_endpoint": "hls_manifest_proxy",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if this is a web-vod API link (new format)
|
||||||
|
if "/web-vod/api/get" in url:
|
||||||
|
resolved_url = await self._resolve_web_vod_link(url)
|
||||||
|
stream_headers = {
|
||||||
|
"user-agent": self.base_headers.get("user-agent", "Mozilla/5.0"),
|
||||||
|
"referer": "https://vavoo.to/",
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"destination_url": resolved_url,
|
||||||
|
"request_headers": stream_headers,
|
||||||
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Legacy mediahubmx flow
|
||||||
signature = await self.get_auth_signature()
|
signature = await self.get_auth_signature()
|
||||||
if not signature:
|
if not signature:
|
||||||
raise ExtractorError("Failed to get Vavoo authentication signature")
|
raise ExtractorError("Failed to get Vavoo authentication signature")
|
||||||
@@ -139,14 +206,9 @@ class VavooExtractor(BaseExtractor):
|
|||||||
"accept": "application/json",
|
"accept": "application/json",
|
||||||
"content-type": "application/json; charset=utf-8",
|
"content-type": "application/json; charset=utf-8",
|
||||||
"accept-encoding": "gzip",
|
"accept-encoding": "gzip",
|
||||||
"mediahubmx-signature": signature
|
"mediahubmx-signature": signature,
|
||||||
}
|
|
||||||
data = {
|
|
||||||
"language": "de",
|
|
||||||
"region": "AT",
|
|
||||||
"url": link,
|
|
||||||
"clientVersion": "3.1.21"
|
|
||||||
}
|
}
|
||||||
|
data = {"language": "de", "region": "AT", "url": link, "clientVersion": "3.1.21"}
|
||||||
try:
|
try:
|
||||||
logger.info(f"Attempting to resolve Vavoo URL: {link}")
|
logger.info(f"Attempting to resolve Vavoo URL: {link}")
|
||||||
resp = await self._make_request(
|
resp = await self._make_request(
|
||||||
@@ -161,7 +223,11 @@ class VavooExtractor(BaseExtractor):
|
|||||||
try:
|
try:
|
||||||
result = resp.json()
|
result = resp.json()
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning("Vavoo resolve returned non-json response (status=%s). Body preview: %s", resp.status_code, getattr(resp, "text", "")[:500])
|
logger.warning(
|
||||||
|
"Vavoo resolve returned non-json response (status=%s). Body preview: %s",
|
||||||
|
resp.status,
|
||||||
|
getattr(resp, "text", "")[:500],
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
logger.debug("Vavoo API response: %s", result)
|
logger.debug("Vavoo API response: %s", result)
|
||||||
|
|||||||
@@ -16,10 +16,9 @@ class VidmolyExtractor(BaseExtractor):
|
|||||||
raise ExtractorError("VIDMOLY: Invalid domain")
|
raise ExtractorError("VIDMOLY: Invalid domain")
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent":
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
"Chrome/120 Safari/537.36",
|
||||||
"Chrome/120 Safari/537.36",
|
|
||||||
"Referer": url,
|
"Referer": url,
|
||||||
"Sec-Fetch-Dest": "iframe",
|
"Sec-Fetch-Dest": "iframe",
|
||||||
}
|
}
|
||||||
@@ -29,10 +28,7 @@ class VidmolyExtractor(BaseExtractor):
|
|||||||
html = response.text
|
html = response.text
|
||||||
|
|
||||||
# --- Extract master m3u8 ---
|
# --- Extract master m3u8 ---
|
||||||
match = re.search(
|
match = re.search(r'sources\s*:\s*\[\s*\{\s*file\s*:\s*[\'"]([^\'"]+)', html)
|
||||||
r'sources:\s*\[\{file:"([^"]+)',
|
|
||||||
html
|
|
||||||
)
|
|
||||||
if not match:
|
if not match:
|
||||||
raise ExtractorError("VIDMOLY: Stream URL not found")
|
raise ExtractorError("VIDMOLY: Stream URL not found")
|
||||||
|
|
||||||
@@ -49,10 +45,8 @@ class VidmolyExtractor(BaseExtractor):
|
|||||||
raise ExtractorError("VIDMOLY: Request timed out")
|
raise ExtractorError("VIDMOLY: Request timed out")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if test.status_code >= 400:
|
if test.status >= 400:
|
||||||
raise ExtractorError(
|
raise ExtractorError(f"VIDMOLY: Stream unavailable ({test.status})")
|
||||||
f"VIDMOLY: Stream unavailable ({test.status_code})"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Return MASTER playlist, not variant
|
# Return MASTER playlist, not variant
|
||||||
# Let MediaFlow Proxy handle variants
|
# Let MediaFlow Proxy handle variants
|
||||||
|
|||||||
@@ -8,23 +8,23 @@ from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
|||||||
class VidozaExtractor(BaseExtractor):
|
class VidozaExtractor(BaseExtractor):
|
||||||
def __init__(self, request_headers: dict):
|
def __init__(self, request_headers: dict):
|
||||||
super().__init__(request_headers)
|
super().__init__(request_headers)
|
||||||
# if your base doesn’t set this, keep it; otherwise you can remove:
|
|
||||||
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
self.mediaflow_endpoint = "proxy_stream_endpoint"
|
||||||
|
|
||||||
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
|
|
||||||
# Accept vidoza + videzz
|
|
||||||
if not parsed.hostname or not (
|
if not parsed.hostname or not (
|
||||||
parsed.hostname.endswith("vidoza.net")
|
parsed.hostname.endswith("vidoza.net") or parsed.hostname.endswith("videzz.net")
|
||||||
or parsed.hostname.endswith("videzz.net")
|
|
||||||
):
|
):
|
||||||
raise ExtractorError("VIDOZA: Invalid domain")
|
raise ExtractorError("VIDOZA: Invalid domain")
|
||||||
|
|
||||||
|
# Use the correct referer for clones
|
||||||
|
referer = f"https://{parsed.hostname}/"
|
||||||
|
|
||||||
headers = self.base_headers.copy()
|
headers = self.base_headers.copy()
|
||||||
headers.update(
|
headers.update(
|
||||||
{
|
{
|
||||||
"referer": "https://vidoza.net/",
|
"referer": referer,
|
||||||
"user-agent": (
|
"user-agent": (
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
@@ -35,16 +35,14 @@ class VidozaExtractor(BaseExtractor):
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# 1) Fetch the embed page (or whatever URL you pass in)
|
# 1) Fetch embed page
|
||||||
response = await self._make_request(url, headers=headers)
|
response = await self._make_request(url, headers=headers)
|
||||||
html = response.text or ""
|
html = response.text or ""
|
||||||
|
|
||||||
if not html:
|
if not html:
|
||||||
raise ExtractorError("VIDOZA: Empty HTML from Vidoza")
|
raise ExtractorError("VIDOZA: Empty HTML")
|
||||||
|
|
||||||
cookies = response.cookies or {}
|
# 2) Extract video URL
|
||||||
|
|
||||||
# 2) Extract final link with REGEX
|
|
||||||
pattern = re.compile(
|
pattern = re.compile(
|
||||||
r"""["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)"""
|
r"""["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P<url>[^"']+)"""
|
||||||
r"""(?:[^}>\]]+)["']?\s*res\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)""",
|
r"""(?:[^}>\]]+)["']?\s*res\s*["']?\s*[:=]\s*["']?(?P<label>[^"',]+)""",
|
||||||
@@ -53,21 +51,15 @@ class VidozaExtractor(BaseExtractor):
|
|||||||
|
|
||||||
match = pattern.search(html)
|
match = pattern.search(html)
|
||||||
if not match:
|
if not match:
|
||||||
raise ExtractorError("VIDOZA: Unable to extract video + label from JS")
|
raise ExtractorError("VIDOZA: Video URL not found")
|
||||||
|
|
||||||
mp4_url = match.group("url")
|
video_url = match.group("url")
|
||||||
label = match.group("label").strip()
|
|
||||||
|
|
||||||
# Fix URLs like //str38.vidoza.net/...
|
if video_url.startswith("//"):
|
||||||
if mp4_url.startswith("//"):
|
video_url = "https:" + video_url
|
||||||
mp4_url = "https:" + mp4_url
|
|
||||||
|
|
||||||
# 3) Attach cookies (token may depend on these)
|
|
||||||
if cookies:
|
|
||||||
headers["cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items())
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"destination_url": mp4_url,
|
"destination_url": video_url,
|
||||||
"request_headers": headers,
|
"request_headers": headers,
|
||||||
"mediaflow_endpoint": self.mediaflow_endpoint,
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
from urllib.parse import urlparse, parse_qs
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, SoupStrainer
|
from bs4 import BeautifulSoup, SoupStrainer
|
||||||
|
|
||||||
@@ -25,7 +24,7 @@ class VixCloudExtractor(BaseExtractor):
|
|||||||
"Origin": f"{site_url}",
|
"Origin": f"{site_url}",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if response.status_code != 200:
|
if response.status != 200:
|
||||||
raise ExtractorError("Outdated Url")
|
raise ExtractorError("Outdated Url")
|
||||||
# Soup the response
|
# Soup the response
|
||||||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
|
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("div", {"id": "app"}))
|
||||||
@@ -48,8 +47,8 @@ class VixCloudExtractor(BaseExtractor):
|
|||||||
response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version})
|
response = await self._make_request(iframe, headers={"x-inertia": "true", "x-inertia-version": version})
|
||||||
elif "movie" in url or "tv" in url:
|
elif "movie" in url or "tv" in url:
|
||||||
response = await self._make_request(url)
|
response = await self._make_request(url)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status != 200:
|
||||||
raise ExtractorError("Failed to extract URL components, Invalid Request")
|
raise ExtractorError("Failed to extract URL components, Invalid Request")
|
||||||
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body"))
|
soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("body"))
|
||||||
if soup:
|
if soup:
|
||||||
@@ -58,7 +57,7 @@ class VixCloudExtractor(BaseExtractor):
|
|||||||
expires = re.search(r"'expires':\s*'(\d+)'", script).group(1)
|
expires = re.search(r"'expires':\s*'(\d+)'", script).group(1)
|
||||||
server_url = re.search(r"url:\s*'([^']+)'", script).group(1)
|
server_url = re.search(r"url:\s*'([^']+)'", script).group(1)
|
||||||
if "?b=1" in server_url:
|
if "?b=1" in server_url:
|
||||||
final_url = f'{server_url}&token={token}&expires={expires}'
|
final_url = f"{server_url}&token={token}&expires={expires}"
|
||||||
else:
|
else:
|
||||||
final_url = f"{server_url}?token={token}&expires={expires}"
|
final_url = f"{server_url}?token={token}&expires={expires}"
|
||||||
if "window.canPlayFHD = true" in script:
|
if "window.canPlayFHD = true" in script:
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ class VoeExtractor(BaseExtractor):
|
|||||||
response = await self._make_request(url)
|
response = await self._make_request(url)
|
||||||
|
|
||||||
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/voesx.py
|
# See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/voesx.py
|
||||||
redirect_pattern = r'''window\.location\.href\s*=\s*'([^']+)'''
|
redirect_pattern = r"""window\.location\.href\s*=\s*'([^']+)"""
|
||||||
redirect_match = re.search(redirect_pattern, response.text, re.DOTALL)
|
redirect_match = re.search(redirect_pattern, response.text, re.DOTALL)
|
||||||
if redirect_match:
|
if redirect_match:
|
||||||
if redirected:
|
if redirected:
|
||||||
@@ -37,7 +37,7 @@ class VoeExtractor(BaseExtractor):
|
|||||||
|
|
||||||
data = self.voe_decode(code_and_script_match.group(1), luts_match.group(1))
|
data = self.voe_decode(code_and_script_match.group(1), luts_match.group(1))
|
||||||
|
|
||||||
final_url = data.get('source')
|
final_url = data.get("source")
|
||||||
if not final_url:
|
if not final_url:
|
||||||
raise ExtractorError("VOE: failed to extract video URL")
|
raise ExtractorError("VOE: failed to extract video URL")
|
||||||
|
|
||||||
@@ -51,8 +51,9 @@ class VoeExtractor(BaseExtractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def voe_decode(ct: str, luts: str) -> Dict[str, Any]:
|
def voe_decode(ct: str, luts: str) -> Dict[str, Any]:
|
||||||
import json
|
import json
|
||||||
lut = [''.join([('\\' + x) if x in '.*+?^${}()|[]\\' else x for x in i]) for i in luts[2:-2].split("','")]
|
|
||||||
txt = ''
|
lut = ["".join([("\\" + x) if x in ".*+?^${}()|[]\\" else x for x in i]) for i in luts[2:-2].split("','")]
|
||||||
|
txt = ""
|
||||||
for i in ct:
|
for i in ct:
|
||||||
x = ord(i)
|
x = ord(i)
|
||||||
if 64 < x < 91:
|
if 64 < x < 91:
|
||||||
@@ -61,8 +62,8 @@ class VoeExtractor(BaseExtractor):
|
|||||||
x = (x - 84) % 26 + 97
|
x = (x - 84) % 26 + 97
|
||||||
txt += chr(x)
|
txt += chr(x)
|
||||||
for i in lut:
|
for i in lut:
|
||||||
txt = re.sub(i, '', txt)
|
txt = re.sub(i, "", txt)
|
||||||
ct = base64.b64decode(txt).decode('utf-8')
|
ct = base64.b64decode(txt).decode("utf-8")
|
||||||
txt = ''.join([chr(ord(i) - 3) for i in ct])
|
txt = "".join([chr(ord(i) - 3) for i in ct])
|
||||||
txt = base64.b64decode(txt[::-1]).decode('utf-8')
|
txt = base64.b64decode(txt[::-1]).decode("utf-8")
|
||||||
return json.loads(txt)
|
return json.loads(txt)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
from importlib import resources
|
from importlib import resources
|
||||||
|
|
||||||
from fastapi import FastAPI, Depends, Security, HTTPException
|
from fastapi import FastAPI, Depends, Security, HTTPException
|
||||||
@@ -10,14 +12,85 @@ from starlette.staticfiles import StaticFiles
|
|||||||
|
|
||||||
from mediaflow_proxy.configs import settings
|
from mediaflow_proxy.configs import settings
|
||||||
from mediaflow_proxy.middleware import UIAccessControlMiddleware
|
from mediaflow_proxy.middleware import UIAccessControlMiddleware
|
||||||
from mediaflow_proxy.routes import proxy_router, extractor_router, speedtest_router, playlist_builder_router
|
from mediaflow_proxy.routes import (
|
||||||
|
proxy_router,
|
||||||
|
extractor_router,
|
||||||
|
speedtest_router,
|
||||||
|
playlist_builder_router,
|
||||||
|
xtream_root_router,
|
||||||
|
acestream_router,
|
||||||
|
telegram_router,
|
||||||
|
)
|
||||||
from mediaflow_proxy.schemas import GenerateUrlRequest, GenerateMultiUrlRequest, MultiUrlRequestItem
|
from mediaflow_proxy.schemas import GenerateUrlRequest, GenerateMultiUrlRequest, MultiUrlRequestItem
|
||||||
from mediaflow_proxy.utils.crypto_utils import EncryptionHandler, EncryptionMiddleware
|
from mediaflow_proxy.utils.crypto_utils import EncryptionHandler, EncryptionMiddleware
|
||||||
|
from mediaflow_proxy.utils import redis_utils
|
||||||
from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url
|
from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url
|
||||||
from mediaflow_proxy.utils.base64_utils import encode_url_to_base64, decode_base64_url, is_base64_url
|
from mediaflow_proxy.utils.base64_utils import encode_url_to_base64, decode_base64_url, is_base64_url
|
||||||
|
from mediaflow_proxy.utils.acestream import acestream_manager
|
||||||
|
from mediaflow_proxy.remuxer.video_transcoder import get_hw_capability, HWAccelType
|
||||||
|
from mediaflow_proxy.utils.telegram import telegram_manager
|
||||||
|
|
||||||
logging.basicConfig(level=settings.log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
logging.basicConfig(level=settings.log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||||
app = FastAPI()
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Suppress Telethon's "RuntimeError: coroutine ignored GeneratorExit" warnings.
|
||||||
|
# These are harmless GC noise from Telethon's internal _recv_loop coroutines
|
||||||
|
# when parallel download connections are cleaned up after client disconnect.
|
||||||
|
_default_unraisable_hook = sys.unraisablehook
|
||||||
|
|
||||||
|
|
||||||
|
def _filtered_unraisable_hook(unraisable):
|
||||||
|
if isinstance(unraisable.exc_value, RuntimeError) and "coroutine ignored GeneratorExit" in str(
|
||||||
|
unraisable.exc_value
|
||||||
|
):
|
||||||
|
return # Suppress Telethon GC noise
|
||||||
|
_default_unraisable_hook(unraisable)
|
||||||
|
|
||||||
|
|
||||||
|
sys.unraisablehook = _filtered_unraisable_hook
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""Application lifespan handler for startup and shutdown events."""
|
||||||
|
# Startup
|
||||||
|
if settings.clear_cache_on_startup:
|
||||||
|
logger.info("Clearing caches on startup (CLEAR_CACHE_ON_STARTUP=true)")
|
||||||
|
# Note: Redis cache clearing would require FLUSHDB which is too aggressive.
|
||||||
|
# Individual cache entries will expire via TTL. If full clear is needed,
|
||||||
|
# use redis-cli KEYS "mfp:*" | xargs redis-cli DEL
|
||||||
|
logger.info("Cache clearing note: Redis entries will expire via TTL")
|
||||||
|
|
||||||
|
# Log transcoding capability
|
||||||
|
hw = get_hw_capability()
|
||||||
|
if hw.accel_type != HWAccelType.NONE and settings.transcode_prefer_gpu:
|
||||||
|
logger.info(
|
||||||
|
"Transcode ready: GPU %s (encoder=%s) | PyAV pipeline",
|
||||||
|
hw.accel_type.value,
|
||||||
|
hw.h264_encoder,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"Transcode ready: CPU (%s) | PyAV pipeline",
|
||||||
|
hw.h264_encoder,
|
||||||
|
)
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
# Shutdown
|
||||||
|
logger.info("Shutting down...")
|
||||||
|
# Close acestream sessions
|
||||||
|
await acestream_manager.close()
|
||||||
|
logger.info("Acestream manager closed")
|
||||||
|
# Close telegram session
|
||||||
|
await telegram_manager.close()
|
||||||
|
logger.info("Telegram manager closed")
|
||||||
|
# Close Redis connections
|
||||||
|
await redis_utils.close_redis()
|
||||||
|
logger.info("Redis connections closed")
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(lifespan=lifespan)
|
||||||
api_password_query = APIKeyQuery(name="api_password", auto_error=False)
|
api_password_query = APIKeyQuery(name="api_password", auto_error=False)
|
||||||
api_password_header = APIKeyHeader(name="api_password", auto_error=False)
|
api_password_header = APIKeyHeader(name="api_password", auto_error=False)
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
@@ -66,6 +139,11 @@ async def show_speedtest_page():
|
|||||||
return RedirectResponse(url="/speedtest.html")
|
return RedirectResponse(url="/speedtest.html")
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/url-generator")
|
||||||
|
async def show_url_generator_page():
|
||||||
|
return RedirectResponse(url="/url_generator.html")
|
||||||
|
|
||||||
|
|
||||||
@app.post(
|
@app.post(
|
||||||
"/generate_encrypted_or_encoded_url",
|
"/generate_encrypted_or_encoded_url",
|
||||||
description="Generate a single encoded URL",
|
description="Generate a single encoded URL",
|
||||||
@@ -112,6 +190,8 @@ async def generate_url(request: GenerateUrlRequest):
|
|||||||
query_params=query_params,
|
query_params=query_params,
|
||||||
request_headers=request.request_headers,
|
request_headers=request.request_headers,
|
||||||
response_headers=request.response_headers,
|
response_headers=request.response_headers,
|
||||||
|
propagate_response_headers=request.propagate_response_headers,
|
||||||
|
remove_response_headers=request.remove_response_headers,
|
||||||
encryption_handler=encryption_handler,
|
encryption_handler=encryption_handler,
|
||||||
expiration=request.expiration,
|
expiration=request.expiration,
|
||||||
ip=ip_str,
|
ip=ip_str,
|
||||||
@@ -151,6 +231,8 @@ async def generate_urls(request: GenerateMultiUrlRequest):
|
|||||||
query_params=query_params,
|
query_params=query_params,
|
||||||
request_headers=url_item.request_headers,
|
request_headers=url_item.request_headers,
|
||||||
response_headers=url_item.response_headers,
|
response_headers=url_item.response_headers,
|
||||||
|
propagate_response_headers=url_item.propagate_response_headers,
|
||||||
|
remove_response_headers=url_item.remove_response_headers,
|
||||||
encryption_handler=encryption_handler,
|
encryption_handler=encryption_handler,
|
||||||
expiration=request.expiration,
|
expiration=request.expiration,
|
||||||
ip=ip_str,
|
ip=ip_str,
|
||||||
@@ -171,10 +253,10 @@ async def generate_urls(request: GenerateMultiUrlRequest):
|
|||||||
async def encode_url_base64(url: str):
|
async def encode_url_base64(url: str):
|
||||||
"""
|
"""
|
||||||
Encode a URL to base64 format.
|
Encode a URL to base64 format.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url (str): The URL to encode.
|
url (str): The URL to encode.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: A dictionary containing the encoded URL.
|
dict: A dictionary containing the encoded URL.
|
||||||
"""
|
"""
|
||||||
@@ -194,17 +276,17 @@ async def encode_url_base64(url: str):
|
|||||||
async def decode_url_base64(encoded_url: str):
|
async def decode_url_base64(encoded_url: str):
|
||||||
"""
|
"""
|
||||||
Decode a base64 encoded URL.
|
Decode a base64 encoded URL.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
encoded_url (str): The base64 encoded URL to decode.
|
encoded_url (str): The base64 encoded URL to decode.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: A dictionary containing the decoded URL.
|
dict: A dictionary containing the decoded URL.
|
||||||
"""
|
"""
|
||||||
decoded_url = decode_base64_url(encoded_url)
|
decoded_url = decode_base64_url(encoded_url)
|
||||||
if decoded_url is None:
|
if decoded_url is None:
|
||||||
raise HTTPException(status_code=400, detail="Invalid base64 encoded URL")
|
raise HTTPException(status_code=400, detail="Invalid base64 encoded URL")
|
||||||
|
|
||||||
return {"decoded_url": decoded_url, "encoded_url": encoded_url}
|
return {"decoded_url": decoded_url, "encoded_url": encoded_url}
|
||||||
|
|
||||||
|
|
||||||
@@ -217,28 +299,32 @@ async def decode_url_base64(encoded_url: str):
|
|||||||
async def check_base64_url(url: str):
|
async def check_base64_url(url: str):
|
||||||
"""
|
"""
|
||||||
Check if a string appears to be a base64 encoded URL.
|
Check if a string appears to be a base64 encoded URL.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url (str): The string to check.
|
url (str): The string to check.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: A dictionary indicating if the string is likely base64 encoded.
|
dict: A dictionary indicating if the string is likely base64 encoded.
|
||||||
"""
|
"""
|
||||||
is_base64 = is_base64_url(url)
|
is_base64 = is_base64_url(url)
|
||||||
result = {"url": url, "is_base64": is_base64}
|
result = {"url": url, "is_base64": is_base64}
|
||||||
|
|
||||||
if is_base64:
|
if is_base64:
|
||||||
decoded_url = decode_base64_url(url)
|
decoded_url = decode_base64_url(url)
|
||||||
if decoded_url:
|
if decoded_url:
|
||||||
result["decoded_url"] = decoded_url
|
result["decoded_url"] = decoded_url
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
app.include_router(proxy_router, prefix="/proxy", tags=["proxy"], dependencies=[Depends(verify_api_key)])
|
app.include_router(proxy_router, prefix="/proxy", tags=["proxy"], dependencies=[Depends(verify_api_key)])
|
||||||
|
app.include_router(acestream_router, prefix="/proxy", tags=["acestream"], dependencies=[Depends(verify_api_key)])
|
||||||
|
app.include_router(telegram_router, prefix="/proxy", tags=["telegram"], dependencies=[Depends(verify_api_key)])
|
||||||
app.include_router(extractor_router, prefix="/extractor", tags=["extractors"], dependencies=[Depends(verify_api_key)])
|
app.include_router(extractor_router, prefix="/extractor", tags=["extractors"], dependencies=[Depends(verify_api_key)])
|
||||||
app.include_router(speedtest_router, prefix="/speedtest", tags=["speedtest"], dependencies=[Depends(verify_api_key)])
|
app.include_router(speedtest_router, prefix="/speedtest", tags=["speedtest"], dependencies=[Depends(verify_api_key)])
|
||||||
app.include_router(playlist_builder_router, prefix="/playlist", tags=["playlist"])
|
app.include_router(playlist_builder_router, prefix="/playlist", tags=["playlist"])
|
||||||
|
# Root-level XC endpoints for IPTV player compatibility (handles its own API key verification)
|
||||||
|
app.include_router(xtream_root_router, tags=["xtream"])
|
||||||
|
|
||||||
static_path = resources.files("mediaflow_proxy").joinpath("static")
|
static_path = resources.files("mediaflow_proxy").joinpath("static")
|
||||||
app.mount("/", StaticFiles(directory=str(static_path), html=True), name="static")
|
app.mount("/", StaticFiles(directory=str(static_path), html=True), name="static")
|
||||||
|
|||||||
@@ -5,17 +5,39 @@ import time
|
|||||||
|
|
||||||
from fastapi import Request, Response, HTTPException
|
from fastapi import Request, Response, HTTPException
|
||||||
|
|
||||||
from mediaflow_proxy.drm.decrypter import decrypt_segment
|
from mediaflow_proxy.drm.decrypter import decrypt_segment, process_drm_init_segment
|
||||||
from mediaflow_proxy.utils.crypto_utils import encryption_handler
|
from mediaflow_proxy.utils.crypto_utils import encryption_handler
|
||||||
from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url, get_original_scheme, ProxyRequestHeaders
|
from mediaflow_proxy.utils.http_utils import (
|
||||||
|
encode_mediaflow_proxy_url,
|
||||||
|
get_original_scheme,
|
||||||
|
ProxyRequestHeaders,
|
||||||
|
apply_header_manipulation,
|
||||||
|
)
|
||||||
from mediaflow_proxy.utils.dash_prebuffer import dash_prebuffer
|
from mediaflow_proxy.utils.dash_prebuffer import dash_prebuffer
|
||||||
|
from mediaflow_proxy.utils.cache_utils import get_cached_processed_init, set_cached_processed_init
|
||||||
|
from mediaflow_proxy.utils.m3u8_processor import SkipSegmentFilter
|
||||||
|
from mediaflow_proxy.remuxer.ts_muxer import remux_fmp4_to_ts
|
||||||
from mediaflow_proxy.configs import settings
|
from mediaflow_proxy.configs import settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_ts_mode(request: Request) -> bool:
|
||||||
|
"""Resolve the effective TS remux mode from the request query params, falling back to settings."""
|
||||||
|
override = request.query_params.get("remux_to_ts")
|
||||||
|
if override is not None:
|
||||||
|
return override.lower() in ("true", "1", "yes")
|
||||||
|
return settings.remux_to_ts
|
||||||
|
|
||||||
|
|
||||||
async def process_manifest(
|
async def process_manifest(
|
||||||
request: Request, mpd_dict: dict, proxy_headers: ProxyRequestHeaders, key_id: str = None, key: str = None
|
request: Request,
|
||||||
|
mpd_dict: dict,
|
||||||
|
proxy_headers: ProxyRequestHeaders,
|
||||||
|
key_id: str = None,
|
||||||
|
key: str = None,
|
||||||
|
resolution: str = None,
|
||||||
|
skip_segments: list = None,
|
||||||
) -> Response:
|
) -> Response:
|
||||||
"""
|
"""
|
||||||
Processes the MPD manifest and converts it to an HLS manifest.
|
Processes the MPD manifest and converts it to an HLS manifest.
|
||||||
@@ -26,12 +48,14 @@ async def process_manifest(
|
|||||||
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
||||||
key_id (str, optional): The DRM key ID. Defaults to None.
|
key_id (str, optional): The DRM key ID. Defaults to None.
|
||||||
key (str, optional): The DRM key. Defaults to None.
|
key (str, optional): The DRM key. Defaults to None.
|
||||||
|
resolution (str, optional): Target resolution (e.g., '1080p', '720p'). Defaults to None.
|
||||||
|
skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Response: The HLS manifest as an HTTP response.
|
Response: The HLS manifest as an HTTP response.
|
||||||
"""
|
"""
|
||||||
hls_content = build_hls(mpd_dict, request, key_id, key)
|
hls_content = build_hls(mpd_dict, request, key_id, key, resolution, skip_segments)
|
||||||
|
|
||||||
# Start DASH pre-buffering in background if enabled
|
# Start DASH pre-buffering in background if enabled
|
||||||
if settings.enable_dash_prebuffer:
|
if settings.enable_dash_prebuffer:
|
||||||
# Extract headers for pre-buffering
|
# Extract headers for pre-buffering
|
||||||
@@ -39,20 +63,23 @@ async def process_manifest(
|
|||||||
for key, value in request.query_params.items():
|
for key, value in request.query_params.items():
|
||||||
if key.startswith("h_"):
|
if key.startswith("h_"):
|
||||||
headers[key[2:]] = value
|
headers[key[2:]] = value
|
||||||
|
|
||||||
# Get the original MPD URL from the request
|
# Get the original MPD URL from the request
|
||||||
mpd_url = request.query_params.get("d", "")
|
mpd_url = request.query_params.get("d", "")
|
||||||
if mpd_url:
|
if mpd_url:
|
||||||
# Start pre-buffering in background
|
# Start pre-buffering in background
|
||||||
asyncio.create_task(
|
asyncio.create_task(dash_prebuffer.prebuffer_dash_manifest(mpd_url, headers))
|
||||||
dash_prebuffer.prebuffer_dash_manifest(mpd_url, headers)
|
|
||||||
)
|
|
||||||
|
|
||||||
return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=proxy_headers.response)
|
return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=proxy_headers.response)
|
||||||
|
|
||||||
|
|
||||||
async def process_playlist(
|
async def process_playlist(
|
||||||
request: Request, mpd_dict: dict, profile_id: str, proxy_headers: ProxyRequestHeaders
|
request: Request,
|
||||||
|
mpd_dict: dict,
|
||||||
|
profile_id: str,
|
||||||
|
proxy_headers: ProxyRequestHeaders,
|
||||||
|
skip_segments: list = None,
|
||||||
|
start_offset: float = None,
|
||||||
) -> Response:
|
) -> Response:
|
||||||
"""
|
"""
|
||||||
Processes the MPD manifest and converts it to an HLS playlist for a specific profile.
|
Processes the MPD manifest and converts it to an HLS playlist for a specific profile.
|
||||||
@@ -62,6 +89,8 @@ async def process_playlist(
|
|||||||
mpd_dict (dict): The MPD manifest data.
|
mpd_dict (dict): The MPD manifest data.
|
||||||
profile_id (str): The profile ID to generate the playlist for.
|
profile_id (str): The profile ID to generate the playlist for.
|
||||||
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
||||||
|
skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
|
||||||
|
start_offset (float, optional): Start offset in seconds for live streams.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Response: The HLS playlist as an HTTP response.
|
Response: The HLS playlist as an HTTP response.
|
||||||
@@ -73,8 +102,22 @@ async def process_playlist(
|
|||||||
if not matching_profiles:
|
if not matching_profiles:
|
||||||
raise HTTPException(status_code=404, detail="Profile not found")
|
raise HTTPException(status_code=404, detail="Profile not found")
|
||||||
|
|
||||||
hls_content = build_hls_playlist(mpd_dict, matching_profiles, request)
|
hls_content = build_hls_playlist(mpd_dict, matching_profiles, request, skip_segments, start_offset)
|
||||||
return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=proxy_headers.response)
|
|
||||||
|
# Trigger prebuffering of upcoming segments for live streams
|
||||||
|
if settings.enable_dash_prebuffer and mpd_dict.get("isLive", False):
|
||||||
|
# Extract headers for pre-buffering
|
||||||
|
headers = {}
|
||||||
|
for key, value in request.query_params.items():
|
||||||
|
if key.startswith("h_"):
|
||||||
|
headers[key[2:]] = value
|
||||||
|
|
||||||
|
# Use the new prefetch method for live playlists
|
||||||
|
asyncio.create_task(dash_prebuffer.prefetch_for_live_playlist(matching_profiles, headers))
|
||||||
|
|
||||||
|
# Don't include propagate headers for playlists - they should only apply to segments
|
||||||
|
response_headers = apply_header_manipulation({}, proxy_headers, include_propagate=False)
|
||||||
|
return Response(content=hls_content, media_type="application/vnd.apple.mpegurl", headers=response_headers)
|
||||||
|
|
||||||
|
|
||||||
async def process_segment(
|
async def process_segment(
|
||||||
@@ -84,9 +127,11 @@ async def process_segment(
|
|||||||
proxy_headers: ProxyRequestHeaders,
|
proxy_headers: ProxyRequestHeaders,
|
||||||
key_id: str = None,
|
key_id: str = None,
|
||||||
key: str = None,
|
key: str = None,
|
||||||
|
use_map: bool = False,
|
||||||
|
remux_ts: bool = None,
|
||||||
) -> Response:
|
) -> Response:
|
||||||
"""
|
"""
|
||||||
Processes and decrypts a media segment.
|
Processes and decrypts a media segment, optionally remuxing to MPEG-TS.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
init_content (bytes): The initialization segment content.
|
init_content (bytes): The initialization segment content.
|
||||||
@@ -95,23 +140,110 @@ async def process_segment(
|
|||||||
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
||||||
key_id (str, optional): The DRM key ID. Defaults to None.
|
key_id (str, optional): The DRM key ID. Defaults to None.
|
||||||
key (str, optional): The DRM key. Defaults to None.
|
key (str, optional): The DRM key. Defaults to None.
|
||||||
|
use_map (bool, optional): If True, init segment is served separately via EXT-X-MAP,
|
||||||
|
so don't concatenate init with segment. Defaults to False.
|
||||||
|
remux_ts (bool, optional): If True, remux fMP4 to MPEG-TS. Defaults to settings.remux_to_ts.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Response: The decrypted segment as an HTTP response.
|
Response: The processed segment as an HTTP response.
|
||||||
"""
|
"""
|
||||||
if key_id and key:
|
if key_id and key:
|
||||||
# For DRM protected content
|
# For DRM protected content
|
||||||
now = time.time()
|
now = time.time()
|
||||||
decrypted_content = decrypt_segment(init_content, segment_content, key_id, key)
|
decrypted_content = decrypt_segment(init_content, segment_content, key_id, key, include_init=not use_map)
|
||||||
logger.info(f"Decryption of {mimetype} segment took {time.time() - now:.4f} seconds")
|
logger.info(f"Decryption of {mimetype} segment took {time.time() - now:.4f} seconds")
|
||||||
else:
|
else:
|
||||||
# For non-DRM protected content, we just concatenate init and segment content
|
# For non-DRM protected content
|
||||||
decrypted_content = init_content + segment_content
|
if use_map:
|
||||||
|
# Init is served separately via EXT-X-MAP
|
||||||
|
decrypted_content = segment_content
|
||||||
|
else:
|
||||||
|
# Concatenate init and segment content
|
||||||
|
decrypted_content = init_content + segment_content
|
||||||
|
|
||||||
return Response(content=decrypted_content, media_type=mimetype, headers=proxy_headers.response)
|
# Check if we should remux to TS
|
||||||
|
should_remux = remux_ts if remux_ts is not None else settings.remux_to_ts
|
||||||
|
|
||||||
|
# Remux both video and audio to MPEG-TS for proper HLS TS playback
|
||||||
|
if should_remux and ("video" in mimetype or "audio" in mimetype):
|
||||||
|
# Remux fMP4 to MPEG-TS for ExoPlayer/VLC compatibility
|
||||||
|
now = time.time()
|
||||||
|
try:
|
||||||
|
# For TS remuxing, we always need init_content for codec config
|
||||||
|
# preserve_timestamps=True keeps the original tfdt timestamps from the
|
||||||
|
# fMP4 segment, ensuring continuous playback across HLS segments
|
||||||
|
ts_content = remux_fmp4_to_ts(
|
||||||
|
init_content,
|
||||||
|
decrypted_content,
|
||||||
|
preserve_timestamps=True,
|
||||||
|
)
|
||||||
|
decrypted_content = ts_content
|
||||||
|
mimetype = "video/mp2t" # Update MIME type for TS (same for audio-only TS)
|
||||||
|
logger.info(f"TS remuxing took {time.time() - now:.4f} seconds")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"TS remuxing failed, returning fMP4: {e}")
|
||||||
|
# Fall through to return original content
|
||||||
|
|
||||||
|
response_headers = apply_header_manipulation({}, proxy_headers)
|
||||||
|
return Response(content=decrypted_content, media_type=mimetype, headers=response_headers)
|
||||||
|
|
||||||
|
|
||||||
def build_hls(mpd_dict: dict, request: Request, key_id: str = None, key: str = None) -> str:
|
async def process_init_segment(
|
||||||
|
init_content: bytes,
|
||||||
|
mimetype: str,
|
||||||
|
proxy_headers: ProxyRequestHeaders,
|
||||||
|
key_id: str = None,
|
||||||
|
key: str = None,
|
||||||
|
init_url: str = None,
|
||||||
|
) -> Response:
|
||||||
|
"""
|
||||||
|
Processes an initialization segment for EXT-X-MAP.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
init_content (bytes): The initialization segment content.
|
||||||
|
mimetype (str): The MIME type of the segment.
|
||||||
|
proxy_headers (ProxyRequestHeaders): The headers to include in the request.
|
||||||
|
key_id (str, optional): The DRM key ID. Defaults to None.
|
||||||
|
key (str, optional): The DRM key. Defaults to None.
|
||||||
|
init_url (str, optional): The init URL for caching. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Response: The processed init segment as an HTTP response.
|
||||||
|
"""
|
||||||
|
if key_id and key:
|
||||||
|
# Check if we have a cached processed version
|
||||||
|
if init_url:
|
||||||
|
cached_processed = await get_cached_processed_init(init_url, key_id)
|
||||||
|
if cached_processed:
|
||||||
|
logger.debug(f"Using cached processed init segment for {init_url}")
|
||||||
|
response_headers = apply_header_manipulation({}, proxy_headers)
|
||||||
|
return Response(content=cached_processed, media_type=mimetype, headers=response_headers)
|
||||||
|
|
||||||
|
# For DRM protected content, we need to process the init segment
|
||||||
|
# to remove encryption-related boxes but keep the moov structure
|
||||||
|
now = time.time()
|
||||||
|
processed_content = process_drm_init_segment(init_content, key_id, key)
|
||||||
|
logger.info(f"Processing of {mimetype} init segment took {time.time() - now:.4f} seconds")
|
||||||
|
|
||||||
|
# Cache the processed init segment
|
||||||
|
if init_url:
|
||||||
|
await set_cached_processed_init(init_url, key_id, processed_content, ttl=3600)
|
||||||
|
else:
|
||||||
|
# For non-DRM protected content, just return the init segment as-is
|
||||||
|
processed_content = init_content
|
||||||
|
|
||||||
|
response_headers = apply_header_manipulation({}, proxy_headers)
|
||||||
|
return Response(content=processed_content, media_type=mimetype, headers=response_headers)
|
||||||
|
|
||||||
|
|
||||||
|
def build_hls(
|
||||||
|
mpd_dict: dict,
|
||||||
|
request: Request,
|
||||||
|
key_id: str = None,
|
||||||
|
key: str = None,
|
||||||
|
resolution: str = None,
|
||||||
|
skip_segments: list = None,
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Builds an HLS manifest from the MPD manifest.
|
Builds an HLS manifest from the MPD manifest.
|
||||||
|
|
||||||
@@ -120,12 +252,23 @@ def build_hls(mpd_dict: dict, request: Request, key_id: str = None, key: str = N
|
|||||||
request (Request): The incoming HTTP request.
|
request (Request): The incoming HTTP request.
|
||||||
key_id (str, optional): The DRM key ID. Defaults to None.
|
key_id (str, optional): The DRM key ID. Defaults to None.
|
||||||
key (str, optional): The DRM key. Defaults to None.
|
key (str, optional): The DRM key. Defaults to None.
|
||||||
|
resolution (str, optional): Target resolution (e.g., '1080p', '720p'). Defaults to None.
|
||||||
|
skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The HLS manifest as a string.
|
str: The HLS manifest as a string.
|
||||||
"""
|
"""
|
||||||
hls = ["#EXTM3U", "#EXT-X-VERSION:6"]
|
is_ts_mode = _resolve_ts_mode(request)
|
||||||
|
# Use HLS v3 for TS (ExoPlayer compatibility), v6 for fMP4
|
||||||
|
version = 3 if is_ts_mode else 6
|
||||||
|
hls = ["#EXTM3U", f"#EXT-X-VERSION:{version}"]
|
||||||
query_params = dict(request.query_params)
|
query_params = dict(request.query_params)
|
||||||
|
|
||||||
|
# Preserve skip parameter in query params so it propagates to playlists
|
||||||
|
if skip_segments:
|
||||||
|
# Convert back to compact format for URL
|
||||||
|
skip_str = ",".join(f"{s['start']}-{s['end']}" for s in skip_segments)
|
||||||
|
query_params["skip"] = skip_str
|
||||||
has_encrypted = query_params.pop("has_encrypted", False)
|
has_encrypted = query_params.pop("has_encrypted", False)
|
||||||
|
|
||||||
video_profiles = {}
|
video_profiles = {}
|
||||||
@@ -148,26 +291,113 @@ def build_hls(mpd_dict: dict, request: Request, key_id: str = None, key: str = N
|
|||||||
elif "audio" in profile["mimeType"]:
|
elif "audio" in profile["mimeType"]:
|
||||||
audio_profiles[profile["id"]] = (profile, playlist_url)
|
audio_profiles[profile["id"]] = (profile, playlist_url)
|
||||||
|
|
||||||
|
# Filter video profiles by resolution if specified
|
||||||
|
if resolution and video_profiles:
|
||||||
|
video_profiles = _filter_video_profiles_by_resolution(video_profiles, resolution)
|
||||||
|
|
||||||
|
# For TS mode, only expose the highest quality video variant
|
||||||
|
# ExoPlayer handles adaptive switching poorly with TS remuxing
|
||||||
|
if is_ts_mode and video_profiles:
|
||||||
|
max_height = max(p[0].get("height", 0) for p in video_profiles.values())
|
||||||
|
video_profiles = {k: v for k, v in video_profiles.items() if v[0].get("height", 0) >= max_height}
|
||||||
|
|
||||||
# Add audio streams
|
# Add audio streams
|
||||||
for i, (profile, playlist_url) in enumerate(audio_profiles.values()):
|
for i, (profile, playlist_url) in enumerate(audio_profiles.values()):
|
||||||
is_default = "YES" if i == 0 else "NO" # Set the first audio track as default
|
is_default = "YES" if i == 0 else "NO" # Set the first audio track as default
|
||||||
|
lang = profile.get("lang", "und")
|
||||||
|
bandwidth = profile.get("bandwidth", "128000")
|
||||||
|
name = f"Audio {lang} ({bandwidth})" if lang != "und" else f"Audio {i + 1} ({bandwidth})"
|
||||||
hls.append(
|
hls.append(
|
||||||
f'#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",NAME="{profile["id"]}",DEFAULT={is_default},AUTOSELECT={is_default},LANGUAGE="{profile.get("lang", "und")}",URI="{playlist_url}"'
|
f'#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",NAME="{name}",DEFAULT={is_default},AUTOSELECT=YES,LANGUAGE="{lang}",URI="{playlist_url}"'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Build combined codecs string (video + audio) for EXT-X-STREAM-INF
|
||||||
|
# ExoPlayer requires CODECS to list all codecs when AUDIO group is referenced
|
||||||
|
first_audio_codec = None
|
||||||
|
if audio_profiles:
|
||||||
|
first_audio_profile = next(iter(audio_profiles.values()))[0]
|
||||||
|
first_audio_codec = first_audio_profile.get("codecs", "")
|
||||||
|
|
||||||
# Add video streams
|
# Add video streams
|
||||||
for profile, playlist_url in video_profiles.values():
|
for profile, playlist_url in video_profiles.values():
|
||||||
# Only add AUDIO attribute if there are audio profiles available
|
# Only add AUDIO attribute if there are audio profiles available
|
||||||
audio_attr = ',AUDIO="audio"' if audio_profiles else ""
|
audio_attr = ',AUDIO="audio"' if audio_profiles else ""
|
||||||
hls.append(
|
|
||||||
f'#EXT-X-STREAM-INF:BANDWIDTH={profile["bandwidth"]},RESOLUTION={profile["width"]}x{profile["height"]},CODECS="{profile["codecs"]}",FRAME-RATE={profile["frameRate"]}{audio_attr}'
|
# Build combined codecs: video + audio
|
||||||
)
|
video_codec = profile["codecs"]
|
||||||
|
if first_audio_codec and audio_attr:
|
||||||
|
combined_codecs = f"{video_codec},{first_audio_codec}"
|
||||||
|
else:
|
||||||
|
combined_codecs = video_codec
|
||||||
|
|
||||||
|
# Keep full codec strings (e.g., avc1.42C01F, mp4a.40.2) for ALL modes.
|
||||||
|
# ExoPlayer's CodecSpecificDataUtil rejects simplified strings like "avc1" or "mp4a"
|
||||||
|
# as malformed, which prevents proper codec initialization.
|
||||||
|
|
||||||
|
# Omit FRAME-RATE for TS mode (ExoPlayer compatibility)
|
||||||
|
if is_ts_mode:
|
||||||
|
hls.append(
|
||||||
|
f'#EXT-X-STREAM-INF:BANDWIDTH={profile["bandwidth"]},RESOLUTION={profile["width"]}x{profile["height"]},CODECS="{combined_codecs}"{audio_attr}'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
hls.append(
|
||||||
|
f'#EXT-X-STREAM-INF:BANDWIDTH={profile["bandwidth"]},RESOLUTION={profile["width"]}x{profile["height"]},CODECS="{combined_codecs}",FRAME-RATE={profile["frameRate"]}{audio_attr}'
|
||||||
|
)
|
||||||
hls.append(playlist_url)
|
hls.append(playlist_url)
|
||||||
|
|
||||||
return "\n".join(hls)
|
return "\n".join(hls)
|
||||||
|
|
||||||
|
|
||||||
def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -> str:
|
def _filter_video_profiles_by_resolution(video_profiles: dict, target_resolution: str) -> dict:
|
||||||
|
"""
|
||||||
|
Filter video profiles to select the one matching the target resolution.
|
||||||
|
Falls back to closest lower resolution if exact match not found.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_profiles: Dictionary of profile_id -> (profile, playlist_url).
|
||||||
|
target_resolution: Target resolution string (e.g., '1080p', '720p').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Filtered dictionary with only the selected profile.
|
||||||
|
"""
|
||||||
|
# Parse target height from "1080p" -> 1080
|
||||||
|
target_height = int(target_resolution.rstrip("p"))
|
||||||
|
|
||||||
|
# Convert to list and sort by height descending
|
||||||
|
profiles_list = [
|
||||||
|
(profile_id, profile, playlist_url)
|
||||||
|
for profile_id, (profile, playlist_url) in video_profiles.items()
|
||||||
|
if profile.get("height", 0) > 0
|
||||||
|
]
|
||||||
|
|
||||||
|
if not profiles_list:
|
||||||
|
logger.warning("No video profiles with valid height found, returning all profiles")
|
||||||
|
return video_profiles
|
||||||
|
|
||||||
|
sorted_profiles = sorted(profiles_list, key=lambda x: x[1]["height"], reverse=True)
|
||||||
|
|
||||||
|
# Find exact match or closest lower
|
||||||
|
selected = None
|
||||||
|
for profile_id, profile, playlist_url in sorted_profiles:
|
||||||
|
if profile["height"] <= target_height:
|
||||||
|
selected = (profile_id, profile, playlist_url)
|
||||||
|
break
|
||||||
|
|
||||||
|
# If all profiles are higher than target, use lowest available
|
||||||
|
if selected is None:
|
||||||
|
selected = sorted_profiles[-1]
|
||||||
|
|
||||||
|
profile_id, profile, playlist_url = selected
|
||||||
|
logger.info(
|
||||||
|
f"Selected MPD video profile with resolution {profile['width']}x{profile['height']} for target {target_resolution}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return {profile_id: (profile, playlist_url)}
|
||||||
|
|
||||||
|
|
||||||
|
def build_hls_playlist(
|
||||||
|
mpd_dict: dict, profiles: list[dict], request: Request, skip_segments: list = None, start_offset: float = None
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Builds an HLS playlist from the MPD manifest for specific profiles.
|
Builds an HLS playlist from the MPD manifest for specific profiles.
|
||||||
|
|
||||||
@@ -175,25 +405,65 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
|
|||||||
mpd_dict (dict): The MPD manifest data.
|
mpd_dict (dict): The MPD manifest data.
|
||||||
profiles (list[dict]): The profiles to include in the playlist.
|
profiles (list[dict]): The profiles to include in the playlist.
|
||||||
request (Request): The incoming HTTP request.
|
request (Request): The incoming HTTP request.
|
||||||
|
skip_segments (list, optional): List of time segments to skip. Each item should have 'start' and 'end' keys.
|
||||||
|
start_offset (float, optional): Start offset in seconds for live streams. Defaults to settings.livestream_start_offset for live.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The HLS playlist as a string.
|
str: The HLS playlist as a string.
|
||||||
"""
|
"""
|
||||||
hls = ["#EXTM3U", "#EXT-X-VERSION:6"]
|
# Determine if we're in TS remux mode (per-request override > global setting)
|
||||||
|
is_ts_mode = _resolve_ts_mode(request)
|
||||||
|
|
||||||
|
# Use HLS v3 for TS (ExoPlayer compatibility), v6 for fMP4
|
||||||
|
version = 3 if is_ts_mode else 6
|
||||||
|
hls = ["#EXTM3U", f"#EXT-X-VERSION:{version}"]
|
||||||
|
|
||||||
added_segments = 0
|
added_segments = 0
|
||||||
|
skipped_segments = 0
|
||||||
|
is_live = mpd_dict.get("isLive", False)
|
||||||
|
|
||||||
proxy_url = request.url_for("segment_endpoint")
|
# Inject EXT-X-START for live streams (enables prebuffering by starting behind live edge)
|
||||||
|
# User-provided start_offset always takes precedence; otherwise use default for live streams only
|
||||||
|
if is_ts_mode and is_live and start_offset is None:
|
||||||
|
# TS mode needs a larger buffer for ExoPlayer
|
||||||
|
effective_start_offset = -30.0
|
||||||
|
else:
|
||||||
|
effective_start_offset = (
|
||||||
|
start_offset if start_offset is not None else (settings.livestream_start_offset if is_live else None)
|
||||||
|
)
|
||||||
|
if effective_start_offset is not None:
|
||||||
|
# ExoPlayer doesn't handle PRECISE=YES well with TS
|
||||||
|
precise = "NO" if is_ts_mode else "YES"
|
||||||
|
hls.append(f"#EXT-X-START:TIME-OFFSET={effective_start_offset:.1f},PRECISE={precise}")
|
||||||
|
|
||||||
|
# Initialize skip filter if skip_segments provided
|
||||||
|
skip_filter = SkipSegmentFilter(skip_segments) if skip_segments else None
|
||||||
|
|
||||||
|
# In TS mode, we don't use EXT-X-MAP because TS segments are self-contained
|
||||||
|
# (PAT/PMT/VPS/SPS/PPS are embedded in each segment)
|
||||||
|
# Use EXT-X-MAP for live streams, but only for fMP4 (not TS)
|
||||||
|
use_map = is_live and not is_ts_mode
|
||||||
|
|
||||||
|
# Select appropriate endpoint based on remux mode
|
||||||
|
if is_ts_mode:
|
||||||
|
proxy_url = request.url_for("segment_ts_endpoint") # /mpd/segment.ts
|
||||||
|
else:
|
||||||
|
proxy_url = request.url_for("segment_endpoint") # /mpd/segment.mp4
|
||||||
proxy_url = str(proxy_url.replace(scheme=get_original_scheme(request)))
|
proxy_url = str(proxy_url.replace(scheme=get_original_scheme(request)))
|
||||||
|
|
||||||
|
# Get init endpoint URL for EXT-X-MAP (only used for fMP4 mode)
|
||||||
|
init_proxy_url = request.url_for("init_endpoint")
|
||||||
|
init_proxy_url = str(init_proxy_url.replace(scheme=get_original_scheme(request)))
|
||||||
|
|
||||||
for index, profile in enumerate(profiles):
|
for index, profile in enumerate(profiles):
|
||||||
segments = profile["segments"]
|
segments = profile["segments"]
|
||||||
if not segments:
|
if not segments:
|
||||||
logger.warning(f"No segments found for profile {profile['id']}")
|
logger.warning(f"No segments found for profile {profile['id']}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if mpd_dict["isLive"]:
|
if is_live:
|
||||||
depth = max(settings.mpd_live_playlist_depth, 1)
|
# TS mode uses deeper playlist for ExoPlayer buffering
|
||||||
|
depth = 20 if is_ts_mode else max(settings.mpd_live_playlist_depth, 1)
|
||||||
trimmed_segments = segments[-depth:]
|
trimmed_segments = segments[-depth:]
|
||||||
else:
|
else:
|
||||||
trimmed_segments = segments
|
trimmed_segments = segments
|
||||||
@@ -202,24 +472,38 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
|
|||||||
if index == 0:
|
if index == 0:
|
||||||
first_segment = trimmed_segments[0]
|
first_segment = trimmed_segments[0]
|
||||||
extinf_values = [f["extinf"] for f in trimmed_segments if "extinf" in f]
|
extinf_values = [f["extinf"] for f in trimmed_segments if "extinf" in f]
|
||||||
target_duration = math.ceil(max(extinf_values)) if extinf_values else 3
|
|
||||||
|
# TS mode uses int(max)+1 to reduce buffer underruns in ExoPlayer
|
||||||
|
if is_ts_mode:
|
||||||
|
target_duration = int(max(extinf_values)) + 1 if extinf_values else 10
|
||||||
|
else:
|
||||||
|
target_duration = math.ceil(max(extinf_values)) if extinf_values else 3
|
||||||
|
|
||||||
# Align HLS media sequence with MPD-provided numbering when available
|
# Align HLS media sequence with MPD-provided numbering when available
|
||||||
mpd_start_number = profile.get("segment_template_start_number")
|
if is_ts_mode and is_live:
|
||||||
sequence = first_segment.get("number")
|
# For live TS, derive sequence from timeline first for stable continuity
|
||||||
|
time_val = first_segment.get("time")
|
||||||
if sequence is None:
|
duration_val = first_segment.get("duration_mpd_timescale")
|
||||||
# Fallback to MPD template start number
|
if time_val is not None and duration_val and duration_val > 0:
|
||||||
if mpd_start_number is not None:
|
sequence = math.floor(time_val / duration_val)
|
||||||
sequence = mpd_start_number
|
|
||||||
else:
|
else:
|
||||||
# As a last resort, derive from timeline information
|
sequence = first_segment.get("number") or profile.get("segment_template_start_number") or 1
|
||||||
time_val = first_segment.get("time")
|
else:
|
||||||
duration_val = first_segment.get("duration_mpd_timescale")
|
mpd_start_number = profile.get("segment_template_start_number")
|
||||||
if time_val is not None and duration_val and duration_val > 0:
|
sequence = first_segment.get("number")
|
||||||
sequence = math.floor(time_val / duration_val)
|
|
||||||
|
if sequence is None:
|
||||||
|
# Fallback to MPD template start number
|
||||||
|
if mpd_start_number is not None:
|
||||||
|
sequence = mpd_start_number
|
||||||
else:
|
else:
|
||||||
sequence = 1
|
# As a last resort, derive from timeline information
|
||||||
|
time_val = first_segment.get("time")
|
||||||
|
duration_val = first_segment.get("duration_mpd_timescale")
|
||||||
|
if time_val is not None and duration_val and duration_val > 0:
|
||||||
|
sequence = math.floor(time_val / duration_val)
|
||||||
|
else:
|
||||||
|
sequence = 1
|
||||||
|
|
||||||
hls.extend(
|
hls.extend(
|
||||||
[
|
[
|
||||||
@@ -227,31 +511,91 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
|
|||||||
f"#EXT-X-MEDIA-SEQUENCE:{sequence}",
|
f"#EXT-X-MEDIA-SEQUENCE:{sequence}",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
if mpd_dict["isLive"]:
|
# For live streams, don't set PLAYLIST-TYPE to allow sliding window
|
||||||
hls.append("#EXT-X-PLAYLIST-TYPE:EVENT")
|
if not is_live:
|
||||||
else:
|
|
||||||
hls.append("#EXT-X-PLAYLIST-TYPE:VOD")
|
hls.append("#EXT-X-PLAYLIST-TYPE:VOD")
|
||||||
|
|
||||||
init_url = profile["initUrl"]
|
init_url = profile["initUrl"]
|
||||||
|
# For SegmentBase profiles, we may have byte range for initialization segment
|
||||||
|
init_range = profile.get("initRange")
|
||||||
|
|
||||||
query_params = dict(request.query_params)
|
query_params = dict(request.query_params)
|
||||||
query_params.pop("profile_id", None)
|
query_params.pop("profile_id", None)
|
||||||
query_params.pop("d", None)
|
query_params.pop("d", None)
|
||||||
|
query_params.pop("remux_to_ts", None) # per-request override; already resolved into endpoint choice
|
||||||
has_encrypted = query_params.pop("has_encrypted", False)
|
has_encrypted = query_params.pop("has_encrypted", False)
|
||||||
|
|
||||||
for segment in trimmed_segments:
|
# Add EXT-X-MAP for init segment (for live streams or when beneficial)
|
||||||
program_date_time = segment.get("program_date_time")
|
if use_map:
|
||||||
if program_date_time:
|
init_query_params = {
|
||||||
hls.append(f"#EXT-X-PROGRAM-DATE-TIME:{program_date_time}")
|
"init_url": init_url,
|
||||||
hls.append(f'#EXTINF:{segment["extinf"]:.3f},')
|
"mime_type": profile["mimeType"],
|
||||||
query_params.update(
|
"is_live": "true" if is_live else "false",
|
||||||
{
|
}
|
||||||
"init_url": init_url,
|
if init_range:
|
||||||
"segment_url": segment["media"],
|
init_query_params["init_range"] = init_range
|
||||||
"mime_type": profile["mimeType"],
|
# Add key parameters
|
||||||
"is_live": "true" if mpd_dict.get("isLive") else "false",
|
if query_params.get("key_id"):
|
||||||
}
|
init_query_params["key_id"] = query_params["key_id"]
|
||||||
|
if query_params.get("key"):
|
||||||
|
init_query_params["key"] = query_params["key"]
|
||||||
|
# Add api_password for authentication
|
||||||
|
if query_params.get("api_password"):
|
||||||
|
init_query_params["api_password"] = query_params["api_password"]
|
||||||
|
|
||||||
|
init_map_url = encode_mediaflow_proxy_url(
|
||||||
|
init_proxy_url,
|
||||||
|
query_params=init_query_params,
|
||||||
|
encryption_handler=encryption_handler if has_encrypted else None,
|
||||||
)
|
)
|
||||||
|
hls.append(f'#EXT-X-MAP:URI="{init_map_url}"')
|
||||||
|
|
||||||
|
need_discontinuity = False
|
||||||
|
for segment in trimmed_segments:
|
||||||
|
duration = segment["extinf"]
|
||||||
|
|
||||||
|
# Check if this segment should be skipped
|
||||||
|
if skip_filter:
|
||||||
|
if skip_filter.should_skip_segment(duration):
|
||||||
|
skip_filter.advance_time(duration)
|
||||||
|
skipped_segments += 1
|
||||||
|
need_discontinuity = True
|
||||||
|
continue
|
||||||
|
skip_filter.advance_time(duration)
|
||||||
|
|
||||||
|
# Add discontinuity marker after skipped segments
|
||||||
|
if need_discontinuity:
|
||||||
|
hls.append("#EXT-X-DISCONTINUITY")
|
||||||
|
need_discontinuity = False
|
||||||
|
|
||||||
|
# Emit EXT-X-PROGRAM-DATE-TIME only for fMP4 (not TS)
|
||||||
|
program_date_time = segment.get("program_date_time")
|
||||||
|
if program_date_time and not is_ts_mode:
|
||||||
|
hls.append(f"#EXT-X-PROGRAM-DATE-TIME:{program_date_time}")
|
||||||
|
hls.append(f"#EXTINF:{duration:.3f},")
|
||||||
|
|
||||||
|
segment_query_params = {
|
||||||
|
"init_url": init_url,
|
||||||
|
"segment_url": segment["media"],
|
||||||
|
"mime_type": profile["mimeType"],
|
||||||
|
"is_live": "true" if is_live else "false",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add use_map flag so segment endpoint knows not to include init
|
||||||
|
if use_map and not is_ts_mode:
|
||||||
|
segment_query_params["use_map"] = "true"
|
||||||
|
elif is_ts_mode:
|
||||||
|
# TS segments are self-contained; init is always embedded by remuxer
|
||||||
|
segment_query_params["use_map"] = "false"
|
||||||
|
|
||||||
|
# Add byte range parameters for SegmentBase
|
||||||
|
if init_range:
|
||||||
|
segment_query_params["init_range"] = init_range
|
||||||
|
# Segment may also have its own range (for SegmentBase)
|
||||||
|
if "initRange" in segment:
|
||||||
|
segment_query_params["init_range"] = segment["initRange"]
|
||||||
|
|
||||||
|
query_params.update(segment_query_params)
|
||||||
hls.append(
|
hls.append(
|
||||||
encode_mediaflow_proxy_url(
|
encode_mediaflow_proxy_url(
|
||||||
proxy_url,
|
proxy_url,
|
||||||
@@ -264,5 +608,8 @@ def build_hls_playlist(mpd_dict: dict, profiles: list[dict], request: Request) -
|
|||||||
if not mpd_dict["isLive"]:
|
if not mpd_dict["isLive"]:
|
||||||
hls.append("#EXT-X-ENDLIST")
|
hls.append("#EXT-X-ENDLIST")
|
||||||
|
|
||||||
logger.info(f"Added {added_segments} segments to HLS playlist")
|
if skip_filter and skipped_segments > 0:
|
||||||
|
logger.info(f"Added {added_segments} segments to HLS playlist (skipped {skipped_segments} segments)")
|
||||||
|
else:
|
||||||
|
logger.info(f"Added {added_segments} segments to HLS playlist")
|
||||||
return "\n".join(hls)
|
return "\n".join(hls)
|
||||||
|
|||||||
18
mediaflow_proxy/remuxer/__init__.py
Normal file
18
mediaflow_proxy/remuxer/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
"""
|
||||||
|
Media remuxer package.
|
||||||
|
|
||||||
|
Provides pure Python implementations for media container parsing, remuxing,
|
||||||
|
and transcoding:
|
||||||
|
|
||||||
|
- ebml_parser: Minimal EBML/MKV parser for seeking and demuxing
|
||||||
|
- ts_muxer: fMP4 -> MPEG-TS remuxer
|
||||||
|
- mkv_demuxer: Streaming MKV demuxer
|
||||||
|
- mp4_muxer: MP4 box builder for standard moov-first MP4
|
||||||
|
- audio_transcoder: PyAV-based audio frame transcoding
|
||||||
|
- video_transcoder: GPU-accelerated video transcoding via PyAV
|
||||||
|
- pyav_demuxer: Universal PyAV-based streaming demuxer (any container)
|
||||||
|
- codec_utils: Codec compatibility detection and decision engine
|
||||||
|
- media_source: Abstract MediaSource protocol (Telegram, HTTP, etc.)
|
||||||
|
- transcode_handler: Shared transcode request orchestrator
|
||||||
|
- transcode_pipeline: MKV fast-path and universal transcode pipelines
|
||||||
|
"""
|
||||||
BIN
mediaflow_proxy/remuxer/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/codec_utils.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/codec_utils.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/ebml_parser.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/ebml_parser.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/hls_manifest.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/hls_manifest.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/media_source.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/media_source.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/mkv_demuxer.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/mkv_demuxer.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/mp4_muxer.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/mp4_muxer.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/mp4_parser.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/mp4_parser.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/pyav_demuxer.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/pyav_demuxer.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/remuxer/__pycache__/ts_muxer.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/remuxer/__pycache__/ts_muxer.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
351
mediaflow_proxy/remuxer/audio_transcoder.py
Normal file
351
mediaflow_proxy/remuxer/audio_transcoder.py
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
"""
|
||||||
|
PyAV-based audio transcoder for frame-level codec conversion.
|
||||||
|
|
||||||
|
Transcodes audio frames between codecs using PyAV's CodecContext API
|
||||||
|
(Python bindings for FFmpeg's libavcodec). This provides in-process
|
||||||
|
audio transcoding without subprocess management or pipe overhead.
|
||||||
|
|
||||||
|
Supported input codecs: EAC3, AC3, AAC, Opus, Vorbis, FLAC, MP3
|
||||||
|
Output codec: AAC-LC (stereo, configurable bitrate)
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
raw_frame_bytes -> parse() -> decode() -> resample() -> encode() -> raw_aac_bytes
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
transcoder = AudioTranscoder("eac3", sample_rate=48000, channels=6)
|
||||||
|
for raw_eac3_frame in frames:
|
||||||
|
aac_frames = transcoder.transcode(raw_eac3_frame)
|
||||||
|
for aac_data in aac_frames:
|
||||||
|
write(aac_data)
|
||||||
|
# Flush remaining frames
|
||||||
|
for aac_data in transcoder.flush():
|
||||||
|
write(aac_data)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import av
|
||||||
|
from av.audio.resampler import AudioResampler
|
||||||
|
|
||||||
|
from mediaflow_proxy.remuxer.ebml_parser import (
|
||||||
|
CODEC_ID_AAC,
|
||||||
|
CODEC_ID_AC3,
|
||||||
|
CODEC_ID_EAC3,
|
||||||
|
CODEC_ID_FLAC,
|
||||||
|
CODEC_ID_OPUS,
|
||||||
|
CODEC_ID_VORBIS,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_silence_aac_frame() -> bytes | None:
|
||||||
|
"""Pre-encode a single silent AAC frame (48 kHz stereo, 1024 samples).
|
||||||
|
|
||||||
|
PyAV's AAC encoder has an intermittent ``avcodec_send_frame`` bug when
|
||||||
|
rapidly creating/destroying codec contexts, so we retry a few times.
|
||||||
|
This function is called once at module load; the result is cached in
|
||||||
|
``_SILENCE_AAC_FRAME``.
|
||||||
|
"""
|
||||||
|
for _attempt in range(10):
|
||||||
|
try:
|
||||||
|
enc = av.CodecContext.create("aac", "w")
|
||||||
|
enc.sample_rate = 48000
|
||||||
|
enc.layout = "stereo"
|
||||||
|
enc.format = av.AudioFormat("fltp")
|
||||||
|
enc.bit_rate = 192000
|
||||||
|
enc.open()
|
||||||
|
|
||||||
|
frame = av.AudioFrame(
|
||||||
|
format=enc.format.name,
|
||||||
|
layout=enc.layout.name,
|
||||||
|
samples=enc.frame_size or 1024,
|
||||||
|
)
|
||||||
|
frame.sample_rate = enc.sample_rate
|
||||||
|
frame.pts = 0
|
||||||
|
|
||||||
|
for pkt in enc.encode(frame):
|
||||||
|
return bytes(pkt)
|
||||||
|
# AAC priming delay: first encode buffered; flush to retrieve
|
||||||
|
for pkt in enc.encode(None):
|
||||||
|
return bytes(pkt)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level silence frame -- generated once, reused by every transcoder.
|
||||||
|
_SILENCE_AAC_FRAME: bytes | None = _generate_silence_aac_frame()
|
||||||
|
|
||||||
|
# Map MKV codec IDs to PyAV/FFmpeg codec names
|
||||||
|
_MKV_TO_FFMPEG_CODEC = {
|
||||||
|
CODEC_ID_EAC3: "eac3",
|
||||||
|
CODEC_ID_AC3: "ac3",
|
||||||
|
CODEC_ID_AAC: "aac",
|
||||||
|
CODEC_ID_OPUS: "opus",
|
||||||
|
CODEC_ID_VORBIS: "vorbis",
|
||||||
|
CODEC_ID_FLAC: "flac",
|
||||||
|
"A_DTS": "dts",
|
||||||
|
"A_MP3": "mp3",
|
||||||
|
"A_MPEG/L3": "mp3",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Codecs that need transcoding to AAC for browser playback
|
||||||
|
NEEDS_TRANSCODE = frozenset(
|
||||||
|
{
|
||||||
|
CODEC_ID_EAC3,
|
||||||
|
CODEC_ID_AC3,
|
||||||
|
CODEC_ID_OPUS,
|
||||||
|
CODEC_ID_VORBIS,
|
||||||
|
CODEC_ID_FLAC,
|
||||||
|
"A_DTS",
|
||||||
|
"A_MP3",
|
||||||
|
"A_MPEG/L3",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Output AAC settings
|
||||||
|
_OUTPUT_CODEC = "aac"
|
||||||
|
_OUTPUT_SAMPLE_FORMAT = "fltp" # AAC requires float planar
|
||||||
|
_OUTPUT_LAYOUT = "stereo"
|
||||||
|
|
||||||
|
# Map channel count -> FFmpeg layout name
|
||||||
|
_CHANNEL_LAYOUT_MAP = {
|
||||||
|
1: "mono",
|
||||||
|
2: "stereo",
|
||||||
|
3: "2.1",
|
||||||
|
4: "quad",
|
||||||
|
6: "5.1",
|
||||||
|
8: "7.1",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def needs_transcode(codec_id: str) -> bool:
|
||||||
|
"""Check if an MKV audio codec needs transcoding for browser playback."""
|
||||||
|
return codec_id in NEEDS_TRANSCODE
|
||||||
|
|
||||||
|
|
||||||
|
def get_ffmpeg_codec_name(mkv_codec_id: str) -> str | None:
|
||||||
|
"""Map an MKV CodecID to an FFmpeg codec name."""
|
||||||
|
return _MKV_TO_FFMPEG_CODEC.get(mkv_codec_id)
|
||||||
|
|
||||||
|
|
||||||
|
class AudioTranscoder:
|
||||||
|
"""
|
||||||
|
In-process audio transcoder using PyAV's CodecContext API.
|
||||||
|
|
||||||
|
Decodes raw audio frames from one codec and encodes them to AAC-LC
|
||||||
|
stereo, suitable for MP4 container and browser playback. No container
|
||||||
|
I/O or subprocess involved -- operates directly on raw frame bytes.
|
||||||
|
|
||||||
|
The transcoder handles sample format conversion and resampling
|
||||||
|
automatically via AudioResampler.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_codec: str,
|
||||||
|
input_sample_rate: int = 48000,
|
||||||
|
input_channels: int = 6,
|
||||||
|
output_sample_rate: int = 48000,
|
||||||
|
output_channels: int = 2,
|
||||||
|
output_bitrate: int = 192000,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Initialize the transcoder.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_codec: FFmpeg codec name (e.g., "eac3", "ac3", "aac").
|
||||||
|
input_sample_rate: Input sample rate in Hz.
|
||||||
|
input_channels: Input channel count.
|
||||||
|
output_sample_rate: Output sample rate in Hz (default 48000).
|
||||||
|
output_channels: Output channel count (default 2 = stereo).
|
||||||
|
output_bitrate: Output bitrate in bits/s (default 192000).
|
||||||
|
"""
|
||||||
|
# Set up decoder -- use layout to configure channel count
|
||||||
|
# (PyAV's channels property is read-only; layout drives it)
|
||||||
|
self._decoder = av.CodecContext.create(input_codec, "r")
|
||||||
|
self._decoder.sample_rate = input_sample_rate
|
||||||
|
input_layout = _CHANNEL_LAYOUT_MAP.get(input_channels, "stereo")
|
||||||
|
self._decoder.layout = input_layout
|
||||||
|
|
||||||
|
# Set up encoder
|
||||||
|
self._encoder = av.CodecContext.create(_OUTPUT_CODEC, "w")
|
||||||
|
self._encoder.sample_rate = output_sample_rate
|
||||||
|
self._encoder.layout = _OUTPUT_LAYOUT
|
||||||
|
self._encoder.format = av.AudioFormat(_OUTPUT_SAMPLE_FORMAT)
|
||||||
|
self._encoder.bit_rate = output_bitrate
|
||||||
|
self._encoder.open()
|
||||||
|
|
||||||
|
# Set up resampler for format/rate/channel conversion
|
||||||
|
self._resampler = AudioResampler(
|
||||||
|
format=_OUTPUT_SAMPLE_FORMAT,
|
||||||
|
layout=_OUTPUT_LAYOUT,
|
||||||
|
rate=output_sample_rate,
|
||||||
|
)
|
||||||
|
|
||||||
|
self._input_codec = input_codec
|
||||||
|
self._frames_decoded = 0
|
||||||
|
self._frames_encoded = 0
|
||||||
|
self._audio_specific_config: bytes | None = None
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[audio_transcoder] Initialized: %s %dHz %dch -> aac %dHz %dch @%dk",
|
||||||
|
input_codec,
|
||||||
|
input_sample_rate,
|
||||||
|
input_channels,
|
||||||
|
output_sample_rate,
|
||||||
|
output_channels,
|
||||||
|
output_bitrate // 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def audio_specific_config(self) -> bytes | None:
|
||||||
|
"""
|
||||||
|
AAC AudioSpecificConfig from the encoder (available after first encode).
|
||||||
|
|
||||||
|
This is needed for the MP4 esds box.
|
||||||
|
"""
|
||||||
|
if self._audio_specific_config is not None:
|
||||||
|
return self._audio_specific_config
|
||||||
|
|
||||||
|
# PyAV exposes extradata after the encoder is opened
|
||||||
|
if self._encoder.extradata:
|
||||||
|
self._audio_specific_config = bytes(self._encoder.extradata)
|
||||||
|
return self._audio_specific_config
|
||||||
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def output_sample_rate(self) -> int:
|
||||||
|
return self._encoder.sample_rate
|
||||||
|
|
||||||
|
@property
|
||||||
|
def output_channels(self) -> int:
|
||||||
|
return self._encoder.channels
|
||||||
|
|
||||||
|
@property
|
||||||
|
def frame_size(self) -> int:
|
||||||
|
"""AAC frame size (samples per frame), typically 1024."""
|
||||||
|
return self._encoder.frame_size or 1024
|
||||||
|
|
||||||
|
def transcode(self, raw_frame_data: bytes) -> list[bytes]:
|
||||||
|
"""
|
||||||
|
Transcode a raw audio frame from the input codec to AAC.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
raw_frame_data: Raw audio frame bytes (one codec frame, e.g.,
|
||||||
|
one EAC3 sync frame).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of raw AAC frame bytes. May return 0, 1, or more frames
|
||||||
|
depending on codec frame sizes and buffering.
|
||||||
|
"""
|
||||||
|
output = []
|
||||||
|
|
||||||
|
# Parse raw bytes into packets
|
||||||
|
packets = self._decoder.parse(raw_frame_data)
|
||||||
|
|
||||||
|
for packet in packets:
|
||||||
|
# Decode to PCM frames
|
||||||
|
try:
|
||||||
|
decoded_frames = self._decoder.decode(packet)
|
||||||
|
except av.error.InvalidDataError as e:
|
||||||
|
logger.debug("[audio_transcoder] Decode error (skipping frame): %s", e)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for frame in decoded_frames:
|
||||||
|
self._frames_decoded += 1
|
||||||
|
|
||||||
|
# Resample to match encoder format
|
||||||
|
resampled = self._resampler.resample(frame)
|
||||||
|
if resampled is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# resampled can be a single frame or list of frames
|
||||||
|
if not isinstance(resampled, list):
|
||||||
|
resampled = [resampled]
|
||||||
|
|
||||||
|
for rs_frame in resampled:
|
||||||
|
# Encode to AAC
|
||||||
|
try:
|
||||||
|
encoded_packets = self._encoder.encode(rs_frame)
|
||||||
|
except av.error.InvalidDataError as e:
|
||||||
|
logger.debug("[audio_transcoder] Encode error: %s", e)
|
||||||
|
continue
|
||||||
|
|
||||||
|
for enc_packet in encoded_packets:
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(bytes(enc_packet))
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def flush(self) -> list[bytes]:
|
||||||
|
"""
|
||||||
|
Flush the decoder and encoder buffers.
|
||||||
|
|
||||||
|
Call this when the input stream ends to get remaining frames.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of remaining raw AAC frame bytes.
|
||||||
|
"""
|
||||||
|
output = []
|
||||||
|
|
||||||
|
# Flush decoder
|
||||||
|
try:
|
||||||
|
for frame in self._decoder.decode(None):
|
||||||
|
self._frames_decoded += 1
|
||||||
|
resampled = self._resampler.resample(frame)
|
||||||
|
if resampled is None:
|
||||||
|
continue
|
||||||
|
if not isinstance(resampled, list):
|
||||||
|
resampled = [resampled]
|
||||||
|
for rs_frame in resampled:
|
||||||
|
for enc_packet in self._encoder.encode(rs_frame):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(bytes(enc_packet))
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[audio_transcoder] Decoder flush error: %s", e)
|
||||||
|
|
||||||
|
# Flush resampler
|
||||||
|
try:
|
||||||
|
resampled = self._resampler.resample(None)
|
||||||
|
if resampled is not None:
|
||||||
|
if not isinstance(resampled, list):
|
||||||
|
resampled = [resampled]
|
||||||
|
for rs_frame in resampled:
|
||||||
|
for enc_packet in self._encoder.encode(rs_frame):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(bytes(enc_packet))
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[audio_transcoder] Resampler flush error: %s", e)
|
||||||
|
|
||||||
|
# Flush encoder
|
||||||
|
try:
|
||||||
|
for enc_packet in self._encoder.encode(None):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(bytes(enc_packet))
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[audio_transcoder] Encoder flush error: %s", e)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[audio_transcoder] Flushed: %d decoded, %d encoded total",
|
||||||
|
self._frames_decoded,
|
||||||
|
self._frames_encoded,
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def generate_silence_frame(self) -> bytes | None:
|
||||||
|
"""Return a pre-encoded silent AAC frame (module-level singleton)."""
|
||||||
|
return _SILENCE_AAC_FRAME
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Release codec contexts (best-effort; PyAV AudioCodecContext may not have close())."""
|
||||||
|
for ctx in (self._decoder, self._encoder):
|
||||||
|
try:
|
||||||
|
if hasattr(ctx, "close"):
|
||||||
|
ctx.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __del__(self) -> None:
|
||||||
|
self.close()
|
||||||
515
mediaflow_proxy/remuxer/codec_utils.py
Normal file
515
mediaflow_proxy/remuxer/codec_utils.py
Normal file
@@ -0,0 +1,515 @@
|
|||||||
|
"""
|
||||||
|
Codec decision engine for browser compatibility detection.
|
||||||
|
|
||||||
|
Determines whether video/audio streams need transcoding for browser
|
||||||
|
playback and selects appropriate output codecs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import struct
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# Browser-compatible codecs (work natively in HTML5 <video>)
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
BROWSER_VIDEO_CODECS = frozenset(
|
||||||
|
{
|
||||||
|
"V_MPEG4/ISO/AVC", # H.264/AVC -- universal
|
||||||
|
"h264",
|
||||||
|
"avc1", # FFmpeg/PyAV names
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
BROWSER_AUDIO_CODECS = frozenset(
|
||||||
|
{
|
||||||
|
"A_AAC", # AAC-LC -- universal
|
||||||
|
"A_AAC/MPEG2/LC",
|
||||||
|
"A_AAC/MPEG4/LC",
|
||||||
|
"aac", # FFmpeg/PyAV name
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# Video codecs that need re-encoding to H.264
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
VIDEO_NEEDS_REENCODE = frozenset(
|
||||||
|
{
|
||||||
|
"V_MPEGH/ISO/HEVC", # H.265/HEVC (Chrome/Firefox don't support)
|
||||||
|
"V_MPEG2", # MPEG-2 (DVD-era)
|
||||||
|
"V_MPEG4/ISO/SP", # MPEG-4 Part 2 Simple Profile
|
||||||
|
"V_MPEG4/ISO/ASP", # MPEG-4 Part 2 Advanced Simple (DivX/Xvid)
|
||||||
|
"V_MPEG4/ISO/AP", # MPEG-4 Part 2 Advanced Profile
|
||||||
|
"V_MPEG4/MS/V3", # MS MPEG-4 v3 (WMV)
|
||||||
|
"V_MS/VFW/FOURCC", # Generic VFW (VC-1, etc.)
|
||||||
|
"V_REAL/RV10",
|
||||||
|
"V_REAL/RV20",
|
||||||
|
"V_REAL/RV30",
|
||||||
|
"V_REAL/RV40",
|
||||||
|
"V_THEORA",
|
||||||
|
"V_VP8",
|
||||||
|
"V_VP9", # VP9 in MKV (needs WebM container for browser)
|
||||||
|
"V_AV1", # AV1 (partial support, safer to reencode)
|
||||||
|
# PyAV / FFmpeg codec names
|
||||||
|
"hevc",
|
||||||
|
"h265",
|
||||||
|
"mpeg2video",
|
||||||
|
"mpeg4",
|
||||||
|
"vc1",
|
||||||
|
"vp8",
|
||||||
|
"vp9",
|
||||||
|
"av1",
|
||||||
|
"theora",
|
||||||
|
"wmv3",
|
||||||
|
"rv30",
|
||||||
|
"rv40",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# Audio codecs that need transcoding to AAC
|
||||||
|
# (superset of the list in audio_transcoder.py, uses both MKV and
|
||||||
|
# PyAV codec names for universal lookup)
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
AUDIO_NEEDS_TRANSCODE = frozenset(
|
||||||
|
{
|
||||||
|
# MKV CodecIDs
|
||||||
|
"A_EAC3",
|
||||||
|
"A_AC3",
|
||||||
|
"A_DTS",
|
||||||
|
"A_DTS/EXPRESS",
|
||||||
|
"A_DTS/LOSSLESS",
|
||||||
|
"A_OPUS",
|
||||||
|
"A_VORBIS",
|
||||||
|
"A_FLAC",
|
||||||
|
"A_TRUEHD",
|
||||||
|
"A_MLP",
|
||||||
|
"A_PCM/INT/LIT",
|
||||||
|
"A_PCM/INT/BIG",
|
||||||
|
"A_PCM/FLOAT/IEEE",
|
||||||
|
"A_REAL/28_8",
|
||||||
|
"A_REAL/COOK",
|
||||||
|
"A_REAL/SIPR",
|
||||||
|
"A_REAL/ATRC",
|
||||||
|
"A_MS/ACM", # Generic Windows audio
|
||||||
|
"A_MP3",
|
||||||
|
"A_MPEG/L3",
|
||||||
|
# PyAV / FFmpeg names
|
||||||
|
"eac3",
|
||||||
|
"ac3",
|
||||||
|
"dts",
|
||||||
|
"dca",
|
||||||
|
"truehd",
|
||||||
|
"mlp",
|
||||||
|
"mp3",
|
||||||
|
"opus",
|
||||||
|
"vorbis",
|
||||||
|
"flac",
|
||||||
|
"pcm_s16le",
|
||||||
|
"pcm_s24le",
|
||||||
|
"pcm_f32le",
|
||||||
|
"wmav2",
|
||||||
|
"wmavoice",
|
||||||
|
"wmapro",
|
||||||
|
"cook",
|
||||||
|
"sipr",
|
||||||
|
"atrac3",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Map PyAV codec names to MKV CodecIDs (for the MKV fast-path)
|
||||||
|
_PYAV_TO_MKV_VIDEO = {
|
||||||
|
"h264": "V_MPEG4/ISO/AVC",
|
||||||
|
"hevc": "V_MPEGH/ISO/HEVC",
|
||||||
|
"h265": "V_MPEGH/ISO/HEVC",
|
||||||
|
"mpeg2video": "V_MPEG2",
|
||||||
|
"vp8": "V_VP8",
|
||||||
|
"vp9": "V_VP9",
|
||||||
|
"av1": "V_AV1",
|
||||||
|
}
|
||||||
|
|
||||||
|
_PYAV_TO_MKV_AUDIO = {
|
||||||
|
"aac": "A_AAC",
|
||||||
|
"eac3": "A_EAC3",
|
||||||
|
"ac3": "A_AC3",
|
||||||
|
"dts": "A_DTS",
|
||||||
|
"opus": "A_OPUS",
|
||||||
|
"vorbis": "A_VORBIS",
|
||||||
|
"flac": "A_FLAC",
|
||||||
|
"mp3": "A_MPEG/L3",
|
||||||
|
"truehd": "A_TRUEHD",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
# NAL unit format conversion (Annex B ↔ AVCC)
|
||||||
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# H.264 NAL types that belong in the init segment (avcC), not in samples
|
||||||
|
_H264_PARAM_NAL_TYPES = frozenset({7, 8, 9}) # SPS, PPS, AUD
|
||||||
|
|
||||||
|
|
||||||
|
def _find_annexb_nals(data: bytes) -> list[tuple[int, int]]:
|
||||||
|
"""
|
||||||
|
Find all NAL unit [start, end) byte ranges in Annex B formatted data.
|
||||||
|
|
||||||
|
Handles both 3-byte (00 00 01) and 4-byte (00 00 00 01) start codes.
|
||||||
|
Returns a list of (start, end) tuples pointing into *data*.
|
||||||
|
"""
|
||||||
|
size = len(data)
|
||||||
|
nals: list[tuple[int, int]] = []
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
while i < size - 2:
|
||||||
|
# Scan for 0x000001 or 0x00000001
|
||||||
|
if data[i] != 0:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
if data[i + 1] != 0:
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
if data[i + 2] == 1:
|
||||||
|
nal_start = i + 3
|
||||||
|
elif data[i + 2] == 0 and i + 3 < size and data[i + 3] == 1:
|
||||||
|
nal_start = i + 4
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Record end of previous NAL
|
||||||
|
if nals:
|
||||||
|
nals[-1] = (nals[-1][0], i)
|
||||||
|
nals.append((nal_start, size))
|
||||||
|
i = nal_start
|
||||||
|
|
||||||
|
return nals
|
||||||
|
|
||||||
|
|
||||||
|
def is_annexb(data: bytes) -> bool:
|
||||||
|
"""
|
||||||
|
Return True if *data* starts with an Annex B start code.
|
||||||
|
|
||||||
|
Disambiguates AVCC (4-byte length prefix) from Annex B when the data
|
||||||
|
begins with ``00 00 01 xx`` or ``00 00 00 01`` by checking whether
|
||||||
|
the AVCC interpretation yields a plausible H.264 NAL. If the 4-byte
|
||||||
|
big-endian length + subsequent NAL header byte is valid and the
|
||||||
|
length fits within the data, this is AVCC -- not Annex B.
|
||||||
|
"""
|
||||||
|
if len(data) < 5:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 4-byte start code: 00 00 00 01
|
||||||
|
if data[0] == 0 and data[1] == 0 and data[2] == 0 and data[3] == 1:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 3-byte start code: 00 00 01 -- but could also be AVCC with length
|
||||||
|
# that starts with 00 00 01 (i.e. length 0x000001xx = 256..511).
|
||||||
|
if data[0] == 0 and data[1] == 0 and data[2] == 1:
|
||||||
|
# Interpret as AVCC: 4-byte big-endian length
|
||||||
|
avcc_len = int.from_bytes(data[0:4], "big")
|
||||||
|
if 0 < avcc_len <= len(data) - 4:
|
||||||
|
# Check if the NAL header byte is a valid H.264 NAL
|
||||||
|
nal_byte = data[4]
|
||||||
|
forbidden = (nal_byte >> 7) & 1
|
||||||
|
nal_type = nal_byte & 0x1F
|
||||||
|
if forbidden == 0 and 1 <= nal_type <= 12:
|
||||||
|
# Plausible AVCC: valid length + valid NAL type
|
||||||
|
return False
|
||||||
|
# Not plausible AVCC, treat as Annex B
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def annexb_to_avcc(data: bytes, filter_ps: bool = True) -> bytes:
|
||||||
|
"""
|
||||||
|
Convert Annex B (start-code-prefixed) NAL units to AVCC
|
||||||
|
(4-byte length-prefixed) format suitable for fMP4 samples.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: H.264 access unit in Annex B format.
|
||||||
|
filter_ps: If True, strip SPS/PPS/AUD NAL units (they belong
|
||||||
|
in the avcC box of the init segment, not in samples).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The same NAL units with 4-byte big-endian length prefixes.
|
||||||
|
"""
|
||||||
|
if not data or not is_annexb(data):
|
||||||
|
return data # Already AVCC or empty
|
||||||
|
|
||||||
|
nals = _find_annexb_nals(data)
|
||||||
|
if not nals:
|
||||||
|
return data
|
||||||
|
|
||||||
|
out = bytearray()
|
||||||
|
for start, end in nals:
|
||||||
|
# Strip trailing zero-padding before next start code
|
||||||
|
while end > start and data[end - 1] == 0:
|
||||||
|
end -= 1
|
||||||
|
if end <= start:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if filter_ps:
|
||||||
|
nal_type = data[start] & 0x1F
|
||||||
|
if nal_type in _H264_PARAM_NAL_TYPES:
|
||||||
|
continue
|
||||||
|
|
||||||
|
length = end - start
|
||||||
|
out.extend(length.to_bytes(4, "big"))
|
||||||
|
out.extend(data[start:end])
|
||||||
|
|
||||||
|
# If every NAL was filtered out (e.g. packet only contains SPS/PPS/AUD),
|
||||||
|
# return empty so callers can drop this sample. Returning original Annex-B
|
||||||
|
# bytes here would corrupt fMP4 samples (expects AVCC length prefixes).
|
||||||
|
return bytes(out)
|
||||||
|
|
||||||
|
|
||||||
|
# H.264 profiles that require the avcC High Profile extension fields
|
||||||
|
# (chroma_format_idc, bit_depth_luma/chroma, numSpsExt).
|
||||||
|
_HIGH_PROFILE_IDCS = frozenset({100, 110, 122, 244, 44, 83, 86, 118, 128, 138, 139, 134})
|
||||||
|
|
||||||
|
|
||||||
|
def _fix_avcc_high_profile(avcc: bytes) -> bytes:
|
||||||
|
"""
|
||||||
|
Ensure an avcC record includes High Profile extension bytes.
|
||||||
|
|
||||||
|
The ISO/IEC 14496-15 spec requires additional fields after the PPS
|
||||||
|
section when ``AVCProfileIndication`` is 100 (High), 110, 122, or 244.
|
||||||
|
Some MKV muxers omit these, causing decoders to not know the chroma
|
||||||
|
format or bit depth, which leads to widespread decode errors.
|
||||||
|
|
||||||
|
If the extensions are missing, appends the defaults for 4:2:0 / 8-bit
|
||||||
|
with zero extended SPS sets.
|
||||||
|
"""
|
||||||
|
if len(avcc) < 7:
|
||||||
|
return avcc
|
||||||
|
if avcc[0] != 1:
|
||||||
|
return avcc # Not an avcC record
|
||||||
|
|
||||||
|
profile_idc = avcc[1]
|
||||||
|
if profile_idc not in _HIGH_PROFILE_IDCS:
|
||||||
|
return avcc # Not a High Profile variant, no extensions needed
|
||||||
|
|
||||||
|
# Walk past SPS and PPS sections to find where extensions should be
|
||||||
|
off = 5
|
||||||
|
num_sps = avcc[off] & 0x1F
|
||||||
|
off += 1
|
||||||
|
for _ in range(num_sps):
|
||||||
|
if off + 2 > len(avcc):
|
||||||
|
return avcc
|
||||||
|
sps_len = struct.unpack(">H", avcc[off : off + 2])[0]
|
||||||
|
off += 2 + sps_len
|
||||||
|
|
||||||
|
if off >= len(avcc):
|
||||||
|
return avcc
|
||||||
|
num_pps = avcc[off]
|
||||||
|
off += 1
|
||||||
|
for _ in range(num_pps):
|
||||||
|
if off + 2 > len(avcc):
|
||||||
|
return avcc
|
||||||
|
pps_len = struct.unpack(">H", avcc[off : off + 2])[0]
|
||||||
|
off += 2 + pps_len
|
||||||
|
|
||||||
|
# If there are already bytes after the PPS section, extensions exist
|
||||||
|
if off < len(avcc):
|
||||||
|
return avcc
|
||||||
|
|
||||||
|
# Append default High Profile extensions:
|
||||||
|
# chroma_format_idc = 1 (4:2:0) -> 0xFC | 0x01 = 0xFD (reserved 111111 + 01)
|
||||||
|
# bit_depth_luma_minus8 = 0 -> 0xF8 | 0x00 = 0xF8 (reserved 11111 + 000)
|
||||||
|
# bit_depth_chroma_minus8 = 0 -> 0xF8 | 0x00 = 0xF8 (reserved 11111 + 000)
|
||||||
|
# numOfSequenceParameterSetExt = 0
|
||||||
|
ext = bytearray(avcc)
|
||||||
|
ext.append(0xFD) # 111111_01 : chroma_format_idc = 1
|
||||||
|
ext.append(0xF8) # 11111_000 : bit_depth_luma_minus8 = 0
|
||||||
|
ext.append(0xF8) # 11111_000 : bit_depth_chroma_minus8 = 0
|
||||||
|
ext.append(0x00) # numOfSequenceParameterSetExt = 0
|
||||||
|
return bytes(ext)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_avcc_extradata(extradata: bytes) -> bytes:
|
||||||
|
"""
|
||||||
|
Ensure h264 extradata is in avcC format for the fMP4 init segment.
|
||||||
|
|
||||||
|
PyAV returns extradata in the container's native format:
|
||||||
|
- MKV/MP4: avcC format (starts with 0x01)
|
||||||
|
- MPEG-TS: Annex B format (starts with 0x00 0x00)
|
||||||
|
|
||||||
|
If Annex B, parses SPS/PPS NAL units and builds proper avcC.
|
||||||
|
If already avcC, validates and fixes High Profile extension fields.
|
||||||
|
"""
|
||||||
|
if not extradata or len(extradata) < 4:
|
||||||
|
return extradata
|
||||||
|
|
||||||
|
# Already avcC format (configurationVersion == 1)
|
||||||
|
if extradata[0] == 0x01:
|
||||||
|
return _fix_avcc_high_profile(extradata)
|
||||||
|
|
||||||
|
# Parse Annex B NAL units to extract SPS and PPS
|
||||||
|
nals = _find_annexb_nals(extradata)
|
||||||
|
if not nals:
|
||||||
|
return extradata
|
||||||
|
|
||||||
|
sps_list: list[bytes] = []
|
||||||
|
pps_list: list[bytes] = []
|
||||||
|
|
||||||
|
for start, end in nals:
|
||||||
|
while end > start and extradata[end - 1] == 0:
|
||||||
|
end -= 1
|
||||||
|
if end <= start:
|
||||||
|
continue
|
||||||
|
nal_type = extradata[start] & 0x1F
|
||||||
|
nal_data = extradata[start:end]
|
||||||
|
if nal_type == 7: # SPS
|
||||||
|
sps_list.append(nal_data)
|
||||||
|
elif nal_type == 8: # PPS
|
||||||
|
pps_list.append(nal_data)
|
||||||
|
|
||||||
|
if not sps_list:
|
||||||
|
return extradata # Can't build avcC without SPS
|
||||||
|
|
||||||
|
sps = sps_list[0]
|
||||||
|
if len(sps) < 4:
|
||||||
|
return extradata
|
||||||
|
|
||||||
|
# Build avcC box content
|
||||||
|
avcc = bytearray()
|
||||||
|
avcc.append(1) # configurationVersion
|
||||||
|
avcc.append(sps[1]) # AVCProfileIndication
|
||||||
|
avcc.append(sps[2]) # profile_compatibility
|
||||||
|
avcc.append(sps[3]) # AVCLevelIndication
|
||||||
|
avcc.append(0xFF) # 6 bits reserved (0x3F) + lengthSizeMinusOne=3 -> 4-byte NAL lengths
|
||||||
|
avcc.append(0xE0 | len(sps_list)) # 3 bits reserved (0x07) + numOfSPS
|
||||||
|
|
||||||
|
for s in sps_list:
|
||||||
|
avcc.extend(struct.pack(">H", len(s)))
|
||||||
|
avcc.extend(s)
|
||||||
|
|
||||||
|
avcc.append(len(pps_list)) # numOfPPS
|
||||||
|
for p in pps_list:
|
||||||
|
avcc.extend(struct.pack(">H", len(p)))
|
||||||
|
avcc.extend(p)
|
||||||
|
|
||||||
|
return _fix_avcc_high_profile(bytes(avcc))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_sps_pps_from_annexb(data: bytes) -> bytes:
|
||||||
|
"""
|
||||||
|
Extract SPS and PPS NAL units from Annex B encoded data and build
|
||||||
|
an avcC-format extradata blob.
|
||||||
|
|
||||||
|
Hardware encoders like VideoToolbox embed SPS/PPS as in-band NAL
|
||||||
|
units in their first keyframe output rather than setting extradata
|
||||||
|
on the codec context. This function finds those parameter sets
|
||||||
|
and returns proper avcC bytes suitable for the fMP4 init segment.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
avcC bytes if SPS/PPS were found, empty bytes otherwise.
|
||||||
|
"""
|
||||||
|
if not data or not is_annexb(data):
|
||||||
|
return b""
|
||||||
|
|
||||||
|
nals = _find_annexb_nals(data)
|
||||||
|
if not nals:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
sps_list: list[bytes] = []
|
||||||
|
pps_list: list[bytes] = []
|
||||||
|
|
||||||
|
for start, end in nals:
|
||||||
|
# Strip trailing zero-padding
|
||||||
|
while end > start and data[end - 1] == 0:
|
||||||
|
end -= 1
|
||||||
|
if end <= start:
|
||||||
|
continue
|
||||||
|
|
||||||
|
nal_type = data[start] & 0x1F
|
||||||
|
if nal_type == 7: # SPS
|
||||||
|
sps_list.append(data[start:end])
|
||||||
|
elif nal_type == 8: # PPS
|
||||||
|
pps_list.append(data[start:end])
|
||||||
|
|
||||||
|
if not sps_list:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
sps = sps_list[0]
|
||||||
|
if len(sps) < 4:
|
||||||
|
return b""
|
||||||
|
|
||||||
|
# Build avcC box content
|
||||||
|
avcc = bytearray()
|
||||||
|
avcc.append(1) # configurationVersion
|
||||||
|
avcc.append(sps[1]) # AVCProfileIndication
|
||||||
|
avcc.append(sps[2]) # profile_compatibility
|
||||||
|
avcc.append(sps[3]) # AVCLevelIndication
|
||||||
|
avcc.append(0xFF) # 6 bits reserved + lengthSizeMinusOne=3
|
||||||
|
avcc.append(0xE0 | len(sps_list)) # 3 bits reserved + numOfSPS
|
||||||
|
|
||||||
|
for s in sps_list:
|
||||||
|
avcc.extend(struct.pack(">H", len(s)))
|
||||||
|
avcc.extend(s)
|
||||||
|
|
||||||
|
avcc.append(len(pps_list)) # numOfPPS
|
||||||
|
for p in pps_list:
|
||||||
|
avcc.extend(struct.pack(">H", len(p)))
|
||||||
|
avcc.extend(p)
|
||||||
|
|
||||||
|
return bytes(avcc)
|
||||||
|
|
||||||
|
|
||||||
|
def video_needs_reencode(codec_id: str) -> bool:
|
||||||
|
"""Check if a video codec requires re-encoding for browser playback."""
|
||||||
|
if not codec_id:
|
||||||
|
return False
|
||||||
|
return codec_id in VIDEO_NEEDS_REENCODE
|
||||||
|
|
||||||
|
|
||||||
|
def audio_needs_transcode(codec_id: str) -> bool:
|
||||||
|
"""Check if an audio codec requires transcoding for browser playback."""
|
||||||
|
if not codec_id:
|
||||||
|
return False
|
||||||
|
return codec_id in AUDIO_NEEDS_TRANSCODE
|
||||||
|
|
||||||
|
|
||||||
|
def is_browser_compatible(video_codec: str, audio_codec: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a video+audio combination is fully browser-compatible.
|
||||||
|
|
||||||
|
Returns True only if BOTH video and audio can be played natively in
|
||||||
|
an HTML5 <video> element inside an MP4 container.
|
||||||
|
"""
|
||||||
|
video_ok = video_codec in BROWSER_VIDEO_CODECS or not video_codec
|
||||||
|
audio_ok = audio_codec in BROWSER_AUDIO_CODECS or not audio_codec
|
||||||
|
return video_ok and audio_ok
|
||||||
|
|
||||||
|
|
||||||
|
class TranscodeDecision:
|
||||||
|
"""Result of analyzing a stream's codec compatibility."""
|
||||||
|
|
||||||
|
__slots__ = ("transcode_video", "transcode_audio", "video_codec", "audio_codec")
|
||||||
|
|
||||||
|
def __init__(self, video_codec: str = "", audio_codec: str = "") -> None:
|
||||||
|
self.video_codec = video_codec
|
||||||
|
self.audio_codec = audio_codec
|
||||||
|
self.transcode_video = video_needs_reencode(video_codec)
|
||||||
|
self.transcode_audio = audio_needs_transcode(audio_codec)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def needs_transcode(self) -> bool:
|
||||||
|
"""True if any stream needs transcoding."""
|
||||||
|
return self.transcode_video or self.transcode_audio
|
||||||
|
|
||||||
|
@property
|
||||||
|
def passthrough_ok(self) -> bool:
|
||||||
|
"""True if the stream can be served as-is to a browser."""
|
||||||
|
return not self.needs_transcode
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
parts = []
|
||||||
|
if self.transcode_video:
|
||||||
|
parts.append(f"video:{self.video_codec}->h264")
|
||||||
|
if self.transcode_audio:
|
||||||
|
parts.append(f"audio:{self.audio_codec}->aac")
|
||||||
|
if not parts:
|
||||||
|
parts.append("passthrough")
|
||||||
|
return f"TranscodeDecision({', '.join(parts)})"
|
||||||
614
mediaflow_proxy/remuxer/container_probe.py
Normal file
614
mediaflow_proxy/remuxer/container_probe.py
Normal file
@@ -0,0 +1,614 @@
|
|||||||
|
"""
|
||||||
|
Container format probing -- MKV Cues and MP4 moov.
|
||||||
|
|
||||||
|
Pure Python probing using EBML parsing (MKV) and struct-based atom
|
||||||
|
scanning (MP4). No FFmpeg dependency.
|
||||||
|
|
||||||
|
Source-agnostic: accepts any MediaSource protocol implementation
|
||||||
|
(Telegram, HTTP, etc.) for byte-range reads.
|
||||||
|
|
||||||
|
Provides:
|
||||||
|
- probe_mkv_cues: probe MKV file to extract seek index (MKVCueIndex)
|
||||||
|
- probe_mp4_moov: probe MP4 file to extract moov atom and build seek index (MP4Index)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import struct
|
||||||
|
|
||||||
|
from mediaflow_proxy.utils import redis_utils
|
||||||
|
from mediaflow_proxy.remuxer.ebml_parser import (
|
||||||
|
MKVCueIndex,
|
||||||
|
build_cue_index,
|
||||||
|
parse_ebml_header,
|
||||||
|
parse_seek_head,
|
||||||
|
CUES,
|
||||||
|
INFO,
|
||||||
|
)
|
||||||
|
from mediaflow_proxy.remuxer.mp4_parser import (
|
||||||
|
MP4Index,
|
||||||
|
build_cue_points_from_moov,
|
||||||
|
is_mp4_header,
|
||||||
|
rewrite_moov_offsets,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# How much of the MKV header to fetch for SeekHead + Info parsing
|
||||||
|
_HEADER_PROBE_SIZE = 64 * 1024 # 64 KB
|
||||||
|
|
||||||
|
# Max Cues element size we'll attempt to fetch
|
||||||
|
_MAX_CUES_SIZE = 2 * 1024 * 1024 # 2 MB
|
||||||
|
|
||||||
|
# Redis cache for MKV Cue indexes
|
||||||
|
_CUE_INDEX_CACHE_PREFIX = "mfp:cue_index:"
|
||||||
|
_CUE_INDEX_CACHE_TTL = 3600 # 1 hour
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MKV Cues probing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def derive_cue_cache_key(
|
||||||
|
source_key: str = "",
|
||||||
|
*,
|
||||||
|
chat_id: str | int | None = None,
|
||||||
|
message_id: int | None = None,
|
||||||
|
file_id: str | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Derive a deterministic cache key for a file's cue index.
|
||||||
|
|
||||||
|
Accepts either a pre-computed source_key (from MediaSource.cache_key)
|
||||||
|
or legacy Telegram-style parameters for backwards compatibility.
|
||||||
|
"""
|
||||||
|
if source_key:
|
||||||
|
return source_key
|
||||||
|
if file_id:
|
||||||
|
raw = f"file_id:{file_id}"
|
||||||
|
elif chat_id is not None and message_id is not None:
|
||||||
|
raw = f"chat:{chat_id}:msg:{message_id}"
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_cached_cue_index(cache_key: str) -> MKVCueIndex | None:
|
||||||
|
"""Try to load a MKVCueIndex from Redis cache."""
|
||||||
|
if not cache_key:
|
||||||
|
return None
|
||||||
|
r = await redis_utils.get_redis()
|
||||||
|
if r is None:
|
||||||
|
return None
|
||||||
|
redis_key = f"{_CUE_INDEX_CACHE_PREFIX}{cache_key}"
|
||||||
|
data = await r.get(redis_key)
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
d = json.loads(data)
|
||||||
|
seek_header = b""
|
||||||
|
if d.get("seek_header_b64"):
|
||||||
|
seek_header = base64.b64decode(d["seek_header_b64"])
|
||||||
|
video_codec_private = b""
|
||||||
|
if d.get("video_codec_private_b64"):
|
||||||
|
video_codec_private = base64.b64decode(d["video_codec_private_b64"])
|
||||||
|
index = MKVCueIndex(
|
||||||
|
duration_ms=d["duration_ms"],
|
||||||
|
timestamp_scale=d["timestamp_scale"],
|
||||||
|
cue_points=[(cp[0], cp[1]) for cp in d["cue_points"]],
|
||||||
|
segment_data_offset=d["segment_data_offset"],
|
||||||
|
first_cluster_offset=d.get("first_cluster_offset", 0),
|
||||||
|
seek_header=seek_header,
|
||||||
|
audio_codec_id=d.get("audio_codec_id", ""),
|
||||||
|
audio_bitrate=d.get("audio_bitrate", 0),
|
||||||
|
audio_channels=d.get("audio_channels", 0),
|
||||||
|
audio_sample_rate=d.get("audio_sample_rate", 0.0),
|
||||||
|
video_codec_id=d.get("video_codec_id", ""),
|
||||||
|
video_codec_private=video_codec_private,
|
||||||
|
video_width=d.get("video_width", 0),
|
||||||
|
video_height=d.get("video_height", 0),
|
||||||
|
video_fps=d.get("video_fps", 0.0),
|
||||||
|
video_default_duration_ns=d.get("video_default_duration_ns", 0),
|
||||||
|
)
|
||||||
|
logger.debug("[container_probe] Loaded cue index from cache: %s", cache_key)
|
||||||
|
return index
|
||||||
|
except (KeyError, TypeError, json.JSONDecodeError) as e:
|
||||||
|
logger.warning("[container_probe] Invalid cached cue index: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _set_cached_cue_index(cache_key: str, index: MKVCueIndex) -> None:
|
||||||
|
"""Cache a MKVCueIndex in Redis."""
|
||||||
|
if not cache_key:
|
||||||
|
return
|
||||||
|
r = await redis_utils.get_redis()
|
||||||
|
if r is None:
|
||||||
|
return
|
||||||
|
redis_key = f"{_CUE_INDEX_CACHE_PREFIX}{cache_key}"
|
||||||
|
data = json.dumps(
|
||||||
|
{
|
||||||
|
"duration_ms": index.duration_ms,
|
||||||
|
"timestamp_scale": index.timestamp_scale,
|
||||||
|
"cue_points": index.cue_points,
|
||||||
|
"segment_data_offset": index.segment_data_offset,
|
||||||
|
"first_cluster_offset": index.first_cluster_offset,
|
||||||
|
"seek_header_b64": base64.b64encode(index.seek_header).decode() if index.seek_header else "",
|
||||||
|
"audio_codec_id": index.audio_codec_id,
|
||||||
|
"audio_bitrate": index.audio_bitrate,
|
||||||
|
"audio_channels": index.audio_channels,
|
||||||
|
"audio_sample_rate": index.audio_sample_rate,
|
||||||
|
"video_codec_id": index.video_codec_id,
|
||||||
|
"video_codec_private_b64": base64.b64encode(index.video_codec_private).decode()
|
||||||
|
if index.video_codec_private
|
||||||
|
else "",
|
||||||
|
"video_width": index.video_width,
|
||||||
|
"video_height": index.video_height,
|
||||||
|
"video_fps": index.video_fps,
|
||||||
|
"video_default_duration_ns": index.video_default_duration_ns,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
await r.set(redis_key, data, ex=_CUE_INDEX_CACHE_TTL)
|
||||||
|
logger.debug("[container_probe] Cached cue index: %s", cache_key)
|
||||||
|
|
||||||
|
|
||||||
|
async def probe_mkv_cues(
|
||||||
|
source,
|
||||||
|
file_size: int = 0,
|
||||||
|
cache_key: str = "",
|
||||||
|
header_data: bytes | None = None,
|
||||||
|
) -> MKVCueIndex | None:
|
||||||
|
"""
|
||||||
|
Probe an MKV file's EBML header and Cues to build a seek index.
|
||||||
|
|
||||||
|
Pure Python -- parses EBML structures directly, no FFmpeg involved.
|
||||||
|
|
||||||
|
Makes up to two small byte-range reads via the provided source:
|
||||||
|
1. First ~64KB: EBML header + SeekHead + Info (skipped if header_data provided)
|
||||||
|
2. Cues section: byte range from SeekHead's Cues position
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: A MediaSource protocol implementation, or any object with
|
||||||
|
a ``stream(offset, limit)`` async generator method.
|
||||||
|
file_size: Total file size in bytes. If 0, tries ``source.file_size``.
|
||||||
|
cache_key: Optional cache key for Redis caching. If empty, tries
|
||||||
|
``source.cache_key``.
|
||||||
|
header_data: Pre-fetched header bytes (first ~64KB). If provided,
|
||||||
|
skips the initial header fetch from source.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MKVCueIndex if successful, None if the file has no Cues or parsing fails.
|
||||||
|
"""
|
||||||
|
# Resolve file_size and cache_key from source if not provided
|
||||||
|
if file_size <= 0:
|
||||||
|
file_size = getattr(source, "file_size", 0)
|
||||||
|
if not cache_key:
|
||||||
|
cache_key = getattr(source, "cache_key", "")
|
||||||
|
|
||||||
|
# Check cache first
|
||||||
|
if cache_key:
|
||||||
|
cached = await _get_cached_cue_index(cache_key)
|
||||||
|
if cached:
|
||||||
|
return cached
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Use pre-fetched header or fetch from source
|
||||||
|
if header_data is None:
|
||||||
|
header_size = min(_HEADER_PROBE_SIZE, file_size) if file_size > 0 else _HEADER_PROBE_SIZE
|
||||||
|
header_data = b""
|
||||||
|
async for chunk in source.stream(offset=0, limit=header_size):
|
||||||
|
header_data += chunk
|
||||||
|
|
||||||
|
if len(header_data) < 64:
|
||||||
|
logger.warning("[container_probe] Header too small (%d bytes), cannot probe", len(header_data))
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 2: Parse EBML header to find Segment data offset
|
||||||
|
segment_data_offset = parse_ebml_header(header_data)
|
||||||
|
|
||||||
|
# Step 3: Parse SeekHead to find Cues and Info positions
|
||||||
|
seek_positions = parse_seek_head(header_data, segment_data_offset)
|
||||||
|
|
||||||
|
if CUES not in seek_positions:
|
||||||
|
logger.info("[container_probe] No Cues position in SeekHead, seeking not available")
|
||||||
|
return None
|
||||||
|
|
||||||
|
cues_relative_offset = seek_positions[CUES]
|
||||||
|
cues_absolute_offset = segment_data_offset + cues_relative_offset
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[container_probe] SeekHead: Cues at offset %d (absolute %d), Info at %s",
|
||||||
|
cues_relative_offset,
|
||||||
|
cues_absolute_offset,
|
||||||
|
seek_positions.get(INFO, "not found"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 4: Fetch the Cues element
|
||||||
|
cues_max = file_size - cues_absolute_offset if file_size > 0 else _MAX_CUES_SIZE
|
||||||
|
cues_fetch_size = min(_MAX_CUES_SIZE, cues_max)
|
||||||
|
if cues_fetch_size <= 0:
|
||||||
|
logger.warning("[container_probe] Cues offset %d beyond file size %d", cues_absolute_offset, file_size)
|
||||||
|
return None
|
||||||
|
|
||||||
|
cues_data = b""
|
||||||
|
async for chunk in source.stream(offset=cues_absolute_offset, limit=cues_fetch_size):
|
||||||
|
cues_data += chunk
|
||||||
|
|
||||||
|
if len(cues_data) < 16:
|
||||||
|
logger.warning("[container_probe] Cues data too small (%d bytes)", len(cues_data))
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 5: Build the cue index
|
||||||
|
index = build_cue_index(
|
||||||
|
header_data=header_data,
|
||||||
|
cues_data=cues_data,
|
||||||
|
cues_file_offset=cues_absolute_offset,
|
||||||
|
segment_data_offset=segment_data_offset,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache the result
|
||||||
|
if cache_key:
|
||||||
|
await _set_cached_cue_index(cache_key, index)
|
||||||
|
|
||||||
|
return index
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("[container_probe] Failed to probe MKV cues: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MP4 Moov probing
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Redis cache for MP4 indexes
|
||||||
|
_MP4_INDEX_CACHE_PREFIX = "mfp:mp4_index:"
|
||||||
|
_MP4_INDEX_CACHE_TTL = 3600 # 1 hour
|
||||||
|
|
||||||
|
# How much to read from the start for ftyp + initial atom scanning
|
||||||
|
_MP4_HEADER_PROBE_SIZE = 64 * 1024 # 64 KB
|
||||||
|
|
||||||
|
# Max moov size we'll accept
|
||||||
|
_MAX_MOOV_SIZE = 50 * 1024 * 1024 # 50 MB
|
||||||
|
|
||||||
|
# How much to read from the end of the file to find moov
|
||||||
|
_MP4_TAIL_PROBE_SIZE = 512 * 1024 # 512 KB
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_cached_mp4_index(cache_key: str) -> MP4Index | None:
|
||||||
|
"""Try to load an MP4Index from Redis cache."""
|
||||||
|
if not cache_key:
|
||||||
|
return None
|
||||||
|
r = await redis_utils.get_redis()
|
||||||
|
if r is None:
|
||||||
|
return None
|
||||||
|
redis_key = f"{_MP4_INDEX_CACHE_PREFIX}{cache_key}"
|
||||||
|
data = await r.get(redis_key)
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
d = json.loads(data)
|
||||||
|
ftyp_data = b""
|
||||||
|
if d.get("ftyp_data_b64"):
|
||||||
|
ftyp_data = base64.b64decode(d["ftyp_data_b64"])
|
||||||
|
index = MP4Index(
|
||||||
|
duration_ms=d["duration_ms"],
|
||||||
|
timescale=d["timescale"],
|
||||||
|
cue_points=[(cp[0], cp[1]) for cp in d["cue_points"]],
|
||||||
|
moov_offset=d["moov_offset"],
|
||||||
|
moov_size=d["moov_size"],
|
||||||
|
ftyp_data=ftyp_data,
|
||||||
|
mdat_offset=d["mdat_offset"],
|
||||||
|
mdat_size=d["mdat_size"],
|
||||||
|
video_codec=d.get("video_codec", ""),
|
||||||
|
audio_codec=d.get("audio_codec", ""),
|
||||||
|
# moov_data is NOT cached (too large), it will be re-fetched
|
||||||
|
)
|
||||||
|
logger.debug("[container_probe] Loaded MP4 index from cache: %s", cache_key)
|
||||||
|
return index
|
||||||
|
except (KeyError, TypeError, json.JSONDecodeError) as e:
|
||||||
|
logger.warning("[container_probe] Invalid cached MP4 index: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _set_cached_mp4_index(cache_key: str, index: MP4Index) -> None:
|
||||||
|
"""Cache an MP4Index in Redis (without moov_data)."""
|
||||||
|
if not cache_key:
|
||||||
|
return
|
||||||
|
r = await redis_utils.get_redis()
|
||||||
|
if r is None:
|
||||||
|
return
|
||||||
|
redis_key = f"{_MP4_INDEX_CACHE_PREFIX}{cache_key}"
|
||||||
|
data = json.dumps(
|
||||||
|
{
|
||||||
|
"duration_ms": index.duration_ms,
|
||||||
|
"timescale": index.timescale,
|
||||||
|
"cue_points": index.cue_points,
|
||||||
|
"moov_offset": index.moov_offset,
|
||||||
|
"moov_size": index.moov_size,
|
||||||
|
"ftyp_data_b64": base64.b64encode(index.ftyp_data).decode() if index.ftyp_data else "",
|
||||||
|
"mdat_offset": index.mdat_offset,
|
||||||
|
"mdat_size": index.mdat_size,
|
||||||
|
"video_codec": index.video_codec,
|
||||||
|
"audio_codec": index.audio_codec,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
await r.set(redis_key, data, ex=_MP4_INDEX_CACHE_TTL)
|
||||||
|
logger.debug("[container_probe] Cached MP4 index: %s", cache_key)
|
||||||
|
|
||||||
|
|
||||||
|
def _scan_top_level_atoms(data: bytes) -> list[tuple[bytes, int, int]]:
|
||||||
|
"""
|
||||||
|
Scan top-level atom headers from raw file bytes.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (box_type, absolute_offset, total_size) for each atom found.
|
||||||
|
"""
|
||||||
|
atoms = []
|
||||||
|
offset = 0
|
||||||
|
while offset + 8 <= len(data):
|
||||||
|
size = struct.unpack_from(">I", data, offset)[0]
|
||||||
|
box_type = data[offset + 4 : offset + 8]
|
||||||
|
|
||||||
|
if size == 1: # Extended size
|
||||||
|
if offset + 16 > len(data):
|
||||||
|
break
|
||||||
|
size = struct.unpack_from(">Q", data, offset + 8)[0]
|
||||||
|
elif size == 0:
|
||||||
|
# Extends to end of file - we can't know the real size from
|
||||||
|
# a partial read, but record what we have
|
||||||
|
atoms.append((box_type, offset, 0))
|
||||||
|
break
|
||||||
|
|
||||||
|
if size < 8:
|
||||||
|
break
|
||||||
|
|
||||||
|
atoms.append((box_type, offset, size))
|
||||||
|
offset += size
|
||||||
|
|
||||||
|
return atoms
|
||||||
|
|
||||||
|
|
||||||
|
async def probe_mp4_moov(
|
||||||
|
source,
|
||||||
|
file_size: int = 0,
|
||||||
|
cache_key: str = "",
|
||||||
|
header_data: bytes | None = None,
|
||||||
|
) -> MP4Index | None:
|
||||||
|
"""
|
||||||
|
Probe an MP4 file's moov atom to build a seek index.
|
||||||
|
|
||||||
|
Pure Python -- scans MP4 box headers with struct, no FFmpeg involved.
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
1. Read first ~64KB to check for ftyp (MP4 signature).
|
||||||
|
2. Scan top-level atoms to find moov and mdat.
|
||||||
|
3. If moov is at the start (faststart), read it from the header data.
|
||||||
|
4. If moov is not in the header, read from the tail of the file.
|
||||||
|
5. Parse moov sample tables to build cue points.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
source: A MediaSource protocol implementation with stream(offset, limit).
|
||||||
|
file_size: Total file size in bytes.
|
||||||
|
cache_key: Optional cache key for Redis caching.
|
||||||
|
header_data: Pre-fetched header bytes (first ~64KB). If provided,
|
||||||
|
skips the initial header fetch from source.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MP4Index if successful, None if not an MP4 or parsing fails.
|
||||||
|
"""
|
||||||
|
if file_size <= 0:
|
||||||
|
file_size = getattr(source, "file_size", 0)
|
||||||
|
if not cache_key:
|
||||||
|
cache_key = getattr(source, "cache_key", "")
|
||||||
|
|
||||||
|
# Check cache first
|
||||||
|
if cache_key:
|
||||||
|
cached = await _get_cached_mp4_index(cache_key)
|
||||||
|
if cached:
|
||||||
|
# Re-fetch moov_data (not cached due to size) and rewrite offsets
|
||||||
|
if cached.moov_size > 0 and cached.moov_size <= _MAX_MOOV_SIZE:
|
||||||
|
moov_data = b""
|
||||||
|
async for chunk in source.stream(offset=cached.moov_offset, limit=cached.moov_size):
|
||||||
|
moov_data += chunk
|
||||||
|
if cached.mdat_offset >= 0:
|
||||||
|
new_mdat_start = len(cached.ftyp_data) + cached.moov_size
|
||||||
|
offset_delta = new_mdat_start - cached.mdat_offset
|
||||||
|
if offset_delta != 0:
|
||||||
|
moov_data = rewrite_moov_offsets(moov_data, offset_delta)
|
||||||
|
cached.moov_data = moov_data
|
||||||
|
return cached
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Use pre-fetched header or fetch from source
|
||||||
|
if header_data is None:
|
||||||
|
header_size = min(_MP4_HEADER_PROBE_SIZE, file_size) if file_size > 0 else _MP4_HEADER_PROBE_SIZE
|
||||||
|
header_data = b""
|
||||||
|
async for chunk in source.stream(offset=0, limit=header_size):
|
||||||
|
header_data += chunk
|
||||||
|
|
||||||
|
if len(header_data) < 12:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 2: Check for ftyp
|
||||||
|
if not is_mp4_header(header_data):
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info("[container_probe] MP4 detected, scanning atoms (header=%d bytes)", len(header_data))
|
||||||
|
|
||||||
|
# Step 3: Scan top-level atoms from header
|
||||||
|
atoms = _scan_top_level_atoms(header_data)
|
||||||
|
|
||||||
|
ftyp_offset = -1
|
||||||
|
ftyp_size = 0
|
||||||
|
moov_offset = -1
|
||||||
|
moov_size = 0
|
||||||
|
mdat_offset = -1
|
||||||
|
mdat_size = 0
|
||||||
|
|
||||||
|
for box_type, atom_offset, atom_size in atoms:
|
||||||
|
if box_type == b"ftyp":
|
||||||
|
ftyp_offset = atom_offset
|
||||||
|
ftyp_size = atom_size
|
||||||
|
elif box_type == b"moov":
|
||||||
|
moov_offset = atom_offset
|
||||||
|
moov_size = atom_size
|
||||||
|
elif box_type == b"mdat":
|
||||||
|
mdat_offset = atom_offset
|
||||||
|
mdat_size = atom_size
|
||||||
|
|
||||||
|
# Step 4: If moov not found in header, scan from tail
|
||||||
|
if moov_offset < 0 and file_size > 0:
|
||||||
|
tail_start = max(0, file_size - _MP4_TAIL_PROBE_SIZE)
|
||||||
|
tail_data = b""
|
||||||
|
async for chunk in source.stream(offset=tail_start, limit=file_size - tail_start):
|
||||||
|
tail_data += chunk
|
||||||
|
|
||||||
|
if tail_data:
|
||||||
|
tail_atoms = _scan_top_level_atoms(tail_data)
|
||||||
|
for box_type, rel_offset, atom_size in tail_atoms:
|
||||||
|
abs_offset = tail_start + rel_offset
|
||||||
|
if box_type == b"moov":
|
||||||
|
moov_offset = abs_offset
|
||||||
|
moov_size = atom_size
|
||||||
|
elif box_type == b"mdat" and mdat_offset < 0:
|
||||||
|
mdat_offset = abs_offset
|
||||||
|
mdat_size = atom_size
|
||||||
|
|
||||||
|
# If the initial scan yielded no moov (tail_start may land
|
||||||
|
# inside a large mdat payload producing garbage atom headers),
|
||||||
|
# resync by scanning 8-byte aligned windows for b"moov".
|
||||||
|
if moov_offset < 0:
|
||||||
|
needle = b"moov"
|
||||||
|
search_pos = 0
|
||||||
|
while search_pos + 8 <= len(tail_data):
|
||||||
|
idx = tail_data.find(needle, search_pos)
|
||||||
|
if idx < 0 or idx < 4:
|
||||||
|
break
|
||||||
|
candidate_size = struct.unpack_from(">I", tail_data, idx - 4)[0]
|
||||||
|
if 8 < candidate_size <= _MAX_MOOV_SIZE:
|
||||||
|
moov_offset = tail_start + idx - 4
|
||||||
|
moov_size = candidate_size
|
||||||
|
break
|
||||||
|
search_pos = idx + 4
|
||||||
|
|
||||||
|
if moov_offset < 0:
|
||||||
|
logger.info("[container_probe] No moov atom found in MP4")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if moov_size <= 0 or moov_size > _MAX_MOOV_SIZE:
|
||||||
|
logger.warning("[container_probe] moov size %d is invalid or too large", moov_size)
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[container_probe] MP4 atoms: moov at %d (%d bytes), mdat at %d (%d bytes)",
|
||||||
|
moov_offset,
|
||||||
|
moov_size,
|
||||||
|
mdat_offset,
|
||||||
|
mdat_size,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 5: Fetch full moov atom
|
||||||
|
# Check if moov is already contained in the header data we read
|
||||||
|
if moov_offset + moov_size <= len(header_data):
|
||||||
|
moov_data = header_data[moov_offset : moov_offset + moov_size]
|
||||||
|
else:
|
||||||
|
moov_data = b""
|
||||||
|
async for chunk in source.stream(offset=moov_offset, limit=moov_size):
|
||||||
|
moov_data += chunk
|
||||||
|
|
||||||
|
if len(moov_data) < moov_size:
|
||||||
|
logger.warning(
|
||||||
|
"[container_probe] Incomplete moov: got %d of %d bytes",
|
||||||
|
len(moov_data),
|
||||||
|
moov_size,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 6: Parse moov body (skip box header)
|
||||||
|
# Determine header size
|
||||||
|
raw_size = struct.unpack_from(">I", moov_data, 0)[0]
|
||||||
|
hdr_size = 16 if raw_size == 1 else 8
|
||||||
|
moov_body = moov_data[hdr_size:]
|
||||||
|
|
||||||
|
cue_points, duration_ms, timescale, video_codec, audio_codec = build_cue_points_from_moov(moov_body)
|
||||||
|
|
||||||
|
# If mdat wasn't found via header scan, it's likely right after ftyp
|
||||||
|
# or right after moov. Common layouts:
|
||||||
|
# ftyp + moov + mdat (faststart) or ftyp + mdat + moov
|
||||||
|
if mdat_offset < 0:
|
||||||
|
# Walk atoms to find mdat by scanning just enough from the file
|
||||||
|
# In most cases, mdat is either before or after moov
|
||||||
|
if moov_offset < file_size // 2:
|
||||||
|
# moov is early -> mdat likely follows
|
||||||
|
mdat_search_offset = moov_offset + moov_size
|
||||||
|
else:
|
||||||
|
# moov is late -> mdat likely right after ftyp
|
||||||
|
ftyp_size = struct.unpack_from(">I", header_data, 0)[0]
|
||||||
|
if ftyp_size == 1:
|
||||||
|
ftyp_size = struct.unpack_from(">Q", header_data, 8)[0]
|
||||||
|
mdat_search_offset = ftyp_size
|
||||||
|
|
||||||
|
# Read a small amount to find the mdat header
|
||||||
|
mdat_header = b""
|
||||||
|
async for chunk in source.stream(offset=mdat_search_offset, limit=16):
|
||||||
|
mdat_header += chunk
|
||||||
|
if len(mdat_header) >= 8:
|
||||||
|
box_type = mdat_header[4:8]
|
||||||
|
if box_type == b"mdat":
|
||||||
|
mdat_offset = mdat_search_offset
|
||||||
|
raw_sz = struct.unpack_from(">I", mdat_header, 0)[0]
|
||||||
|
if raw_sz == 1 and len(mdat_header) >= 16:
|
||||||
|
mdat_size = struct.unpack_from(">Q", mdat_header, 8)[0]
|
||||||
|
else:
|
||||||
|
mdat_size = raw_sz
|
||||||
|
|
||||||
|
# Step 7: Extract ftyp data (always in the header since it's the first atom)
|
||||||
|
ftyp_data = b""
|
||||||
|
if ftyp_offset >= 0 and ftyp_size > 0 and ftyp_offset + ftyp_size <= len(header_data):
|
||||||
|
ftyp_data = header_data[ftyp_offset : ftyp_offset + ftyp_size]
|
||||||
|
|
||||||
|
# Step 8: Rewrite moov chunk offsets for faststart pipe layout.
|
||||||
|
# The pipe stream will be: ftyp + moov + mdat. The stco/co64
|
||||||
|
# offsets in the original moov point to positions in the original
|
||||||
|
# file. We need to shift them to account for the new layout.
|
||||||
|
# New mdat position = ftyp_size + moov_size
|
||||||
|
# Delta = new_mdat_position - original_mdat_offset
|
||||||
|
if mdat_offset >= 0:
|
||||||
|
new_mdat_start = len(ftyp_data) + moov_size
|
||||||
|
offset_delta = new_mdat_start - mdat_offset
|
||||||
|
if offset_delta != 0:
|
||||||
|
moov_data = rewrite_moov_offsets(moov_data, offset_delta)
|
||||||
|
|
||||||
|
index = MP4Index(
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
timescale=timescale,
|
||||||
|
cue_points=cue_points,
|
||||||
|
moov_offset=moov_offset,
|
||||||
|
moov_size=moov_size,
|
||||||
|
moov_data=moov_data,
|
||||||
|
ftyp_data=ftyp_data,
|
||||||
|
mdat_offset=mdat_offset,
|
||||||
|
mdat_size=mdat_size,
|
||||||
|
video_codec=video_codec,
|
||||||
|
audio_codec=audio_codec,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[container_probe] MP4 index: duration=%.1fs, %d cue points, video=%s, audio=%s",
|
||||||
|
duration_ms / 1000.0,
|
||||||
|
len(cue_points),
|
||||||
|
video_codec,
|
||||||
|
audio_codec,
|
||||||
|
)
|
||||||
|
|
||||||
|
if cache_key:
|
||||||
|
await _set_cached_mp4_index(cache_key, index)
|
||||||
|
|
||||||
|
return index
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("[container_probe] Failed to probe MP4 moov: %s", e)
|
||||||
|
return None
|
||||||
1228
mediaflow_proxy/remuxer/ebml_parser.py
Normal file
1228
mediaflow_proxy/remuxer/ebml_parser.py
Normal file
File diff suppressed because it is too large
Load Diff
151
mediaflow_proxy/remuxer/hls_manifest.py
Normal file
151
mediaflow_proxy/remuxer/hls_manifest.py
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
"""
|
||||||
|
HLS VOD playlist generator for on-the-fly fMP4 transcoding.
|
||||||
|
|
||||||
|
Produces an M3U8 VOD playlist from an ``MKVCueIndex`` or ``MP4Index``.
|
||||||
|
Consecutive keyframes that are closer together than the target segment
|
||||||
|
duration are merged into a single HLS segment, matching the behaviour
|
||||||
|
of ``ffmpeg -hls_time``.
|
||||||
|
|
||||||
|
The init segment is referenced via ``#EXT-X-MAP``.
|
||||||
|
|
||||||
|
Requires ``#EXT-X-VERSION:7`` for fMP4 (CMAF) segments.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def merge_cue_points(
|
||||||
|
cue_points: list[tuple[float, int]],
|
||||||
|
target_duration_ms: float = 5000.0,
|
||||||
|
) -> list[tuple[float, int]]:
|
||||||
|
"""Merge consecutive keyframes into segments of *>= target_duration_ms*.
|
||||||
|
|
||||||
|
This replicates the logic of ``ffmpeg -hls_time``: a new segment
|
||||||
|
boundary is created only when a keyframe is encountered **at least**
|
||||||
|
``target_duration_ms`` after the start of the current segment.
|
||||||
|
Keyframes that fall within the target window are absorbed into the
|
||||||
|
current segment.
|
||||||
|
|
||||||
|
Side-effects:
|
||||||
|
* Eliminates duplicate byte-offset entries (previously handled by
|
||||||
|
``deduplicate_cue_points``).
|
||||||
|
* Eliminates very short "runt" segments (e.g. 0.3 s).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cue_points: Sorted ``(time_ms, byte_offset)`` list.
|
||||||
|
target_duration_ms: Minimum segment duration in milliseconds.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A reduced list of ``(time_ms, byte_offset)`` tuples representing
|
||||||
|
the merged segment boundaries.
|
||||||
|
"""
|
||||||
|
if not cue_points:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Normalize duplicate offsets first: keep the earliest timestamp for each
|
||||||
|
# byte offset. Some MKV files expose multiple cue times for the same
|
||||||
|
# cluster offset; if we keep a later duplicate, segment start times no
|
||||||
|
# longer match the actual bytes and can produce timestamp regressions.
|
||||||
|
# Sorting by (time, offset) ensures earliest time wins deterministically.
|
||||||
|
by_time = sorted(cue_points, key=lambda x: (x[0], x[1]))
|
||||||
|
deduped: list[tuple[float, int]] = []
|
||||||
|
seen_offsets: set[int] = set()
|
||||||
|
for time_ms, byte_offset in by_time:
|
||||||
|
if byte_offset in seen_offsets:
|
||||||
|
continue
|
||||||
|
seen_offsets.add(byte_offset)
|
||||||
|
deduped.append((time_ms, byte_offset))
|
||||||
|
|
||||||
|
if not deduped:
|
||||||
|
return []
|
||||||
|
|
||||||
|
merged: list[tuple[float, int]] = [deduped[0]]
|
||||||
|
for i in range(1, len(deduped)):
|
||||||
|
time_since_last = deduped[i][0] - merged[-1][0]
|
||||||
|
if time_since_last >= target_duration_ms:
|
||||||
|
merged.append(deduped[i])
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def generate_vod_playlist(
|
||||||
|
cue_points: list[tuple[float, int]],
|
||||||
|
duration_ms: float,
|
||||||
|
init_url: str,
|
||||||
|
segment_url_template: str,
|
||||||
|
target_segment_duration_ms: float = 5000.0,
|
||||||
|
) -> str:
|
||||||
|
"""Build an HLS VOD M3U8 playlist from cue-point data.
|
||||||
|
|
||||||
|
Consecutive keyframes that are closer than *target_segment_duration_ms*
|
||||||
|
are merged into a single segment (matching ``ffmpeg -hls_time``).
|
||||||
|
|
||||||
|
Segment URLs use ``{start_ms}`` and ``{end_ms}`` placeholders that are
|
||||||
|
replaced with the segment's time range in milliseconds.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cue_points: Sorted list of ``(time_ms, byte_offset)`` tuples.
|
||||||
|
duration_ms: Total media duration in milliseconds.
|
||||||
|
init_url: URL for the fMP4 init segment (``#EXT-X-MAP`` URI).
|
||||||
|
segment_url_template: URL template containing ``{seg}``,
|
||||||
|
``{start_ms}`` and ``{end_ms}`` placeholders.
|
||||||
|
target_segment_duration_ms: Target minimum segment duration.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete M3U8 playlist string.
|
||||||
|
"""
|
||||||
|
if not cue_points:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
merged = merge_cue_points(cue_points, target_segment_duration_ms)
|
||||||
|
|
||||||
|
# Build per-segment (start_ms, end_ms, duration_s) list.
|
||||||
|
segments: list[tuple[float, float, float]] = []
|
||||||
|
for i in range(len(merged)):
|
||||||
|
start_ms = merged[i][0]
|
||||||
|
end_ms = merged[i + 1][0] if i + 1 < len(merged) else duration_ms
|
||||||
|
dur_s = max((end_ms - start_ms) / 1000.0, 0.001)
|
||||||
|
segments.append((start_ms, end_ms, dur_s))
|
||||||
|
|
||||||
|
if not segments:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
target_duration = math.ceil(max(dur_s for _, _, dur_s in segments))
|
||||||
|
target_duration = max(target_duration, 1)
|
||||||
|
|
||||||
|
lines: list[str] = [
|
||||||
|
"#EXTM3U",
|
||||||
|
"#EXT-X-VERSION:7",
|
||||||
|
f"#EXT-X-TARGETDURATION:{target_duration}",
|
||||||
|
"#EXT-X-PLAYLIST-TYPE:VOD",
|
||||||
|
"#EXT-X-MEDIA-SEQUENCE:0",
|
||||||
|
f'#EXT-X-MAP:URI="{init_url}"',
|
||||||
|
]
|
||||||
|
|
||||||
|
for seg_num, (start_ms, end_ms, dur_s) in enumerate(segments):
|
||||||
|
lines.append(f"#EXTINF:{dur_s:.3f},")
|
||||||
|
url = (
|
||||||
|
segment_url_template.replace(
|
||||||
|
"{seg}",
|
||||||
|
str(seg_num),
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
"{start_ms}",
|
||||||
|
str(int(start_ms)),
|
||||||
|
)
|
||||||
|
.replace(
|
||||||
|
"{end_ms}",
|
||||||
|
str(int(end_ms)),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
lines.append(url)
|
||||||
|
|
||||||
|
lines.append("#EXT-X-ENDLIST")
|
||||||
|
lines.append("") # trailing newline
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
234
mediaflow_proxy/remuxer/media_source.py
Normal file
234
mediaflow_proxy/remuxer/media_source.py
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
"""
|
||||||
|
Abstract media source protocol for source-agnostic transcode pipeline.
|
||||||
|
|
||||||
|
Decouples the transcode pipeline, MKV cue probing, and seeking logic
|
||||||
|
from any specific transport (Telegram, HTTP, etc.). Each transport
|
||||||
|
implements the MediaSource protocol to provide byte-range streaming.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from typing import Protocol, runtime_checkable
|
||||||
|
from urllib.parse import urlparse, unquote
|
||||||
|
|
||||||
|
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||||
|
from mediaflow_proxy.utils.telegram import telegram_manager
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Extensions mapped to container format hints used by transcode_handler
|
||||||
|
_MKV_EXTENSIONS = frozenset({".mkv", ".webm"})
|
||||||
|
_MP4_EXTENSIONS = frozenset({".mp4", ".m4v", ".mov", ".m4a", ".3gp"})
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_extension(path: str) -> str:
|
||||||
|
"""Extract lowercase file extension (e.g. '.mkv') from a path or URL."""
|
||||||
|
# Strip query/fragment first for URL paths
|
||||||
|
dot_pos = path.rfind(".")
|
||||||
|
if dot_pos < 0:
|
||||||
|
return ""
|
||||||
|
ext = path[dot_pos:].lower()
|
||||||
|
# Trim anything after the extension (query params from raw paths)
|
||||||
|
for ch in ("?", "#", "&"):
|
||||||
|
idx = ext.find(ch)
|
||||||
|
if idx > 0:
|
||||||
|
ext = ext[:idx]
|
||||||
|
return ext
|
||||||
|
|
||||||
|
|
||||||
|
def filename_hint_from_url(url: str) -> str:
|
||||||
|
"""Derive a filename hint from a URL path (e.g. '.mkv', '.mp4')."""
|
||||||
|
try:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
return _extract_extension(unquote(parsed.path))
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def filename_hint_from_name(filename: str) -> str:
|
||||||
|
"""Derive a filename hint from a filename string."""
|
||||||
|
return _extract_extension(filename) if filename else ""
|
||||||
|
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class MediaSource(Protocol):
|
||||||
|
"""
|
||||||
|
Protocol for streaming media byte ranges.
|
||||||
|
|
||||||
|
Implementations must provide:
|
||||||
|
- stream(): async iterator of bytes from offset/limit
|
||||||
|
- file_size: total file size in bytes
|
||||||
|
- cache_key: deterministic key for caching (cue index, etc.)
|
||||||
|
- filename_hint: optional file extension hint (e.g. '.mkv', '.mp4')
|
||||||
|
"""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def file_size(self) -> int:
|
||||||
|
"""Total file size in bytes."""
|
||||||
|
...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cache_key(self) -> str:
|
||||||
|
"""Deterministic cache key derived from the source identity."""
|
||||||
|
...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename_hint(self) -> str:
|
||||||
|
"""Optional file extension hint (e.g. '.mkv', '.mp4') for format detection."""
|
||||||
|
...
|
||||||
|
|
||||||
|
async def stream(self, offset: int = 0, limit: int | None = None) -> AsyncIterator[bytes]:
|
||||||
|
"""
|
||||||
|
Stream bytes from the source.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
offset: Byte offset to start from.
|
||||||
|
limit: Number of bytes to read. None = read to end.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Chunks of bytes.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class TelegramMediaSource:
|
||||||
|
"""
|
||||||
|
MediaSource backed by Telegram MTProto downloads.
|
||||||
|
|
||||||
|
Supports two download modes:
|
||||||
|
|
||||||
|
* **parallel** (default): Uses ``ParallelTransferrer`` with multiple
|
||||||
|
MTProtoSender connections for maximum throughput. Best for full-file
|
||||||
|
streaming (e.g. ``/proxy/telegram/stream``).
|
||||||
|
|
||||||
|
* **single** (``use_single_client=True``): Uses Telethon's built-in
|
||||||
|
``iter_download`` over the existing client connection. Avoids the
|
||||||
|
overhead of creating/destroying extra connections for each request,
|
||||||
|
ideal for small byte-range fetches like HLS segments and probe
|
||||||
|
headers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
telegram_ref,
|
||||||
|
file_size: int,
|
||||||
|
file_name: str = "",
|
||||||
|
*,
|
||||||
|
use_single_client: bool = False,
|
||||||
|
) -> None:
|
||||||
|
self._ref = telegram_ref
|
||||||
|
self._file_size = file_size
|
||||||
|
self._filename_hint = filename_hint_from_name(file_name)
|
||||||
|
self._use_single_client = use_single_client
|
||||||
|
|
||||||
|
@property
|
||||||
|
def file_size(self) -> int:
|
||||||
|
return self._file_size
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cache_key(self) -> str:
|
||||||
|
ref = self._ref
|
||||||
|
if ref.file_id:
|
||||||
|
raw = f"file_id:{ref.file_id}"
|
||||||
|
elif ref.chat_id is not None and ref.message_id is not None:
|
||||||
|
raw = f"chat:{ref.chat_id}:msg:{ref.message_id}"
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename_hint(self) -> str:
|
||||||
|
return self._filename_hint
|
||||||
|
|
||||||
|
async def stream(self, offset: int = 0, limit: int | None = None) -> AsyncIterator[bytes]:
|
||||||
|
effective_limit = limit or self._file_size
|
||||||
|
if self._use_single_client:
|
||||||
|
async for chunk in telegram_manager.stream_media_single(
|
||||||
|
self._ref,
|
||||||
|
offset=offset,
|
||||||
|
limit=effective_limit,
|
||||||
|
file_size=self._file_size,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
|
else:
|
||||||
|
async for chunk in telegram_manager.stream_media(
|
||||||
|
self._ref,
|
||||||
|
offset=offset,
|
||||||
|
limit=effective_limit,
|
||||||
|
file_size=self._file_size,
|
||||||
|
):
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPMediaSource:
|
||||||
|
"""MediaSource backed by HTTP byte-range requests via aiohttp."""
|
||||||
|
|
||||||
|
def __init__(self, url: str, headers: dict | None = None, file_size: int = 0) -> None:
|
||||||
|
self._url = url
|
||||||
|
self._headers = headers or {}
|
||||||
|
self._file_size = file_size
|
||||||
|
self._filename_hint = filename_hint_from_url(url)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def file_size(self) -> int:
|
||||||
|
return self._file_size
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cache_key(self) -> str:
|
||||||
|
return hashlib.sha256(self._url.encode()).hexdigest()[:16]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename_hint(self) -> str:
|
||||||
|
return self._filename_hint
|
||||||
|
|
||||||
|
async def resolve_file_size(self) -> int:
|
||||||
|
"""Perform a HEAD request to determine file size if not already known."""
|
||||||
|
if self._file_size > 0:
|
||||||
|
return self._file_size
|
||||||
|
|
||||||
|
async with create_aiohttp_session(self._url, headers=self._headers) as (session, proxy_url):
|
||||||
|
async with session.head(
|
||||||
|
self._url,
|
||||||
|
headers=self._headers,
|
||||||
|
proxy=proxy_url,
|
||||||
|
allow_redirects=True,
|
||||||
|
) as resp:
|
||||||
|
cl = resp.headers.get("content-length")
|
||||||
|
if cl:
|
||||||
|
self._file_size = int(cl)
|
||||||
|
else:
|
||||||
|
# Try GET with range to get content-range
|
||||||
|
async with session.get(
|
||||||
|
self._url,
|
||||||
|
headers={**self._headers, "range": "bytes=0-0"},
|
||||||
|
proxy=proxy_url,
|
||||||
|
allow_redirects=True,
|
||||||
|
) as range_resp:
|
||||||
|
cr = range_resp.headers.get("content-range", "")
|
||||||
|
if "/" in cr:
|
||||||
|
try:
|
||||||
|
self._file_size = int(cr.split("/")[-1])
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return self._file_size
|
||||||
|
|
||||||
|
async def stream(self, offset: int = 0, limit: int | None = None) -> AsyncIterator[bytes]:
|
||||||
|
headers = dict(self._headers)
|
||||||
|
|
||||||
|
if offset > 0 or limit is not None:
|
||||||
|
end = ""
|
||||||
|
if limit is not None:
|
||||||
|
end = str(offset + limit - 1)
|
||||||
|
headers["range"] = f"bytes={offset}-{end}"
|
||||||
|
|
||||||
|
async with create_aiohttp_session(self._url, headers=headers) as (session, proxy_url):
|
||||||
|
async with session.get(
|
||||||
|
self._url,
|
||||||
|
headers=headers,
|
||||||
|
proxy=proxy_url,
|
||||||
|
allow_redirects=True,
|
||||||
|
) as resp:
|
||||||
|
resp.raise_for_status()
|
||||||
|
async for chunk in resp.content.iter_any():
|
||||||
|
yield chunk
|
||||||
469
mediaflow_proxy/remuxer/mkv_demuxer.py
Normal file
469
mediaflow_proxy/remuxer/mkv_demuxer.py
Normal file
@@ -0,0 +1,469 @@
|
|||||||
|
"""
|
||||||
|
Streaming MKV demuxer.
|
||||||
|
|
||||||
|
Reads an MKV byte stream via an async iterator and yields individual media
|
||||||
|
frames (MKVFrame) with absolute timestamps. Designed for on-the-fly remuxing
|
||||||
|
without buffering the entire file.
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
AsyncIterator[bytes] -> StreamBuffer -> EBML parsing -> MKVFrame yields
|
||||||
|
|
||||||
|
The demuxer works in two phases:
|
||||||
|
1. read_header(): Consume bytes until Tracks is fully parsed, returning
|
||||||
|
a list of MKVTrack with codec metadata.
|
||||||
|
2. iter_frames(): Yield MKVFrame objects from Cluster/SimpleBlock data
|
||||||
|
as clusters arrive.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from mediaflow_proxy.remuxer.ebml_parser import (
|
||||||
|
CLUSTER,
|
||||||
|
CLUSTER_TIMESTAMP,
|
||||||
|
EBML_HEADER,
|
||||||
|
INFO,
|
||||||
|
MKVFrame,
|
||||||
|
MKVTrack,
|
||||||
|
SEGMENT,
|
||||||
|
SIMPLE_BLOCK,
|
||||||
|
BLOCK_GROUP,
|
||||||
|
TRACKS,
|
||||||
|
TIMESTAMP_SCALE,
|
||||||
|
DURATION,
|
||||||
|
UNKNOWN_SIZE,
|
||||||
|
extract_block_frames,
|
||||||
|
parse_tracks,
|
||||||
|
read_element_id,
|
||||||
|
read_element_size,
|
||||||
|
read_float,
|
||||||
|
read_uint,
|
||||||
|
_parse_block_group,
|
||||||
|
iter_elements,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class StreamBuffer:
|
||||||
|
"""
|
||||||
|
Accumulating byte buffer for streaming EBML parsing.
|
||||||
|
|
||||||
|
Collects chunks from an async byte source and provides read-ahead
|
||||||
|
capabilities for EBML element parsing. Supports consuming parsed
|
||||||
|
bytes to keep memory usage bounded.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._chunks: list[bytes] = []
|
||||||
|
self._total: int = 0
|
||||||
|
self._consumed: int = 0 # Logical bytes consumed (for offset tracking)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def available(self) -> int:
|
||||||
|
"""Number of buffered bytes available for reading."""
|
||||||
|
return self._total
|
||||||
|
|
||||||
|
@property
|
||||||
|
def consumed(self) -> int:
|
||||||
|
"""Total bytes consumed so far (for absolute offset tracking)."""
|
||||||
|
return self._consumed
|
||||||
|
|
||||||
|
def append(self, data: bytes) -> None:
|
||||||
|
"""Add bytes to the buffer."""
|
||||||
|
if data:
|
||||||
|
self._chunks.append(data)
|
||||||
|
self._total += len(data)
|
||||||
|
|
||||||
|
def peek(self, size: int) -> bytes:
|
||||||
|
"""Read up to size bytes without consuming."""
|
||||||
|
if size <= 0:
|
||||||
|
return b""
|
||||||
|
result = bytearray()
|
||||||
|
remaining = size
|
||||||
|
for chunk in self._chunks:
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
take = min(len(chunk), remaining)
|
||||||
|
result.extend(chunk[:take])
|
||||||
|
remaining -= take
|
||||||
|
return bytes(result)
|
||||||
|
|
||||||
|
def get_all(self) -> bytes:
|
||||||
|
"""Get all buffered data as a single bytes object (without consuming)."""
|
||||||
|
if len(self._chunks) == 1:
|
||||||
|
return self._chunks[0]
|
||||||
|
data = b"".join(self._chunks)
|
||||||
|
self._chunks = [data]
|
||||||
|
return data
|
||||||
|
|
||||||
|
def consume(self, size: int) -> bytes:
|
||||||
|
"""Remove and return size bytes from the front of the buffer."""
|
||||||
|
if size <= 0:
|
||||||
|
return b""
|
||||||
|
if size > self._total:
|
||||||
|
size = self._total
|
||||||
|
|
||||||
|
result = bytearray()
|
||||||
|
remaining = size
|
||||||
|
while remaining > 0 and self._chunks:
|
||||||
|
chunk = self._chunks[0]
|
||||||
|
if len(chunk) <= remaining:
|
||||||
|
result.extend(chunk)
|
||||||
|
remaining -= len(chunk)
|
||||||
|
self._chunks.pop(0)
|
||||||
|
else:
|
||||||
|
result.extend(chunk[:remaining])
|
||||||
|
self._chunks[0] = chunk[remaining:]
|
||||||
|
remaining = 0
|
||||||
|
|
||||||
|
consumed = len(result)
|
||||||
|
self._total -= consumed
|
||||||
|
self._consumed += consumed
|
||||||
|
return bytes(result)
|
||||||
|
|
||||||
|
def skip(self, size: int) -> int:
|
||||||
|
"""Discard size bytes from the front. Returns actual bytes skipped."""
|
||||||
|
if size <= 0:
|
||||||
|
return 0
|
||||||
|
actual = min(size, self._total)
|
||||||
|
remaining = actual
|
||||||
|
while remaining > 0 and self._chunks:
|
||||||
|
chunk = self._chunks[0]
|
||||||
|
if len(chunk) <= remaining:
|
||||||
|
remaining -= len(chunk)
|
||||||
|
self._chunks.pop(0)
|
||||||
|
else:
|
||||||
|
self._chunks[0] = chunk[remaining:]
|
||||||
|
remaining = 0
|
||||||
|
self._total -= actual
|
||||||
|
self._consumed += actual
|
||||||
|
return actual
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MKVHeader:
|
||||||
|
"""Parsed MKV header metadata."""
|
||||||
|
|
||||||
|
tracks: list[MKVTrack] = field(default_factory=list)
|
||||||
|
timestamp_scale_ns: int = 1_000_000 # Default 1ms
|
||||||
|
duration_ms: float = 0.0
|
||||||
|
segment_data_offset: int = 0 # Absolute byte offset of Segment children
|
||||||
|
|
||||||
|
|
||||||
|
class MKVDemuxer:
|
||||||
|
"""
|
||||||
|
Streaming async MKV demuxer.
|
||||||
|
|
||||||
|
Reads an MKV byte stream from an async iterator and provides:
|
||||||
|
- read_header(): Parse EBML header + Segment metadata + Tracks
|
||||||
|
- iter_frames(): Yield MKVFrame objects from Clusters
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
demuxer = MKVDemuxer()
|
||||||
|
header = await demuxer.read_header(source)
|
||||||
|
async for frame in demuxer.iter_frames(source):
|
||||||
|
process(frame)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Minimum bytes to try parsing an element header (ID + size)
|
||||||
|
_MIN_ELEMENT_HEADER = 12
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._buf = StreamBuffer()
|
||||||
|
self._header: MKVHeader | None = None
|
||||||
|
self._scale_ms: float = 1.0 # timestamp_scale / 1_000_000
|
||||||
|
|
||||||
|
@property
|
||||||
|
def header(self) -> MKVHeader | None:
|
||||||
|
return self._header
|
||||||
|
|
||||||
|
async def read_header(self, source: AsyncIterator[bytes]) -> MKVHeader:
|
||||||
|
"""
|
||||||
|
Read and parse the MKV header (EBML header, Segment, Info, Tracks).
|
||||||
|
|
||||||
|
Consumes bytes from source until Tracks is fully parsed. Any leftover
|
||||||
|
bytes (start of first Cluster) remain in the internal buffer for
|
||||||
|
iter_frames().
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MKVHeader with track info and timing metadata.
|
||||||
|
"""
|
||||||
|
header = MKVHeader()
|
||||||
|
|
||||||
|
# Phase 1: Accumulate enough data for EBML header + Segment header
|
||||||
|
await self._ensure_bytes(source, 64)
|
||||||
|
|
||||||
|
data = self._buf.get_all()
|
||||||
|
if len(data) < 4:
|
||||||
|
raise ValueError(
|
||||||
|
f"Source ended prematurely: got {len(data)} bytes, need at least an EBML header (source disconnected?)"
|
||||||
|
)
|
||||||
|
pos = 0
|
||||||
|
|
||||||
|
# Parse EBML Header
|
||||||
|
eid, pos = read_element_id(data, pos)
|
||||||
|
if eid != EBML_HEADER:
|
||||||
|
raise ValueError(f"Not an MKV file: expected EBML header, got 0x{eid:X}")
|
||||||
|
size, pos = read_element_size(data, pos)
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
raise ValueError("EBML header has unknown size")
|
||||||
|
pos += size # Skip EBML header content
|
||||||
|
|
||||||
|
# Parse Segment element header
|
||||||
|
eid, pos = read_element_id(data, pos)
|
||||||
|
if eid != SEGMENT:
|
||||||
|
raise ValueError(f"Expected Segment, got 0x{eid:X}")
|
||||||
|
_seg_size, pos = read_element_size(data, pos)
|
||||||
|
header.segment_data_offset = self._buf.consumed + pos
|
||||||
|
|
||||||
|
# Phase 2: Parse Segment children until we have Tracks
|
||||||
|
# We need to iterate top-level Segment children: SeekHead, Info, Tracks
|
||||||
|
# Stop when we hit the first Cluster (media data).
|
||||||
|
tracks_found = False
|
||||||
|
|
||||||
|
while not tracks_found:
|
||||||
|
# Ensure we have enough for element header
|
||||||
|
await self._ensure_bytes(source, pos + self._MIN_ELEMENT_HEADER)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
|
||||||
|
if pos >= len(data):
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
await self._ensure_bytes(source, pos + 32)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
break
|
||||||
|
|
||||||
|
if eid == CLUSTER:
|
||||||
|
# Reached media data; header parsing is done.
|
||||||
|
# Don't consume the Cluster -- leave it for iter_frames.
|
||||||
|
break
|
||||||
|
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
# Can't handle unknown-size elements in header
|
||||||
|
logger.warning("[mkv_demuxer] Unknown-size element 0x%X in header at pos %d", eid, pos)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Ensure we have the full element
|
||||||
|
elem_end = pos3 + size
|
||||||
|
await self._ensure_bytes(source, elem_end)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
|
||||||
|
if eid == INFO:
|
||||||
|
self._parse_info_element(data, pos3, pos3 + size, header)
|
||||||
|
elif eid == TRACKS:
|
||||||
|
header.tracks = parse_tracks(data, pos3, pos3 + size)
|
||||||
|
tracks_found = True
|
||||||
|
logger.info(
|
||||||
|
"[mkv_demuxer] Parsed %d tracks: %s",
|
||||||
|
len(header.tracks),
|
||||||
|
", ".join(f"#{t.track_number}={t.codec_id}" for t in header.tracks),
|
||||||
|
)
|
||||||
|
|
||||||
|
pos = elem_end
|
||||||
|
|
||||||
|
# Consume everything up to the current position (Cluster boundary)
|
||||||
|
self._buf.consume(pos)
|
||||||
|
|
||||||
|
# Set timing scale
|
||||||
|
self._scale_ms = header.timestamp_scale_ns / 1_000_000.0
|
||||||
|
self._header = header
|
||||||
|
return header
|
||||||
|
|
||||||
|
async def iter_frames(self, source: AsyncIterator[bytes]) -> AsyncIterator[MKVFrame]:
|
||||||
|
"""
|
||||||
|
Yield MKVFrame objects from Cluster/SimpleBlock data.
|
||||||
|
|
||||||
|
Must be called after read_header(). Continues consuming bytes from
|
||||||
|
source, parsing Clusters and yielding individual frames.
|
||||||
|
"""
|
||||||
|
if self._header is None:
|
||||||
|
raise RuntimeError("read_header() must be called before iter_frames()")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# Try to read the next element header
|
||||||
|
if not await self._ensure_bytes_soft(source, self._MIN_ELEMENT_HEADER):
|
||||||
|
break
|
||||||
|
|
||||||
|
data = self._buf.get_all()
|
||||||
|
pos = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
# Try to get more data
|
||||||
|
if not await self._ensure_bytes_soft(source, len(data) + 4096):
|
||||||
|
break
|
||||||
|
data = self._buf.get_all()
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
break
|
||||||
|
|
||||||
|
if eid == CLUSTER:
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
# Unknown-size Cluster: parse children until we hit the next
|
||||||
|
# Cluster or run out of data
|
||||||
|
self._buf.consume(pos3) # consume Cluster header
|
||||||
|
async for frame in self._parse_unknown_size_cluster(source):
|
||||||
|
yield frame
|
||||||
|
else:
|
||||||
|
# Known-size Cluster: ensure we have all data
|
||||||
|
elem_end = pos3 + size
|
||||||
|
await self._ensure_bytes(source, elem_end)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
|
||||||
|
for frame in self._parse_cluster_data(data, pos3, pos3 + size):
|
||||||
|
yield frame
|
||||||
|
|
||||||
|
self._buf.consume(elem_end)
|
||||||
|
else:
|
||||||
|
# Skip non-Cluster top-level elements
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
break
|
||||||
|
elem_end = pos3 + size
|
||||||
|
if elem_end > len(data):
|
||||||
|
# Need to skip bytes we don't have yet
|
||||||
|
self._buf.consume(len(data))
|
||||||
|
skip_remaining = elem_end - len(data)
|
||||||
|
await self._skip_bytes(source, skip_remaining)
|
||||||
|
else:
|
||||||
|
self._buf.consume(elem_end)
|
||||||
|
|
||||||
|
def _parse_info_element(self, data: bytes, start: int, end: int, header: MKVHeader) -> None:
|
||||||
|
"""Parse Info element children for timestamp scale and duration."""
|
||||||
|
for eid, off, size, _ in iter_elements(data, start, end):
|
||||||
|
if eid == TIMESTAMP_SCALE:
|
||||||
|
header.timestamp_scale_ns = read_uint(data, off, size)
|
||||||
|
elif eid == DURATION:
|
||||||
|
scale = header.timestamp_scale_ns / 1_000_000.0
|
||||||
|
header.duration_ms = read_float(data, off, size) * scale
|
||||||
|
|
||||||
|
def _parse_cluster_data(self, data: bytes, start: int, end: int) -> list[MKVFrame]:
|
||||||
|
"""Parse a known-size Cluster and return its frames."""
|
||||||
|
cluster_timecode = 0
|
||||||
|
frames = []
|
||||||
|
|
||||||
|
for eid, data_off, size, _ in iter_elements(data, start, end):
|
||||||
|
if eid == CLUSTER_TIMESTAMP:
|
||||||
|
cluster_timecode = read_uint(data, data_off, size)
|
||||||
|
elif eid == SIMPLE_BLOCK:
|
||||||
|
for track_num, rel_tc, flags, frame_list in extract_block_frames(data, data_off, size):
|
||||||
|
is_kf = bool(flags & 0x80)
|
||||||
|
abs_ts_ms = (cluster_timecode + rel_tc) * self._scale_ms
|
||||||
|
for frame_data in frame_list:
|
||||||
|
frames.append(
|
||||||
|
MKVFrame(
|
||||||
|
track_number=track_num,
|
||||||
|
timestamp_ms=abs_ts_ms,
|
||||||
|
is_keyframe=is_kf,
|
||||||
|
data=frame_data,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif eid == BLOCK_GROUP:
|
||||||
|
_parse_block_group(data, data_off, data_off + size, cluster_timecode, self._scale_ms, frames)
|
||||||
|
|
||||||
|
return frames
|
||||||
|
|
||||||
|
async def _parse_unknown_size_cluster(self, source: AsyncIterator[bytes]) -> AsyncIterator[MKVFrame]:
|
||||||
|
"""Parse an unknown-size Cluster by reading children until next Cluster."""
|
||||||
|
cluster_timecode = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if not await self._ensure_bytes_soft(source, self._MIN_ELEMENT_HEADER):
|
||||||
|
break
|
||||||
|
|
||||||
|
data = self._buf.get_all()
|
||||||
|
pos = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
if not await self._ensure_bytes_soft(source, len(data) + 4096):
|
||||||
|
break
|
||||||
|
data = self._buf.get_all()
|
||||||
|
try:
|
||||||
|
eid, pos2 = read_element_id(data, pos)
|
||||||
|
size, pos3 = read_element_size(data, pos2)
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
break
|
||||||
|
|
||||||
|
# A new Cluster or top-level element signals end of current Cluster
|
||||||
|
if eid == CLUSTER or eid == SEGMENT:
|
||||||
|
break
|
||||||
|
|
||||||
|
if size == UNKNOWN_SIZE:
|
||||||
|
break
|
||||||
|
|
||||||
|
elem_end = pos3 + size
|
||||||
|
await self._ensure_bytes(source, elem_end)
|
||||||
|
data = self._buf.get_all()
|
||||||
|
|
||||||
|
if eid == CLUSTER_TIMESTAMP:
|
||||||
|
cluster_timecode = read_uint(data, pos3, size)
|
||||||
|
elif eid == SIMPLE_BLOCK:
|
||||||
|
for track_num, rel_tc, flags, frame_list in extract_block_frames(data, pos3, size):
|
||||||
|
is_kf = bool(flags & 0x80)
|
||||||
|
abs_ts_ms = (cluster_timecode + rel_tc) * self._scale_ms
|
||||||
|
for frame_data in frame_list:
|
||||||
|
yield MKVFrame(
|
||||||
|
track_number=track_num,
|
||||||
|
timestamp_ms=abs_ts_ms,
|
||||||
|
is_keyframe=is_kf,
|
||||||
|
data=frame_data,
|
||||||
|
)
|
||||||
|
elif eid == BLOCK_GROUP:
|
||||||
|
bg_frames = []
|
||||||
|
_parse_block_group(data, pos3, pos3 + size, cluster_timecode, self._scale_ms, bg_frames)
|
||||||
|
for frame in bg_frames:
|
||||||
|
yield frame
|
||||||
|
|
||||||
|
self._buf.consume(elem_end)
|
||||||
|
|
||||||
|
async def _ensure_bytes(self, source: AsyncIterator[bytes], needed: int) -> None:
|
||||||
|
"""Ensure the buffer has at least 'needed' bytes. Raises StopAsyncIteration if exhausted."""
|
||||||
|
while self._buf.available < needed:
|
||||||
|
try:
|
||||||
|
chunk = await source.__anext__()
|
||||||
|
self._buf.append(chunk)
|
||||||
|
except StopAsyncIteration:
|
||||||
|
return
|
||||||
|
|
||||||
|
async def _ensure_bytes_soft(self, source: AsyncIterator[bytes], needed: int) -> bool:
|
||||||
|
"""Like _ensure_bytes but returns False instead of raising."""
|
||||||
|
while self._buf.available < needed:
|
||||||
|
try:
|
||||||
|
chunk = await source.__anext__()
|
||||||
|
if not chunk:
|
||||||
|
return self._buf.available > 0
|
||||||
|
self._buf.append(chunk)
|
||||||
|
except StopAsyncIteration:
|
||||||
|
return self._buf.available > 0
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def _skip_bytes(self, source: AsyncIterator[bytes], count: int) -> None:
|
||||||
|
"""Skip count bytes from the source without buffering."""
|
||||||
|
remaining = count
|
||||||
|
while remaining > 0:
|
||||||
|
try:
|
||||||
|
chunk = await source.__anext__()
|
||||||
|
if len(chunk) <= remaining:
|
||||||
|
remaining -= len(chunk)
|
||||||
|
else:
|
||||||
|
# Put the excess back
|
||||||
|
self._buf.append(chunk[remaining:])
|
||||||
|
remaining = 0
|
||||||
|
except StopAsyncIteration:
|
||||||
|
break
|
||||||
1376
mediaflow_proxy/remuxer/mp4_muxer.py
Normal file
1376
mediaflow_proxy/remuxer/mp4_muxer.py
Normal file
File diff suppressed because it is too large
Load Diff
834
mediaflow_proxy/remuxer/mp4_parser.py
Normal file
834
mediaflow_proxy/remuxer/mp4_parser.py
Normal file
@@ -0,0 +1,834 @@
|
|||||||
|
"""
|
||||||
|
MP4 container parser for moov atom probing.
|
||||||
|
|
||||||
|
Provides:
|
||||||
|
- MP4Index: seek index extracted from MP4 moov atom (parallel to MKVCueIndex)
|
||||||
|
- Top-level atom scanning
|
||||||
|
- Sample table parsers (stco, co64, stss, stsz, stts, stsc)
|
||||||
|
- Moov-to-cue-point builder
|
||||||
|
- rewrite_moov_offsets: adjust stco/co64 in moov for file rearrangement
|
||||||
|
|
||||||
|
The parsers are the inverse of the builder functions in mp4_muxer.py.
|
||||||
|
Box navigation reuses the pattern from ts_muxer.py's read_box/find_box/iter_boxes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import bisect
|
||||||
|
import logging
|
||||||
|
import struct
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MP4 Box Utilities
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Minimum bytes needed to read a standard box header
|
||||||
|
_BOX_HEADER_SIZE = 8
|
||||||
|
|
||||||
|
# ftyp brands that identify MP4/MOV containers
|
||||||
|
_MP4_BRANDS = {
|
||||||
|
b"isom",
|
||||||
|
b"iso2",
|
||||||
|
b"iso3",
|
||||||
|
b"iso4",
|
||||||
|
b"iso5",
|
||||||
|
b"iso6",
|
||||||
|
b"mp41",
|
||||||
|
b"mp42",
|
||||||
|
b"M4V ",
|
||||||
|
b"M4A ",
|
||||||
|
b"f4v ",
|
||||||
|
b"kddi",
|
||||||
|
b"avc1",
|
||||||
|
b"qt ",
|
||||||
|
b"MSNV",
|
||||||
|
b"dash",
|
||||||
|
b"3gp4",
|
||||||
|
b"3gp5",
|
||||||
|
b"3gp6",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def is_mp4_header(data: bytes) -> bool:
|
||||||
|
"""Check if the data starts with an ftyp box (MP4 signature)."""
|
||||||
|
if len(data) < 8:
|
||||||
|
return False
|
||||||
|
size = struct.unpack_from(">I", data, 0)[0]
|
||||||
|
box_type = data[4:8]
|
||||||
|
if box_type != b"ftyp":
|
||||||
|
return False
|
||||||
|
if size < 12 or size > len(data):
|
||||||
|
return size >= 12 # might be valid but truncated
|
||||||
|
major_brand = data[8:12]
|
||||||
|
return major_brand in _MP4_BRANDS
|
||||||
|
|
||||||
|
|
||||||
|
def read_box_header(data: bytes, offset: int) -> tuple[bytes, int, int] | None:
|
||||||
|
"""
|
||||||
|
Read a box header at the given offset.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(box_type, header_size, total_box_size) or None if not enough data.
|
||||||
|
"""
|
||||||
|
if offset + 8 > len(data):
|
||||||
|
return None
|
||||||
|
|
||||||
|
size, box_type = struct.unpack_from(">I4s", data, offset)
|
||||||
|
header_size = 8
|
||||||
|
|
||||||
|
if size == 1: # Extended size (64-bit)
|
||||||
|
if offset + 16 > len(data):
|
||||||
|
return None
|
||||||
|
size = struct.unpack_from(">Q", data, offset + 8)[0]
|
||||||
|
header_size = 16
|
||||||
|
elif size == 0: # Box extends to end of data
|
||||||
|
size = len(data) - offset
|
||||||
|
|
||||||
|
return box_type, header_size, size
|
||||||
|
|
||||||
|
|
||||||
|
def iter_top_level_boxes(data: bytes):
|
||||||
|
"""
|
||||||
|
Iterate over top-level box headers.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
(box_type, header_size, total_size, data_offset)
|
||||||
|
"""
|
||||||
|
offset = 0
|
||||||
|
while offset < len(data):
|
||||||
|
result = read_box_header(data, offset)
|
||||||
|
if result is None:
|
||||||
|
break
|
||||||
|
box_type, header_size, total_size = result
|
||||||
|
yield box_type, header_size, total_size, offset + header_size
|
||||||
|
if total_size == 0:
|
||||||
|
break
|
||||||
|
offset += total_size
|
||||||
|
|
||||||
|
|
||||||
|
def find_box(data: bytes, target: bytes) -> bytes | None:
|
||||||
|
"""Find a box by type and return its body (data after header)."""
|
||||||
|
for box_type, header_size, total_size, data_offset in iter_top_level_boxes(data):
|
||||||
|
if box_type == target:
|
||||||
|
return data[data_offset : data_offset - header_size + total_size]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def iter_boxes(data: bytes):
|
||||||
|
"""Iterate over child boxes: yields (box_type, box_body_bytes)."""
|
||||||
|
for box_type, header_size, total_size, data_offset in iter_top_level_boxes(data):
|
||||||
|
end = data_offset - header_size + total_size
|
||||||
|
yield box_type, data[data_offset:end]
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Sample Table Parsers (inverse of mp4_muxer.py builders)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def parse_full_box_header(data: bytes) -> tuple[int, int, int]:
|
||||||
|
"""
|
||||||
|
Parse a full box header (version + flags).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(version, flags, header_size) where header_size is 4 bytes.
|
||||||
|
"""
|
||||||
|
if len(data) < 4:
|
||||||
|
return 0, 0, 0
|
||||||
|
version = data[0]
|
||||||
|
flags = (data[1] << 16) | (data[2] << 8) | data[3]
|
||||||
|
return version, flags, 4
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stco(data: bytes) -> list[int]:
|
||||||
|
"""
|
||||||
|
Parse Chunk Offset box (stco) - 32-bit offsets.
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) + [offset(4)]...
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 4:
|
||||||
|
return []
|
||||||
|
|
||||||
|
offsets = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
offsets.append(struct.unpack_from(">I", data, pos)[0])
|
||||||
|
pos += 4
|
||||||
|
return offsets
|
||||||
|
|
||||||
|
|
||||||
|
def parse_co64(data: bytes) -> list[int]:
|
||||||
|
"""
|
||||||
|
Parse Chunk Offset box (co64) - 64-bit offsets.
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) + [offset(8)]...
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 8:
|
||||||
|
return []
|
||||||
|
|
||||||
|
offsets = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
offsets.append(struct.unpack_from(">Q", data, pos)[0])
|
||||||
|
pos += 8
|
||||||
|
return offsets
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stss(data: bytes) -> list[int]:
|
||||||
|
"""
|
||||||
|
Parse Sync Sample box (stss) - keyframe indices (1-based).
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) + [sample_number(4)]...
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 4:
|
||||||
|
return []
|
||||||
|
|
||||||
|
indices = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
indices.append(struct.unpack_from(">I", data, pos)[0])
|
||||||
|
pos += 4
|
||||||
|
return indices
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stsz(data: bytes) -> tuple[int, list[int]]:
|
||||||
|
"""
|
||||||
|
Parse Sample Size box (stsz).
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + sample_size(4) + sample_count(4) + [size(4)]...
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(uniform_size, sizes_list).
|
||||||
|
If uniform_size > 0, all samples have that size and sizes_list is empty.
|
||||||
|
Otherwise, sizes_list contains per-sample sizes.
|
||||||
|
"""
|
||||||
|
if len(data) < 12:
|
||||||
|
return 0, []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
sample_size = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
sample_count = struct.unpack_from(">I", data, pos + 4)[0]
|
||||||
|
pos += 8
|
||||||
|
|
||||||
|
if sample_size > 0:
|
||||||
|
return sample_size, []
|
||||||
|
|
||||||
|
if len(data) < pos + sample_count * 4:
|
||||||
|
return 0, []
|
||||||
|
|
||||||
|
sizes = []
|
||||||
|
for _ in range(sample_count):
|
||||||
|
sizes.append(struct.unpack_from(">I", data, pos)[0])
|
||||||
|
pos += 4
|
||||||
|
return 0, sizes
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stts(data: bytes) -> list[tuple[int, int]]:
|
||||||
|
"""
|
||||||
|
Parse Time-to-Sample box (stts) - run-length encoded durations.
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) + [sample_count(4) + sample_delta(4)]...
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (sample_count, sample_delta) entries.
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 8:
|
||||||
|
return []
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
delta = struct.unpack_from(">I", data, pos + 4)[0]
|
||||||
|
entries.append((count, delta))
|
||||||
|
pos += 8
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stsc(data: bytes) -> list[tuple[int, int, int]]:
|
||||||
|
"""
|
||||||
|
Parse Sample-to-Chunk box (stsc).
|
||||||
|
|
||||||
|
Layout: version(1) + flags(3) + entry_count(4) +
|
||||||
|
[first_chunk(4) + samples_per_chunk(4) + sample_desc_index(4)]...
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (first_chunk, samples_per_chunk, sample_desc_index) entries.
|
||||||
|
first_chunk is 1-based.
|
||||||
|
"""
|
||||||
|
if len(data) < 8:
|
||||||
|
return []
|
||||||
|
_, _, hdr = parse_full_box_header(data)
|
||||||
|
pos = hdr
|
||||||
|
entry_count = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
pos += 4
|
||||||
|
|
||||||
|
if len(data) < pos + entry_count * 12:
|
||||||
|
return []
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for _ in range(entry_count):
|
||||||
|
first_chunk = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
spc = struct.unpack_from(">I", data, pos + 4)[0]
|
||||||
|
sdi = struct.unpack_from(">I", data, pos + 8)[0]
|
||||||
|
entries.append((first_chunk, spc, sdi))
|
||||||
|
pos += 12
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def parse_mdhd(data: bytes) -> tuple[int, int]:
|
||||||
|
"""
|
||||||
|
Parse Media Header box (mdhd) for timescale and duration.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(timescale, duration) in media timescale units.
|
||||||
|
"""
|
||||||
|
if len(data) < 4:
|
||||||
|
return 0, 0
|
||||||
|
version = data[0]
|
||||||
|
if version == 1:
|
||||||
|
# 64-bit: skip version(1)+flags(3)+creation(8)+modification(8)
|
||||||
|
if len(data) < 32:
|
||||||
|
return 0, 0
|
||||||
|
timescale = struct.unpack_from(">I", data, 20)[0]
|
||||||
|
duration = struct.unpack_from(">Q", data, 24)[0]
|
||||||
|
else:
|
||||||
|
# 32-bit: skip version(1)+flags(3)+creation(4)+modification(4)
|
||||||
|
if len(data) < 20:
|
||||||
|
return 0, 0
|
||||||
|
timescale = struct.unpack_from(">I", data, 12)[0]
|
||||||
|
duration = struct.unpack_from(">I", data, 16)[0]
|
||||||
|
return timescale, duration
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stsd_codec(data: bytes) -> str:
|
||||||
|
"""
|
||||||
|
Parse Sample Description box (stsd) to extract the codec FourCC.
|
||||||
|
|
||||||
|
Returns the codec name as a string (e.g. "avc1", "hvc1", "mp4a").
|
||||||
|
"""
|
||||||
|
if len(data) < 16:
|
||||||
|
return ""
|
||||||
|
# version(1)+flags(3)+entry_count(4)
|
||||||
|
pos = 8
|
||||||
|
# First entry: size(4)+type(4)
|
||||||
|
if pos + 8 > len(data):
|
||||||
|
return ""
|
||||||
|
codec_fourcc = data[pos + 4 : pos + 8]
|
||||||
|
try:
|
||||||
|
return codec_fourcc.decode("ascii").strip()
|
||||||
|
except (UnicodeDecodeError, ValueError):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MP4 Index (parallel to MKVCueIndex)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MP4Index:
|
||||||
|
"""
|
||||||
|
Seek index extracted from an MP4 file's moov atom.
|
||||||
|
|
||||||
|
Parallel to ``MKVCueIndex`` for MKV files. Provides keyframe-indexed
|
||||||
|
cue points for time-based seeking and the raw moov bytes needed to
|
||||||
|
reconstruct a streamable (faststart) MP4 for on-the-fly demuxing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
duration_ms: float = 0.0
|
||||||
|
timescale: int = 0
|
||||||
|
cue_points: list[tuple[float, int]] = field(default_factory=list) # [(time_ms, byte_offset), ...]
|
||||||
|
moov_offset: int = 0 # Absolute file offset where moov atom starts
|
||||||
|
moov_size: int = 0 # Total size of the moov atom (header + body)
|
||||||
|
moov_data: bytes = b"" # Raw moov atom bytes (for prepending to mdat pipe)
|
||||||
|
ftyp_data: bytes = b"" # Raw ftyp atom bytes (for prepending before moov)
|
||||||
|
mdat_offset: int = 0 # Absolute file offset where mdat atom starts
|
||||||
|
mdat_size: int = 0 # Total size of the mdat atom
|
||||||
|
video_codec: str = "" # e.g. "avc1", "hvc1", "mp4v"
|
||||||
|
audio_codec: str = "" # e.g. "mp4a", "ac-3"
|
||||||
|
|
||||||
|
def byte_offset_for_time(self, time_ms: float) -> tuple[int, float]:
|
||||||
|
"""
|
||||||
|
Find the byte offset for the nearest keyframe at or before time_ms.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(absolute_byte_offset, actual_keyframe_time_ms)
|
||||||
|
"""
|
||||||
|
if not self.cue_points:
|
||||||
|
return 0, 0.0
|
||||||
|
|
||||||
|
times = [cp[0] for cp in self.cue_points]
|
||||||
|
idx = bisect.bisect_right(times, time_ms) - 1
|
||||||
|
if idx < 0:
|
||||||
|
idx = 0
|
||||||
|
|
||||||
|
cue_time_ms, byte_offset = self.cue_points[idx]
|
||||||
|
return byte_offset, cue_time_ms
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Moov -> Cue Points Builder
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _find_nested_box(data: bytes, *path: bytes) -> bytes | None:
|
||||||
|
"""Walk a box hierarchy: find_nested_box(data, b"trak", b"mdia") etc."""
|
||||||
|
current = data
|
||||||
|
for box_name in path:
|
||||||
|
found = find_box(current, box_name)
|
||||||
|
if found is None:
|
||||||
|
return None
|
||||||
|
current = found
|
||||||
|
return current
|
||||||
|
|
||||||
|
|
||||||
|
def build_cue_points_from_moov(moov_body: bytes) -> tuple[list[tuple[float, int]], float, int, str, str]:
|
||||||
|
"""
|
||||||
|
Parse a moov body to build keyframe-indexed cue points.
|
||||||
|
|
||||||
|
Walks the first video trak's stbl to extract:
|
||||||
|
- Chunk offsets (stco/co64)
|
||||||
|
- Keyframe sample indices (stss)
|
||||||
|
- Sample sizes (stsz)
|
||||||
|
- Sample durations (stts)
|
||||||
|
- Sample-to-chunk mapping (stsc)
|
||||||
|
- Timescale and duration from mdhd
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(cue_points, duration_ms, timescale, video_codec, audio_codec)
|
||||||
|
"""
|
||||||
|
cue_points: list[tuple[float, int]] = []
|
||||||
|
duration_ms = 0.0
|
||||||
|
timescale = 0
|
||||||
|
video_codec = ""
|
||||||
|
audio_codec = ""
|
||||||
|
|
||||||
|
# Find all traks
|
||||||
|
video_stbl = None
|
||||||
|
video_mdhd = None
|
||||||
|
|
||||||
|
offset = 0
|
||||||
|
data = moov_body
|
||||||
|
while offset < len(data):
|
||||||
|
result = read_box_header(data, offset)
|
||||||
|
if result is None:
|
||||||
|
break
|
||||||
|
box_type, hdr_size, total_size = result
|
||||||
|
|
||||||
|
if box_type == b"trak":
|
||||||
|
trak_body = data[offset + hdr_size : offset + total_size]
|
||||||
|
|
||||||
|
# Check handler type to identify video/audio
|
||||||
|
hdlr_data = _find_nested_box(trak_body, b"mdia", b"hdlr")
|
||||||
|
handler_type = b""
|
||||||
|
if hdlr_data and len(hdlr_data) >= 12:
|
||||||
|
# hdlr: version(1)+flags(3)+pre_defined(4)+handler_type(4)
|
||||||
|
handler_type = hdlr_data[8:12]
|
||||||
|
|
||||||
|
if handler_type == b"vide" and video_stbl is None:
|
||||||
|
video_stbl = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl")
|
||||||
|
video_mdhd_data = _find_nested_box(trak_body, b"mdia", b"mdhd")
|
||||||
|
if video_mdhd_data:
|
||||||
|
video_mdhd = video_mdhd_data
|
||||||
|
|
||||||
|
stsd_data = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl", b"stsd")
|
||||||
|
if stsd_data:
|
||||||
|
video_codec = parse_stsd_codec(stsd_data)
|
||||||
|
|
||||||
|
elif handler_type == b"soun" and not audio_codec:
|
||||||
|
stsd_data = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl", b"stsd")
|
||||||
|
if stsd_data:
|
||||||
|
audio_codec = parse_stsd_codec(stsd_data)
|
||||||
|
|
||||||
|
elif box_type == b"mvhd":
|
||||||
|
# Fallback: parse mvhd for timescale/duration if no mdhd
|
||||||
|
mvhd_body = data[offset + hdr_size : offset + total_size]
|
||||||
|
if len(mvhd_body) >= 20:
|
||||||
|
version = mvhd_body[0]
|
||||||
|
if version == 1:
|
||||||
|
if len(mvhd_body) >= 28:
|
||||||
|
ts = struct.unpack_from(">I", mvhd_body, 20)[0]
|
||||||
|
dur = struct.unpack_from(">Q", mvhd_body, 24)[0]
|
||||||
|
if timescale == 0:
|
||||||
|
timescale = ts
|
||||||
|
duration_ms = dur / ts * 1000.0 if ts else 0.0
|
||||||
|
else:
|
||||||
|
ts = struct.unpack_from(">I", mvhd_body, 12)[0]
|
||||||
|
dur = struct.unpack_from(">I", mvhd_body, 16)[0]
|
||||||
|
if timescale == 0:
|
||||||
|
timescale = ts
|
||||||
|
duration_ms = dur / ts * 1000.0 if ts else 0.0
|
||||||
|
|
||||||
|
if total_size == 0:
|
||||||
|
break
|
||||||
|
offset += total_size
|
||||||
|
|
||||||
|
# Parse mdhd for video timescale (more precise than mvhd)
|
||||||
|
if video_mdhd:
|
||||||
|
ts, dur = parse_mdhd(video_mdhd)
|
||||||
|
if ts > 0:
|
||||||
|
timescale = ts
|
||||||
|
duration_ms = dur / ts * 1000.0
|
||||||
|
|
||||||
|
if video_stbl is None:
|
||||||
|
logger.warning("[mp4_parser] No video stbl found in moov")
|
||||||
|
return cue_points, duration_ms, timescale, video_codec, audio_codec
|
||||||
|
|
||||||
|
# Parse sample tables from video stbl
|
||||||
|
stco_data = find_box(video_stbl, b"stco")
|
||||||
|
co64_data = find_box(video_stbl, b"co64")
|
||||||
|
stss_data = find_box(video_stbl, b"stss")
|
||||||
|
stsz_data = find_box(video_stbl, b"stsz")
|
||||||
|
stts_data = find_box(video_stbl, b"stts")
|
||||||
|
stsc_data = find_box(video_stbl, b"stsc")
|
||||||
|
|
||||||
|
# Chunk offsets
|
||||||
|
chunk_offsets = parse_co64(co64_data) if co64_data else (parse_stco(stco_data) if stco_data else [])
|
||||||
|
|
||||||
|
# Keyframe sample numbers (1-based)
|
||||||
|
keyframe_samples = set(parse_stss(stss_data)) if stss_data else set()
|
||||||
|
all_are_keyframes = not stss_data # No stss means all samples are sync
|
||||||
|
|
||||||
|
# Sample sizes
|
||||||
|
uniform_size, size_list = parse_stsz(stsz_data) if stsz_data else (0, [])
|
||||||
|
|
||||||
|
# Sample durations (run-length encoded)
|
||||||
|
stts_entries = parse_stts(stts_data) if stts_data else []
|
||||||
|
|
||||||
|
# Sample-to-chunk mapping
|
||||||
|
stsc_entries = parse_stsc(stsc_data) if stsc_data else []
|
||||||
|
|
||||||
|
if not chunk_offsets or timescale == 0:
|
||||||
|
logger.warning(
|
||||||
|
"[mp4_parser] Missing data: chunks=%d, timescale=%d",
|
||||||
|
len(chunk_offsets),
|
||||||
|
timescale,
|
||||||
|
)
|
||||||
|
return cue_points, duration_ms, timescale, video_codec, audio_codec
|
||||||
|
|
||||||
|
# Expand stts to per-sample durations
|
||||||
|
sample_durations: list[int] = []
|
||||||
|
for count, delta in stts_entries:
|
||||||
|
sample_durations.extend([delta] * count)
|
||||||
|
|
||||||
|
# Expand stsc to determine which samples belong to which chunk
|
||||||
|
# Build a mapping: chunk_index (0-based) -> samples_per_chunk
|
||||||
|
total_chunks = len(chunk_offsets)
|
||||||
|
chunk_sample_counts: list[int] = [0] * total_chunks
|
||||||
|
|
||||||
|
if stsc_entries:
|
||||||
|
for i, (first_chunk, spc, _sdi) in enumerate(stsc_entries):
|
||||||
|
# first_chunk is 1-based
|
||||||
|
start = first_chunk - 1
|
||||||
|
if i + 1 < len(stsc_entries):
|
||||||
|
end = stsc_entries[i + 1][0] - 1
|
||||||
|
else:
|
||||||
|
end = total_chunks
|
||||||
|
for c in range(start, end):
|
||||||
|
if c < total_chunks:
|
||||||
|
chunk_sample_counts[c] = spc
|
||||||
|
else:
|
||||||
|
# Default: 1 sample per chunk
|
||||||
|
chunk_sample_counts = [1] * total_chunks
|
||||||
|
|
||||||
|
# Count total samples
|
||||||
|
total_samples = sum(chunk_sample_counts)
|
||||||
|
|
||||||
|
# Get per-sample sizes
|
||||||
|
if uniform_size > 0:
|
||||||
|
sample_sizes = [uniform_size] * total_samples
|
||||||
|
else:
|
||||||
|
sample_sizes = size_list
|
||||||
|
|
||||||
|
# Build cumulative timestamp for each sample and map keyframes to byte offsets
|
||||||
|
current_sample = 0 # 0-based sample index
|
||||||
|
current_time = 0 # in timescale units
|
||||||
|
|
||||||
|
for chunk_idx, chunk_offset in enumerate(chunk_offsets):
|
||||||
|
spc = chunk_sample_counts[chunk_idx] if chunk_idx < len(chunk_sample_counts) else 1
|
||||||
|
byte_pos = chunk_offset
|
||||||
|
|
||||||
|
for s in range(spc):
|
||||||
|
sample_num = current_sample + 1 # 1-based for stss comparison
|
||||||
|
is_keyframe = all_are_keyframes or sample_num in keyframe_samples
|
||||||
|
|
||||||
|
if is_keyframe:
|
||||||
|
time_ms = current_time / timescale * 1000.0
|
||||||
|
cue_points.append((time_ms, byte_pos))
|
||||||
|
|
||||||
|
# Advance byte position by this sample's size
|
||||||
|
if current_sample < len(sample_sizes):
|
||||||
|
byte_pos += sample_sizes[current_sample]
|
||||||
|
|
||||||
|
# Advance timestamp
|
||||||
|
if current_sample < len(sample_durations):
|
||||||
|
current_time += sample_durations[current_sample]
|
||||||
|
|
||||||
|
current_sample += 1
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[mp4_parser] Built %d cue points from %d samples, duration=%.1fs, video=%s, audio=%s",
|
||||||
|
len(cue_points),
|
||||||
|
total_samples,
|
||||||
|
duration_ms / 1000.0,
|
||||||
|
video_codec,
|
||||||
|
audio_codec,
|
||||||
|
)
|
||||||
|
|
||||||
|
return cue_points, duration_ms, timescale, video_codec, audio_codec
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Moov Offset Rewriting (for faststart pipe construction)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _rewrite_stco_in_place(data: bytearray, box_start: int, box_size: int, delta: int) -> int:
|
||||||
|
"""Rewrite stco chunk offsets by adding delta. Returns number of entries fixed."""
|
||||||
|
# FullBox header: version(1) + flags(3) = 4 bytes
|
||||||
|
body_start = box_start + 4
|
||||||
|
if body_start + 4 > box_start + box_size:
|
||||||
|
return 0
|
||||||
|
entry_count = struct.unpack_from(">I", data, body_start)[0]
|
||||||
|
pos = body_start + 4
|
||||||
|
for _ in range(entry_count):
|
||||||
|
if pos + 4 > box_start + box_size:
|
||||||
|
break
|
||||||
|
old_val = struct.unpack_from(">I", data, pos)[0]
|
||||||
|
struct.pack_into(">I", data, pos, old_val + delta)
|
||||||
|
pos += 4
|
||||||
|
return entry_count
|
||||||
|
|
||||||
|
|
||||||
|
def _rewrite_co64_in_place(data: bytearray, box_start: int, box_size: int, delta: int) -> int:
|
||||||
|
"""Rewrite co64 chunk offsets by adding delta. Returns number of entries fixed."""
|
||||||
|
body_start = box_start + 4
|
||||||
|
if body_start + 4 > box_start + box_size:
|
||||||
|
return 0
|
||||||
|
entry_count = struct.unpack_from(">I", data, body_start)[0]
|
||||||
|
pos = body_start + 4
|
||||||
|
for _ in range(entry_count):
|
||||||
|
if pos + 8 > box_start + box_size:
|
||||||
|
break
|
||||||
|
old_val = struct.unpack_from(">Q", data, pos)[0]
|
||||||
|
struct.pack_into(">Q", data, pos, old_val + delta)
|
||||||
|
pos += 8
|
||||||
|
return entry_count
|
||||||
|
|
||||||
|
|
||||||
|
def _walk_and_rewrite(data: bytearray, start: int, end: int, delta: int) -> int:
|
||||||
|
"""
|
||||||
|
Recursively walk boxes within [start, end) looking for stco/co64 boxes
|
||||||
|
and rewriting their offsets.
|
||||||
|
|
||||||
|
Returns total number of offset entries rewritten.
|
||||||
|
"""
|
||||||
|
total = 0
|
||||||
|
offset = start
|
||||||
|
while offset + 8 <= end:
|
||||||
|
size = struct.unpack_from(">I", data, offset)[0]
|
||||||
|
box_type = data[offset + 4 : offset + 8]
|
||||||
|
hdr_size = 8
|
||||||
|
|
||||||
|
if size == 1:
|
||||||
|
if offset + 16 > end:
|
||||||
|
break
|
||||||
|
size = struct.unpack_from(">Q", data, offset + 8)[0]
|
||||||
|
hdr_size = 16
|
||||||
|
elif size == 0:
|
||||||
|
size = end - offset
|
||||||
|
|
||||||
|
if size < 8 or offset + size > end:
|
||||||
|
break
|
||||||
|
|
||||||
|
body_start = offset + hdr_size
|
||||||
|
body_end = offset + size
|
||||||
|
|
||||||
|
if box_type == b"stco":
|
||||||
|
total += _rewrite_stco_in_place(data, body_start, size - hdr_size, delta)
|
||||||
|
elif box_type == b"co64":
|
||||||
|
total += _rewrite_co64_in_place(data, body_start, size - hdr_size, delta)
|
||||||
|
elif box_type in (b"moov", b"trak", b"mdia", b"minf", b"stbl"):
|
||||||
|
# Container box -- recurse into children
|
||||||
|
total += _walk_and_rewrite(data, body_start, body_end, delta)
|
||||||
|
|
||||||
|
offset += size
|
||||||
|
|
||||||
|
return total
|
||||||
|
|
||||||
|
|
||||||
|
def extract_video_track_from_moov(moov_data: bytes):
|
||||||
|
"""
|
||||||
|
Extract video codec configuration from an MP4 moov atom.
|
||||||
|
|
||||||
|
Walks the moov box tree to find the first video trak, extracts its
|
||||||
|
resolution and codec-private data (avcC/hvcC), and returns a synthetic
|
||||||
|
``MKVTrack`` suitable for building an fMP4 init segment.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An ``MKVTrack`` with video metadata, or ``None`` if no video track
|
||||||
|
is found.
|
||||||
|
"""
|
||||||
|
from mediaflow_proxy.remuxer.ebml_parser import (
|
||||||
|
CODEC_ID_H264,
|
||||||
|
CODEC_ID_H265,
|
||||||
|
MKVTrack,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Strip the moov box header to get the body
|
||||||
|
if len(moov_data) < 8:
|
||||||
|
return None
|
||||||
|
raw_size = struct.unpack_from(">I", moov_data, 0)[0]
|
||||||
|
hdr_size = 16 if raw_size == 1 else 8
|
||||||
|
moov_body = moov_data[hdr_size:]
|
||||||
|
|
||||||
|
# Walk traks looking for video handler
|
||||||
|
offset = 0
|
||||||
|
while offset < len(moov_body):
|
||||||
|
result = read_box_header(moov_body, offset)
|
||||||
|
if result is None:
|
||||||
|
break
|
||||||
|
box_type, box_hdr_size, total_size = result
|
||||||
|
|
||||||
|
if box_type == b"trak":
|
||||||
|
trak_body = moov_body[offset + box_hdr_size : offset + total_size]
|
||||||
|
|
||||||
|
# Check handler type
|
||||||
|
hdlr_data = _find_nested_box(trak_body, b"mdia", b"hdlr")
|
||||||
|
handler_type = b""
|
||||||
|
if hdlr_data and len(hdlr_data) >= 12:
|
||||||
|
handler_type = hdlr_data[8:12]
|
||||||
|
|
||||||
|
if handler_type == b"vide":
|
||||||
|
# Found video trak -- extract stsd for codec config
|
||||||
|
stsd_data = _find_nested_box(trak_body, b"mdia", b"minf", b"stbl", b"stsd")
|
||||||
|
if not stsd_data or len(stsd_data) < 16:
|
||||||
|
offset += total_size
|
||||||
|
continue
|
||||||
|
|
||||||
|
codec_name = parse_stsd_codec(stsd_data)
|
||||||
|
|
||||||
|
# Map MP4 codec names to MKV codec IDs
|
||||||
|
if codec_name in ("avc1", "avc3"):
|
||||||
|
mkv_codec_id = CODEC_ID_H264
|
||||||
|
elif codec_name in ("hvc1", "hev1"):
|
||||||
|
mkv_codec_id = CODEC_ID_H265
|
||||||
|
else:
|
||||||
|
mkv_codec_id = f"V_MP4/{codec_name}"
|
||||||
|
|
||||||
|
# Extract codec private (avcC or hvcC box) from inside the
|
||||||
|
# sample entry. The stsd structure is:
|
||||||
|
# version(1) + flags(3) + entry_count(4)
|
||||||
|
# then entry: size(4) + type(4) + ... + nested boxes
|
||||||
|
# The avcC/hvcC is a child box of the sample entry.
|
||||||
|
codec_private = b""
|
||||||
|
width = 0
|
||||||
|
height = 0
|
||||||
|
|
||||||
|
# Parse sample entry to get width/height and codec config
|
||||||
|
entry_start = 8 # skip version+flags+entry_count
|
||||||
|
if entry_start + 8 <= len(stsd_data):
|
||||||
|
entry_size = struct.unpack_from(">I", stsd_data, entry_start)[0]
|
||||||
|
entry_body_start = entry_start + 8 # skip size+type
|
||||||
|
entry_end = min(entry_start + entry_size, len(stsd_data))
|
||||||
|
|
||||||
|
# Visual sample entry: 6 reserved + 2 data_ref_idx + ...
|
||||||
|
# At offset 24 from entry body start: width(2) + height(2)
|
||||||
|
vis_offset = entry_body_start + 24
|
||||||
|
if vis_offset + 4 <= entry_end:
|
||||||
|
width = struct.unpack_from(">H", stsd_data, vis_offset)[0]
|
||||||
|
height = struct.unpack_from(">H", stsd_data, vis_offset + 2)[0]
|
||||||
|
|
||||||
|
# Scan nested boxes for avcC or hvcC
|
||||||
|
# Visual sample entry fixed fields = 70 bytes from entry body
|
||||||
|
nested_start = entry_body_start + 70
|
||||||
|
if nested_start < entry_end:
|
||||||
|
nested_data = stsd_data[nested_start:entry_end]
|
||||||
|
for target in (b"avcC", b"hvcC"):
|
||||||
|
found = find_box(nested_data, target)
|
||||||
|
if found:
|
||||||
|
codec_private = found
|
||||||
|
break
|
||||||
|
|
||||||
|
# Get duration from mdhd if available
|
||||||
|
default_duration_ns = 0
|
||||||
|
mdhd_data = _find_nested_box(trak_body, b"mdia", b"mdhd")
|
||||||
|
if mdhd_data and len(mdhd_data) >= 20:
|
||||||
|
version = mdhd_data[0]
|
||||||
|
if version == 1 and len(mdhd_data) >= 28:
|
||||||
|
ts = struct.unpack_from(">I", mdhd_data, 20)[0]
|
||||||
|
dur = struct.unpack_from(">Q", mdhd_data, 24)[0]
|
||||||
|
else:
|
||||||
|
ts = struct.unpack_from(">I", mdhd_data, 12)[0]
|
||||||
|
dur = struct.unpack_from(">I", mdhd_data, 16)[0]
|
||||||
|
if ts > 0 and dur > 0:
|
||||||
|
# Rough estimate: assume 24fps if we can't determine.
|
||||||
|
default_duration_ns = int(1_000_000_000 / 24)
|
||||||
|
|
||||||
|
return MKVTrack(
|
||||||
|
track_number=1,
|
||||||
|
track_type=1, # video
|
||||||
|
codec_id=mkv_codec_id,
|
||||||
|
codec_private=codec_private,
|
||||||
|
pixel_width=width,
|
||||||
|
pixel_height=height,
|
||||||
|
default_duration_ns=default_duration_ns,
|
||||||
|
)
|
||||||
|
|
||||||
|
offset += total_size
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def rewrite_moov_offsets(moov_data: bytes, delta: int) -> bytes:
|
||||||
|
"""
|
||||||
|
Rewrite all stco/co64 chunk offsets in a moov atom by adding ``delta``.
|
||||||
|
|
||||||
|
This is needed when rearranging an MP4 file for pipe streaming:
|
||||||
|
the original moov's chunk offsets reference positions in the original
|
||||||
|
file layout. When we prepend moov before mdat, the offsets must be
|
||||||
|
shifted by ``delta = moov_size - original_mdat_offset``.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
moov_data: Raw bytes of the complete moov box (header + body).
|
||||||
|
delta: Offset adjustment to add to every chunk offset.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Modified moov bytes with updated chunk offsets.
|
||||||
|
"""
|
||||||
|
buf = bytearray(moov_data)
|
||||||
|
|
||||||
|
# Determine moov box header size
|
||||||
|
raw_size = struct.unpack_from(">I", buf, 0)[0]
|
||||||
|
hdr_size = 16 if raw_size == 1 else 8
|
||||||
|
|
||||||
|
total = _walk_and_rewrite(buf, hdr_size, len(buf), delta)
|
||||||
|
logger.info("[mp4_parser] Rewrote %d chunk offset entries (delta=%+d)", total, delta)
|
||||||
|
|
||||||
|
return bytes(buf)
|
||||||
608
mediaflow_proxy/remuxer/pyav_demuxer.py
Normal file
608
mediaflow_proxy/remuxer/pyav_demuxer.py
Normal file
@@ -0,0 +1,608 @@
|
|||||||
|
"""
|
||||||
|
Universal PyAV-based streaming demuxer.
|
||||||
|
|
||||||
|
Bridges async byte streams to PyAV's synchronous I/O using an OS pipe,
|
||||||
|
allowing on-the-fly demuxing of any container format (MKV, MP4, TS,
|
||||||
|
FLV, WebM, etc.) from an async source.
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
AsyncIterator[bytes] --> async feeder task --> queue.Queue --> writer thread (pipe)
|
||||||
|
|
|
||||||
|
OS pipe (kernel buffer)
|
||||||
|
|
|
||||||
|
demux thread: av.open + discover + demux
|
||||||
|
|
|
||||||
|
queue.Queue --> run_in_executor consumer
|
||||||
|
|
||||||
|
Performance: Uses plain threading.Queue on both sides (writer input and
|
||||||
|
packet output) to avoid per-item ``run_coroutine_threadsafe`` overhead.
|
||||||
|
The async/thread bridge is done via ``run_in_executor`` on the consumer
|
||||||
|
side and a dedicated asyncio task on the producer side.
|
||||||
|
|
||||||
|
For MP4 inputs, the caller (transcode_handler) prepends the moov atom
|
||||||
|
to the stream so PyAV receives a "faststart"-style MP4 through the pipe.
|
||||||
|
This allows true on-the-fly demuxing for all container formats.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import av
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Sentinel object to signal end-of-stream in queues
|
||||||
|
_SENTINEL = object()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DemuxedStream:
|
||||||
|
"""Metadata about a demuxed stream."""
|
||||||
|
|
||||||
|
index: int
|
||||||
|
codec_name: str
|
||||||
|
codec_type: str # "video" or "audio"
|
||||||
|
# Video-specific
|
||||||
|
width: int = 0
|
||||||
|
height: int = 0
|
||||||
|
fps: float = 0.0
|
||||||
|
pixel_format: str = ""
|
||||||
|
# Audio-specific
|
||||||
|
sample_rate: int = 0
|
||||||
|
channels: int = 0
|
||||||
|
# Timing
|
||||||
|
time_base_num: int = 1
|
||||||
|
time_base_den: int = 1000
|
||||||
|
duration_seconds: float = 0.0
|
||||||
|
# Raw codec extradata (e.g. SPS/PPS for H.264, AudioSpecificConfig for AAC)
|
||||||
|
extradata: bytes = b""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(slots=True)
|
||||||
|
class DemuxedPacket:
|
||||||
|
"""A demuxed packet with timing info."""
|
||||||
|
|
||||||
|
stream_index: int
|
||||||
|
codec_type: str # "video" or "audio"
|
||||||
|
data: bytes
|
||||||
|
pts: int # Presentation timestamp in stream time_base units
|
||||||
|
dts: int # Decode timestamp in stream time_base units
|
||||||
|
duration: int # Duration in stream time_base units
|
||||||
|
is_keyframe: bool
|
||||||
|
time_base_num: int
|
||||||
|
time_base_den: int
|
||||||
|
# Optional decoded frame when decode_video/decode_audio is True
|
||||||
|
# av.VideoFrame for video, av.AudioFrame for audio
|
||||||
|
decoded_frame: object = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pts_seconds(self) -> float:
|
||||||
|
if self.time_base_den == 0:
|
||||||
|
return 0.0
|
||||||
|
return self.pts * self.time_base_num / self.time_base_den
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dts_seconds(self) -> float:
|
||||||
|
if self.time_base_den == 0:
|
||||||
|
return 0.0
|
||||||
|
return self.dts * self.time_base_num / self.time_base_den
|
||||||
|
|
||||||
|
@property
|
||||||
|
def duration_seconds(self) -> float:
|
||||||
|
if self.time_base_den == 0:
|
||||||
|
return 0.0
|
||||||
|
return self.duration * self.time_base_num / self.time_base_den
|
||||||
|
|
||||||
|
|
||||||
|
class PyAVDemuxer:
|
||||||
|
"""
|
||||||
|
Streaming demuxer using PyAV with pipe-based I/O.
|
||||||
|
|
||||||
|
All container I/O happens in background threads. The writer thread
|
||||||
|
feeds source bytes into a pipe; a single demux thread opens the
|
||||||
|
container, discovers streams, and demuxes packets -- all on the
|
||||||
|
same file object, ensuring the pipe's read cursor is never lost.
|
||||||
|
|
||||||
|
Performance optimisation: both the writer-input side and the
|
||||||
|
packet-output side use plain ``queue.Queue`` (no event-loop
|
||||||
|
involvement per item). The async/thread bridge is done via
|
||||||
|
``run_in_executor`` on the consumer and an asyncio task on the
|
||||||
|
producer, eliminating ~1700 ``run_coroutine_threadsafe`` round-trips
|
||||||
|
per 30 s of 4K content.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
demuxer = PyAVDemuxer()
|
||||||
|
await demuxer.start(source_async_iter)
|
||||||
|
# demuxer.video_stream / audio_stream are now available
|
||||||
|
async for packet in demuxer.iter_packets():
|
||||||
|
if packet.codec_type == "video":
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, decode_video: bool = False, decode_audio: bool = False) -> None:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
decode_video: If True, the demux thread will decode video packets
|
||||||
|
using the container's codec context and attach decoded frames
|
||||||
|
to DemuxedPacket.decoded_frame. This avoids format conversion
|
||||||
|
issues with standalone decoders (HVCC vs Annex B).
|
||||||
|
decode_audio: If True, the demux thread will decode audio packets
|
||||||
|
using the container's codec context and attach decoded frames
|
||||||
|
to DemuxedPacket.decoded_frame. This is needed for codecs like
|
||||||
|
Vorbis/Opus where the standalone decoder requires codec headers
|
||||||
|
that are only available in the container context. Can also be
|
||||||
|
set after start() returns (before packets are consumed) via
|
||||||
|
the ``enable_audio_decode()`` method.
|
||||||
|
"""
|
||||||
|
self._decode_video = decode_video
|
||||||
|
self._decode_audio = decode_audio
|
||||||
|
self._video_decode_decided = threading.Event()
|
||||||
|
self._audio_decode_decided = threading.Event()
|
||||||
|
# If decode flags were set at construction time, mark decided immediately
|
||||||
|
if decode_video:
|
||||||
|
self._video_decode_decided.set()
|
||||||
|
if decode_audio:
|
||||||
|
self._audio_decode_decided.set()
|
||||||
|
self._container: av.InputContainer | None = None
|
||||||
|
self._video_stream: DemuxedStream | None = None
|
||||||
|
self._audio_stream: DemuxedStream | None = None
|
||||||
|
# Thread-safe queues (no event-loop involvement per put/get)
|
||||||
|
self._packet_queue: queue.Queue | None = None
|
||||||
|
self._source_queue: queue.Queue | None = None
|
||||||
|
self._demux_thread: threading.Thread | None = None
|
||||||
|
self._writer_thread: threading.Thread | None = None
|
||||||
|
self._feeder_task: asyncio.Task | None = None
|
||||||
|
self._write_fd: int | None = None
|
||||||
|
self._read_fd: int | None = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def video_stream(self) -> DemuxedStream | None:
|
||||||
|
return self._video_stream
|
||||||
|
|
||||||
|
@property
|
||||||
|
def audio_stream(self) -> DemuxedStream | None:
|
||||||
|
return self._audio_stream
|
||||||
|
|
||||||
|
def enable_video_decode(self, enable: bool = True) -> None:
|
||||||
|
"""
|
||||||
|
Enable or disable in-thread video decoding.
|
||||||
|
|
||||||
|
Call this after ``start()`` returns (stream metadata is available)
|
||||||
|
but before consuming packets via ``iter_packets()``. The demux
|
||||||
|
thread waits for this signal before processing video packets.
|
||||||
|
"""
|
||||||
|
self._decode_video = enable
|
||||||
|
self._video_decode_decided.set()
|
||||||
|
|
||||||
|
def enable_audio_decode(self, enable: bool = True) -> None:
|
||||||
|
"""
|
||||||
|
Enable or disable in-thread audio decoding.
|
||||||
|
|
||||||
|
Call this after ``start()`` returns (stream metadata is available)
|
||||||
|
but before consuming packets via ``iter_packets()``. The demux
|
||||||
|
thread waits for this signal before processing audio packets.
|
||||||
|
"""
|
||||||
|
self._decode_audio = enable
|
||||||
|
self._audio_decode_decided.set()
|
||||||
|
|
||||||
|
# ── Writer side ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def _async_feeder(self, source: AsyncIterator[bytes]) -> None:
|
||||||
|
"""
|
||||||
|
Async task: pull chunks from the async source and push them
|
||||||
|
into a plain ``queue.Queue`` for the writer thread.
|
||||||
|
|
||||||
|
This replaces the old per-chunk ``run_coroutine_threadsafe``
|
||||||
|
pattern, batching the async-to-sync bridge into one task.
|
||||||
|
|
||||||
|
``queue.Queue.put()`` is a blocking call, so we use
|
||||||
|
``run_in_executor`` to avoid blocking the event loop when the
|
||||||
|
queue is full.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
sq = self._source_queue
|
||||||
|
try:
|
||||||
|
async for chunk in source:
|
||||||
|
await loop.run_in_executor(None, sq.put, chunk)
|
||||||
|
except (asyncio.CancelledError, GeneratorExit):
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
sq.put(_SENTINEL)
|
||||||
|
|
||||||
|
def _write_chunks_sync(self) -> None:
|
||||||
|
"""
|
||||||
|
Writer thread: pull pre-buffered chunks from ``_source_queue``
|
||||||
|
and write to the OS pipe. No event-loop interaction.
|
||||||
|
"""
|
||||||
|
write_fd = self._write_fd
|
||||||
|
sq = self._source_queue
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
chunk = sq.get(timeout=30.0)
|
||||||
|
if chunk is _SENTINEL:
|
||||||
|
break
|
||||||
|
os.write(write_fd, chunk)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.close(write_fd)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
self._write_fd = None
|
||||||
|
|
||||||
|
# ── Demux side ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async def start(self, source: AsyncIterator[bytes]) -> None:
|
||||||
|
"""
|
||||||
|
Start pipe-based streaming: writer thread feeds the pipe, a single
|
||||||
|
demux thread opens the container, discovers streams, and begins
|
||||||
|
enqueuing packets.
|
||||||
|
|
||||||
|
After this returns, ``video_stream`` and ``audio_stream`` are
|
||||||
|
populated and packets are being enqueued for ``iter_packets()``.
|
||||||
|
"""
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
# Create OS pipe
|
||||||
|
self._read_fd, self._write_fd = os.pipe()
|
||||||
|
|
||||||
|
# Source buffer queue (async feeder task -> writer thread)
|
||||||
|
self._source_queue = queue.Queue(maxsize=256)
|
||||||
|
|
||||||
|
# Kick off the async feeder task
|
||||||
|
self._feeder_task = asyncio.create_task(self._async_feeder(source))
|
||||||
|
|
||||||
|
# Start writer thread (drains source_queue into the pipe)
|
||||||
|
self._writer_thread = threading.Thread(
|
||||||
|
target=self._write_chunks_sync,
|
||||||
|
daemon=True,
|
||||||
|
name="pyav-writer",
|
||||||
|
)
|
||||||
|
self._writer_thread.start()
|
||||||
|
|
||||||
|
# Packet queue for demux-thread -> async consumer bridge
|
||||||
|
self._packet_queue = queue.Queue(maxsize=128)
|
||||||
|
streams_ready = threading.Event()
|
||||||
|
|
||||||
|
def _open_and_demux():
|
||||||
|
"""
|
||||||
|
Single background thread: open container, discover streams,
|
||||||
|
demux all packets.
|
||||||
|
|
||||||
|
Critical: av.open(), _discover_streams(), and container.demux()
|
||||||
|
all happen on the same file object in the same thread. This
|
||||||
|
ensures the pipe read cursor is never lost between open and demux.
|
||||||
|
"""
|
||||||
|
pkt_count = 0
|
||||||
|
pq = self._packet_queue
|
||||||
|
try:
|
||||||
|
# Open container from read end of pipe
|
||||||
|
read_file = os.fdopen(self._read_fd, "rb")
|
||||||
|
self._read_fd = None # ownership transferred
|
||||||
|
|
||||||
|
self._container = av.open(
|
||||||
|
read_file,
|
||||||
|
mode="r",
|
||||||
|
options={
|
||||||
|
# Tolerate mid-stream joins / broken data in live TS
|
||||||
|
"err_detect": "ignore_err",
|
||||||
|
"fflags": "+discardcorrupt+genpts",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self._discover_streams()
|
||||||
|
|
||||||
|
# Signal stream metadata is available
|
||||||
|
streams_ready.set()
|
||||||
|
|
||||||
|
if self._video_stream is None and self._audio_stream is None:
|
||||||
|
logger.warning("[pyav_demuxer] No video or audio streams found")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Select streams to demux
|
||||||
|
streams_to_demux = []
|
||||||
|
if self._video_stream is not None:
|
||||||
|
streams_to_demux.append(self._container.streams[self._video_stream.index])
|
||||||
|
if self._audio_stream is not None:
|
||||||
|
streams_to_demux.append(self._container.streams[self._audio_stream.index])
|
||||||
|
|
||||||
|
# Wait for the caller to decide on video/audio decoding
|
||||||
|
# (if not already decided at construction time).
|
||||||
|
if not self._video_decode_decided.is_set():
|
||||||
|
self._video_decode_decided.wait(timeout=10.0)
|
||||||
|
if not self._audio_decode_decided.is_set():
|
||||||
|
self._audio_decode_decided.wait(timeout=10.0)
|
||||||
|
|
||||||
|
# Cache stream objects and time_base for the hot loop
|
||||||
|
video_stream_obj = (
|
||||||
|
self._container.streams[self._video_stream.index] if self._video_stream is not None else None
|
||||||
|
)
|
||||||
|
audio_stream_obj = (
|
||||||
|
self._container.streams[self._audio_stream.index] if self._audio_stream is not None else None
|
||||||
|
)
|
||||||
|
|
||||||
|
video_tb_num = video_stream_obj.time_base.numerator if video_stream_obj else 1
|
||||||
|
video_tb_den = video_stream_obj.time_base.denominator if video_stream_obj else 1
|
||||||
|
audio_tb_num = audio_stream_obj.time_base.numerator if audio_stream_obj else 1
|
||||||
|
audio_tb_den = audio_stream_obj.time_base.denominator if audio_stream_obj else 1
|
||||||
|
|
||||||
|
decode_video = self._decode_video
|
||||||
|
decode_audio = self._decode_audio
|
||||||
|
|
||||||
|
# Demux and enqueue packets -- plain queue.put(), no event loop
|
||||||
|
for packet in self._container.demux(*streams_to_demux):
|
||||||
|
if packet.size == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
stream = self._container.streams[packet.stream_index]
|
||||||
|
is_video = stream.type == "video"
|
||||||
|
is_audio = stream.type == "audio"
|
||||||
|
|
||||||
|
# Optionally decode video packets in-thread
|
||||||
|
if decode_video and is_video and video_stream_obj is not None:
|
||||||
|
try:
|
||||||
|
frames = video_stream_obj.codec_context.decode(packet)
|
||||||
|
except Exception:
|
||||||
|
frames = []
|
||||||
|
for frame in frames:
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=packet.stream_index,
|
||||||
|
codec_type="video",
|
||||||
|
data=b"",
|
||||||
|
pts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
dts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
duration=int(packet.duration) if packet.duration is not None else 0,
|
||||||
|
is_keyframe=frame.key_frame,
|
||||||
|
time_base_num=video_tb_num,
|
||||||
|
time_base_den=video_tb_den,
|
||||||
|
decoded_frame=frame,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
|
||||||
|
# Optionally decode audio packets in-thread
|
||||||
|
elif decode_audio and is_audio and audio_stream_obj is not None:
|
||||||
|
try:
|
||||||
|
frames = audio_stream_obj.codec_context.decode(packet)
|
||||||
|
except Exception:
|
||||||
|
frames = []
|
||||||
|
for frame in frames:
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=packet.stream_index,
|
||||||
|
codec_type="audio",
|
||||||
|
data=b"",
|
||||||
|
pts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
dts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
duration=int(packet.duration) if packet.duration is not None else 0,
|
||||||
|
is_keyframe=False,
|
||||||
|
time_base_num=audio_tb_num,
|
||||||
|
time_base_den=audio_tb_den,
|
||||||
|
decoded_frame=frame,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
tb_num = video_tb_num if is_video else audio_tb_num
|
||||||
|
tb_den = video_tb_den if is_video else audio_tb_den
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=packet.stream_index,
|
||||||
|
codec_type=stream.type,
|
||||||
|
data=bytes(packet),
|
||||||
|
pts=int(packet.pts) if packet.pts is not None else 0,
|
||||||
|
dts=int(packet.dts) if packet.dts is not None else 0,
|
||||||
|
duration=int(packet.duration) if packet.duration is not None else 0,
|
||||||
|
is_keyframe=packet.is_keyframe,
|
||||||
|
time_base_num=tb_num,
|
||||||
|
time_base_den=tb_den,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
|
||||||
|
# Flush the video decoder if we were decoding
|
||||||
|
if decode_video and video_stream_obj is not None:
|
||||||
|
try:
|
||||||
|
for frame in video_stream_obj.codec_context.decode(None):
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=video_stream_obj.index,
|
||||||
|
codec_type="video",
|
||||||
|
data=b"",
|
||||||
|
pts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
dts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
duration=0,
|
||||||
|
is_keyframe=frame.key_frame,
|
||||||
|
time_base_num=video_tb_num,
|
||||||
|
time_base_den=video_tb_den,
|
||||||
|
decoded_frame=frame,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Flush the audio decoder if we were decoding
|
||||||
|
if decode_audio and audio_stream_obj is not None:
|
||||||
|
try:
|
||||||
|
for frame in audio_stream_obj.codec_context.decode(None):
|
||||||
|
pq.put(
|
||||||
|
DemuxedPacket(
|
||||||
|
stream_index=audio_stream_obj.index,
|
||||||
|
codec_type="audio",
|
||||||
|
data=b"",
|
||||||
|
pts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
dts=int(frame.pts) if frame.pts is not None else 0,
|
||||||
|
duration=0,
|
||||||
|
is_keyframe=False,
|
||||||
|
time_base_num=audio_tb_num,
|
||||||
|
time_base_den=audio_tb_den,
|
||||||
|
decoded_frame=frame,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
pkt_count += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
logger.info("[pyav_demuxer] Demux complete: %d packets", pkt_count)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if "Invalid data" not in str(e):
|
||||||
|
logger.debug("[pyav_demuxer] Demux thread error: %s", e)
|
||||||
|
# Ensure streams_ready is set even on error
|
||||||
|
streams_ready.set()
|
||||||
|
finally:
|
||||||
|
pq.put(_SENTINEL)
|
||||||
|
|
||||||
|
self._demux_thread = threading.Thread(target=_open_and_demux, daemon=True, name="pyav-demux")
|
||||||
|
self._demux_thread.start()
|
||||||
|
|
||||||
|
# Wait for stream discovery before returning.
|
||||||
|
# Use run_in_executor to avoid blocking the event loop.
|
||||||
|
await loop.run_in_executor(None, streams_ready.wait)
|
||||||
|
|
||||||
|
async def iter_packets(self) -> AsyncIterator[DemuxedPacket]:
|
||||||
|
"""
|
||||||
|
Yield demuxed packets from the background thread.
|
||||||
|
|
||||||
|
Uses ``run_in_executor`` for the blocking ``queue.get()`` call,
|
||||||
|
avoiding per-packet ``run_coroutine_threadsafe`` overhead.
|
||||||
|
|
||||||
|
``start()`` must be called first.
|
||||||
|
"""
|
||||||
|
if self._packet_queue is None:
|
||||||
|
raise RuntimeError("Call start() before iter_packets()")
|
||||||
|
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
pq = self._packet_queue
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
packet = await loop.run_in_executor(None, pq.get)
|
||||||
|
if packet is _SENTINEL:
|
||||||
|
break
|
||||||
|
yield packet
|
||||||
|
|
||||||
|
if self._demux_thread is not None:
|
||||||
|
self._demux_thread.join(timeout=5.0)
|
||||||
|
|
||||||
|
except GeneratorExit:
|
||||||
|
logger.debug("[pyav_demuxer] Generator closed")
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
logger.debug("[pyav_demuxer] Cancelled")
|
||||||
|
finally:
|
||||||
|
self._cleanup()
|
||||||
|
|
||||||
|
def _discover_streams(self) -> None:
|
||||||
|
"""Inspect the opened container and record stream metadata."""
|
||||||
|
if self._container is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
for stream in self._container.streams:
|
||||||
|
if stream.type == "video" and self._video_stream is None:
|
||||||
|
codec_ctx = stream.codec_context
|
||||||
|
fps = float(stream.average_rate) if stream.average_rate else 24.0
|
||||||
|
self._video_stream = DemuxedStream(
|
||||||
|
index=stream.index,
|
||||||
|
codec_name=codec_ctx.name if codec_ctx else stream.codec.name,
|
||||||
|
codec_type="video",
|
||||||
|
width=codec_ctx.width if codec_ctx else 0,
|
||||||
|
height=codec_ctx.height if codec_ctx else 0,
|
||||||
|
fps=fps,
|
||||||
|
pixel_format=str(codec_ctx.pix_fmt) if codec_ctx and codec_ctx.pix_fmt else "yuv420p",
|
||||||
|
time_base_num=stream.time_base.numerator,
|
||||||
|
time_base_den=stream.time_base.denominator,
|
||||||
|
duration_seconds=float(stream.duration * stream.time_base) if stream.duration else 0.0,
|
||||||
|
extradata=bytes(codec_ctx.extradata) if codec_ctx and codec_ctx.extradata else b"",
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"[pyav_demuxer] Video: %s %dx%d @%.1ffps",
|
||||||
|
self._video_stream.codec_name,
|
||||||
|
self._video_stream.width,
|
||||||
|
self._video_stream.height,
|
||||||
|
self._video_stream.fps,
|
||||||
|
)
|
||||||
|
|
||||||
|
elif stream.type == "audio" and self._audio_stream is None:
|
||||||
|
codec_ctx = stream.codec_context
|
||||||
|
self._audio_stream = DemuxedStream(
|
||||||
|
index=stream.index,
|
||||||
|
codec_name=codec_ctx.name if codec_ctx else stream.codec.name,
|
||||||
|
codec_type="audio",
|
||||||
|
sample_rate=codec_ctx.sample_rate if codec_ctx else 0,
|
||||||
|
channels=codec_ctx.channels if codec_ctx else 0,
|
||||||
|
time_base_num=stream.time_base.numerator,
|
||||||
|
time_base_den=stream.time_base.denominator,
|
||||||
|
duration_seconds=float(stream.duration * stream.time_base) if stream.duration else 0.0,
|
||||||
|
extradata=bytes(codec_ctx.extradata) if codec_ctx and codec_ctx.extradata else b"",
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"[pyav_demuxer] Audio: %s %dHz %dch",
|
||||||
|
self._audio_stream.codec_name,
|
||||||
|
self._audio_stream.sample_rate,
|
||||||
|
self._audio_stream.channels,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _cleanup(self) -> None:
|
||||||
|
"""Stop threads and release all resources safely.
|
||||||
|
|
||||||
|
The order is critical to avoid SIGSEGV from closing the container
|
||||||
|
while the demux thread is still calling container.demux():
|
||||||
|
|
||||||
|
1. Cancel the feeder task (stops new bytes being queued).
|
||||||
|
2. Put a sentinel into the source queue so the writer thread
|
||||||
|
unblocks and exits. The writer's ``finally`` closes the pipe
|
||||||
|
write-end, which causes the demux thread to see EOF.
|
||||||
|
3. Join the writer thread (wait for it to drain and exit).
|
||||||
|
4. Join the demux thread (it finishes after pipe EOF).
|
||||||
|
5. ONLY THEN close the container (no thread is using it).
|
||||||
|
6. Close any remaining pipe FDs (read end, if still open).
|
||||||
|
"""
|
||||||
|
# 1. Cancel feeder task
|
||||||
|
if self._feeder_task is not None:
|
||||||
|
self._feeder_task.cancel()
|
||||||
|
self._feeder_task = None
|
||||||
|
|
||||||
|
# 2. Unblock writer thread so it exits and closes the pipe
|
||||||
|
if self._source_queue is not None:
|
||||||
|
try:
|
||||||
|
self._source_queue.put_nowait(_SENTINEL)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 3. Join writer thread (it closes _write_fd in its finally block)
|
||||||
|
if self._writer_thread is not None:
|
||||||
|
self._writer_thread.join(timeout=5.0)
|
||||||
|
self._writer_thread = None
|
||||||
|
|
||||||
|
# 4. Join demux thread -- must finish before we close the container
|
||||||
|
if self._demux_thread is not None:
|
||||||
|
self._demux_thread.join(timeout=5.0)
|
||||||
|
self._demux_thread = None
|
||||||
|
|
||||||
|
# 5. Now safe to close the container (no thread is using it)
|
||||||
|
if self._container is not None:
|
||||||
|
try:
|
||||||
|
self._container.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self._container = None
|
||||||
|
|
||||||
|
# 6. Close any remaining pipe FDs
|
||||||
|
for fd_name in ("_read_fd", "_write_fd"):
|
||||||
|
fd = getattr(self, fd_name, None)
|
||||||
|
if fd is not None:
|
||||||
|
try:
|
||||||
|
os.close(fd)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
setattr(self, fd_name, None)
|
||||||
1121
mediaflow_proxy/remuxer/transcode_handler.py
Normal file
1121
mediaflow_proxy/remuxer/transcode_handler.py
Normal file
File diff suppressed because it is too large
Load Diff
1268
mediaflow_proxy/remuxer/transcode_pipeline.py
Normal file
1268
mediaflow_proxy/remuxer/transcode_pipeline.py
Normal file
File diff suppressed because it is too large
Load Diff
1728
mediaflow_proxy/remuxer/ts_muxer.py
Normal file
1728
mediaflow_proxy/remuxer/ts_muxer.py
Normal file
File diff suppressed because it is too large
Load Diff
403
mediaflow_proxy/remuxer/video_transcoder.py
Normal file
403
mediaflow_proxy/remuxer/video_transcoder.py
Normal file
@@ -0,0 +1,403 @@
|
|||||||
|
"""
|
||||||
|
GPU-accelerated video transcoder with runtime detection.
|
||||||
|
|
||||||
|
Detects available hardware encoders/decoders at first use and selects
|
||||||
|
the best available backend:
|
||||||
|
- NVIDIA: h264_nvenc / hevc_cuvid (NVENC + CUDA)
|
||||||
|
- Apple macOS: h264_videotoolbox / hevc_videotoolbox
|
||||||
|
- Intel Linux: h264_vaapi / h264_qsv
|
||||||
|
- Fallback: libx264 (CPU)
|
||||||
|
|
||||||
|
The transcoder operates at the packet/frame level via PyAV, suitable
|
||||||
|
for integration into the streaming pipeline.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from enum import Enum
|
||||||
|
from fractions import Fraction
|
||||||
|
|
||||||
|
import av
|
||||||
|
|
||||||
|
from mediaflow_proxy.configs import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class HWAccelType(Enum):
|
||||||
|
NONE = "none"
|
||||||
|
NVIDIA = "nvidia"
|
||||||
|
VIDEOTOOLBOX = "videotoolbox"
|
||||||
|
VAAPI = "vaapi"
|
||||||
|
QSV = "qsv"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HWCapability:
|
||||||
|
"""Detected hardware acceleration capability."""
|
||||||
|
|
||||||
|
accel_type: HWAccelType = HWAccelType.NONE
|
||||||
|
h264_encoder: str = "libx264"
|
||||||
|
h264_decoder: str | None = None # None = use default software decoder
|
||||||
|
hevc_decoder: str | None = None
|
||||||
|
available_encoders: list[str] = field(default_factory=list)
|
||||||
|
available_decoders: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level singleton -- populated on first call to get_hw_capability()
|
||||||
|
_hw_capability: HWCapability | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def _probe_codec(name: str, mode: str = "w") -> bool:
|
||||||
|
"""
|
||||||
|
Check if a PyAV codec is available by name.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Codec name (e.g. 'h264_videotoolbox').
|
||||||
|
mode: 'w' for encoder, 'r' for decoder.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
av.Codec(name, mode)
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_hw_capability() -> HWCapability:
|
||||||
|
"""
|
||||||
|
Probe the runtime environment for hardware encoder/decoder availability.
|
||||||
|
|
||||||
|
Checks NVIDIA, Apple VideoToolbox, Intel VAAPI/QSV in priority order.
|
||||||
|
Falls back to libx264 CPU encoding.
|
||||||
|
"""
|
||||||
|
cap = HWCapability()
|
||||||
|
|
||||||
|
# Collect available encoders/decoders for logging
|
||||||
|
hw_encoders = [
|
||||||
|
"h264_nvenc",
|
||||||
|
"hevc_nvenc",
|
||||||
|
"h264_videotoolbox",
|
||||||
|
"hevc_videotoolbox",
|
||||||
|
"h264_vaapi",
|
||||||
|
"hevc_vaapi",
|
||||||
|
"h264_qsv",
|
||||||
|
"hevc_qsv",
|
||||||
|
]
|
||||||
|
hw_decoders = [
|
||||||
|
"h264_cuvid",
|
||||||
|
"hevc_cuvid",
|
||||||
|
"h264_qsv",
|
||||||
|
"hevc_qsv",
|
||||||
|
]
|
||||||
|
|
||||||
|
cap.available_encoders = [c for c in hw_encoders if _probe_codec(c, "w")]
|
||||||
|
cap.available_decoders = [c for c in hw_decoders if _probe_codec(c, "r")]
|
||||||
|
|
||||||
|
# Priority 1: NVIDIA NVENC
|
||||||
|
if "h264_nvenc" in cap.available_encoders:
|
||||||
|
cap.accel_type = HWAccelType.NVIDIA
|
||||||
|
cap.h264_encoder = "h264_nvenc"
|
||||||
|
if "h264_cuvid" in cap.available_decoders:
|
||||||
|
cap.h264_decoder = "h264_cuvid"
|
||||||
|
if "hevc_cuvid" in cap.available_decoders:
|
||||||
|
cap.hevc_decoder = "hevc_cuvid"
|
||||||
|
return cap
|
||||||
|
|
||||||
|
# Priority 2: Apple VideoToolbox
|
||||||
|
if "h264_videotoolbox" in cap.available_encoders:
|
||||||
|
cap.accel_type = HWAccelType.VIDEOTOOLBOX
|
||||||
|
cap.h264_encoder = "h264_videotoolbox"
|
||||||
|
# VideoToolbox decoders are used automatically via hwaccel
|
||||||
|
return cap
|
||||||
|
|
||||||
|
# Priority 3: Intel VAAPI (Linux)
|
||||||
|
if "h264_vaapi" in cap.available_encoders:
|
||||||
|
cap.accel_type = HWAccelType.VAAPI
|
||||||
|
cap.h264_encoder = "h264_vaapi"
|
||||||
|
return cap
|
||||||
|
|
||||||
|
# Priority 4: Intel QSV
|
||||||
|
if "h264_qsv" in cap.available_encoders:
|
||||||
|
cap.accel_type = HWAccelType.QSV
|
||||||
|
cap.h264_encoder = "h264_qsv"
|
||||||
|
if "h264_qsv" in cap.available_decoders:
|
||||||
|
cap.h264_decoder = "h264_qsv"
|
||||||
|
if "hevc_qsv" in cap.available_decoders:
|
||||||
|
cap.hevc_decoder = "hevc_qsv"
|
||||||
|
return cap
|
||||||
|
|
||||||
|
# Fallback: CPU
|
||||||
|
cap.accel_type = HWAccelType.NONE
|
||||||
|
cap.h264_encoder = "libx264"
|
||||||
|
return cap
|
||||||
|
|
||||||
|
|
||||||
|
def get_hw_capability() -> HWCapability:
|
||||||
|
"""Get the detected hardware acceleration capability (cached singleton)."""
|
||||||
|
global _hw_capability
|
||||||
|
if _hw_capability is None:
|
||||||
|
_hw_capability = _detect_hw_capability()
|
||||||
|
if settings.transcode_prefer_gpu and _hw_capability.accel_type != HWAccelType.NONE:
|
||||||
|
logger.info(
|
||||||
|
"[video_transcoder] GPU acceleration: %s (encoder=%s, decoders=%s)",
|
||||||
|
_hw_capability.accel_type.value,
|
||||||
|
_hw_capability.h264_encoder,
|
||||||
|
_hw_capability.available_decoders or "software",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"[video_transcoder] Using CPU encoder: %s (available HW: encoders=%s, decoders=%s)",
|
||||||
|
_hw_capability.h264_encoder,
|
||||||
|
_hw_capability.available_encoders or "none",
|
||||||
|
_hw_capability.available_decoders or "none",
|
||||||
|
)
|
||||||
|
return _hw_capability
|
||||||
|
|
||||||
|
|
||||||
|
class VideoTranscoder:
|
||||||
|
"""
|
||||||
|
In-process video transcoder using PyAV.
|
||||||
|
|
||||||
|
Decodes input video packets and re-encodes to H.264 using the best
|
||||||
|
available hardware encoder (or CPU libx264 fallback).
|
||||||
|
|
||||||
|
Operates at the frame level: caller provides raw video packets (from
|
||||||
|
PyAV demuxer), transcoder returns encoded H.264 NAL data suitable
|
||||||
|
for the fMP4 muxer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_codec_name: str,
|
||||||
|
width: int,
|
||||||
|
height: int,
|
||||||
|
fps: float = 24.0,
|
||||||
|
pixel_format: str = "yuv420p",
|
||||||
|
force_software: bool = False,
|
||||||
|
) -> None:
|
||||||
|
hw = get_hw_capability()
|
||||||
|
use_gpu = settings.transcode_prefer_gpu and hw.accel_type != HWAccelType.NONE and not force_software
|
||||||
|
|
||||||
|
# --- Decoder ---
|
||||||
|
hw_decoder = None
|
||||||
|
if use_gpu:
|
||||||
|
if "hevc" in input_codec_name or "h265" in input_codec_name:
|
||||||
|
hw_decoder = hw.hevc_decoder
|
||||||
|
else:
|
||||||
|
hw_decoder = hw.h264_decoder
|
||||||
|
|
||||||
|
decoder_name = hw_decoder or input_codec_name
|
||||||
|
self._decoder = av.CodecContext.create(decoder_name, "r")
|
||||||
|
|
||||||
|
# --- Encoder ---
|
||||||
|
encoder_name = hw.h264_encoder if use_gpu else "libx264"
|
||||||
|
|
||||||
|
# H.264 requires even dimensions
|
||||||
|
enc_width = width if width % 2 == 0 else width + 1
|
||||||
|
enc_height = height if height % 2 == 0 else height + 1
|
||||||
|
|
||||||
|
self._encoder = av.CodecContext.create(encoder_name, "w")
|
||||||
|
self._encoder.width = enc_width
|
||||||
|
self._encoder.height = enc_height
|
||||||
|
self._encoder.pix_fmt = "yuv420p" # H.264 requires yuv420p
|
||||||
|
self._encoder.time_base = Fraction(1, int(fps * 1000))
|
||||||
|
self._encoder.framerate = Fraction(int(fps * 1000), 1000)
|
||||||
|
self._encoder.bit_rate = _parse_bitrate(settings.transcode_video_bitrate)
|
||||||
|
self._encoder.gop_size = int(fps * 2) # Keyframe every ~2 seconds
|
||||||
|
|
||||||
|
# Encoder options based on backend
|
||||||
|
opts = {}
|
||||||
|
if encoder_name == "libx264":
|
||||||
|
opts["preset"] = settings.transcode_video_preset
|
||||||
|
opts["tune"] = "zerolatency"
|
||||||
|
opts["profile"] = "high"
|
||||||
|
elif "nvenc" in encoder_name:
|
||||||
|
opts["preset"] = "p4" # NVENC preset (p1=fastest .. p7=slowest)
|
||||||
|
opts["tune"] = "ll" # Low latency
|
||||||
|
opts["rc"] = "vbr"
|
||||||
|
elif "videotoolbox" in encoder_name:
|
||||||
|
opts["realtime"] = "1"
|
||||||
|
opts["allow_sw"] = "1" # Fallback to software if HW busy
|
||||||
|
elif "vaapi" in encoder_name:
|
||||||
|
opts["rc_mode"] = "VBR"
|
||||||
|
elif "qsv" in encoder_name:
|
||||||
|
opts["preset"] = "medium"
|
||||||
|
|
||||||
|
self._encoder.options = opts
|
||||||
|
self._encoder.open()
|
||||||
|
|
||||||
|
width = enc_width
|
||||||
|
height = enc_height
|
||||||
|
|
||||||
|
self._input_codec = input_codec_name
|
||||||
|
self._encoder_name = encoder_name
|
||||||
|
self._frames_decoded = 0
|
||||||
|
self._frames_encoded = 0
|
||||||
|
self._width = width
|
||||||
|
self._height = height
|
||||||
|
# Tracks whether the standalone decoder was actually used (via decode_packet).
|
||||||
|
# When the demux thread decodes frames in-thread (decode_video=True),
|
||||||
|
# the standalone decoder is never fed packets and flushing it is wasted work.
|
||||||
|
self._decoder_used = False
|
||||||
|
self._flushed = False # Prevents double-flush which causes SIGSEGV
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[video_transcoder] Initialized: %s -> %s (%s), %dx%d @%.1ffps %dk",
|
||||||
|
input_codec_name,
|
||||||
|
encoder_name,
|
||||||
|
hw.accel_type.value,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
fps,
|
||||||
|
self._encoder.bit_rate // 1000 if self._encoder.bit_rate else 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def codec_private_data(self) -> bytes | None:
|
||||||
|
"""H.264 extradata (SPS/PPS) from the encoder, for the fMP4 init segment."""
|
||||||
|
if self._encoder.extradata:
|
||||||
|
return bytes(self._encoder.extradata)
|
||||||
|
return None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def width(self) -> int:
|
||||||
|
return self._width
|
||||||
|
|
||||||
|
@property
|
||||||
|
def height(self) -> int:
|
||||||
|
return self._height
|
||||||
|
|
||||||
|
def transcode_frame(self, frame: av.VideoFrame) -> list[tuple[bytes, bool, int, int]]:
|
||||||
|
"""
|
||||||
|
Encode a decoded video frame to H.264.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
frame: A decoded av.VideoFrame.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (nal_data, is_keyframe, pts, dts) tuples.
|
||||||
|
"""
|
||||||
|
self._frames_decoded += 1
|
||||||
|
output = []
|
||||||
|
|
||||||
|
# Ensure correct pixel format for encoder
|
||||||
|
if frame.format.name != self._encoder.pix_fmt:
|
||||||
|
frame = frame.reformat(format=self._encoder.pix_fmt)
|
||||||
|
|
||||||
|
try:
|
||||||
|
for packet in self._encoder.encode(frame):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(
|
||||||
|
(
|
||||||
|
bytes(packet),
|
||||||
|
packet.is_keyframe,
|
||||||
|
int(packet.pts) if packet.pts is not None else 0,
|
||||||
|
int(packet.dts) if packet.dts is not None else 0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except av.error.InvalidDataError as e:
|
||||||
|
logger.debug("[video_transcoder] Encode error: %s", e)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def decode_packet(self, packet: av.Packet) -> list[av.VideoFrame]:
|
||||||
|
"""Decode a video packet into frames."""
|
||||||
|
self._decoder_used = True
|
||||||
|
try:
|
||||||
|
return list(self._decoder.decode(packet))
|
||||||
|
except av.error.InvalidDataError as e:
|
||||||
|
logger.debug("[video_transcoder] Decode error: %s", e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
def flush(self) -> list[tuple[bytes, bool, int, int]]:
|
||||||
|
"""
|
||||||
|
Flush encoder (and decoder, if it was used) buffers.
|
||||||
|
|
||||||
|
When ``decode_video=True`` is used in PyAVDemuxer, the demux thread
|
||||||
|
decodes frames using the container's codec context. In that case the
|
||||||
|
standalone ``_decoder`` here is never fed any packets, so flushing
|
||||||
|
it is skipped -- avoiding a stall that added ~5 s on some backends.
|
||||||
|
|
||||||
|
Safe to call multiple times -- subsequent calls return an empty list.
|
||||||
|
"""
|
||||||
|
if self._flushed:
|
||||||
|
return []
|
||||||
|
self._flushed = True
|
||||||
|
|
||||||
|
output = []
|
||||||
|
|
||||||
|
# Flush decoder only if it was actually used (via decode_packet)
|
||||||
|
if self._decoder_used:
|
||||||
|
try:
|
||||||
|
for frame in self._decoder.decode(None):
|
||||||
|
self._frames_decoded += 1
|
||||||
|
if frame.format.name != self._encoder.pix_fmt:
|
||||||
|
frame = frame.reformat(format=self._encoder.pix_fmt)
|
||||||
|
for packet in self._encoder.encode(frame):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(
|
||||||
|
(
|
||||||
|
bytes(packet),
|
||||||
|
packet.is_keyframe,
|
||||||
|
int(packet.pts) if packet.pts is not None else 0,
|
||||||
|
int(packet.dts) if packet.dts is not None else 0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[video_transcoder] Decoder flush error: %s", e)
|
||||||
|
else:
|
||||||
|
logger.debug("[video_transcoder] Skipping decoder flush (decoder not used)")
|
||||||
|
|
||||||
|
# Flush encoder
|
||||||
|
try:
|
||||||
|
for packet in self._encoder.encode(None):
|
||||||
|
self._frames_encoded += 1
|
||||||
|
output.append(
|
||||||
|
(
|
||||||
|
bytes(packet),
|
||||||
|
packet.is_keyframe,
|
||||||
|
int(packet.pts) if packet.pts is not None else 0,
|
||||||
|
int(packet.dts) if packet.dts is not None else 0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("[video_transcoder] Encoder flush error: %s", e)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[video_transcoder] Flushed: %d decoded, %d encoded total (decoder_used=%s)",
|
||||||
|
self._frames_decoded,
|
||||||
|
self._frames_encoded,
|
||||||
|
self._decoder_used,
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Release codec contexts.
|
||||||
|
|
||||||
|
Flushes the encoder (if not already flushed) before releasing to avoid
|
||||||
|
SIGSEGV when libx264 or hardware encoders have buffered frames at
|
||||||
|
teardown time. Double-flushing is the most common cause of SIGSEGV
|
||||||
|
in the transcode pipeline.
|
||||||
|
|
||||||
|
PyAV codec contexts are released via garbage collection (no explicit
|
||||||
|
close method), so we flush first to ensure native buffers are drained
|
||||||
|
before the C-level codec is freed.
|
||||||
|
"""
|
||||||
|
# flush() is idempotent -- safe to call even if already flushed
|
||||||
|
self.flush()
|
||||||
|
# Release references -- GC will free the native codec contexts
|
||||||
|
self._encoder = None
|
||||||
|
self._decoder = None
|
||||||
|
|
||||||
|
def __del__(self) -> None:
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_bitrate(bitrate_str: str) -> int:
|
||||||
|
"""Parse a bitrate string like '4M', '2000k', '5000000' to int bits/s."""
|
||||||
|
s = bitrate_str.strip().lower()
|
||||||
|
if s.endswith("m"):
|
||||||
|
return int(float(s[:-1]) * 1_000_000)
|
||||||
|
if s.endswith("k"):
|
||||||
|
return int(float(s[:-1]) * 1_000)
|
||||||
|
return int(s)
|
||||||
@@ -2,5 +2,16 @@ from .proxy import proxy_router
|
|||||||
from .extractor import extractor_router
|
from .extractor import extractor_router
|
||||||
from .speedtest import speedtest_router
|
from .speedtest import speedtest_router
|
||||||
from .playlist_builder import playlist_builder_router
|
from .playlist_builder import playlist_builder_router
|
||||||
|
from .xtream import xtream_root_router
|
||||||
|
from .acestream import acestream_router
|
||||||
|
from .telegram import telegram_router
|
||||||
|
|
||||||
__all__ = ["proxy_router", "extractor_router", "speedtest_router", "playlist_builder_router"]
|
__all__ = [
|
||||||
|
"proxy_router",
|
||||||
|
"extractor_router",
|
||||||
|
"speedtest_router",
|
||||||
|
"playlist_builder_router",
|
||||||
|
"xtream_root_router",
|
||||||
|
"acestream_router",
|
||||||
|
"telegram_router",
|
||||||
|
]
|
||||||
|
|||||||
BIN
mediaflow_proxy/routes/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/routes/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/routes/__pycache__/acestream.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/routes/__pycache__/acestream.cpython-313.pyc
Normal file
Binary file not shown.
BIN
mediaflow_proxy/routes/__pycache__/extractor.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/routes/__pycache__/extractor.cpython-313.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
mediaflow_proxy/routes/__pycache__/proxy.cpython-313.pyc
Normal file
BIN
mediaflow_proxy/routes/__pycache__/proxy.cpython-313.pyc
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user