diff --git a/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..c0093f8 Binary files /dev/null and b/mediaflow_proxy/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/configs.cpython-313.pyc b/mediaflow_proxy/__pycache__/configs.cpython-313.pyc new file mode 100644 index 0000000..1da72df Binary files /dev/null and b/mediaflow_proxy/__pycache__/configs.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/const.cpython-313.pyc b/mediaflow_proxy/__pycache__/const.cpython-313.pyc new file mode 100644 index 0000000..173fd13 Binary files /dev/null and b/mediaflow_proxy/__pycache__/const.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc b/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc new file mode 100644 index 0000000..d3857cf Binary files /dev/null and b/mediaflow_proxy/__pycache__/handlers.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/main.cpython-313.pyc b/mediaflow_proxy/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000..8e81be0 Binary files /dev/null and b/mediaflow_proxy/__pycache__/main.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc b/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc new file mode 100644 index 0000000..5bb4a40 Binary files /dev/null and b/mediaflow_proxy/__pycache__/middleware.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc b/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc new file mode 100644 index 0000000..925c50d Binary files /dev/null and b/mediaflow_proxy/__pycache__/mpd_processor.cpython-313.pyc differ diff --git a/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc b/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc new file mode 100644 index 0000000..e15268d Binary files /dev/null and b/mediaflow_proxy/__pycache__/schemas.cpython-313.pyc differ diff --git a/mediaflow_proxy/configs.py b/mediaflow_proxy/configs.py index bae1ddf..fbae17f 100644 --- a/mediaflow_proxy/configs.py +++ b/mediaflow_proxy/configs.py @@ -1,7 +1,6 @@ -from typing import Dict, Literal, Optional, Union +from typing import Dict, Literal, Optional -import httpx -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, SecretStr from pydantic_settings import BaseSettings @@ -28,48 +27,6 @@ class TransportConfig(BaseSettings): ) timeout: int = Field(60, description="Timeout for HTTP requests in seconds") - def get_mounts( - self, async_http: bool = True - ) -> Dict[str, Optional[Union[httpx.HTTPTransport, httpx.AsyncHTTPTransport]]]: - """ - Get a dictionary of httpx mount points to transport instances. - """ - mounts = {} - transport_cls = httpx.AsyncHTTPTransport if async_http else httpx.HTTPTransport - global_verify = not self.disable_ssl_verification_globally - - # Configure specific routes - for pattern, route in self.transport_routes.items(): - mounts[pattern] = transport_cls( - verify=route.verify_ssl if global_verify else False, - proxy=route.proxy_url or self.proxy_url if route.proxy else None, - ) - - # Hardcoded configuration for jxoplay.xyz domain - SSL verification disabled - mounts["all://jxoplay.xyz"] = transport_cls( - verify=False, proxy=self.proxy_url if self.all_proxy else None - ) - - mounts["all://dlhd.dad"] = transport_cls( - verify=False, proxy=self.proxy_url if self.all_proxy else None - ) - - mounts["all://*.newkso.ru"] = transport_cls( - verify=False, proxy=self.proxy_url if self.all_proxy else None - ) - - # Apply global settings for proxy and SSL - default_proxy_url = self.proxy_url if self.all_proxy else None - if default_proxy_url or not global_verify: - mounts["all://"] = transport_cls(proxy=default_proxy_url, verify=global_verify) - - # Set default proxy for all routes if enabled - # This part is now handled above to combine proxy and SSL settings - # if self.all_proxy: - # mounts["all://"] = transport_cls(proxy=self.proxy_url) - - return mounts - class Config: env_file = ".env" extra = "ignore" @@ -78,30 +35,80 @@ class TransportConfig(BaseSettings): class Settings(BaseSettings): api_password: str | None = None # The password for protecting the API endpoints. log_level: str = "INFO" # The logging level to use. - transport_config: TransportConfig = Field(default_factory=TransportConfig) # Configuration for httpx transport. + transport_config: TransportConfig = Field(default_factory=TransportConfig) # Configuration for HTTP transport. enable_streaming_progress: bool = False # Whether to enable streaming progress tracking. disable_home_page: bool = False # Whether to disable the home page UI. disable_docs: bool = False # Whether to disable the API documentation (Swagger UI). disable_speedtest: bool = False # Whether to disable the speedtest UI. + clear_cache_on_startup: bool = ( + False # Whether to clear all caches (extractor, MPD, etc.) on startup. Useful for development. + ) stremio_proxy_url: str | None = None # The Stremio server URL for alternative content proxying. m3u8_content_routing: Literal["mediaflow", "stremio", "direct"] = ( "mediaflow" # Routing strategy for M3U8 content URLs: "mediaflow", "stremio", or "direct" ) - enable_hls_prebuffer: bool = False # Whether to enable HLS pre-buffering for improved streaming performance. + enable_hls_prebuffer: bool = True # Whether to enable HLS pre-buffering for improved streaming performance. + livestream_start_offset: ( + float | None + ) = -18 # Default start offset for live streams (e.g., -18 to start 18 seconds behind live edge). Applies to HLS and MPD live playlists. Set to None to disable. hls_prebuffer_segments: int = 5 # Number of segments to pre-buffer ahead. hls_prebuffer_cache_size: int = 50 # Maximum number of segments to cache in memory. hls_prebuffer_max_memory_percent: int = 80 # Maximum percentage of system memory to use for HLS pre-buffer cache. hls_prebuffer_emergency_threshold: int = 90 # Emergency threshold percentage to trigger aggressive cache cleanup. - enable_dash_prebuffer: bool = False # Whether to enable DASH pre-buffering for improved streaming performance. + hls_prebuffer_inactivity_timeout: int = 60 # Seconds of inactivity before stopping playlist refresh loop. + hls_segment_cache_ttl: int = 300 # TTL (seconds) for cached HLS segments; 300s (5min) for VOD, lower for live. + enable_dash_prebuffer: bool = True # Whether to enable DASH pre-buffering for improved streaming performance. dash_prebuffer_segments: int = 5 # Number of segments to pre-buffer ahead. dash_prebuffer_cache_size: int = 50 # Maximum number of segments to cache in memory. dash_prebuffer_max_memory_percent: int = 80 # Maximum percentage of system memory to use for DASH pre-buffer cache. dash_prebuffer_emergency_threshold: int = 90 # Emergency threshold percentage to trigger aggressive cache cleanup. - mpd_live_init_cache_ttl: int = 0 # TTL (seconds) for live init segment cache; 0 disables caching. + dash_prebuffer_inactivity_timeout: int = 60 # Seconds of inactivity before cleaning up stream state. + dash_segment_cache_ttl: int = 60 # TTL (seconds) for cached media segments; longer = better for slow playback. + mpd_live_init_cache_ttl: int = 60 # TTL (seconds) for live init segment cache; 0 disables caching. mpd_live_playlist_depth: int = 8 # Number of recent segments to expose per live playlist variant. + remux_to_ts: bool = False # Remux fMP4 segments to MPEG-TS for ExoPlayer/VLC compatibility. + processed_segment_cache_ttl: int = 60 # TTL (seconds) for caching processed (decrypted/remuxed) segments. - user_agent: str = ( - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36" # The user agent to use for HTTP requests. + # FlareSolverr settings (for Cloudflare bypass) + flaresolverr_url: str | None = None # FlareSolverr service URL. Example: http://localhost:8191 + flaresolverr_timeout: int = 60 # Timeout (seconds) for FlareSolverr requests. + + # Acestream settings + enable_acestream: bool = False # Whether to enable Acestream proxy support. + acestream_host: str = "localhost" # Acestream engine host. + acestream_port: int = 6878 # Acestream engine port. + acestream_buffer_size: int = 4 * 1024 * 1024 # Buffer size for MPEG-TS streaming (4MB default, like acexy). + acestream_empty_timeout: int = 30 # Timeout (seconds) when no data is received from upstream. + acestream_session_timeout: int = 60 # Session timeout (seconds) for cleanup of inactive sessions. + acestream_keepalive_interval: int = 15 # Interval (seconds) for session keepalive polling. + + # Telegram MTProto settings + enable_telegram: bool = False # Whether to enable Telegram MTProto proxy support. + telegram_api_id: int | None = None # Telegram API ID from https://my.telegram.org/apps + telegram_api_hash: SecretStr | None = None # Telegram API hash from https://my.telegram.org/apps + telegram_session_string: SecretStr | None = None # Persistent session string (avoids re-authentication). + telegram_max_connections: int = 8 # Max parallel DC connections for downloads (max 20, careful of floods). + telegram_request_timeout: int = 30 # Request timeout in seconds. + + # Transcode settings + enable_transcode: bool = True # Whether to enable on-the-fly transcoding endpoints (MKV→fMP4, HLS VOD). + transcode_prefer_gpu: bool = True # Prefer GPU acceleration (NVENC/VideoToolbox/VAAPI) when available. + transcode_video_bitrate: str = "4M" # Target video bitrate for re-encoding (e.g. "4M", "2000k"). + transcode_audio_bitrate: int = 192000 # AAC audio bitrate in bits/s for the Python transcode pipeline. + transcode_video_preset: str = "medium" # Encoding speed/quality tradeoff (libx264: ultrafast..veryslow). + + user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36" # The user agent to use for HTTP requests. + + # Upstream error resilience settings + upstream_retry_on_disconnect: bool = True # Enable/disable retry when upstream disconnects mid-stream. + upstream_retry_attempts: int = 2 # Number of retry attempts when upstream disconnects during streaming. + upstream_retry_delay: float = 1.0 # Delay (seconds) between retry attempts. + graceful_stream_end: bool = True # Return valid empty playlist instead of error when upstream fails. + + # Redis settings + redis_url: str | None = None # Redis URL for distributed locking and caching. None = disabled. + cache_namespace: str | None = ( + None # Optional namespace for instance-specific caches (e.g. pod name or hostname). When set, extractor results and other IP-bound data are stored under this namespace so multiple pods sharing one Redis don't serve each other's IP-specific URLs. ) class Config: diff --git a/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..ab17842 Binary files /dev/null and b/mediaflow_proxy/drm/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc b/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc new file mode 100644 index 0000000..f69ffd6 Binary files /dev/null and b/mediaflow_proxy/drm/__pycache__/decrypter.cpython-313.pyc differ diff --git a/mediaflow_proxy/drm/decrypter.py b/mediaflow_proxy/drm/decrypter.py index a6514a9..8b6a354 100644 --- a/mediaflow_proxy/drm/decrypter.py +++ b/mediaflow_proxy/drm/decrypter.py @@ -168,12 +168,20 @@ class MP4Decrypter: """ Class to handle the decryption of CENC encrypted MP4 segments. + Supports multi-track segments (e.g., video + audio) by properly handling + data offsets and encryption info for each track. + Attributes: - key_map (dict[bytes, bytes]): Mapping of track IDs to decryption keys. - current_key (Optional[bytes]): Current decryption key. + key_map (dict[bytes, bytes]): Mapping of KIDs to decryption keys. + current_key (Optional[bytes]): Current decryption key (for single-track compatibility). trun_sample_sizes (array.array): Array of sample sizes from the 'trun' box. current_sample_info (list): List of sample information from the 'senc' box. - encryption_overhead (int): Total size of encryption-related boxes. + total_encryption_overhead (int): Total size of encryption-related boxes (senc, saiz, saio) across all trafs. + default_sample_size (int): Default sample size from tfhd, used when trun doesn't specify sizes. + track_infos (list): List of track info dicts for multi-track mdat decryption. + encryption_scheme (bytes): Encryption scheme type (b"cenc", b"cens", b"cbc1", or b"cbcs"). + crypt_byte_block (int): Number of encrypted 16-byte blocks in pattern encryption (for cbcs). + skip_byte_block (int): Number of clear 16-byte blocks in pattern encryption (for cbcs). """ def __init__(self, key_map: dict[bytes, bytes]): @@ -187,14 +195,34 @@ class MP4Decrypter: self.current_key = None self.trun_sample_sizes = array.array("I") self.current_sample_info = [] - self.encryption_overhead = 0 + self.total_encryption_overhead = 0 # Total overhead from all trafs (senc, saiz, saio) + self.default_sample_size = 0 + # Track info for multi-track support: list of (data_offset, sample_sizes, sample_info, key, default_sample_size) + self.track_infos = [] + # IV size from tenc box (default 8 for GPAC, 16 for Bento4) + self.default_iv_size = 8 + # Encryption scheme: b"cenc" (AES-CTR), b"cens" (AES-CTR pattern), b"cbc1" (AES-CBC), b"cbcs" (AES-CBC pattern) + self.encryption_scheme = b"cenc" # Default to cenc (AES-CTR) + # Pattern encryption parameters (for cbcs/cens) - default values + self.crypt_byte_block = 1 # Default: encrypt 1 block + self.skip_byte_block = 9 # Default: skip 9 blocks + # Constant IV for CBCS (when default_Per_Sample_IV_Size is 0) + self.constant_iv: Optional[bytes] = None + # Per-track encryption settings (track_id -> {crypt, skip, iv}) + self.track_encryption_settings: dict[int, dict] = {} + # Extracted KIDs from tenc boxes (track_id -> kid) + self.extracted_kids: dict[int, bytes] = {} + # Current track ID being processed + self.current_track_id: int = 0 - def decrypt_segment(self, combined_segment: bytes) -> bytes: + def decrypt_segment(self, combined_segment: bytes, include_init: bool = True) -> bytes: """ Decrypts a combined MP4 segment. Args: combined_segment (bytes): Combined initialization and media segment. + include_init (bool): If True, include processed init atoms (ftyp, moov) in output. + If False, only return media atoms (moof, sidx, mdat) for use with EXT-X-MAP. Returns: bytes: Decrypted segment content. @@ -210,6 +238,44 @@ class MP4Decrypter: if atom := next((a for a in atoms if a.atom_type == atom_type), None): processed_atoms[atom_type] = self._process_atom(atom_type, atom) + result = bytearray() + # Init atoms to skip when include_init is False + # Note: styp is a segment type atom that should be kept in segments + init_atoms = {b"ftyp", b"moov"} + + for atom in atoms: + # Skip init atoms if not including init + if not include_init and atom.atom_type in init_atoms: + continue + + if atom.atom_type in processed_atoms: + processed_atom = processed_atoms[atom.atom_type] + result.extend(processed_atom.pack()) + else: + result.extend(atom.pack()) + + return bytes(result) + + def process_init_only(self, init_segment: bytes) -> bytes: + """ + Processes only the initialization segment, removing encryption-related boxes. + Used for EXT-X-MAP where init is served separately. + + Args: + init_segment (bytes): Initialization segment data. + + Returns: + bytes: Processed init segment with encryption boxes removed. + """ + data = memoryview(init_segment) + parser = MP4Parser(data) + atoms = parser.list_atoms() + + processed_atoms = {} + # Only process moov for init segments + if moov_atom := next((a for a in atoms if a.atom_type == b"moov"), None): + processed_atoms[b"moov"] = self._process_moov(moov_atom) + result = bytearray() for atom in atoms: if atom.atom_type in processed_atoms: @@ -268,19 +334,33 @@ class MP4Decrypter: def _process_moof(self, moof: MP4Atom) -> MP4Atom: """ - Processes the 'moov' (Movie) atom, which contains metadata about the entire presentation. - This includes information about tracks, media data, and other movie-level metadata. + Processes the 'moof' (Movie Fragment) atom, which contains metadata about a fragment. + This includes information about track fragments, sample information, and encryption data. Args: - moov (MP4Atom): The 'moov' atom to process. + moof (MP4Atom): The 'moof' atom to process. Returns: - MP4Atom: Processed 'moov' atom with updated track information. + MP4Atom: Processed 'moof' atom with updated track information. """ parser = MP4Parser(moof.data) - new_moof_data = bytearray() + atoms = parser.list_atoms() - for atom in iter(parser.read_atom, None): + # Reset track infos for this moof + self.track_infos = [] + + # First pass: calculate total encryption overhead from all trafs + self.total_encryption_overhead = 0 + for atom in atoms: + if atom.atom_type == b"traf": + traf_parser = MP4Parser(atom.data) + traf_atoms = traf_parser.list_atoms() + traf_overhead = sum(a.size for a in traf_atoms if a.atom_type in {b"senc", b"saiz", b"saio"}) + self.total_encryption_overhead += traf_overhead + + # Second pass: process atoms + new_moof_data = bytearray() + for atom in atoms: if atom.atom_type == b"traf": new_traf = self._process_traf(atom) new_moof_data.extend(new_traf.pack()) @@ -304,19 +384,21 @@ class MP4Decrypter: new_traf_data = bytearray() tfhd = None sample_count = 0 + trun_data_offset = 0 sample_info = [] + track_default_sample_size = 0 atoms = parser.list_atoms() - # calculate encryption_overhead earlier to avoid dependency on trun - self.encryption_overhead = sum(a.size for a in atoms if a.atom_type in {b"senc", b"saiz", b"saio"}) - for atom in atoms: if atom.atom_type == b"tfhd": tfhd = atom new_traf_data.extend(atom.pack()) + # Extract default_sample_size from tfhd if present + self._parse_tfhd(atom) + track_default_sample_size = self.default_sample_size elif atom.atom_type == b"trun": - sample_count = self._process_trun(atom) + sample_count, trun_data_offset = self._process_trun(atom) new_trun = self._modify_trun(atom) new_traf_data.extend(new_trun.pack()) elif atom.atom_type == b"senc": @@ -327,15 +409,60 @@ class MP4Decrypter: if tfhd: tfhd_track_id = struct.unpack_from(">I", tfhd.data, 4)[0] - self.current_key = self._get_key_for_track(tfhd_track_id) + track_key = self._get_key_for_track(tfhd_track_id) + # Get per-track encryption settings if available + track_enc_settings = self.track_encryption_settings.get(tfhd_track_id, {}) + # Store track info for multi-track mdat decryption + # Copy the sample sizes array since it gets overwritten for each track + track_sample_sizes = array.array("I", self.trun_sample_sizes) + self.track_infos.append( + { + "data_offset": trun_data_offset, + "sample_sizes": track_sample_sizes, + "sample_info": sample_info, + "key": track_key, + "default_sample_size": track_default_sample_size, + "track_id": tfhd_track_id, + "crypt_byte_block": track_enc_settings.get("crypt_byte_block", self.crypt_byte_block), + "skip_byte_block": track_enc_settings.get("skip_byte_block", self.skip_byte_block), + "constant_iv": track_enc_settings.get("constant_iv", self.constant_iv), + } + ) + # Keep backward compatibility for single-track case + self.current_key = track_key self.current_sample_info = sample_info return MP4Atom(b"traf", len(new_traf_data) + 8, new_traf_data) + def _parse_tfhd(self, tfhd: MP4Atom) -> None: + """ + Parses the 'tfhd' (Track Fragment Header) atom to extract default sample size. + + Args: + tfhd (MP4Atom): The 'tfhd' atom to parse. + """ + data = tfhd.data + flags = struct.unpack_from(">I", data, 0)[0] & 0xFFFFFF + offset = 8 # Skip version_flags (4) + track_id (4) + + # Skip optional fields based on flags + if flags & 0x000001: # base-data-offset-present + offset += 8 + if flags & 0x000002: # sample-description-index-present + offset += 4 + if flags & 0x000008: # default-sample-duration-present + offset += 4 + if flags & 0x000010: # default-sample-size-present + if offset + 4 <= len(data): + self.default_sample_size = struct.unpack_from(">I", data, offset)[0] + offset += 4 + # We don't need default-sample-flags (0x000020) + def _decrypt_mdat(self, mdat: MP4Atom) -> MP4Atom: """ Decrypts the 'mdat' (Media Data) atom, which contains the actual media data (audio, video, etc.). The decryption is performed using the current decryption key and sample information. + Supports multiple tracks by using track_infos collected during moof processing. Args: mdat (MP4Atom): The 'mdat' atom to decrypt. @@ -343,30 +470,130 @@ class MP4Decrypter: Returns: MP4Atom: Decrypted 'mdat' atom with decrypted media data. """ + mdat_data = mdat.data + + # Use multi-track decryption if we have track_infos + if self.track_infos: + return self._decrypt_mdat_multi_track(mdat) + + # Fallback to single-track decryption for backward compatibility if not self.current_key or not self.current_sample_info: return mdat # Return original mdat if we don't have decryption info decrypted_samples = bytearray() - mdat_data = mdat.data position = 0 for i, info in enumerate(self.current_sample_info): if position >= len(mdat_data): break # No more data to process - sample_size = self.trun_sample_sizes[i] if i < len(self.trun_sample_sizes) else len(mdat_data) - position + # Get sample size from trun, or use default_sample_size from tfhd, or remaining data + sample_size = 0 + if i < len(self.trun_sample_sizes): + sample_size = self.trun_sample_sizes[i] + + # If sample size is 0 (not specified in trun), use default from tfhd + if sample_size == 0: + sample_size = self.default_sample_size if self.default_sample_size > 0 else len(mdat_data) - position + sample = mdat_data[position : position + sample_size] position += sample_size - decrypted_sample = self._process_sample(sample, info, self.current_key) + decrypted_sample = self._decrypt_sample(sample, info, self.current_key) decrypted_samples.extend(decrypted_sample) return MP4Atom(b"mdat", len(decrypted_samples) + 8, decrypted_samples) + def _decrypt_mdat_multi_track(self, mdat: MP4Atom) -> MP4Atom: + """ + Decrypts the 'mdat' atom with support for multiple tracks. + Each track's samples are located at their respective data_offset positions. + + The data_offset in trun is the byte offset from the start of the moof box + to the first byte of sample data. Since mdat immediately follows moof, + we can calculate the position within mdat as: + position_in_mdat = data_offset - moof_size + + But we don't have moof_size here directly. However, we know that the first + track's data_offset minus 8 (mdat header) gives us the moof size. + + For simplicity, we sort tracks by data_offset and process them in order, + using the data_offset difference to determine where each track's samples start. + + Args: + mdat (MP4Atom): The 'mdat' atom to decrypt. + + Returns: + MP4Atom: Decrypted 'mdat' atom with decrypted media data from all tracks. + """ + mdat_data = mdat.data + + if not self.track_infos: + return mdat + + # Sort tracks by data_offset to process in order + sorted_tracks = sorted(self.track_infos, key=lambda x: x["data_offset"]) + + # The first track's data_offset tells us where mdat data starts relative to moof + # data_offset = moof_size + 8 (mdat header) for the first sample + # So mdat_data_start_in_file = moof_start + first_data_offset + # And position_in_mdat = data_offset - first_data_offset + first_data_offset = sorted_tracks[0]["data_offset"] + + # Pre-allocate output buffer with original data (in case some parts aren't encrypted) + decrypted_data = bytearray(mdat_data) + + # Process each track's samples at their respective offsets + for track_info in sorted_tracks: + data_offset = track_info["data_offset"] + sample_sizes = track_info["sample_sizes"] + sample_info = track_info["sample_info"] + key = track_info["key"] + default_sample_size = track_info["default_sample_size"] + # Get per-track encryption settings + track_crypt = track_info.get("crypt_byte_block", self.crypt_byte_block) + track_skip = track_info.get("skip_byte_block", self.skip_byte_block) + track_constant_iv = track_info.get("constant_iv", self.constant_iv) + + if not key or not sample_info: + continue + + # Calculate start position in mdat + # position = data_offset - first_data_offset (relative to first track's start) + mdat_position = data_offset - first_data_offset + + for i, info in enumerate(sample_info): + sample_size = 0 + if i < len(sample_sizes): + sample_size = sample_sizes[i] + + if sample_size == 0: + sample_size = default_sample_size if default_sample_size > 0 else 0 + + if sample_size == 0: + continue + + if mdat_position + sample_size > len(mdat_data): + break + + sample = mdat_data[mdat_position : mdat_position + sample_size] + decrypted_sample = self._decrypt_sample_with_track_settings( + sample, info, key, track_crypt, track_skip, track_constant_iv + ) + + # Write decrypted sample to output at the same position + decrypted_data[mdat_position : mdat_position + len(decrypted_sample)] = decrypted_sample + mdat_position += sample_size + + return MP4Atom(b"mdat", len(decrypted_data) + 8, bytes(decrypted_data)) + def _parse_senc(self, senc: MP4Atom, sample_count: int) -> list[CENCSampleAuxiliaryDataFormat]: """ Parses the 'senc' (Sample Encryption) atom, which contains encryption information for samples. This includes initialization vectors (IVs) and sub-sample encryption data. + For CBCS with constant IV (default_iv_size == 0 in tenc), the senc box only contains + subsample info, not per-sample IVs. The constant IV from tenc is used instead. + Args: senc (MP4Atom): The 'senc' atom to parse. sample_count (int): The number of samples. @@ -383,13 +610,23 @@ class MP4Decrypter: sample_count = struct.unpack_from(">I", data, position)[0] position += 4 + # Use the IV size from tenc box (8 or 16 bytes, or 0 for constant IV) + iv_size = self.default_iv_size + + # For CBCS with constant IV, use the IV from tenc instead of per-sample IVs + use_constant_iv = self.encryption_scheme == b"cbcs" and self.constant_iv is not None + sample_info = [] for _ in range(sample_count): - if position + 8 > len(data): - break - - iv = data[position : position + 8].tobytes() - position += 8 + if use_constant_iv: + # Use constant IV from tenc box + iv = self.constant_iv + else: + # Read per-sample IV from senc + if position + iv_size > len(data): + break + iv = data[position : position + iv_size].tobytes() + position += iv_size sub_samples = [] if flags & 0x000002 and position + 2 <= len(data): # Check if subsample information is present @@ -411,6 +648,8 @@ class MP4Decrypter: def _get_key_for_track(self, track_id: int) -> bytes: """ Retrieves the decryption key for a given track ID from the key map. + Uses the KID extracted from the tenc box if available, otherwise falls back to + using the first key if only one key is provided. Args: track_id (int): The track ID. @@ -418,9 +657,32 @@ class MP4Decrypter: Returns: bytes: The decryption key for the specified track ID. """ + # If we have an extracted KID for this track, use it to look up the key + if track_id in self.extracted_kids: + extracted_kid = self.extracted_kids[track_id] + # If KID is all zeros, it's a placeholder - use the provided key_id directly + # Check if all bytes are zero + is_all_zeros = all(b == 0 for b in extracted_kid) and len(extracted_kid) == 16 + if is_all_zeros: + # All zeros KID means use the provided key_id (first key in map) + if len(self.key_map) == 1: + return next(iter(self.key_map.values())) + else: + # Use the extracted KID to look up the key + key = self.key_map.get(extracted_kid) + if key: + return key + # If KID doesn't match, try fallback + # Note: This is expected when KID in file doesn't match provided key_id + # The provided key_id should still work if it's the correct decryption key + + # Fallback: if only one key provided, use it (backward compatibility) if len(self.key_map) == 1: return next(iter(self.key_map.values())) - key = self.key_map.get(track_id.pack(4, "big")) + + # Try using track_id as KID (for multi-key scenarios) + track_id_bytes = track_id.to_bytes(4, "big") + key = self.key_map.get(track_id_bytes) if not key: raise ValueError(f"No key found for track ID {track_id}") return key @@ -466,7 +728,399 @@ class MP4Decrypter: return result - def _process_trun(self, trun: MP4Atom) -> int: + def _process_sample_cbcs( + self, sample: memoryview, sample_info: CENCSampleAuxiliaryDataFormat, key: bytes + ) -> Union[memoryview, bytearray, bytes]: + """ + Processes and decrypts a sample using CBCS (AES-CBC with pattern encryption). + + CBCS uses AES-CBC mode with a constant IV (no counter increment between blocks). + Pattern encryption encrypts 'crypt_byte_block' 16-byte blocks, then leaves + 'skip_byte_block' 16-byte blocks in the clear, repeating this pattern. + + Args: + sample (memoryview): The sample data. + sample_info (CENCSampleAuxiliaryDataFormat): The sample auxiliary data format with encryption information. + key (bytes): The decryption key. + + Returns: + Union[memoryview, bytearray, bytes]: The decrypted sample. + """ + if not sample_info.is_encrypted: + return sample + + # CBCS uses constant IV - pad to 16 bytes + iv = sample_info.iv + b"\x00" * (16 - len(sample_info.iv)) + + if not sample_info.sub_samples: + # Full sample encryption with pattern + return self._decrypt_cbcs_pattern(bytes(sample), key, iv) + + # Subsample encryption + result = bytearray() + offset = 0 + for clear_bytes, encrypted_bytes in sample_info.sub_samples: + # Copy clear bytes as-is + result.extend(sample[offset : offset + clear_bytes]) + offset += clear_bytes + + # Decrypt encrypted portion using pattern encryption + encrypted_part = bytes(sample[offset : offset + encrypted_bytes]) + decrypted = self._decrypt_cbcs_pattern(encrypted_part, key, iv) + result.extend(decrypted) + offset += encrypted_bytes + + # If there's any remaining data after subsamples, copy as-is (shouldn't happen) + if offset < len(sample): + result.extend(sample[offset:]) + + return result + + def _decrypt_cbcs_pattern(self, data: bytes, key: bytes, iv: bytes) -> bytes: + """ + Decrypts data using CBCS pattern encryption (AES-CBC with crypt/skip pattern). + + Pattern encryption decrypts 'crypt_byte_block' 16-byte blocks, then skips + 'skip_byte_block' 16-byte blocks (leaving them in clear), repeating. + + Important: In CBCS, the CBC cipher state (previous ciphertext) carries over + between encrypted blocks, even though clear blocks are skipped. This means + we need to collect all encrypted blocks, decrypt them as a continuous CBC + stream, then interleave the results with the clear blocks. + + Args: + data (bytes): The encrypted data. + key (bytes): The decryption key. + iv (bytes): The initialization vector. + + Returns: + bytes: The decrypted data. + """ + if not data: + return data + + block_size = 16 + crypt_blocks = self.crypt_byte_block + skip_blocks = self.skip_byte_block + + # If no pattern (crypt=0), no encryption + if crypt_blocks == 0: + return data + + # If skip=0, it's full encryption (all blocks encrypted) + if skip_blocks == 0: + # Decrypt complete blocks only + complete_blocks_size = (len(data) // block_size) * block_size + if complete_blocks_size > 0: + cipher = AES.new(key, AES.MODE_CBC, iv) + decrypted = cipher.decrypt(data[:complete_blocks_size]) + if complete_blocks_size < len(data): + return decrypted + data[complete_blocks_size:] + return decrypted + return data + + crypt_bytes = crypt_blocks * block_size + skip_bytes = skip_blocks * block_size + + # Step 1: Collect all encrypted blocks + encrypted_blocks = bytearray() + block_positions = [] # Track where each encrypted block came from + pos = 0 + + while pos < len(data): + # Encrypted portion + if pos + crypt_bytes <= len(data): + encrypted_blocks.extend(data[pos : pos + crypt_bytes]) + block_positions.append((pos, crypt_bytes)) + pos += crypt_bytes + else: + # Remaining data - encrypt complete blocks only + remaining = len(data) - pos + complete = (remaining // block_size) * block_size + if complete > 0: + encrypted_blocks.extend(data[pos : pos + complete]) + block_positions.append((pos, complete)) + pos += complete + break + + # Skip clear portion + if pos + skip_bytes <= len(data): + pos += skip_bytes + else: + break + + # Step 2: Decrypt all encrypted blocks as a continuous CBC stream + if encrypted_blocks: + cipher = AES.new(key, AES.MODE_CBC, iv) + decrypted_blocks = cipher.decrypt(bytes(encrypted_blocks)) + else: + decrypted_blocks = b"" + + # Step 3: Reconstruct the output with decrypted blocks and clear blocks + result = bytearray(data) # Start with original data + decrypted_pos = 0 + + for orig_pos, length in block_positions: + result[orig_pos : orig_pos + length] = decrypted_blocks[decrypted_pos : decrypted_pos + length] + decrypted_pos += length + + return bytes(result) + + def _process_sample_cbc1( + self, sample: memoryview, sample_info: CENCSampleAuxiliaryDataFormat, key: bytes + ) -> Union[memoryview, bytearray, bytes]: + """ + Processes and decrypts a sample using CBC1 (full sample AES-CBC encryption). + + Unlike CBCS, CBC1 encrypts the entire sample without pattern encryption. + + Args: + sample (memoryview): The sample data. + sample_info (CENCSampleAuxiliaryDataFormat): The sample auxiliary data format with encryption information. + key (bytes): The decryption key. + + Returns: + Union[memoryview, bytearray, bytes]: The decrypted sample. + """ + if not sample_info.is_encrypted: + return sample + + # Pad IV to 16 bytes + iv = sample_info.iv + b"\x00" * (16 - len(sample_info.iv)) + cipher = AES.new(key, AES.MODE_CBC, iv) + + if not sample_info.sub_samples: + # Full sample encryption - decrypt complete blocks only + block_size = 16 + complete_blocks_size = (len(sample) // block_size) * block_size + if complete_blocks_size > 0: + decrypted = cipher.decrypt(bytes(sample[:complete_blocks_size])) + if complete_blocks_size < len(sample): + # Append remaining partial block as-is + return decrypted + bytes(sample[complete_blocks_size:]) + return decrypted + return sample + + # Subsample encryption + result = bytearray() + offset = 0 + for clear_bytes, encrypted_bytes in sample_info.sub_samples: + result.extend(sample[offset : offset + clear_bytes]) + offset += clear_bytes + + encrypted_part = bytes(sample[offset : offset + encrypted_bytes]) + # Only decrypt complete blocks + block_size = 16 + complete_blocks_size = (len(encrypted_part) // block_size) * block_size + if complete_blocks_size > 0: + decrypted = cipher.decrypt(encrypted_part[:complete_blocks_size]) + result.extend(decrypted) + if complete_blocks_size < len(encrypted_part): + result.extend(encrypted_part[complete_blocks_size:]) + else: + result.extend(encrypted_part) + offset += encrypted_bytes + + if offset < len(sample): + result.extend(sample[offset:]) + + return result + + def _decrypt_sample( + self, sample: memoryview, sample_info: CENCSampleAuxiliaryDataFormat, key: bytes + ) -> Union[memoryview, bytearray, bytes]: + """ + Decrypts a sample using the appropriate scheme based on encryption_scheme attribute. + + Args: + sample (memoryview): The sample data. + sample_info (CENCSampleAuxiliaryDataFormat): The sample auxiliary data format. + key (bytes): The decryption key. + + Returns: + Union[memoryview, bytearray, bytes]: The decrypted sample. + """ + if self.encryption_scheme == b"cbcs": + return self._process_sample_cbcs(sample, sample_info, key) + elif self.encryption_scheme == b"cbc1": + return self._process_sample_cbc1(sample, sample_info, key) + else: + # cenc and cens use AES-CTR + return self._process_sample(sample, sample_info, key) + + def _decrypt_sample_with_track_settings( + self, + sample: memoryview, + sample_info: CENCSampleAuxiliaryDataFormat, + key: bytes, + crypt_byte_block: int, + skip_byte_block: int, + constant_iv: Optional[bytes], + ) -> Union[memoryview, bytearray, bytes]: + """ + Decrypts a sample using per-track encryption settings. + + Args: + sample (memoryview): The sample data. + sample_info (CENCSampleAuxiliaryDataFormat): The sample auxiliary data format. + key (bytes): The decryption key. + crypt_byte_block (int): Number of encrypted blocks in pattern. + skip_byte_block (int): Number of clear blocks in pattern. + constant_iv (Optional[bytes]): Constant IV for CBCS, or None. + + Returns: + Union[memoryview, bytearray, bytes]: The decrypted sample. + """ + if self.encryption_scheme == b"cbcs": + return self._process_sample_cbcs_with_settings( + sample, sample_info, key, crypt_byte_block, skip_byte_block, constant_iv + ) + elif self.encryption_scheme == b"cbc1": + return self._process_sample_cbc1(sample, sample_info, key) + else: + # cenc and cens use AES-CTR + return self._process_sample(sample, sample_info, key) + + def _process_sample_cbcs_with_settings( + self, + sample: memoryview, + sample_info: CENCSampleAuxiliaryDataFormat, + key: bytes, + crypt_byte_block: int, + skip_byte_block: int, + constant_iv: Optional[bytes], + ) -> Union[memoryview, bytearray, bytes]: + """ + Processes and decrypts a sample using CBCS with per-track settings. + + Args: + sample (memoryview): The sample data. + sample_info (CENCSampleAuxiliaryDataFormat): The sample auxiliary data format. + key (bytes): The decryption key. + crypt_byte_block (int): Number of encrypted blocks in pattern. + skip_byte_block (int): Number of clear blocks in pattern. + constant_iv (Optional[bytes]): Constant IV for CBCS, or None. + + Returns: + Union[memoryview, bytearray, bytes]: The decrypted sample. + """ + if not sample_info.is_encrypted: + return sample + + # Use constant IV if provided, otherwise use the IV from sample_info + if constant_iv: + iv = constant_iv + b"\x00" * (16 - len(constant_iv)) + else: + iv = sample_info.iv + b"\x00" * (16 - len(sample_info.iv)) + + if not sample_info.sub_samples: + # Full sample encryption with pattern + return self._decrypt_cbcs_pattern_with_settings(bytes(sample), key, iv, crypt_byte_block, skip_byte_block) + + # Subsample encryption + result = bytearray() + offset = 0 + for clear_bytes, encrypted_bytes in sample_info.sub_samples: + # Copy clear bytes as-is + result.extend(sample[offset : offset + clear_bytes]) + offset += clear_bytes + + # Decrypt encrypted portion using pattern encryption + encrypted_part = bytes(sample[offset : offset + encrypted_bytes]) + decrypted = self._decrypt_cbcs_pattern_with_settings( + encrypted_part, key, iv, crypt_byte_block, skip_byte_block + ) + result.extend(decrypted) + offset += encrypted_bytes + + # If there's any remaining data after subsamples, copy as-is + if offset < len(sample): + result.extend(sample[offset:]) + + return result + + def _decrypt_cbcs_pattern_with_settings( + self, data: bytes, key: bytes, iv: bytes, crypt_blocks: int, skip_blocks: int + ) -> bytes: + """ + Decrypts data using CBCS pattern encryption with explicit pattern settings. + + Args: + data (bytes): The encrypted data. + key (bytes): The decryption key. + iv (bytes): The initialization vector. + crypt_blocks (int): Number of encrypted blocks in pattern. + skip_blocks (int): Number of clear blocks in pattern. + + Returns: + bytes: The decrypted data. + """ + if not data: + return data + + block_size = 16 + + # If both crypt=0 and skip=0, it means full sample CBC encryption (no pattern) + # This is common for audio tracks in CBCS + if crypt_blocks == 0 and skip_blocks == 0: + # Decrypt complete blocks only + complete_blocks_size = (len(data) // block_size) * block_size + if complete_blocks_size > 0: + cipher = AES.new(key, AES.MODE_CBC, iv) + decrypted = cipher.decrypt(data[:complete_blocks_size]) + if complete_blocks_size < len(data): + return decrypted + data[complete_blocks_size:] + return decrypted + return data + + crypt_bytes = crypt_blocks * block_size + skip_bytes = skip_blocks * block_size + + # Step 1: Collect all encrypted blocks + encrypted_blocks = bytearray() + block_positions = [] # Track where each encrypted block came from + pos = 0 + + while pos < len(data): + # Encrypted portion + if pos + crypt_bytes <= len(data): + encrypted_blocks.extend(data[pos : pos + crypt_bytes]) + block_positions.append((pos, crypt_bytes)) + pos += crypt_bytes + else: + # Remaining data - encrypt complete blocks only + remaining = len(data) - pos + complete = (remaining // block_size) * block_size + if complete > 0: + encrypted_blocks.extend(data[pos : pos + complete]) + block_positions.append((pos, complete)) + pos += complete + break + + # Skip clear portion + if pos + skip_bytes <= len(data): + pos += skip_bytes + else: + break + + # Step 2: Decrypt all encrypted blocks as a continuous CBC stream + if encrypted_blocks: + cipher = AES.new(key, AES.MODE_CBC, iv) + decrypted_blocks = cipher.decrypt(bytes(encrypted_blocks)) + else: + decrypted_blocks = b"" + + # Step 3: Reconstruct the output with decrypted blocks and clear blocks + result = bytearray(data) # Start with original data + decrypted_pos = 0 + + for orig_pos, length in block_positions: + result[orig_pos : orig_pos + length] = decrypted_blocks[decrypted_pos : decrypted_pos + length] + decrypted_pos += length + + return bytes(result) + + def _process_trun(self, trun: MP4Atom) -> tuple[int, int]: """ Processes the 'trun' (Track Fragment Run) atom, which contains information about the samples in a track fragment. This includes sample sizes, durations, flags, and composition time offsets. @@ -475,38 +1129,43 @@ class MP4Decrypter: trun (MP4Atom): The 'trun' atom to process. Returns: - int: The number of samples in the 'trun' atom. + tuple[int, int]: (sample_count, data_offset_value) where data_offset_value is the offset + into mdat where this track's samples start (0 if not present in trun). """ trun_flags, sample_count = struct.unpack_from(">II", trun.data, 0) - data_offset = 8 + parse_offset = 8 + # Extract data_offset if present (flag 0x000001) + trun_data_offset = 0 if trun_flags & 0x000001: - data_offset += 4 - if trun_flags & 0x000004: - data_offset += 4 + trun_data_offset = struct.unpack_from(">i", trun.data, parse_offset)[0] # signed int + parse_offset += 4 + if trun_flags & 0x000004: # first-sample-flags-present + parse_offset += 4 self.trun_sample_sizes = array.array("I") for _ in range(sample_count): if trun_flags & 0x000100: # sample-duration-present flag - data_offset += 4 + parse_offset += 4 if trun_flags & 0x000200: # sample-size-present flag - sample_size = struct.unpack_from(">I", trun.data, data_offset)[0] + sample_size = struct.unpack_from(">I", trun.data, parse_offset)[0] self.trun_sample_sizes.append(sample_size) - data_offset += 4 + parse_offset += 4 else: self.trun_sample_sizes.append(0) # Using 0 instead of None for uniformity in the array if trun_flags & 0x000400: # sample-flags-present flag - data_offset += 4 + parse_offset += 4 if trun_flags & 0x000800: # sample-composition-time-offsets-present flag - data_offset += 4 + parse_offset += 4 - return sample_count + return sample_count, trun_data_offset def _modify_trun(self, trun: MP4Atom) -> MP4Atom: """ Modifies the 'trun' (Track Fragment Run) atom to update the data offset. - This is necessary to account for the encryption overhead. + This is necessary to account for the total encryption overhead from all trafs, + since mdat comes after all trafs in the moof. Args: trun (MP4Atom): The 'trun' atom to modify. @@ -518,9 +1177,10 @@ class MP4Decrypter: current_flags = struct.unpack_from(">I", trun_data, 0)[0] & 0xFFFFFF # If the data-offset-present flag is set, update the data offset to account for encryption overhead + # All trun data_offsets need to be reduced by the total encryption overhead from all trafs if current_flags & 0x000001: current_data_offset = struct.unpack_from(">i", trun_data, 8)[0] - struct.pack_into(">i", trun_data, 8, current_data_offset - self.encryption_overhead) + struct.pack_into(">i", trun_data, 8, current_data_offset - self.total_encryption_overhead) return MP4Atom(b"trun", len(trun_data) + 8, trun_data) @@ -541,8 +1201,8 @@ class MP4Decrypter: reference_type = current_size >> 31 current_referenced_size = current_size & 0x7FFFFFFF - # Remove encryption overhead from referenced size - new_referenced_size = current_referenced_size - self.encryption_overhead + # Remove total encryption overhead from referenced size + new_referenced_size = current_referenced_size - self.total_encryption_overhead new_size = (reference_type << 31) | new_referenced_size struct.pack_into(">I", sidx_data, 32, new_size) @@ -562,6 +1222,19 @@ class MP4Decrypter: parser = MP4Parser(trak.data) new_trak_data = bytearray() + # First pass: find track ID from tkhd + for atom in parser.list_atoms(): + if atom.atom_type == b"tkhd": + # tkhd: version(1) + flags(3) + ... + track_id at offset 12 (v0) or 20 (v1) + version = atom.data[0] + if version == 0: + self.current_track_id = struct.unpack_from(">I", atom.data, 12)[0] + else: + self.current_track_id = struct.unpack_from(">I", atom.data, 20)[0] + break + + # Second pass: process atoms + parser.position = 0 for atom in iter(parser.read_atom, None): if atom.atom_type == b"mdia": new_mdia = self._process_mdia(atom) @@ -705,6 +1378,7 @@ class MP4Decrypter: """ Extracts the codec format from the 'sinf' (Protection Scheme Information) atom. This includes information about the original format of the protected content. + Also extracts IV size from the 'tenc' box and encryption scheme from 'schm' box. Args: sinf (MP4Atom): The 'sinf' atom to extract from. @@ -713,28 +1387,177 @@ class MP4Decrypter: Optional[bytes]: The codec format or None if not found. """ parser = MP4Parser(sinf.data) + codec_format = None for atom in iter(parser.read_atom, None): if atom.atom_type == b"frma": - return atom.data - return None + codec_format = atom.data + elif atom.atom_type == b"schm": + self._parse_schm(atom) + elif atom.atom_type == b"schi": + # Parse schi to find tenc + schi_parser = MP4Parser(atom.data) + for schi_atom in iter(schi_parser.read_atom, None): + if schi_atom.atom_type == b"tenc": + self._parse_tenc(schi_atom) + return codec_format + + def _parse_schm(self, schm: MP4Atom) -> None: + """ + Parses the 'schm' (Scheme Type) atom to detect the encryption scheme. + + Args: + schm (MP4Atom): The 'schm' atom to parse. + """ + # schm structure: + # - version (1 byte) + flags (3 bytes) = 4 bytes + # - scheme_type (4 bytes): "cenc", "cens", "cbc1", or "cbcs" + # - scheme_version (4 bytes) + data = schm.data + if len(data) >= 8: + scheme_type = bytes(data[4:8]) + if scheme_type in (b"cenc", b"cens", b"cbc1", b"cbcs"): + self.encryption_scheme = scheme_type + + def _parse_tenc(self, tenc: MP4Atom) -> None: + """ + Parses the 'tenc' (Track Encryption) atom to extract encryption parameters. + Stores per-track encryption settings for multi-track support. + + Args: + tenc (MP4Atom): The 'tenc' atom to parse. + """ + # tenc structure: + # - version (1 byte) + flags (3 bytes) = 4 bytes + # - reserved (1 byte) + reserved (1 byte) if version == 0, or reserved (1 byte) + default_crypt_byte_block (4 bits) + default_skip_byte_block (4 bits) if version > 0 + # - default_isProtected (1 byte) + # - default_Per_Sample_IV_Size (1 byte) + # - default_KID (16 bytes) + # For version 1 with IV size 0: + # - default_constant_IV_size (1 byte) + # - default_constant_IV (default_constant_IV_size bytes) + data = tenc.data + if len(data) >= 8: + version = data[0] + + # Initialize per-track settings + track_settings = { + "crypt_byte_block": 1, # Default + "skip_byte_block": 9, # Default + "constant_iv": None, + "iv_size": 8, + "kid": None, # KID from tenc box + } + + # Extract pattern encryption parameters for version > 0 (used in cbcs) + if version > 0 and len(data) >= 6: + # Byte 5 contains crypt_byte_block (upper 4 bits) and skip_byte_block (lower 4 bits) + pattern_byte = data[5] + track_settings["crypt_byte_block"] = (pattern_byte >> 4) & 0x0F + track_settings["skip_byte_block"] = pattern_byte & 0x0F + # Also update global defaults (for backward compatibility) + self.crypt_byte_block = track_settings["crypt_byte_block"] + self.skip_byte_block = track_settings["skip_byte_block"] + + # Extract KID (default_KID is at offset 8, 16 bytes) + kid_offset = 8 + if len(data) >= kid_offset + 16: + kid = bytes(data[kid_offset : kid_offset + 16]) + track_settings["kid"] = kid + # Also store globally for backward compatibility + if not hasattr(self, "extracted_kids"): + self.extracted_kids = {} + if self.current_track_id > 0: + self.extracted_kids[self.current_track_id] = kid + + # IV size is at offset 7 for both versions + iv_size_offset = 7 + if len(data) > iv_size_offset: + iv_size = data[iv_size_offset] + if iv_size in (0, 8, 16): + # IV size of 0 means constant IV (used in cbcs) + track_settings["iv_size"] = iv_size if iv_size > 0 else 16 + self.default_iv_size = track_settings["iv_size"] + + # If IV size is 0, extract constant IV from tenc (for CBCS) + if iv_size == 0: + # After KID (16 bytes at offset 8), there's constant_IV_size (1 byte) and constant_IV + constant_iv_size_offset = 8 + 16 # offset 24 + if len(data) > constant_iv_size_offset: + constant_iv_size = data[constant_iv_size_offset] + constant_iv_offset = constant_iv_size_offset + 1 + if constant_iv_size > 0 and len(data) >= constant_iv_offset + constant_iv_size: + track_settings["constant_iv"] = bytes( + data[constant_iv_offset : constant_iv_offset + constant_iv_size] + ) + self.constant_iv = track_settings["constant_iv"] + + # Store per-track settings + if self.current_track_id > 0: + self.track_encryption_settings[self.current_track_id] = track_settings -def decrypt_segment(init_segment: bytes, segment_content: bytes, key_id: str, key: str) -> bytes: +def _build_key_map(key_id: str, key: str) -> dict: + """ + Build a key_map dict from (possibly comma-separated) key_id and key strings. + + Both arguments may be comma-separated lists of equal length to support + multi-key DRM streams where different tracks use different keys. + + Args: + key_id: Hex key ID(s), comma-separated for multi-key. + key: Hex key(s), comma-separated for multi-key. + + Returns: + dict mapping key-ID bytes to key bytes. + """ + key_ids = [k.strip() for k in key_id.split(",") if k.strip()] + keys = [k.strip() for k in key.split(",") if k.strip()] + return {bytes.fromhex(kid): bytes.fromhex(k) for kid, k in zip(key_ids, keys)} + + +def decrypt_segment( + init_segment: bytes, segment_content: bytes, key_id: str, key: str, include_init: bool = True +) -> bytes: """ Decrypts a CENC encrypted MP4 segment. Args: init_segment (bytes): Initialization segment data. segment_content (bytes): Encrypted segment content. - key_id (str): Key ID in hexadecimal format. - key (str): Key in hexadecimal format. + key_id (str): Key ID(s) in hexadecimal format, comma-separated for multi-key DRM. + key (str): Key(s) in hexadecimal format, comma-separated for multi-key DRM. + include_init (bool): If True, include processed init segment in output. + If False, only return decrypted media segment (for use with EXT-X-MAP). + + Returns: + bytes: Decrypted segment with processed init (moov/ftyp) + decrypted media (moof/mdat), + or just decrypted media if include_init is False. """ - key_map = {bytes.fromhex(key_id): bytes.fromhex(key)} + key_map = _build_key_map(key_id, key) decrypter = MP4Decrypter(key_map) - decrypted_content = decrypter.decrypt_segment(init_segment + segment_content) + decrypted_content = decrypter.decrypt_segment(init_segment + segment_content, include_init=include_init) return decrypted_content +def process_drm_init_segment(init_segment: bytes, key_id: str, key: str) -> bytes: + """ + Processes a DRM-protected init segment for use with EXT-X-MAP. + Removes encryption-related boxes but keeps the moov structure. + + Args: + init_segment (bytes): Initialization segment data. + key_id (str): Key ID(s) in hexadecimal format, comma-separated for multi-key DRM. + key (str): Key(s) in hexadecimal format, comma-separated for multi-key DRM. + + Returns: + bytes: Processed init segment with encryption boxes removed. + """ + key_map = _build_key_map(key_id, key) + decrypter = MP4Decrypter(key_map) + processed_init = decrypter.process_init_only(init_segment) + return processed_init + + def cli(): """ Command line interface for decrypting a CENC encrypted MP4 segment. diff --git a/mediaflow_proxy/extractors/F16Px.py b/mediaflow_proxy/extractors/F16Px.py index fbb7708..ea244eb 100644 --- a/mediaflow_proxy/extractors/F16Px.py +++ b/mediaflow_proxy/extractors/F16Px.py @@ -65,9 +65,9 @@ class F16PxExtractor(BaseExtractor): raise ExtractorError("F16PX: No playback data") try: - iv = self._b64url_decode(pb["iv"]) # nonce - key = self._join_key_parts(pb["key_parts"]) # AES key - payload = self._b64url_decode(pb["payload"]) # ciphertext + tag + iv = self._b64url_decode(pb["iv"]) # nonce + key = self._join_key_parts(pb["key_parts"]) # AES key + payload = self._b64url_decode(pb["payload"]) # ciphertext + tag cipher = python_aesgcm.new(key) decrypted = cipher.open(iv, payload) # AAD = '' like ResolveURL @@ -95,7 +95,7 @@ class F16PxExtractor(BaseExtractor): self.base_headers["origin"] = origin self.base_headers["Accept-Language"] = "en-US,en;q=0.5" self.base_headers["Accept"] = "*/*" - self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0' + self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0" return { "destination_url": best, diff --git a/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc new file mode 100644 index 0000000..77bccc4 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/F16Px.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..065ca49 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/__init__.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc new file mode 100644 index 0000000..5f22939 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/base.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc new file mode 100644 index 0000000..ec4dd71 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/dlhd.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc new file mode 100644 index 0000000..81a2fae Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/doodstream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc new file mode 100644 index 0000000..6d7ef51 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/factory.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc new file mode 100644 index 0000000..22dd5fb Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/fastream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc new file mode 100644 index 0000000..f8f0cd9 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/filelions.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc new file mode 100644 index 0000000..682877e Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/filemoon.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc new file mode 100644 index 0000000..afdbc12 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/gupload.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc new file mode 100644 index 0000000..511dfca Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/livetv.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc new file mode 100644 index 0000000..15f04bf Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/lulustream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc new file mode 100644 index 0000000..39fd206 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/maxstream.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc new file mode 100644 index 0000000..3d78afb Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/mixdrop.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc new file mode 100644 index 0000000..2f8fd0e Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/okru.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc new file mode 100644 index 0000000..c9051e7 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/sportsonline.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc new file mode 100644 index 0000000..9889241 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/streamtape.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc new file mode 100644 index 0000000..3e4e085 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/streamwish.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc new file mode 100644 index 0000000..0882358 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/supervideo.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc new file mode 100644 index 0000000..0055a02 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/turbovidplay.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc new file mode 100644 index 0000000..4d802f6 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/uqload.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc new file mode 100644 index 0000000..7777896 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vavoo.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc new file mode 100644 index 0000000..adafcde Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vidmoly.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc new file mode 100644 index 0000000..273f693 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vidoza.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc new file mode 100644 index 0000000..99c4449 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/vixcloud.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc b/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc new file mode 100644 index 0000000..ceaeb54 Binary files /dev/null and b/mediaflow_proxy/extractors/__pycache__/voe.cpython-313.pyc differ diff --git a/mediaflow_proxy/extractors/base.py b/mediaflow_proxy/extractors/base.py index 0e271a7..bd0b76e 100644 --- a/mediaflow_proxy/extractors/base.py +++ b/mediaflow_proxy/extractors/base.py @@ -1,21 +1,53 @@ from abc import ABC, abstractmethod +from dataclasses import dataclass from typing import Dict, Optional, Any +from urllib.parse import urlparse import asyncio -import httpx +import aiohttp +import json import logging from mediaflow_proxy.configs import settings -from mediaflow_proxy.utils.http_utils import create_httpx_client, DownloadError +from mediaflow_proxy.utils.http_client import create_aiohttp_session +from mediaflow_proxy.utils.http_utils import DownloadError logger = logging.getLogger(__name__) class ExtractorError(Exception): """Base exception for all extractors.""" + pass +@dataclass +class HttpResponse: + """ + Simple response container for extractor HTTP requests. + + Uses aiohttp-style naming conventions: + - status (not status_code) + - text (pre-loaded content as string) + - content (pre-loaded content as bytes) + """ + + status: int + headers: Dict[str, str] + text: str + content: bytes + url: str + + def json(self) -> Any: + """Parse response content as JSON.""" + return json.loads(self.text) + + def get_origin(self) -> str: + """Get the origin (scheme + host) from the response URL.""" + parsed = urlparse(self.url) + return f"{parsed.scheme}://{parsed.netloc}" + + class BaseExtractor(ABC): """Base class for all URL extractors. @@ -43,74 +75,99 @@ class BaseExtractor(ABC): backoff_factor: float = 0.5, raise_on_status: bool = True, **kwargs, - ) -> httpx.Response: + ) -> HttpResponse: """ - Make HTTP request with retry and timeout support. + Make HTTP request with retry and timeout support using aiohttp. Parameters ---------- + url : str + The URL to request. + method : str + HTTP method (GET, POST, etc.). Defaults to GET. + headers : dict | None + Additional headers to merge with base headers. timeout : float | None - Seconds to wait for the request (applied to httpx.Timeout). Defaults to 15s. + Seconds to wait for the request. Defaults to 15s. retries : int Number of attempts for transient errors. backoff_factor : float Base for exponential backoff between retries. raise_on_status : bool - If True, HTTP non-2xx raises DownloadError (preserves status code). + If True, HTTP non-2xx raises DownloadError. + **kwargs + Additional arguments passed to aiohttp request (e.g., data, json). + + Returns + ------- + HttpResponse + Response object with pre-loaded content. """ attempt = 0 last_exc = None - # build request headers merging base and per-request + # Build request headers merging base and per-request request_headers = self.base_headers.copy() if headers: request_headers.update(headers) - timeout_cfg = httpx.Timeout(timeout or 15.0) + timeout_val = timeout or 15.0 while attempt < retries: try: - async with create_httpx_client(timeout=timeout_cfg) as client: - response = await client.request( + async with create_aiohttp_session(url, timeout=timeout_val) as (session, proxy_url): + async with session.request( method, url, headers=request_headers, + proxy=proxy_url, **kwargs, - ) + ) as response: + # Read content while session is still open + content = await response.read() + text = content.decode("utf-8", errors="replace") + final_url = str(response.url) + status = response.status + resp_headers = dict(response.headers) - if raise_on_status: - try: - response.raise_for_status() - except httpx.HTTPStatusError as e: - # Provide a short body preview for debugging - body_preview = "" - try: - body_preview = e.response.text[:500] - except Exception: - body_preview = "" + if raise_on_status and status >= 400: + body_preview = text[:500] logger.debug( - "HTTPStatusError for %s (status=%s) -- body preview: %s", + "HTTP error for %s (status=%s) -- body preview: %s", url, - e.response.status_code, + status, body_preview, ) - raise DownloadError(e.response.status_code, f"HTTP error {e.response.status_code} while requesting {url}") - return response + raise DownloadError(status, f"HTTP error {status} while requesting {url}") + + return HttpResponse( + status=status, + headers=resp_headers, + text=text, + content=content, + url=final_url, + ) except DownloadError: # Do not retry on explicit HTTP status errors (they are intentional) raise - except (httpx.ReadTimeout, httpx.ConnectTimeout, httpx.NetworkError, httpx.TransportError) as e: - # Transient network error — retry with backoff + except (asyncio.TimeoutError, aiohttp.ClientError) as e: + # Transient network error - retry with backoff last_exc = e attempt += 1 sleep_for = backoff_factor * (2 ** (attempt - 1)) - logger.warning("Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs", - attempt, retries, url, e, sleep_for) + logger.warning( + "Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs", + attempt, + retries, + url, + e, + sleep_for, + ) await asyncio.sleep(sleep_for) continue except Exception as e: - # Unexpected exception — wrap as ExtractorError to keep interface consistent + # Unexpected exception - wrap as ExtractorError to keep interface consistent logger.exception("Unhandled exception while requesting %s: %s", url, e) raise ExtractorError(f"Request failed for URL {url}: {str(e)}") diff --git a/mediaflow_proxy/extractors/dlhd.py b/mediaflow_proxy/extractors/dlhd.py index 940e696..07a1d87 100644 --- a/mediaflow_proxy/extractors/dlhd.py +++ b/mediaflow_proxy/extractors/dlhd.py @@ -1,133 +1,345 @@ +import hashlib +import hmac import re -import base64 +import time import logging -from typing import Any, Dict, Optional, List -from urllib.parse import urlparse, quote_plus, urljoin +from typing import Any, Dict, Optional +from urllib.parse import urlparse +import aiohttp -import httpx - - -from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError +from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse +from mediaflow_proxy.utils.http_client import create_aiohttp_session +from mediaflow_proxy.configs import settings logger = logging.getLogger(__name__) # Silenzia l'errore ConnectionResetError su Windows -logging.getLogger('asyncio').setLevel(logging.CRITICAL) +logging.getLogger("asyncio").setLevel(logging.CRITICAL) + +# Default fingerprint parameters +DEFAULT_DLHD_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0" +DEFAULT_DLHD_SCREEN_RESOLUTION = "1920x1080" +DEFAULT_DLHD_TIMEZONE = "UTC" +DEFAULT_DLHD_LANGUAGE = "en" + + +def compute_fingerprint( + user_agent: str = DEFAULT_DLHD_USER_AGENT, + screen_resolution: str = DEFAULT_DLHD_SCREEN_RESOLUTION, + timezone: str = DEFAULT_DLHD_TIMEZONE, + language: str = DEFAULT_DLHD_LANGUAGE, +) -> str: + """ + Compute the X-Fingerprint header value. + + Algorithm: + fingerprint = SHA256(useragent + screen_resolution + timezone + language).hex()[:16] + + Args: + user_agent: The user agent string + screen_resolution: The screen resolution (e.g., "1920x1080") + timezone: The timezone (e.g., "UTC") + language: The language code (e.g., "en") + + Returns: + The 16-character fingerprint + """ + combined = f"{user_agent}{screen_resolution}{timezone}{language}" + return hashlib.sha256(combined.encode("utf-8")).hexdigest()[:16] + + +def compute_key_path(resource: str, number: str, timestamp: int, fingerprint: str, secret_key: str) -> str: + """ + Compute the X-Key-Path header value. + + Algorithm: + key_path = HMAC-SHA256("resource|number|timestamp|fingerprint", secret_key).hex()[:16] + + Args: + resource: The resource from the key URL + number: The number from the key URL + timestamp: The Unix timestamp + fingerprint: The fingerprint value + secret_key: The HMAC secret key (channel_salt) + + Returns: + The 16-character key path + """ + combined = f"{resource}|{number}|{timestamp}|{fingerprint}" + hmac_hash = hmac.new(secret_key.encode("utf-8"), combined.encode("utf-8"), hashlib.sha256).hexdigest() + return hmac_hash[:16] + + +def compute_key_headers(key_url: str, secret_key: str) -> tuple[int, int, str, str] | None: + """ + Compute X-Key-Timestamp, X-Key-Nonce, X-Key-Path, and X-Fingerprint for a /key/ URL. + + Algorithm: + 1. Extract resource and number from URL pattern /key/{resource}/{number} + 2. ts = Unix timestamp in seconds + 3. hmac_hash = HMAC-SHA256(resource, secret_key).hex() + 4. nonce = proof-of-work: find i where MD5(hmac+resource+number+ts+i)[:4] < 0x1000 + 5. fingerprint = compute_fingerprint() + 6. key_path = HMAC-SHA256("resource|number|ts|fingerprint", secret_key).hex()[:16] + + Args: + key_url: The key URL containing /key/{resource}/{number} + secret_key: The HMAC secret key (channel_salt) + + Returns: + Tuple of (timestamp, nonce, key_path, fingerprint) or None if URL doesn't match pattern + """ + # Extract resource and number from URL + pattern = r"/key/([^/]+)/(\d+)" + match = re.search(pattern, key_url) + + if not match: + return None + + resource = match.group(1) + number = match.group(2) + + ts = int(time.time()) + + # Compute HMAC-SHA256 + hmac_hash = hmac.new(secret_key.encode("utf-8"), resource.encode("utf-8"), hashlib.sha256).hexdigest() + + # Proof-of-work loop + nonce = 0 + for i in range(100000): + combined = f"{hmac_hash}{resource}{number}{ts}{i}" + md5_hash = hashlib.md5(combined.encode("utf-8")).hexdigest() + prefix_value = int(md5_hash[:4], 16) + + if prefix_value < 0x1000: # < 4096 + nonce = i + break + + fingerprint = compute_fingerprint() + key_path = compute_key_path(resource, number, ts, fingerprint, secret_key) + + return ts, nonce, key_path, fingerprint class DLHDExtractor(BaseExtractor): """DLHD (DaddyLive) URL extractor for M3U8 streams. - - Notes: - - Multi-domain support for daddylive.sx / dlhd.dad - - Robust extraction of auth parameters and server lookup - - Uses retries/timeouts via BaseExtractor where possible - - Multi-iframe fallback for resilience + Supports the new authentication flow with: + - EPlayerAuth extraction (auth_token, channel_key, channel_salt) + - Server lookup for dynamic server selection + - Dynamic key header computation for AES-128 encrypted streams """ - def __init__(self, request_headers: dict): super().__init__(request_headers) - self.mediaflow_endpoint = "hls_manifest_proxy" + self.mediaflow_endpoint = "hls_key_proxy" self._iframe_context: Optional[str] = None + self._flaresolverr_cookies: Optional[str] = None + self._flaresolverr_user_agent: Optional[str] = None + async def _fetch_via_flaresolverr(self, url: str) -> HttpResponse: + """Fetch a URL using FlareSolverr to bypass Cloudflare protection.""" + if not settings.flaresolverr_url: + raise ExtractorError("FlareSolverr URL not configured. Set FLARESOLVERR_URL in environment.") + flaresolverr_endpoint = f"{settings.flaresolverr_url.rstrip('/')}/v1" + payload = { + "cmd": "request.get", + "url": url, + "maxTimeout": settings.flaresolverr_timeout * 1000, + } - async def _make_request(self, url: str, method: str = "GET", headers: Optional[Dict] = None, **kwargs) -> Any: - """Override to disable SSL verification for this extractor and use fetch_with_retry if available.""" - from mediaflow_proxy.utils.http_utils import create_httpx_client, fetch_with_retry + logger.info(f"Using FlareSolverr to fetch: {url}") + async with aiohttp.ClientSession() as session: + async with session.post( + flaresolverr_endpoint, + json=payload, + timeout=aiohttp.ClientTimeout(total=settings.flaresolverr_timeout + 10), + ) as response: + if response.status != 200: + raise ExtractorError(f"FlareSolverr returned status {response.status}") + + data = await response.json() + + if data.get("status") != "ok": + raise ExtractorError(f"FlareSolverr failed: {data.get('message', 'Unknown error')}") + + solution = data.get("solution", {}) + html_content = solution.get("response", "") + final_url = solution.get("url", url) + status = solution.get("status", 200) + + # Store cookies and user-agent for subsequent requests + cookies = solution.get("cookies", []) + if cookies: + cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies]) + self._flaresolverr_cookies = cookie_str + logger.info(f"FlareSolverr provided {len(cookies)} cookies") + + user_agent = solution.get("userAgent") + if user_agent: + self._flaresolverr_user_agent = user_agent + logger.info(f"FlareSolverr user-agent: {user_agent}") + + logger.info(f"FlareSolverr successfully bypassed Cloudflare for: {url}") + + return HttpResponse( + status=status, + headers={}, + text=html_content, + content=html_content.encode("utf-8", errors="replace"), + url=final_url, + ) + + async def _make_request( + self, url: str, method: str = "GET", headers: Optional[Dict] = None, use_flaresolverr: bool = False, **kwargs + ) -> HttpResponse: + """Override to disable SSL verification and optionally use FlareSolverr.""" + # Use FlareSolverr for Cloudflare-protected pages + if use_flaresolverr and settings.flaresolverr_url: + return await self._fetch_via_flaresolverr(url) timeout = kwargs.pop("timeout", 15) - retries = kwargs.pop("retries", 3) - backoff_factor = kwargs.pop("backoff_factor", 0.5) + kwargs.pop("retries", 3) # consumed but not used directly + kwargs.pop("backoff_factor", 0.5) # consumed but not used directly + # Merge headers + request_headers = self.base_headers.copy() + if headers: + request_headers.update(headers) - async with create_httpx_client(verify=False, timeout=httpx.Timeout(timeout)) as client: - try: - return await fetch_with_retry(client, method, url, headers or {}, timeout=timeout) - except Exception: - logger.debug("fetch_with_retry failed or unavailable; falling back to direct request for %s", url) - response = await client.request(method, url, headers=headers or {}, timeout=timeout) - response.raise_for_status() - return response + # Add FlareSolverr cookies if available + if self._flaresolverr_cookies: + existing_cookies = request_headers.get("Cookie", "") + if existing_cookies: + request_headers["Cookie"] = f"{existing_cookies}; {self._flaresolverr_cookies}" + else: + request_headers["Cookie"] = self._flaresolverr_cookies + # Use FlareSolverr user-agent if available + if self._flaresolverr_user_agent: + request_headers["User-Agent"] = self._flaresolverr_user_agent - async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]: + # Use create_aiohttp_session with verify=False for SSL bypass + async with create_aiohttp_session(url, timeout=timeout, verify=False) as (session, proxy_url): + async with session.request(method, url, headers=request_headers, proxy=proxy_url, **kwargs) as response: + content = await response.read() + final_url = str(response.url) + status = response.status + resp_headers = dict(response.headers) + + if status >= 400: + raise ExtractorError(f"HTTP error {status} while requesting {url}") + + return HttpResponse( + status=status, + headers=resp_headers, + text=content.decode("utf-8", errors="replace"), + content=content, + url=final_url, + ) + + async def _extract_session_data(self, iframe_url: str, main_url: str) -> dict | None: """ - Estrattore alternativo per iframe lovecdn.ru che usa un formato diverso. + Fetch the iframe URL and extract auth_token, channel_key, and channel_salt. + + Args: + iframe_url: The iframe URL to fetch + main_url: The main site domain for Referer header + + Returns: + Dict with auth_token, channel_key, channel_salt, or None if not found """ + headers = { + "User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT, + "Referer": f"https://{main_url}/", + } + try: - # Cerca pattern di stream URL diretto - m3u8_patterns = [ - r'["\']([^"\']*\.m3u8[^"\']*)["\']', - r'source[:\s]+["\']([^"\']+)["\']', - r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']', - r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']', - ] - - stream_url = None - for pattern in m3u8_patterns: - matches = re.findall(pattern, iframe_content) - for match in matches: - if '.m3u8' in match and match.startswith('http'): - stream_url = match - logger.info(f"Found direct m3u8 URL: {stream_url}") - break - if stream_url: - break - - # Pattern 2: Cerca costruzione dinamica URL - if not stream_url: - channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content) - server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content) - - if channel_match: - channel_name = channel_match.group(1) - server = server_match.group(1) if server_match else 'newkso.ru' - stream_url = f"https://{server}/{channel_name}/mono.m3u8" - logger.info(f"Constructed stream URL: {stream_url}") - - if not stream_url: - # Fallback: cerca qualsiasi URL che sembri uno stream - url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*' - matches = re.findall(url_pattern, iframe_content) - if matches: - stream_url = matches[0] - logger.info(f"Found fallback stream URL: {stream_url}") - - if not stream_url: - raise ExtractorError(f"Could not find stream URL in lovecdn.ru iframe") - - # Usa iframe URL come referer - iframe_origin = f"https://{urlparse(iframe_url).netloc}" - stream_headers = { - 'User-Agent': headers['User-Agent'], - 'Referer': iframe_url, - 'Origin': iframe_origin - } - - # Determina endpoint in base al dominio dello stream - endpoint = "hls_key_proxy" - - logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}") - - return { - "destination_url": stream_url, - "request_headers": stream_headers, - "mediaflow_endpoint": endpoint, - } - + resp = await self._make_request(iframe_url, headers=headers, timeout=12) + html = resp.text except Exception as e: - raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}") + logger.warning(f"Error fetching iframe URL: {e}") + return None + + # Pattern to extract EPlayerAuth.init block with authToken, channelKey, channelSalt + # Matches: EPlayerAuth.init({ authToken: '...', channelKey: '...', ..., channelSalt: '...' }); + auth_pattern = r"EPlayerAuth\.init\s*\(\s*\{\s*authToken:\s*'([^']+)'" + channel_key_pattern = r"channelKey:\s*'([^']+)'" + channel_salt_pattern = r"channelSalt:\s*'([^']+)'" + + # Pattern to extract server lookup base URL from fetchWithRetry call + lookup_pattern = r"fetchWithRetry\s*\(\s*'([^']+server_lookup\?channel_id=)" + + auth_match = re.search(auth_pattern, html) + channel_key_match = re.search(channel_key_pattern, html) + channel_salt_match = re.search(channel_salt_pattern, html) + lookup_match = re.search(lookup_pattern, html) + + if auth_match and channel_key_match and channel_salt_match: + result = { + "auth_token": auth_match.group(1), + "channel_key": channel_key_match.group(1), + "channel_salt": channel_salt_match.group(1), + } + if lookup_match: + result["server_lookup_url"] = lookup_match.group(1) + result["channel_key"] + + return result + + return None + + async def _get_server_key(self, server_lookup_url: str, iframe_url: str) -> str | None: + """ + Fetch the server lookup URL and extract the server_key. + + Args: + server_lookup_url: The server lookup URL + iframe_url: The iframe URL for extracting the host for headers + + Returns: + The server_key or None if not found + """ + parsed = urlparse(iframe_url) + iframe_host = parsed.netloc + + headers = { + "User-Agent": self._flaresolverr_user_agent or DEFAULT_DLHD_USER_AGENT, + "Referer": f"https://{iframe_host}/", + "Origin": f"https://{iframe_host}", + } + + try: + resp = await self._make_request(server_lookup_url, headers=headers, timeout=10) + data = resp.json() + return data.get("server_key") + except Exception as e: + logger.warning(f"Error fetching server lookup: {e}") + return None + + def _build_m3u8_url(self, server_key: str, channel_key: str) -> str: + """ + Build the m3u8 URL based on the server_key. + + Args: + server_key: The server key from server lookup + channel_key: The channel key + + Returns: + The m3u8 URL (with .css extension as per the original implementation) + """ + if server_key == "top1/cdn": + return f"https://top1.dvalna.ru/top1/cdn/{channel_key}/mono.css" + else: + return f"https://{server_key}new.dvalna.ru/{server_key}/{channel_key}/mono.css" async def _extract_new_auth_flow(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]: """Handles the new authentication flow found in recent updates.""" - + def _extract_params(js: str) -> Dict[str, Optional[str]]: params = {} patterns = { @@ -143,82 +355,93 @@ class DLHDExtractor(BaseExtractor): return params params = _extract_params(iframe_content) - + missing_params = [k for k, v in params.items() if not v] if missing_params: # This is not an error, just means it's not the new flow raise ExtractorError(f"Not the new auth flow: missing params {missing_params}") logger.info("New auth flow detected. Proceeding with POST auth.") - + # 1. Initial Auth POST - auth_url = 'https://security.newkso.ru/auth2.php' - # Use files parameter to force multipart/form-data which is required by the server - # (None, value) tells httpx to send it as a form field, not a file upload - multipart_data = { - 'channelKey': (None, params["channel_key"]), - 'country': (None, params["auth_country"]), - 'timestamp': (None, params["auth_ts"]), - 'expiry': (None, params["auth_expiry"]), - 'token': (None, params["auth_token"]), - } + auth_url = "https://security.newkso.ru/auth2.php" iframe_origin = f"https://{urlparse(iframe_url).netloc}" auth_headers = headers.copy() - auth_headers.update({ - 'Accept': '*/*', - 'Accept-Language': 'en-US,en;q=0.9', - 'Origin': iframe_origin, - 'Referer': iframe_url, - 'Sec-Fetch-Dest': 'empty', - 'Sec-Fetch-Mode': 'cors', - 'Sec-Fetch-Site': 'cross-site', - 'Priority': 'u=1, i', - }) - - from mediaflow_proxy.utils.http_utils import create_httpx_client + auth_headers.update( + { + "Accept": "*/*", + "Accept-Language": "en-US,en;q=0.9", + "Origin": iframe_origin, + "Referer": iframe_url, + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "cross-site", + "Priority": "u=1, i", + } + ) + + # Build form data for multipart/form-data + form_data = aiohttp.FormData() + form_data.add_field("channelKey", params["channel_key"]) + form_data.add_field("country", params["auth_country"]) + form_data.add_field("timestamp", params["auth_ts"]) + form_data.add_field("expiry", params["auth_expiry"]) + form_data.add_field("token", params["auth_token"]) + try: - async with create_httpx_client(verify=False) as client: - # Note: using 'files' instead of 'data' to ensure multipart/form-data Content-Type - auth_resp = await client.post(auth_url, files=multipart_data, headers=auth_headers, timeout=12) - auth_resp.raise_for_status() - auth_data = auth_resp.json() - if not (auth_data.get("valid") or auth_data.get("success")): - raise ExtractorError(f"Initial auth failed with response: {auth_data}") + async with create_aiohttp_session(auth_url, timeout=12, verify=False) as (session, proxy_url): + async with session.post( + auth_url, + headers=auth_headers, + data=form_data, + proxy=proxy_url, + ) as response: + content = await response.read() + response.raise_for_status() + import json + + auth_data = json.loads(content.decode("utf-8")) + if not (auth_data.get("valid") or auth_data.get("success")): + raise ExtractorError(f"Initial auth failed with response: {auth_data}") logger.info("New auth flow: Initial auth successful.") + except ExtractorError: + raise except Exception as e: raise ExtractorError(f"New auth flow failed during initial auth POST: {e}") # 2. Server Lookup server_lookup_url = f"https://{urlparse(iframe_url).netloc}/server_lookup.js?channel_id={params['channel_key']}" try: - # Use _make_request as it handles retries and expects JSON + # Use _make_request as it handles retries lookup_resp = await self._make_request(server_lookup_url, headers=headers, timeout=10) server_data = lookup_resp.json() - server_key = server_data.get('server_key') + server_key = server_data.get("server_key") if not server_key: raise ExtractorError(f"No server_key in lookup response: {server_data}") logger.info(f"New auth flow: Server lookup successful - Server key: {server_key}") + except ExtractorError: + raise except Exception as e: raise ExtractorError(f"New auth flow failed during server lookup: {e}") # 3. Build final stream URL - channel_key = params['channel_key'] - auth_token = params['auth_token'] + channel_key = params["channel_key"] + auth_token = params["auth_token"] # The JS logic uses .css, not .m3u8 - if server_key == 'top1/cdn': - stream_url = f'https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css' + if server_key == "top1/cdn": + stream_url = f"https://top1.newkso.ru/top1/cdn/{channel_key}/mono.css" else: - stream_url = f'https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css' - - logger.info(f'New auth flow: Constructed stream URL: {stream_url}') + stream_url = f"https://{server_key}new.newkso.ru/{server_key}/{channel_key}/mono.css" + + logger.info(f"New auth flow: Constructed stream URL: {stream_url}") stream_headers = { - 'User-Agent': headers['User-Agent'], - 'Referer': iframe_url, - 'Origin': iframe_origin, - 'Authorization': f'Bearer {auth_token}', - 'X-Channel-Key': channel_key + "User-Agent": headers["User-Agent"], + "Referer": iframe_url, + "Origin": iframe_origin, + "Authorization": f"Bearer {auth_token}", + "X-Channel-Key": channel_key, } return { @@ -227,106 +450,255 @@ class DLHDExtractor(BaseExtractor): "mediaflow_endpoint": "hls_manifest_proxy", } - async def extract(self, url: str, **kwargs) -> Dict[str, Any]: - """Main extraction flow: resolve base, fetch players, extract iframe, auth and final m3u8.""" - baseurl = "https://dlhd.dad/" + async def _extract_lovecdn_stream(self, iframe_url: str, iframe_content: str, headers: dict) -> Dict[str, Any]: + """ + Alternative extractor for lovecdn.ru iframe that uses a different format. + """ + try: + # Look for direct stream URL patterns + m3u8_patterns = [ + r'["\']([^"\']*\.m3u8[^"\']*)["\']', + r'source[:\s]+["\']([^"\']+)["\']', + r'file[:\s]+["\']([^"\']+\.m3u8[^"\']*)["\']', + r'hlsManifestUrl[:\s]*["\']([^"\']+)["\']', + ] - def extract_channel_id(u: str) -> Optional[str]: - match_watch_id = re.search(r'watch\.php\?id=(\d+)', u) - if match_watch_id: - return match_watch_id.group(1) - return None + stream_url = None + for pattern in m3u8_patterns: + matches = re.findall(pattern, iframe_content) + for match in matches: + if ".m3u8" in match and match.startswith("http"): + stream_url = match + logger.info(f"Found direct m3u8 URL: {stream_url}") + break + if stream_url: + break + # Pattern 2: Look for dynamic URL construction + if not stream_url: + channel_match = re.search(r'(?:stream|channel)["\s:=]+["\']([^"\']+)["\']', iframe_content) + server_match = re.search(r'(?:server|domain|host)["\s:=]+["\']([^"\']+)["\']', iframe_content) - async def get_stream_data(initial_url: str): - daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc - daddylive_headers = { - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36', - 'Referer': baseurl, - 'Origin': daddy_origin + if channel_match: + channel_name = channel_match.group(1) + server = server_match.group(1) if server_match else "newkso.ru" + stream_url = f"https://{server}/{channel_name}/mono.m3u8" + logger.info(f"Constructed stream URL: {stream_url}") + + if not stream_url: + # Fallback: look for any URL that looks like a stream + url_pattern = r'https?://[^\s"\'<>]+\.m3u8[^\s"\'<>]*' + matches = re.findall(url_pattern, iframe_content) + if matches: + stream_url = matches[0] + logger.info(f"Found fallback stream URL: {stream_url}") + + if not stream_url: + raise ExtractorError("Could not find stream URL in lovecdn.ru iframe") + + # Use iframe URL as referer + iframe_origin = f"https://{urlparse(iframe_url).netloc}" + stream_headers = {"User-Agent": headers["User-Agent"], "Referer": iframe_url, "Origin": iframe_origin} + + # Determine endpoint based on the stream domain + endpoint = "hls_key_proxy" + + logger.info(f"Using lovecdn.ru stream with endpoint: {endpoint}") + + return { + "destination_url": stream_url, + "request_headers": stream_headers, + "mediaflow_endpoint": endpoint, } + except Exception as e: + raise ExtractorError(f"Failed to extract lovecdn.ru stream: {e}") - # 1. Request initial page - resp1 = await self._make_request(initial_url, headers=daddylive_headers, timeout=15) - player_links = re.findall(r']*data-url="([^"]+)"[^>]*>Player\s*\d+', resp1.text) - if not player_links: - raise ExtractorError("No player links found on the page.") + async def _extract_direct_stream(self, channel_id: str) -> Dict[str, Any]: + """ + Direct stream extraction using server lookup API with the new auth flow. + This extracts auth_token, channel_key, channel_salt and computes key headers. + """ + # Common iframe domains for DLHD + iframe_domains = ["lefttoplay.xyz"] + for iframe_domain in iframe_domains: + try: + iframe_url = f"https://{iframe_domain}/premiumtv/daddyhd.php?id={channel_id}" + logger.info(f"Attempting extraction via {iframe_domain}") - # Prova tutti i player e raccogli tutti gli iframe validi - last_player_error = None - iframe_candidates = [] + session_data = await self._extract_session_data(iframe_url, "dlhd.link") - for player_url in player_links: - try: - if not player_url.startswith('http'): - player_url = baseurl + player_url.lstrip('/') - - - daddylive_headers['Referer'] = player_url - daddylive_headers['Origin'] = player_url - resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12) - iframes2 = re.findall(r' Dict[str, Any]: + """Main extraction flow - uses direct server lookup with new auth flow.""" + + def extract_channel_id(u: str) -> Optional[str]: + match_watch_id = re.search(r"watch\.php\?id=(\d+)", u) + if match_watch_id: + return match_watch_id.group(1) + # Also try stream-XXX pattern + match_stream = re.search(r"stream-(\d+)", u) + if match_stream: + return match_stream.group(1) + return None try: channel_id = extract_channel_id(url) if not channel_id: raise ExtractorError(f"Unable to extract channel ID from {url}") - logger.info(f"Using base domain: {baseurl}") - return await get_stream_data(url) + logger.info(f"Extracting DLHD stream for channel ID: {channel_id}") + # Try direct stream extraction with new auth flow + try: + return await self._extract_direct_stream(channel_id) + except ExtractorError as e: + logger.warning(f"Direct stream extraction failed: {e}") + + # Fallback to legacy iframe-based extraction if direct fails + logger.info("Falling back to iframe-based extraction...") + return await self._extract_via_iframe(url, channel_id) except Exception as e: raise ExtractorError(f"Extraction failed: {str(e)}") + + async def _extract_via_iframe(self, url: str, channel_id: str) -> Dict[str, Any]: + """Legacy iframe-based extraction flow - used as fallback.""" + baseurl = "https://dlhd.dad/" + + daddy_origin = urlparse(baseurl).scheme + "://" + urlparse(baseurl).netloc + daddylive_headers = { + "User-Agent": self._flaresolverr_user_agent + or "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", + "Referer": baseurl, + "Origin": daddy_origin, + } + + # 1. Request initial page - use FlareSolverr if available to bypass Cloudflare + use_flaresolverr = settings.flaresolverr_url is not None + resp1 = await self._make_request(url, headers=daddylive_headers, timeout=15, use_flaresolverr=use_flaresolverr) + resp1_text = resp1.text + + # Update headers with FlareSolverr user-agent after initial request + if self._flaresolverr_user_agent: + daddylive_headers["User-Agent"] = self._flaresolverr_user_agent + + player_links = re.findall(r']*data-url="([^"]+)"[^>]*>Player\s*\d+', resp1_text) + if not player_links: + raise ExtractorError("No player links found on the page.") + + # Try all players and collect all valid iframes + last_player_error = None + iframe_candidates = [] + + for player_url in player_links: + try: + if not player_url.startswith("http"): + player_url = baseurl + player_url.lstrip("/") + + daddylive_headers["Referer"] = player_url + daddylive_headers["Origin"] = player_url + resp2 = await self._make_request(player_url, headers=daddylive_headers, timeout=12) + resp2_text = resp2.text + iframes2 = re.findall(r' Dict[str, str]: - """Extract DoodStream URL.""" - response = await self._make_request(url) + async def extract(self, url: str, **kwargs): + parsed = urlparse(url) + video_id = parsed.path.rstrip("/").split("/")[-1] + if not video_id: + raise ExtractorError("Invalid Dood URL") - # Extract URL pattern - pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)" - match = re.search(pattern, response.text, re.DOTALL) + headers = { + "User-Agent": self.base_headers.get("User-Agent") or "Mozilla/5.0", + "Referer": f"{self.base_url}/", + } + + embed_url = f"{self.base_url}/e/{video_id}" + html = (await self._make_request(embed_url, headers=headers)).text + + match = re.search(r"(\/pass_md5\/[^']+)", html) if not match: - raise ExtractorError("Failed to extract URL pattern") + raise ExtractorError("Dood: pass_md5 not found") - # Build final URL - pass_url = f"{self.base_url}{match[1]}" - referer = f"{self.base_url}/" - headers = {"range": "bytes=0-", "referer": referer} + pass_url = urljoin(self.base_url, match.group(1)) - response = await self._make_request(pass_url, headers=headers) - timestamp = str(int(time.time())) - final_url = f"{response.text}123456789{match[2]}{timestamp}" + base_stream = (await self._make_request(pass_url, headers=headers)).text.strip() + + token_match = re.search(r"token=([^&]+)", html) + if not token_match: + raise ExtractorError("Dood: token missing") + + token = token_match.group(1) + + final_url = f"{base_stream}123456789?token={token}&expiry={int(time.time())}" - self.base_headers["referer"] = referer return { "destination_url": final_url, - "request_headers": self.base_headers, - "mediaflow_endpoint": self.mediaflow_endpoint, + "request_headers": headers, + "mediaflow_endpoint": "proxy_stream_endpoint", } diff --git a/mediaflow_proxy/extractors/factory.py b/mediaflow_proxy/extractors/factory.py index 042f3ac..e3158bc 100644 --- a/mediaflow_proxy/extractors/factory.py +++ b/mediaflow_proxy/extractors/factory.py @@ -7,6 +7,7 @@ from mediaflow_proxy.extractors.sportsonline import SportsonlineExtractor from mediaflow_proxy.extractors.filelions import FileLionsExtractor from mediaflow_proxy.extractors.filemoon import FileMoonExtractor from mediaflow_proxy.extractors.F16Px import F16PxExtractor +from mediaflow_proxy.extractors.gupload import GuploadExtractor from mediaflow_proxy.extractors.livetv import LiveTVExtractor from mediaflow_proxy.extractors.lulustream import LuluStreamExtractor from mediaflow_proxy.extractors.maxstream import MaxstreamExtractor @@ -33,6 +34,7 @@ class ExtractorFactory: "FileLions": FileLionsExtractor, "FileMoon": FileMoonExtractor, "F16Px": F16PxExtractor, + "Gupload": GuploadExtractor, "Uqload": UqloadExtractor, "Mixdrop": MixdropExtractor, "Streamtape": StreamtapeExtractor, diff --git a/mediaflow_proxy/extractors/fastream.py b/mediaflow_proxy/extractors/fastream.py index dcfe8c4..c656faf 100644 --- a/mediaflow_proxy/extractors/fastream.py +++ b/mediaflow_proxy/extractors/fastream.py @@ -4,25 +4,29 @@ from mediaflow_proxy.extractors.base import BaseExtractor from mediaflow_proxy.utils.packed import eval_solver - - class FastreamExtractor(BaseExtractor): """Fastream URL extractor.""" + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.mediaflow_endpoint = "hls_manifest_proxy" async def extract(self, url: str, **kwargs) -> Dict[str, Any]: - headers = {'Accept': '*/*', 'Connection': 'keep-alive','Accept-Language': 'en-US,en;q=0.5','Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0'} + headers = { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Connection": "keep-alive", + "Accept-Language": "en-US,en;q=0.5", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0", + } patterns = [r'file:"(.*?)"'] final_url = await eval_solver(self, url, headers, patterns) - self.base_headers["referer"] = f'https://{url.replace("https://","").split("/")[0]}/' - self.base_headers["origin"] = f'https://{url.replace("https://","").split("/")[0]}' - self.base_headers['Accept-Language'] = 'en-US,en;q=0.5' - self.base_headers['Accept'] = '*/*' - self.base_headers['user-agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0' + self.base_headers["referer"] = f"https://{url.replace('https://', '').split('/')[0]}/" + self.base_headers["origin"] = f"https://{url.replace('https://', '').split('/')[0]}" + self.base_headers["Accept-Language"] = "en-US,en;q=0.5" + self.base_headers["Accept"] = "*/*" + self.base_headers["user-agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:138.0) Gecko/20100101 Firefox/138.0" return { "destination_url": final_url, diff --git a/mediaflow_proxy/extractors/filelions.py b/mediaflow_proxy/extractors/filelions.py index 25271ed..1eb0dc9 100644 --- a/mediaflow_proxy/extractors/filelions.py +++ b/mediaflow_proxy/extractors/filelions.py @@ -3,17 +3,18 @@ from typing import Dict, Any from mediaflow_proxy.extractors.base import BaseExtractor from mediaflow_proxy.utils.packed import eval_solver + class FileLionsExtractor(BaseExtractor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.mediaflow_endpoint = "hls_manifest_proxy" async def extract(self, url: str, **kwargs) -> Dict[str, Any]: - headers = {} - patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py - r'''sources:\s*\[{file:\s*["'](?P[^"']+)''', - r'''["']hls4["']:\s*["'](?P[^"']+)''', - r'''["']hls2["']:\s*["'](?P[^"']+)''' + headers = {} + patterns = [ # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/filelions.py + r"""sources:\s*\[{file:\s*["'](?P[^"']+)""", + r"""["']hls4["']:\s*["'](?P[^"']+)""", + r"""["']hls2["']:\s*["'](?P[^"']+)""", ] final_url = await eval_solver(self, url, headers, patterns) @@ -23,4 +24,5 @@ class FileLionsExtractor(BaseExtractor): "destination_url": final_url, "request_headers": self.base_headers, "mediaflow_endpoint": self.mediaflow_endpoint, + "stream_transformer": "ts_stream", } diff --git a/mediaflow_proxy/extractors/filemoon.py b/mediaflow_proxy/extractors/filemoon.py index 808042a..e67d3f1 100644 --- a/mediaflow_proxy/extractors/filemoon.py +++ b/mediaflow_proxy/extractors/filemoon.py @@ -40,7 +40,7 @@ class FileMoonExtractor(BaseExtractor): ) test_resp = await self._make_request(final_url, headers=headers) - if test_resp.status_code == 404: + if test_resp.status == 404: raise ExtractorError("Stream not found (404)") self.base_headers["referer"] = url diff --git a/mediaflow_proxy/extractors/gupload.py b/mediaflow_proxy/extractors/gupload.py new file mode 100644 index 0000000..f509877 --- /dev/null +++ b/mediaflow_proxy/extractors/gupload.py @@ -0,0 +1,65 @@ +import re +import base64 +import json +from typing import Dict, Any +from urllib.parse import urlparse + +from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError + + +class GuploadExtractor(BaseExtractor): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.mediaflow_endpoint = "hls_manifest_proxy" + + async def extract(self, url: str) -> Dict[str, Any]: + parsed = urlparse(url) + if not parsed.hostname or "gupload.xyz" not in parsed.hostname: + raise ExtractorError("GUPLOAD: Invalid domain") + + headers = { + "User-Agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/144 Safari/537.36" + ), + "Referer": "https://gupload.xyz/", + "Origin": "https://gupload.xyz", + } + + # --- Fetch embed page --- + response = await self._make_request(url, headers=headers) + html = response.text + + # --- Extract base64 payload --- + match = re.search(r"decodePayload\('([^']+)'\)", html) + if not match: + raise ExtractorError("GUPLOAD: Payload not found") + + encoded = match.group(1).strip() + + # --- Decode payload --- + try: + decoded = base64.b64decode(encoded).decode("utf-8", "ignore") + # payload format: |{json} + json_part = decoded.split("|", 1)[1] + payload = json.loads(json_part) + except Exception: + raise ExtractorError("GUPLOAD: Payload decode failed") + + # --- Extract HLS URL --- + hls_url = payload.get("videoUrl") + if not hls_url: + raise ExtractorError("GUPLOAD: videoUrl missing") + + # --- Validate stream (prevents client timeout) --- + test = await self._make_request(hls_url, headers=headers, raise_on_status=False) + if test.status >= 400: + raise ExtractorError(f"GUPLOAD: Stream unavailable ({test.status})") + + # Return MASTER playlist + return { + "destination_url": hls_url, + "request_headers": headers, + "mediaflow_endpoint": self.mediaflow_endpoint, + } diff --git a/mediaflow_proxy/extractors/livetv.py b/mediaflow_proxy/extractors/livetv.py index fbe0c93..c1ab473 100644 --- a/mediaflow_proxy/extractors/livetv.py +++ b/mediaflow_proxy/extractors/livetv.py @@ -2,9 +2,9 @@ import re from typing import Dict, Tuple, Optional from urllib.parse import urljoin, urlparse, unquote -from httpx import Response +import aiohttp -from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError +from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError, HttpResponse class LiveTVExtractor(BaseExtractor): @@ -33,20 +33,21 @@ class LiveTVExtractor(BaseExtractor): stream_title: Optional stream title to filter specific stream Returns: - Tuple[str, Dict[str, str]]: Stream URL and required headers + Dict containing destination_url, request_headers, and mediaflow_endpoint """ try: # Get the channel page response = await self._make_request(url) + response_text = response.text self.base_headers["referer"] = urljoin(url, "/") # Extract player API details - player_api_base, method = await self._extract_player_api_base(response.text) + player_api_base, method = await self._extract_player_api_base(response_text) if not player_api_base: raise ExtractorError("Failed to extract player API URL") # Get player options - options_data = await self._get_player_options(response.text) + options_data = await self._get_player_options(response_text) if not options_data: raise ExtractorError("No player options found") @@ -66,7 +67,7 @@ class LiveTVExtractor(BaseExtractor): if not stream_url: continue - response = { + result = { "destination_url": stream_url, "request_headers": self.base_headers, "mediaflow_endpoint": self.mediaflow_endpoint, @@ -75,7 +76,7 @@ class LiveTVExtractor(BaseExtractor): # Set endpoint based on stream type if stream_data.get("type") == "mpd": if stream_data.get("drm_key_id") and stream_data.get("drm_key"): - response.update( + result.update( { "query_params": { "key_id": stream_data["drm_key_id"], @@ -85,7 +86,7 @@ class LiveTVExtractor(BaseExtractor): } ) - return response + return result raise ExtractorError("No valid stream found") @@ -120,7 +121,12 @@ class LiveTVExtractor(BaseExtractor): api_url = f"{api_base}{post}/{type_}/{nume}" response = await self._make_request(api_url) else: - form_data = {"action": "doo_player_ajax", "post": post, "nume": nume, "type": type_} + # Use aiohttp FormData for POST requests + form_data = aiohttp.FormData() + form_data.add_field("action", "doo_player_ajax") + form_data.add_field("post", post) + form_data.add_field("nume", nume) + form_data.add_field("type", type_) response = await self._make_request(api_base, method="POST", data=form_data) # Get iframe URL from API response @@ -136,7 +142,7 @@ class LiveTVExtractor(BaseExtractor): except Exception as e: raise ExtractorError(f"Failed to process player option: {str(e)}") - async def _extract_stream_url(self, iframe_response: Response, iframe_url: str) -> Dict: + async def _extract_stream_url(self, iframe_response: HttpResponse, iframe_url: str) -> Dict: """ Extract final stream URL from iframe content. """ @@ -147,8 +153,9 @@ class LiveTVExtractor(BaseExtractor): # Check if content is already a direct M3U8 stream content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"] + content_type = iframe_response.headers.get("content-type", "") - if any(ext in iframe_response.headers["content-type"] for ext in content_types): + if any(ext in content_type for ext in content_types): return {"url": iframe_url, "type": "m3u8"} stream_data = {} diff --git a/mediaflow_proxy/extractors/lulustream.py b/mediaflow_proxy/extractors/lulustream.py index 4c1d4c9..63aaf7d 100644 --- a/mediaflow_proxy/extractors/lulustream.py +++ b/mediaflow_proxy/extractors/lulustream.py @@ -13,7 +13,7 @@ class LuluStreamExtractor(BaseExtractor): response = await self._make_request(url) # See https://github.com/Gujal00/ResolveURL/blob/master/script.module.resolveurl/lib/resolveurl/plugins/lulustream.py - pattern = r'''sources:\s*\[{file:\s*["'](?P[^"']+)''' + pattern = r"""sources:\s*\[{file:\s*["'](?P[^"']+)""" match = re.search(pattern, response.text, re.DOTALL) if not match: raise ExtractorError("Failed to extract source URL") diff --git a/mediaflow_proxy/extractors/mixdrop.py b/mediaflow_proxy/extractors/mixdrop.py index bd77f34..dff9870 100644 --- a/mediaflow_proxy/extractors/mixdrop.py +++ b/mediaflow_proxy/extractors/mixdrop.py @@ -1,6 +1,6 @@ from typing import Dict, Any -from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError +from mediaflow_proxy.extractors.base import BaseExtractor from mediaflow_proxy.utils.packed import eval_solver diff --git a/mediaflow_proxy/extractors/okru.py b/mediaflow_proxy/extractors/okru.py index bf6d307..6aa6ec8 100644 --- a/mediaflow_proxy/extractors/okru.py +++ b/mediaflow_proxy/extractors/okru.py @@ -22,7 +22,9 @@ class OkruExtractor(BaseExtractor): data_options = div.get("data-options") data = json.loads(data_options) metadata = json.loads(data["flashvars"]["metadata"]) - final_url = metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl") + final_url = ( + metadata.get("hlsMasterPlaylistUrl") or metadata.get("hlsManifestUrl") or metadata.get("ondemandHls") + ) self.base_headers["referer"] = url return { "destination_url": final_url, diff --git a/mediaflow_proxy/extractors/sportsonline.py b/mediaflow_proxy/extractors/sportsonline.py index f1c3b90..a72a62b 100644 --- a/mediaflow_proxy/extractors/sportsonline.py +++ b/mediaflow_proxy/extractors/sportsonline.py @@ -1,10 +1,10 @@ import re import logging -from typing import Any, Dict, Optional +from typing import Any, Dict from urllib.parse import urlparse from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError -from mediaflow_proxy.utils.packed import detect, unpack +from mediaflow_proxy.utils.packed import unpack logger = logging.getLogger(__name__) @@ -32,18 +32,17 @@ class SportsonlineExtractor(BaseExtractor): def _detect_packed_blocks(self, html: str) -> list[str]: """ Detect and extract packed eval blocks from HTML. - Replicates the TypeScript logic: /eval\(function(.+?.+)/g """ # Find all eval(function...) blocks - more greedy to capture full packed code pattern = re.compile(r"eval\(function\(p,a,c,k,e,.*?\)\)(?:\s*;|\s*<)", re.DOTALL) raw_matches = pattern.findall(html) - + # If no matches with the strict pattern, try a more relaxed one if not raw_matches: # Try to find eval(function and capture until we find the closing )) pattern = re.compile(r"eval\(function\(p,a,c,k,e,[dr]\).*?\}\(.*?\)\)", re.DOTALL) raw_matches = pattern.findall(html) - + return raw_matches async def extract(self, url: str, **kwargs) -> Dict[str, Any]: @@ -60,25 +59,25 @@ class SportsonlineExtractor(BaseExtractor): raise ExtractorError("No iframe found on the page") iframe_url = iframe_match.group(1) - + # Normalize iframe URL - if iframe_url.startswith('//'): - iframe_url = 'https:' + iframe_url - elif iframe_url.startswith('/'): + if iframe_url.startswith("//"): + iframe_url = "https:" + iframe_url + elif iframe_url.startswith("/"): parsed_main = urlparse(url) iframe_url = f"{parsed_main.scheme}://{parsed_main.netloc}{iframe_url}" - + logger.info(f"Found iframe URL: {iframe_url}") # Step 2: Fetch iframe with Referer iframe_headers = { - 'Referer': 'https://sportzonline.st/', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.9,it;q=0.8', - 'Cache-Control': 'no-cache' + "Referer": "https://sportzonline.st/", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9,it;q=0.8", + "Cache-Control": "no-cache", } - + iframe_response = await self._make_request(iframe_url, headers=iframe_headers, timeout=15) iframe_html = iframe_response.text @@ -86,9 +85,9 @@ class SportsonlineExtractor(BaseExtractor): # Step 3: Detect packed blocks packed_blocks = self._detect_packed_blocks(iframe_html) - + logger.info(f"Found {len(packed_blocks)} packed blocks") - + if not packed_blocks: logger.warning("No packed blocks found, trying direct m3u8 search") # Fallback: try direct m3u8 search @@ -96,13 +95,10 @@ class SportsonlineExtractor(BaseExtractor): if direct_match: m3u8_url = direct_match.group(1) logger.info(f"Found direct m3u8 URL: {m3u8_url}") - + return { "destination_url": m3u8_url, - "request_headers": { - 'Referer': iframe_url, - 'User-Agent': iframe_headers['User-Agent'] - }, + "request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]}, "mediaflow_endpoint": self.mediaflow_endpoint, } else: @@ -134,13 +130,13 @@ class SportsonlineExtractor(BaseExtractor): r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', # file: "...m3u8" r'["\']([^"\']*https?://[^"\']+\.m3u8[^"\']*)["\']', # any m3u8 URL ] - + for pattern in patterns: src_match = re.search(pattern, unpacked_code) if src_match: m3u8_url = src_match.group(1) # Verify it looks like a valid m3u8 URL - if '.m3u8' in m3u8_url or 'http' in m3u8_url: + if ".m3u8" in m3u8_url or "http" in m3u8_url: break m3u8_url = None @@ -162,11 +158,11 @@ class SportsonlineExtractor(BaseExtractor): src_match = re.search(pattern, unpacked_code) if src_match: test_url = src_match.group(1) - if '.m3u8' in test_url or 'http' in test_url: + if ".m3u8" in test_url or "http" in test_url: m3u8_url = test_url logger.info(f"Found m3u8 in block {i}") break - + if m3u8_url: break except Exception as e: @@ -181,10 +177,7 @@ class SportsonlineExtractor(BaseExtractor): # Return stream configuration return { "destination_url": m3u8_url, - "request_headers": { - 'Referer': iframe_url, - 'User-Agent': iframe_headers['User-Agent'] - }, + "request_headers": {"Referer": iframe_url, "User-Agent": iframe_headers["User-Agent"]}, "mediaflow_endpoint": self.mediaflow_endpoint, } diff --git a/mediaflow_proxy/extractors/streamtape.py b/mediaflow_proxy/extractors/streamtape.py index c180b7c..a5a6966 100644 --- a/mediaflow_proxy/extractors/streamtape.py +++ b/mediaflow_proxy/extractors/streamtape.py @@ -15,8 +15,8 @@ class StreamtapeExtractor(BaseExtractor): if not matches: raise ExtractorError("Failed to extract URL components") i = 0 - for i in range(len(matches)): - if matches[i-1] == matches[i] and "ip=" in matches[i]: + for i in range(len(matches)): + if matches[i - 1] == matches[i] and "ip=" in matches[i]: final_url = f"https://streamtape.com/get_video?{matches[i]}" self.base_headers["referer"] = url diff --git a/mediaflow_proxy/extractors/streamwish.py b/mediaflow_proxy/extractors/streamwish.py index 51c665a..d09fd94 100644 --- a/mediaflow_proxy/extractors/streamwish.py +++ b/mediaflow_proxy/extractors/streamwish.py @@ -19,18 +19,11 @@ class StreamWishExtractor(BaseExtractor): headers = {"Referer": referer} response = await self._make_request(url, headers=headers) - - iframe_match = re.search( - r']+src=["\']([^"\']+)["\']', - response.text, - re.DOTALL - ) + + iframe_match = re.search(r']+src=["\']([^"\']+)["\']', response.text, re.DOTALL) iframe_url = urljoin(url, iframe_match.group(1)) if iframe_match else url - iframe_response = await self._make_request( - iframe_url, - headers=headers - ) + iframe_response = await self._make_request(iframe_url, headers=headers) html = iframe_response.text final_url = self._extract_m3u8(html) @@ -58,15 +51,18 @@ class StreamWishExtractor(BaseExtractor): final_url = urljoin(iframe_url, final_url) origin = f"{urlparse(referer).scheme}://{urlparse(referer).netloc}" - self.base_headers.update({ - "Referer": referer, - "Origin": origin, - }) + self.base_headers.update( + { + "Referer": referer, + "Origin": origin, + } + ) return { "destination_url": final_url, "request_headers": self.base_headers, "mediaflow_endpoint": self.mediaflow_endpoint, + "stream_transformer": "ts_stream", } @staticmethod @@ -74,8 +70,5 @@ class StreamWishExtractor(BaseExtractor): """ Extract first absolute m3u8 URL from text """ - match = re.search( - r'https?://[^"\']+\.m3u8[^"\']*', - text - ) + match = re.search(r'https?://[^"\']+\.m3u8[^"\']*', text) return match.group(0) if match else None diff --git a/mediaflow_proxy/extractors/supervideo.py b/mediaflow_proxy/extractors/supervideo.py index f2be69a..d62ab3f 100644 --- a/mediaflow_proxy/extractors/supervideo.py +++ b/mediaflow_proxy/extractors/supervideo.py @@ -1,27 +1,64 @@ import re from typing import Dict, Any +from urllib.parse import urljoin, urlparse -from mediaflow_proxy.extractors.base import BaseExtractor -from mediaflow_proxy.utils.packed import eval_solver - +from bs4 import BeautifulSoup, SoupStrainer +from curl_cffi.requests import AsyncSession +from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError +from mediaflow_proxy.utils.packed import unpack, detect, UnpackingError class SupervideoExtractor(BaseExtractor): - """Supervideo URL extractor.""" + """Supervideo URL extractor. + + Uses curl_cffi to bypass Cloudflare protection. + """ + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.mediaflow_endpoint = "hls_manifest_proxy" - + async def extract(self, url: str, **kwargs) -> Dict[str, Any]: - headers = {'Accept': '*/*', 'Connection': 'keep-alive', 'User-Agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36', 'user-agent': 'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.71 Mobile Safari/537.36'} + """Extract video URL from Supervideo. + + Uses curl_cffi with Chrome impersonation to bypass Cloudflare. + """ + patterns = [r'file:"(.*?)"'] - final_url = await eval_solver(self, url, headers, patterns) + try: + async with AsyncSession() as session: + response = await session.get(url, impersonate="chrome") - self.base_headers["referer"] = url - return { - "destination_url": final_url, - "request_headers": self.base_headers, - "mediaflow_endpoint": self.mediaflow_endpoint, - } + if response.status_code != 200: + raise ExtractorError(f"HTTP {response.status_code} while fetching {url}") + + soup = BeautifulSoup(response.text, "lxml", parse_only=SoupStrainer("script")) + script_all = soup.find_all("script") + + for script in script_all: + if script.text and detect(script.text): + unpacked_code = unpack(script.text) + for pattern in patterns: + match = re.search(pattern, unpacked_code) + if match: + extracted_url = match.group(1) + if not urlparse(extracted_url).scheme: + extracted_url = urljoin(url, extracted_url) + + self.base_headers["referer"] = url + return { + "destination_url": extracted_url, + "request_headers": self.base_headers, + "mediaflow_endpoint": self.mediaflow_endpoint, + } + + raise ExtractorError("No packed JS found or no file URL pattern matched") + + except UnpackingError as e: + raise ExtractorError(f"Failed to unpack Supervideo JS: {e}") + except Exception as e: + if isinstance(e, ExtractorError): + raise + raise ExtractorError(f"Supervideo extraction failed: {e}") diff --git a/mediaflow_proxy/extractors/turbovidplay.py b/mediaflow_proxy/extractors/turbovidplay.py index d0b7a47..611bf92 100644 --- a/mediaflow_proxy/extractors/turbovidplay.py +++ b/mediaflow_proxy/extractors/turbovidplay.py @@ -1,5 +1,4 @@ import re -from typing import Dict, Any from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError @@ -36,7 +35,7 @@ class TurboVidPlayExtractor(BaseExtractor): if media_url.startswith("//"): media_url = "https:" + media_url elif media_url.startswith("/"): - media_url = response.url.origin + media_url + media_url = response.get_origin() + media_url # # 3. Fetch the intermediate playlist @@ -53,16 +52,11 @@ class TurboVidPlayExtractor(BaseExtractor): real_m3u8 = m2.group(0) - # - # 5. Final headers - # - self.base_headers["referer"] = url - - # - # 6. Always return master proxy (your MediaFlow only supports this) - # return { "destination_url": real_m3u8, - "request_headers": self.base_headers, + "request_headers": {"origin": response.get_origin()}, + "propagate_response_headers": {"content-type": "video/mp2t"}, + "remove_response_headers": ["content-length", "content-range"], "mediaflow_endpoint": "hls_manifest_proxy", + "stream_transformer": "ts_stream", # Use TS transformer for PNG/padding stripping } diff --git a/mediaflow_proxy/extractors/vavoo.py b/mediaflow_proxy/extractors/vavoo.py index 34c31eb..b096512 100644 --- a/mediaflow_proxy/extractors/vavoo.py +++ b/mediaflow_proxy/extractors/vavoo.py @@ -1,5 +1,6 @@ import logging from typing import Any, Dict, Optional + from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError logger = logging.getLogger(__name__) @@ -8,6 +9,11 @@ logger = logging.getLogger(__name__) class VavooExtractor(BaseExtractor): """Vavoo URL extractor for resolving vavoo.to links. + Supports two URL formats: + 1. Web-VOD API links: https://vavoo.to/web-vod/api/get?link=... + These redirect (302) to external video hosts (Doodstream, etc.) + 2. Legacy mediahubmx format (currently broken on Vavoo's end) + Features: - Uses BaseExtractor's retry/timeouts - Improved headers to mimic Android okhttp client @@ -18,6 +24,40 @@ class VavooExtractor(BaseExtractor): super().__init__(request_headers) self.mediaflow_endpoint = "proxy_stream_endpoint" + async def _resolve_web_vod_link(self, url: str) -> str: + """Resolve a web-vod API link by getting the redirect Location header.""" + import aiohttp + + try: + # Use aiohttp directly with allow_redirects=False to get the Location header + timeout = aiohttp.ClientTimeout(total=10) + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get( + url, + headers={"Accept": "application/json"}, + allow_redirects=False, + ) as resp: + # Check for redirect + if resp.status in (301, 302, 303, 307, 308): + location = resp.headers.get("Location") or resp.headers.get("location") + if location: + logger.info(f"Vavoo web-vod redirected to: {location}") + return location + + # If we got a 200, the response might contain the URL + if resp.status == 200: + text = await resp.text() + if text and text.startswith("http"): + logger.info(f"Vavoo web-vod resolved to: {text.strip()}") + return text.strip() + + raise ExtractorError(f"Vavoo web-vod API returned unexpected status {resp.status}") + + except ExtractorError: + raise + except Exception as e: + raise ExtractorError(f"Failed to resolve Vavoo web-vod link: {e}") + async def get_auth_signature(self) -> Optional[str]: """Get authentication signature for Vavoo API (async).""" headers = { @@ -27,10 +67,11 @@ class VavooExtractor(BaseExtractor): "accept-encoding": "gzip", } import time + current_time = int(time.time() * 1000) data = { - "token": "tosFwQCJMS8qrW_AjLoHPQ41646J5dRNha6ZWHnijoYQQQoADQoXYSo7ki7O5-CsgN4CH0uRk6EEoJ0728ar9scCRQW3ZkbfrPfeCXW2VgopSW2FWDqPOoVYIuVPAOnXCZ5g", + "token": "", "reason": "app-blur", "locale": "de", "theme": "dark", @@ -40,21 +81,11 @@ class VavooExtractor(BaseExtractor): "brand": "google", "model": "Pixel", "name": "sdk_gphone64_arm64", - "uniqueId": "d10e5d99ab665233" - }, - "os": { - "name": "android", - "version": "13" - }, - "app": { - "platform": "android", - "version": "3.1.21" - }, - "version": { - "package": "tv.vavoo.app", - "binary": "3.1.21", - "js": "3.1.21" + "uniqueId": "d10e5d99ab665233", }, + "os": {"name": "android", "version": "13"}, + "app": {"platform": "android", "version": "3.1.21"}, + "version": {"package": "tv.vavoo.app", "binary": "3.1.21", "js": "3.1.21"}, }, "appFocusTime": 0, "playerActive": False, @@ -75,11 +106,9 @@ class VavooExtractor(BaseExtractor): "ssVersion": 1, "enabled": True, "autoServer": True, - "id": "de-fra" + "id": "de-fra", }, - "iap": { - "supported": False - } + "iap": {"supported": False}, } try: @@ -94,7 +123,7 @@ class VavooExtractor(BaseExtractor): try: result = resp.json() except Exception: - logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status_code) + logger.warning("Vavoo ping returned non-json response (status=%s).", resp.status) return None addon_sig = result.get("addonSig") if isinstance(result, dict) else None @@ -109,10 +138,48 @@ class VavooExtractor(BaseExtractor): return None async def extract(self, url: str, **kwargs) -> Dict[str, Any]: - """Extract Vavoo stream URL (async).""" + """Extract Vavoo stream URL (async). + + Supports: + - Direct play URLs: https://vavoo.to/play/{id}/index.m3u8 (Live TV) + - Web-VOD API links: https://vavoo.to/web-vod/api/get?link=... + - Legacy mediahubmx links (may not work due to Vavoo API changes) + """ if "vavoo.to" not in url: raise ExtractorError("Not a valid Vavoo URL") + # Check if this is a direct play URL (Live TV) + # These URLs are already m3u8 streams but need auth signature + if "/play/" in url and url.endswith(".m3u8"): + signature = await self.get_auth_signature() + if not signature: + raise ExtractorError("Failed to get Vavoo authentication signature for Live TV") + + stream_headers = { + "user-agent": "okhttp/4.11.0", + "referer": "https://vavoo.to/", + "mediahubmx-signature": signature, + } + return { + "destination_url": url, + "request_headers": stream_headers, + "mediaflow_endpoint": "hls_manifest_proxy", + } + + # Check if this is a web-vod API link (new format) + if "/web-vod/api/get" in url: + resolved_url = await self._resolve_web_vod_link(url) + stream_headers = { + "user-agent": self.base_headers.get("user-agent", "Mozilla/5.0"), + "referer": "https://vavoo.to/", + } + return { + "destination_url": resolved_url, + "request_headers": stream_headers, + "mediaflow_endpoint": self.mediaflow_endpoint, + } + + # Legacy mediahubmx flow signature = await self.get_auth_signature() if not signature: raise ExtractorError("Failed to get Vavoo authentication signature") @@ -139,14 +206,9 @@ class VavooExtractor(BaseExtractor): "accept": "application/json", "content-type": "application/json; charset=utf-8", "accept-encoding": "gzip", - "mediahubmx-signature": signature - } - data = { - "language": "de", - "region": "AT", - "url": link, - "clientVersion": "3.1.21" + "mediahubmx-signature": signature, } + data = {"language": "de", "region": "AT", "url": link, "clientVersion": "3.1.21"} try: logger.info(f"Attempting to resolve Vavoo URL: {link}") resp = await self._make_request( @@ -161,7 +223,11 @@ class VavooExtractor(BaseExtractor): try: result = resp.json() except Exception: - logger.warning("Vavoo resolve returned non-json response (status=%s). Body preview: %s", resp.status_code, getattr(resp, "text", "")[:500]) + logger.warning( + "Vavoo resolve returned non-json response (status=%s). Body preview: %s", + resp.status, + getattr(resp, "text", "")[:500], + ) return None logger.debug("Vavoo API response: %s", result) diff --git a/mediaflow_proxy/extractors/vidmoly.py b/mediaflow_proxy/extractors/vidmoly.py index c46d189..161f813 100644 --- a/mediaflow_proxy/extractors/vidmoly.py +++ b/mediaflow_proxy/extractors/vidmoly.py @@ -16,10 +16,9 @@ class VidmolyExtractor(BaseExtractor): raise ExtractorError("VIDMOLY: Invalid domain") headers = { - "User-Agent": - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " - "AppleWebKit/537.36 (KHTML, like Gecko) " - "Chrome/120 Safari/537.36", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/120 Safari/537.36", "Referer": url, "Sec-Fetch-Dest": "iframe", } @@ -29,10 +28,7 @@ class VidmolyExtractor(BaseExtractor): html = response.text # --- Extract master m3u8 --- - match = re.search( - r'sources:\s*\[\{file:"([^"]+)', - html - ) + match = re.search(r'sources\s*:\s*\[\s*\{\s*file\s*:\s*[\'"]([^\'"]+)', html) if not match: raise ExtractorError("VIDMOLY: Stream URL not found") @@ -49,10 +45,8 @@ class VidmolyExtractor(BaseExtractor): raise ExtractorError("VIDMOLY: Request timed out") raise - if test.status_code >= 400: - raise ExtractorError( - f"VIDMOLY: Stream unavailable ({test.status_code})" - ) + if test.status >= 400: + raise ExtractorError(f"VIDMOLY: Stream unavailable ({test.status})") # Return MASTER playlist, not variant # Let MediaFlow Proxy handle variants diff --git a/mediaflow_proxy/extractors/vidoza.py b/mediaflow_proxy/extractors/vidoza.py index ddfffa2..6ad0fcc 100644 --- a/mediaflow_proxy/extractors/vidoza.py +++ b/mediaflow_proxy/extractors/vidoza.py @@ -8,23 +8,23 @@ from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError class VidozaExtractor(BaseExtractor): def __init__(self, request_headers: dict): super().__init__(request_headers) - # if your base doesn’t set this, keep it; otherwise you can remove: self.mediaflow_endpoint = "proxy_stream_endpoint" async def extract(self, url: str, **kwargs) -> Dict[str, Any]: parsed = urlparse(url) - # Accept vidoza + videzz if not parsed.hostname or not ( - parsed.hostname.endswith("vidoza.net") - or parsed.hostname.endswith("videzz.net") + parsed.hostname.endswith("vidoza.net") or parsed.hostname.endswith("videzz.net") ): raise ExtractorError("VIDOZA: Invalid domain") + # Use the correct referer for clones + referer = f"https://{parsed.hostname}/" + headers = self.base_headers.copy() headers.update( { - "referer": "https://vidoza.net/", + "referer": referer, "user-agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " @@ -35,16 +35,14 @@ class VidozaExtractor(BaseExtractor): } ) - # 1) Fetch the embed page (or whatever URL you pass in) + # 1) Fetch embed page response = await self._make_request(url, headers=headers) html = response.text or "" if not html: - raise ExtractorError("VIDOZA: Empty HTML from Vidoza") + raise ExtractorError("VIDOZA: Empty HTML") - cookies = response.cookies or {} - - # 2) Extract final link with REGEX + # 2) Extract video URL pattern = re.compile( r"""["']?\s*(?:file|src)\s*["']?\s*[:=,]?\s*["'](?P[^"']+)""" r"""(?:[^}>\]]+)["']?\s*res\s*["']?\s*[:=]\s*["']?(?P