update

2026-04-11 11:50:51 +00:00 · 2026-02-19 20:15:03 +01:00
parent 7785e8c604
commit cfc6bbabc9
181 changed files with 32141 additions and 4629 deletions
--- a/mediaflow_proxy/utils/stream_transformers.py
+++ b/mediaflow_proxy/utils/stream_transformers.py
@@ -0,0 +1,241 @@
+"""
+Stream transformers for host-specific content manipulation.
+
+This module provides transformer classes that can modify streaming content
+on-the-fly. Each transformer handles specific content manipulation needs
+for different streaming hosts (e.g., PNG wrapper stripping, TS detection).
+"""
+
+import logging
+import typing
+
+logger = logging.getLogger(__name__)
+
+
+class StreamTransformer:
+    """
+    Base class for stream content transformers.
+
+    Subclasses should override the transform method to implement
+    specific content manipulation logic.
+    """
+
+    async def transform(self, chunk_iterator: typing.AsyncIterator[bytes]) -> typing.AsyncGenerator[bytes, None]:
+        """
+        Transform stream chunks.
+
+        Args:
+            chunk_iterator: Async iterator of raw bytes from upstream.
+
+        Yields:
+            Transformed bytes chunks.
+        """
+        async for chunk in chunk_iterator:
+            yield chunk
+
+
+class TSStreamTransformer(StreamTransformer):
+    """
+    Transformer for MPEG-TS streams with obfuscation.
+
+    Handles streams from hosts like TurboVidPlay, StreamWish, and FileMoon
+    that may have:
+    - Fake PNG wrapper prepended to video data
+    - 0xFF padding bytes before actual content
+    - Need for TS sync byte detection
+    """
+
+    # PNG signature and IEND marker for fake PNG header detection
+    _PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
+    _PNG_IEND_MARKER = b"\x49\x45\x4e\x44\xae\x42\x60\x82"
+
+    # TS packet constants
+    _TS_SYNC = 0x47
+    _TS_PACKET_SIZE = 188
+
+    # Maximum bytes to buffer before forcing passthrough
+    _MAX_PREFETCH = 512 * 1024  # 512 KB
+
+    def __init__(self):
+        self.buffer = bytearray()
+        self.ts_started = False
+        self.bytes_stripped = 0
+
+    @staticmethod
+    def _find_ts_start(buffer: bytes) -> typing.Optional[int]:
+        """
+        Find MPEG-TS sync byte (0x47) aligned on 188 bytes.
+
+        Args:
+            buffer: Bytes to search for TS sync pattern.
+
+        Returns:
+            Offset where TS starts, or None if not found.
+        """
+        TS_SYNC = 0x47
+        TS_PACKET = 188
+
+        max_i = len(buffer) - TS_PACKET
+        for i in range(max(0, max_i)):
+            if buffer[i] == TS_SYNC and buffer[i + TS_PACKET] == TS_SYNC:
+                return i
+        return None
+
+    def _strip_fake_png_wrapper(self, chunk: bytes) -> bytes:
+        """
+        Strip fake PNG wrapper from chunk data.
+
+        Some streaming services prepend a fake PNG image to video data
+        to evade detection. This method detects and removes it.
+
+        Args:
+            chunk: The raw chunk data that may contain a fake PNG header.
+
+        Returns:
+            The chunk with fake PNG wrapper removed, or original chunk if not present.
+        """
+        if not chunk.startswith(self._PNG_SIGNATURE):
+            return chunk
+
+        # Find the IEND marker that signals end of PNG data
+        iend_pos = chunk.find(self._PNG_IEND_MARKER)
+        if iend_pos == -1:
+            # IEND not found in this chunk - return as-is to avoid data corruption
+            logger.debug("PNG signature detected but IEND marker not found in chunk")
+            return chunk
+
+        # Calculate position after IEND marker
+        content_start = iend_pos + len(self._PNG_IEND_MARKER)
+
+        # Skip any padding bytes (null or 0xFF) between PNG and actual content
+        while content_start < len(chunk) and chunk[content_start] in (0x00, 0xFF):
+            content_start += 1
+
+        self.bytes_stripped = content_start
+        logger.debug(f"Stripped {content_start} bytes of fake PNG wrapper from stream")
+
+        return chunk[content_start:]
+
+    async def transform(self, chunk_iterator: typing.AsyncIterator[bytes]) -> typing.AsyncGenerator[bytes, None]:
+        """
+        Transform TS stream by stripping PNG wrapper and finding TS start.
+
+        Args:
+            chunk_iterator: Async iterator of raw bytes from upstream.
+
+        Yields:
+            Cleaned TS stream bytes.
+        """
+        async for chunk in chunk_iterator:
+            if self.ts_started:
+                # Normal streaming once TS has started
+                yield chunk
+                continue
+
+            # Prebuffer phase (until we find TS or pass through)
+            self.buffer += chunk
+
+            # Fast-path: if it's an m3u8 playlist, don't do TS detection
+            if len(self.buffer) >= 7 and self.buffer[:7] in (b"#EXTM3U", b"#EXT-X-"):
+                yield bytes(self.buffer)
+                self.buffer.clear()
+                self.ts_started = True
+                continue
+
+            # Strip fake PNG wrapper if present
+            if self.buffer.startswith(self._PNG_SIGNATURE):
+                if self._PNG_IEND_MARKER in self.buffer:
+                    self.buffer = bytearray(self._strip_fake_png_wrapper(bytes(self.buffer)))
+
+            # Skip pure 0xFF padding bytes (TurboVid style)
+            while self.buffer and self.buffer[0] == 0xFF:
+                self.buffer.pop(0)
+
+            # Re-check for m3u8 playlist after stripping PNG wrapper and padding
+            # This handles cases where m3u8 content is wrapped in PNG
+            if len(self.buffer) >= 7 and self.buffer[:7] in (b"#EXTM3U", b"#EXT-X-"):
+                logger.debug("Found m3u8 content after stripping wrapper - passing through")
+                yield bytes(self.buffer)
+                self.buffer.clear()
+                self.ts_started = True
+                continue
+
+            ts_offset = self._find_ts_start(bytes(self.buffer))
+            if ts_offset is None:
+                # Keep buffering until we find TS or hit limit
+                if len(self.buffer) > self._MAX_PREFETCH:
+                    logger.warning("TS sync not found after large prebuffer, forcing passthrough")
+                    yield bytes(self.buffer)
+                    self.buffer.clear()
+                    self.ts_started = True
+                continue
+
+            # TS found: emit from ts_offset and switch to pass-through
+            self.ts_started = True
+            out = bytes(self.buffer[ts_offset:])
+            self.buffer.clear()
+
+            if out:
+                yield out
+
+
+# Registry of available transformers
+TRANSFORMER_REGISTRY: dict[str, type[StreamTransformer]] = {
+    "ts_stream": TSStreamTransformer,
+}
+
+
+def get_transformer(transformer_id: typing.Optional[str]) -> typing.Optional[StreamTransformer]:
+    """
+    Get a transformer instance by ID.
+
+    Args:
+        transformer_id: The transformer identifier (e.g., "ts_stream").
+
+    Returns:
+        A new transformer instance, or None if transformer_id is None or not found.
+    """
+    if transformer_id is None:
+        return None
+
+    transformer_class = TRANSFORMER_REGISTRY.get(transformer_id)
+    if transformer_class is None:
+        logger.warning(f"Unknown transformer ID: {transformer_id}")
+        return None
+
+    return transformer_class()
+
+
+async def apply_transformer_to_bytes(
+    data: bytes,
+    transformer_id: typing.Optional[str],
+) -> bytes:
+    """
+    Apply a transformer to already-downloaded bytes data.
+
+    This is useful when serving cached segments that need transformation.
+    Creates a single-chunk async iterator and collects the transformed output.
+
+    Args:
+        data: The raw bytes data to transform.
+        transformer_id: The transformer identifier (e.g., "ts_stream").
+
+    Returns:
+        Transformed bytes, or original data if no transformer specified.
+    """
+    if not transformer_id:
+        return data
+
+    transformer = get_transformer(transformer_id)
+    if not transformer:
+        return data
+
+    async def single_chunk_iterator():
+        yield data
+
+    # Collect all transformed chunks
+    result = bytearray()
+    async for chunk in transformer.transform(single_chunk_iterator()):
+        result.extend(chunk)
+
+    return bytes(result)