This commit is contained in:
UrloMythus
2026-02-19 20:15:03 +01:00
parent 7785e8c604
commit cfc6bbabc9
181 changed files with 32141 additions and 4629 deletions

View File

@@ -0,0 +1,241 @@
"""
Stream transformers for host-specific content manipulation.
This module provides transformer classes that can modify streaming content
on-the-fly. Each transformer handles specific content manipulation needs
for different streaming hosts (e.g., PNG wrapper stripping, TS detection).
"""
import logging
import typing
logger = logging.getLogger(__name__)
class StreamTransformer:
"""
Base class for stream content transformers.
Subclasses should override the transform method to implement
specific content manipulation logic.
"""
async def transform(self, chunk_iterator: typing.AsyncIterator[bytes]) -> typing.AsyncGenerator[bytes, None]:
"""
Transform stream chunks.
Args:
chunk_iterator: Async iterator of raw bytes from upstream.
Yields:
Transformed bytes chunks.
"""
async for chunk in chunk_iterator:
yield chunk
class TSStreamTransformer(StreamTransformer):
"""
Transformer for MPEG-TS streams with obfuscation.
Handles streams from hosts like TurboVidPlay, StreamWish, and FileMoon
that may have:
- Fake PNG wrapper prepended to video data
- 0xFF padding bytes before actual content
- Need for TS sync byte detection
"""
# PNG signature and IEND marker for fake PNG header detection
_PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
_PNG_IEND_MARKER = b"\x49\x45\x4e\x44\xae\x42\x60\x82"
# TS packet constants
_TS_SYNC = 0x47
_TS_PACKET_SIZE = 188
# Maximum bytes to buffer before forcing passthrough
_MAX_PREFETCH = 512 * 1024 # 512 KB
def __init__(self):
self.buffer = bytearray()
self.ts_started = False
self.bytes_stripped = 0
@staticmethod
def _find_ts_start(buffer: bytes) -> typing.Optional[int]:
"""
Find MPEG-TS sync byte (0x47) aligned on 188 bytes.
Args:
buffer: Bytes to search for TS sync pattern.
Returns:
Offset where TS starts, or None if not found.
"""
TS_SYNC = 0x47
TS_PACKET = 188
max_i = len(buffer) - TS_PACKET
for i in range(max(0, max_i)):
if buffer[i] == TS_SYNC and buffer[i + TS_PACKET] == TS_SYNC:
return i
return None
def _strip_fake_png_wrapper(self, chunk: bytes) -> bytes:
"""
Strip fake PNG wrapper from chunk data.
Some streaming services prepend a fake PNG image to video data
to evade detection. This method detects and removes it.
Args:
chunk: The raw chunk data that may contain a fake PNG header.
Returns:
The chunk with fake PNG wrapper removed, or original chunk if not present.
"""
if not chunk.startswith(self._PNG_SIGNATURE):
return chunk
# Find the IEND marker that signals end of PNG data
iend_pos = chunk.find(self._PNG_IEND_MARKER)
if iend_pos == -1:
# IEND not found in this chunk - return as-is to avoid data corruption
logger.debug("PNG signature detected but IEND marker not found in chunk")
return chunk
# Calculate position after IEND marker
content_start = iend_pos + len(self._PNG_IEND_MARKER)
# Skip any padding bytes (null or 0xFF) between PNG and actual content
while content_start < len(chunk) and chunk[content_start] in (0x00, 0xFF):
content_start += 1
self.bytes_stripped = content_start
logger.debug(f"Stripped {content_start} bytes of fake PNG wrapper from stream")
return chunk[content_start:]
async def transform(self, chunk_iterator: typing.AsyncIterator[bytes]) -> typing.AsyncGenerator[bytes, None]:
"""
Transform TS stream by stripping PNG wrapper and finding TS start.
Args:
chunk_iterator: Async iterator of raw bytes from upstream.
Yields:
Cleaned TS stream bytes.
"""
async for chunk in chunk_iterator:
if self.ts_started:
# Normal streaming once TS has started
yield chunk
continue
# Prebuffer phase (until we find TS or pass through)
self.buffer += chunk
# Fast-path: if it's an m3u8 playlist, don't do TS detection
if len(self.buffer) >= 7 and self.buffer[:7] in (b"#EXTM3U", b"#EXT-X-"):
yield bytes(self.buffer)
self.buffer.clear()
self.ts_started = True
continue
# Strip fake PNG wrapper if present
if self.buffer.startswith(self._PNG_SIGNATURE):
if self._PNG_IEND_MARKER in self.buffer:
self.buffer = bytearray(self._strip_fake_png_wrapper(bytes(self.buffer)))
# Skip pure 0xFF padding bytes (TurboVid style)
while self.buffer and self.buffer[0] == 0xFF:
self.buffer.pop(0)
# Re-check for m3u8 playlist after stripping PNG wrapper and padding
# This handles cases where m3u8 content is wrapped in PNG
if len(self.buffer) >= 7 and self.buffer[:7] in (b"#EXTM3U", b"#EXT-X-"):
logger.debug("Found m3u8 content after stripping wrapper - passing through")
yield bytes(self.buffer)
self.buffer.clear()
self.ts_started = True
continue
ts_offset = self._find_ts_start(bytes(self.buffer))
if ts_offset is None:
# Keep buffering until we find TS or hit limit
if len(self.buffer) > self._MAX_PREFETCH:
logger.warning("TS sync not found after large prebuffer, forcing passthrough")
yield bytes(self.buffer)
self.buffer.clear()
self.ts_started = True
continue
# TS found: emit from ts_offset and switch to pass-through
self.ts_started = True
out = bytes(self.buffer[ts_offset:])
self.buffer.clear()
if out:
yield out
# Registry of available transformers
TRANSFORMER_REGISTRY: dict[str, type[StreamTransformer]] = {
"ts_stream": TSStreamTransformer,
}
def get_transformer(transformer_id: typing.Optional[str]) -> typing.Optional[StreamTransformer]:
"""
Get a transformer instance by ID.
Args:
transformer_id: The transformer identifier (e.g., "ts_stream").
Returns:
A new transformer instance, or None if transformer_id is None or not found.
"""
if transformer_id is None:
return None
transformer_class = TRANSFORMER_REGISTRY.get(transformer_id)
if transformer_class is None:
logger.warning(f"Unknown transformer ID: {transformer_id}")
return None
return transformer_class()
async def apply_transformer_to_bytes(
data: bytes,
transformer_id: typing.Optional[str],
) -> bytes:
"""
Apply a transformer to already-downloaded bytes data.
This is useful when serving cached segments that need transformation.
Creates a single-chunk async iterator and collects the transformed output.
Args:
data: The raw bytes data to transform.
transformer_id: The transformer identifier (e.g., "ts_stream").
Returns:
Transformed bytes, or original data if no transformer specified.
"""
if not transformer_id:
return data
transformer = get_transformer(transformer_id)
if not transformer:
return data
async def single_chunk_iterator():
yield data
# Collect all transformed chunks
result = bytearray()
async for chunk in transformer.transform(single_chunk_iterator()):
result.extend(chunk)
return bytes(result)