Files
UnHided/mediaflow_proxy/remuxer/pyav_demuxer.py
UrloMythus cfc6bbabc9 update
2026-02-19 20:15:03 +01:00

609 lines
26 KiB
Python

"""
Universal PyAV-based streaming demuxer.
Bridges async byte streams to PyAV's synchronous I/O using an OS pipe,
allowing on-the-fly demuxing of any container format (MKV, MP4, TS,
FLV, WebM, etc.) from an async source.
Architecture:
AsyncIterator[bytes] --> async feeder task --> queue.Queue --> writer thread (pipe)
|
OS pipe (kernel buffer)
|
demux thread: av.open + discover + demux
|
queue.Queue --> run_in_executor consumer
Performance: Uses plain threading.Queue on both sides (writer input and
packet output) to avoid per-item ``run_coroutine_threadsafe`` overhead.
The async/thread bridge is done via ``run_in_executor`` on the consumer
side and a dedicated asyncio task on the producer side.
For MP4 inputs, the caller (transcode_handler) prepends the moov atom
to the stream so PyAV receives a "faststart"-style MP4 through the pipe.
This allows true on-the-fly demuxing for all container formats.
"""
import asyncio
import logging
import os
import queue
import threading
from collections.abc import AsyncIterator
from dataclasses import dataclass
import av
logger = logging.getLogger(__name__)
# Sentinel object to signal end-of-stream in queues
_SENTINEL = object()
@dataclass(slots=True)
class DemuxedStream:
"""Metadata about a demuxed stream."""
index: int
codec_name: str
codec_type: str # "video" or "audio"
# Video-specific
width: int = 0
height: int = 0
fps: float = 0.0
pixel_format: str = ""
# Audio-specific
sample_rate: int = 0
channels: int = 0
# Timing
time_base_num: int = 1
time_base_den: int = 1000
duration_seconds: float = 0.0
# Raw codec extradata (e.g. SPS/PPS for H.264, AudioSpecificConfig for AAC)
extradata: bytes = b""
@dataclass(slots=True)
class DemuxedPacket:
"""A demuxed packet with timing info."""
stream_index: int
codec_type: str # "video" or "audio"
data: bytes
pts: int # Presentation timestamp in stream time_base units
dts: int # Decode timestamp in stream time_base units
duration: int # Duration in stream time_base units
is_keyframe: bool
time_base_num: int
time_base_den: int
# Optional decoded frame when decode_video/decode_audio is True
# av.VideoFrame for video, av.AudioFrame for audio
decoded_frame: object = None
@property
def pts_seconds(self) -> float:
if self.time_base_den == 0:
return 0.0
return self.pts * self.time_base_num / self.time_base_den
@property
def dts_seconds(self) -> float:
if self.time_base_den == 0:
return 0.0
return self.dts * self.time_base_num / self.time_base_den
@property
def duration_seconds(self) -> float:
if self.time_base_den == 0:
return 0.0
return self.duration * self.time_base_num / self.time_base_den
class PyAVDemuxer:
"""
Streaming demuxer using PyAV with pipe-based I/O.
All container I/O happens in background threads. The writer thread
feeds source bytes into a pipe; a single demux thread opens the
container, discovers streams, and demuxes packets -- all on the
same file object, ensuring the pipe's read cursor is never lost.
Performance optimisation: both the writer-input side and the
packet-output side use plain ``queue.Queue`` (no event-loop
involvement per item). The async/thread bridge is done via
``run_in_executor`` on the consumer and an asyncio task on the
producer, eliminating ~1700 ``run_coroutine_threadsafe`` round-trips
per 30 s of 4K content.
Usage:
demuxer = PyAVDemuxer()
await demuxer.start(source_async_iter)
# demuxer.video_stream / audio_stream are now available
async for packet in demuxer.iter_packets():
if packet.codec_type == "video":
...
"""
def __init__(self, decode_video: bool = False, decode_audio: bool = False) -> None:
"""
Args:
decode_video: If True, the demux thread will decode video packets
using the container's codec context and attach decoded frames
to DemuxedPacket.decoded_frame. This avoids format conversion
issues with standalone decoders (HVCC vs Annex B).
decode_audio: If True, the demux thread will decode audio packets
using the container's codec context and attach decoded frames
to DemuxedPacket.decoded_frame. This is needed for codecs like
Vorbis/Opus where the standalone decoder requires codec headers
that are only available in the container context. Can also be
set after start() returns (before packets are consumed) via
the ``enable_audio_decode()`` method.
"""
self._decode_video = decode_video
self._decode_audio = decode_audio
self._video_decode_decided = threading.Event()
self._audio_decode_decided = threading.Event()
# If decode flags were set at construction time, mark decided immediately
if decode_video:
self._video_decode_decided.set()
if decode_audio:
self._audio_decode_decided.set()
self._container: av.InputContainer | None = None
self._video_stream: DemuxedStream | None = None
self._audio_stream: DemuxedStream | None = None
# Thread-safe queues (no event-loop involvement per put/get)
self._packet_queue: queue.Queue | None = None
self._source_queue: queue.Queue | None = None
self._demux_thread: threading.Thread | None = None
self._writer_thread: threading.Thread | None = None
self._feeder_task: asyncio.Task | None = None
self._write_fd: int | None = None
self._read_fd: int | None = None
@property
def video_stream(self) -> DemuxedStream | None:
return self._video_stream
@property
def audio_stream(self) -> DemuxedStream | None:
return self._audio_stream
def enable_video_decode(self, enable: bool = True) -> None:
"""
Enable or disable in-thread video decoding.
Call this after ``start()`` returns (stream metadata is available)
but before consuming packets via ``iter_packets()``. The demux
thread waits for this signal before processing video packets.
"""
self._decode_video = enable
self._video_decode_decided.set()
def enable_audio_decode(self, enable: bool = True) -> None:
"""
Enable or disable in-thread audio decoding.
Call this after ``start()`` returns (stream metadata is available)
but before consuming packets via ``iter_packets()``. The demux
thread waits for this signal before processing audio packets.
"""
self._decode_audio = enable
self._audio_decode_decided.set()
# ── Writer side ──────────────────────────────────────────────────
async def _async_feeder(self, source: AsyncIterator[bytes]) -> None:
"""
Async task: pull chunks from the async source and push them
into a plain ``queue.Queue`` for the writer thread.
This replaces the old per-chunk ``run_coroutine_threadsafe``
pattern, batching the async-to-sync bridge into one task.
``queue.Queue.put()`` is a blocking call, so we use
``run_in_executor`` to avoid blocking the event loop when the
queue is full.
"""
loop = asyncio.get_running_loop()
sq = self._source_queue
try:
async for chunk in source:
await loop.run_in_executor(None, sq.put, chunk)
except (asyncio.CancelledError, GeneratorExit):
pass
except Exception:
pass
finally:
sq.put(_SENTINEL)
def _write_chunks_sync(self) -> None:
"""
Writer thread: pull pre-buffered chunks from ``_source_queue``
and write to the OS pipe. No event-loop interaction.
"""
write_fd = self._write_fd
sq = self._source_queue
try:
while True:
chunk = sq.get(timeout=30.0)
if chunk is _SENTINEL:
break
os.write(write_fd, chunk)
except Exception:
pass
finally:
try:
os.close(write_fd)
except OSError:
pass
self._write_fd = None
# ── Demux side ───────────────────────────────────────────────────
async def start(self, source: AsyncIterator[bytes]) -> None:
"""
Start pipe-based streaming: writer thread feeds the pipe, a single
demux thread opens the container, discovers streams, and begins
enqueuing packets.
After this returns, ``video_stream`` and ``audio_stream`` are
populated and packets are being enqueued for ``iter_packets()``.
"""
loop = asyncio.get_running_loop()
# Create OS pipe
self._read_fd, self._write_fd = os.pipe()
# Source buffer queue (async feeder task -> writer thread)
self._source_queue = queue.Queue(maxsize=256)
# Kick off the async feeder task
self._feeder_task = asyncio.create_task(self._async_feeder(source))
# Start writer thread (drains source_queue into the pipe)
self._writer_thread = threading.Thread(
target=self._write_chunks_sync,
daemon=True,
name="pyav-writer",
)
self._writer_thread.start()
# Packet queue for demux-thread -> async consumer bridge
self._packet_queue = queue.Queue(maxsize=128)
streams_ready = threading.Event()
def _open_and_demux():
"""
Single background thread: open container, discover streams,
demux all packets.
Critical: av.open(), _discover_streams(), and container.demux()
all happen on the same file object in the same thread. This
ensures the pipe read cursor is never lost between open and demux.
"""
pkt_count = 0
pq = self._packet_queue
try:
# Open container from read end of pipe
read_file = os.fdopen(self._read_fd, "rb")
self._read_fd = None # ownership transferred
self._container = av.open(
read_file,
mode="r",
options={
# Tolerate mid-stream joins / broken data in live TS
"err_detect": "ignore_err",
"fflags": "+discardcorrupt+genpts",
},
)
self._discover_streams()
# Signal stream metadata is available
streams_ready.set()
if self._video_stream is None and self._audio_stream is None:
logger.warning("[pyav_demuxer] No video or audio streams found")
return
# Select streams to demux
streams_to_demux = []
if self._video_stream is not None:
streams_to_demux.append(self._container.streams[self._video_stream.index])
if self._audio_stream is not None:
streams_to_demux.append(self._container.streams[self._audio_stream.index])
# Wait for the caller to decide on video/audio decoding
# (if not already decided at construction time).
if not self._video_decode_decided.is_set():
self._video_decode_decided.wait(timeout=10.0)
if not self._audio_decode_decided.is_set():
self._audio_decode_decided.wait(timeout=10.0)
# Cache stream objects and time_base for the hot loop
video_stream_obj = (
self._container.streams[self._video_stream.index] if self._video_stream is not None else None
)
audio_stream_obj = (
self._container.streams[self._audio_stream.index] if self._audio_stream is not None else None
)
video_tb_num = video_stream_obj.time_base.numerator if video_stream_obj else 1
video_tb_den = video_stream_obj.time_base.denominator if video_stream_obj else 1
audio_tb_num = audio_stream_obj.time_base.numerator if audio_stream_obj else 1
audio_tb_den = audio_stream_obj.time_base.denominator if audio_stream_obj else 1
decode_video = self._decode_video
decode_audio = self._decode_audio
# Demux and enqueue packets -- plain queue.put(), no event loop
for packet in self._container.demux(*streams_to_demux):
if packet.size == 0:
continue
stream = self._container.streams[packet.stream_index]
is_video = stream.type == "video"
is_audio = stream.type == "audio"
# Optionally decode video packets in-thread
if decode_video and is_video and video_stream_obj is not None:
try:
frames = video_stream_obj.codec_context.decode(packet)
except Exception:
frames = []
for frame in frames:
pq.put(
DemuxedPacket(
stream_index=packet.stream_index,
codec_type="video",
data=b"",
pts=int(frame.pts) if frame.pts is not None else 0,
dts=int(frame.pts) if frame.pts is not None else 0,
duration=int(packet.duration) if packet.duration is not None else 0,
is_keyframe=frame.key_frame,
time_base_num=video_tb_num,
time_base_den=video_tb_den,
decoded_frame=frame,
)
)
pkt_count += 1
# Optionally decode audio packets in-thread
elif decode_audio and is_audio and audio_stream_obj is not None:
try:
frames = audio_stream_obj.codec_context.decode(packet)
except Exception:
frames = []
for frame in frames:
pq.put(
DemuxedPacket(
stream_index=packet.stream_index,
codec_type="audio",
data=b"",
pts=int(frame.pts) if frame.pts is not None else 0,
dts=int(frame.pts) if frame.pts is not None else 0,
duration=int(packet.duration) if packet.duration is not None else 0,
is_keyframe=False,
time_base_num=audio_tb_num,
time_base_den=audio_tb_den,
decoded_frame=frame,
)
)
pkt_count += 1
else:
tb_num = video_tb_num if is_video else audio_tb_num
tb_den = video_tb_den if is_video else audio_tb_den
pq.put(
DemuxedPacket(
stream_index=packet.stream_index,
codec_type=stream.type,
data=bytes(packet),
pts=int(packet.pts) if packet.pts is not None else 0,
dts=int(packet.dts) if packet.dts is not None else 0,
duration=int(packet.duration) if packet.duration is not None else 0,
is_keyframe=packet.is_keyframe,
time_base_num=tb_num,
time_base_den=tb_den,
)
)
pkt_count += 1
# Flush the video decoder if we were decoding
if decode_video and video_stream_obj is not None:
try:
for frame in video_stream_obj.codec_context.decode(None):
pq.put(
DemuxedPacket(
stream_index=video_stream_obj.index,
codec_type="video",
data=b"",
pts=int(frame.pts) if frame.pts is not None else 0,
dts=int(frame.pts) if frame.pts is not None else 0,
duration=0,
is_keyframe=frame.key_frame,
time_base_num=video_tb_num,
time_base_den=video_tb_den,
decoded_frame=frame,
)
)
pkt_count += 1
except Exception:
pass
# Flush the audio decoder if we were decoding
if decode_audio and audio_stream_obj is not None:
try:
for frame in audio_stream_obj.codec_context.decode(None):
pq.put(
DemuxedPacket(
stream_index=audio_stream_obj.index,
codec_type="audio",
data=b"",
pts=int(frame.pts) if frame.pts is not None else 0,
dts=int(frame.pts) if frame.pts is not None else 0,
duration=0,
is_keyframe=False,
time_base_num=audio_tb_num,
time_base_den=audio_tb_den,
decoded_frame=frame,
)
)
pkt_count += 1
except Exception:
pass
logger.info("[pyav_demuxer] Demux complete: %d packets", pkt_count)
except Exception as e:
if "Invalid data" not in str(e):
logger.debug("[pyav_demuxer] Demux thread error: %s", e)
# Ensure streams_ready is set even on error
streams_ready.set()
finally:
pq.put(_SENTINEL)
self._demux_thread = threading.Thread(target=_open_and_demux, daemon=True, name="pyav-demux")
self._demux_thread.start()
# Wait for stream discovery before returning.
# Use run_in_executor to avoid blocking the event loop.
await loop.run_in_executor(None, streams_ready.wait)
async def iter_packets(self) -> AsyncIterator[DemuxedPacket]:
"""
Yield demuxed packets from the background thread.
Uses ``run_in_executor`` for the blocking ``queue.get()`` call,
avoiding per-packet ``run_coroutine_threadsafe`` overhead.
``start()`` must be called first.
"""
if self._packet_queue is None:
raise RuntimeError("Call start() before iter_packets()")
loop = asyncio.get_running_loop()
pq = self._packet_queue
try:
while True:
packet = await loop.run_in_executor(None, pq.get)
if packet is _SENTINEL:
break
yield packet
if self._demux_thread is not None:
self._demux_thread.join(timeout=5.0)
except GeneratorExit:
logger.debug("[pyav_demuxer] Generator closed")
except asyncio.CancelledError:
logger.debug("[pyav_demuxer] Cancelled")
finally:
self._cleanup()
def _discover_streams(self) -> None:
"""Inspect the opened container and record stream metadata."""
if self._container is None:
return
for stream in self._container.streams:
if stream.type == "video" and self._video_stream is None:
codec_ctx = stream.codec_context
fps = float(stream.average_rate) if stream.average_rate else 24.0
self._video_stream = DemuxedStream(
index=stream.index,
codec_name=codec_ctx.name if codec_ctx else stream.codec.name,
codec_type="video",
width=codec_ctx.width if codec_ctx else 0,
height=codec_ctx.height if codec_ctx else 0,
fps=fps,
pixel_format=str(codec_ctx.pix_fmt) if codec_ctx and codec_ctx.pix_fmt else "yuv420p",
time_base_num=stream.time_base.numerator,
time_base_den=stream.time_base.denominator,
duration_seconds=float(stream.duration * stream.time_base) if stream.duration else 0.0,
extradata=bytes(codec_ctx.extradata) if codec_ctx and codec_ctx.extradata else b"",
)
logger.info(
"[pyav_demuxer] Video: %s %dx%d @%.1ffps",
self._video_stream.codec_name,
self._video_stream.width,
self._video_stream.height,
self._video_stream.fps,
)
elif stream.type == "audio" and self._audio_stream is None:
codec_ctx = stream.codec_context
self._audio_stream = DemuxedStream(
index=stream.index,
codec_name=codec_ctx.name if codec_ctx else stream.codec.name,
codec_type="audio",
sample_rate=codec_ctx.sample_rate if codec_ctx else 0,
channels=codec_ctx.channels if codec_ctx else 0,
time_base_num=stream.time_base.numerator,
time_base_den=stream.time_base.denominator,
duration_seconds=float(stream.duration * stream.time_base) if stream.duration else 0.0,
extradata=bytes(codec_ctx.extradata) if codec_ctx and codec_ctx.extradata else b"",
)
logger.info(
"[pyav_demuxer] Audio: %s %dHz %dch",
self._audio_stream.codec_name,
self._audio_stream.sample_rate,
self._audio_stream.channels,
)
def _cleanup(self) -> None:
"""Stop threads and release all resources safely.
The order is critical to avoid SIGSEGV from closing the container
while the demux thread is still calling container.demux():
1. Cancel the feeder task (stops new bytes being queued).
2. Put a sentinel into the source queue so the writer thread
unblocks and exits. The writer's ``finally`` closes the pipe
write-end, which causes the demux thread to see EOF.
3. Join the writer thread (wait for it to drain and exit).
4. Join the demux thread (it finishes after pipe EOF).
5. ONLY THEN close the container (no thread is using it).
6. Close any remaining pipe FDs (read end, if still open).
"""
# 1. Cancel feeder task
if self._feeder_task is not None:
self._feeder_task.cancel()
self._feeder_task = None
# 2. Unblock writer thread so it exits and closes the pipe
if self._source_queue is not None:
try:
self._source_queue.put_nowait(_SENTINEL)
except Exception:
pass
# 3. Join writer thread (it closes _write_fd in its finally block)
if self._writer_thread is not None:
self._writer_thread.join(timeout=5.0)
self._writer_thread = None
# 4. Join demux thread -- must finish before we close the container
if self._demux_thread is not None:
self._demux_thread.join(timeout=5.0)
self._demux_thread = None
# 5. Now safe to close the container (no thread is using it)
if self._container is not None:
try:
self._container.close()
except Exception:
pass
self._container = None
# 6. Close any remaining pipe FDs
for fd_name in ("_read_fd", "_write_fd"):
fd = getattr(self, fd_name, None)
if fd is not None:
try:
os.close(fd)
except OSError:
pass
setattr(self, fd_name, None)