mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 11:50:51 +00:00
352 lines
11 KiB
Python
352 lines
11 KiB
Python
"""
|
|
PyAV-based audio transcoder for frame-level codec conversion.
|
|
|
|
Transcodes audio frames between codecs using PyAV's CodecContext API
|
|
(Python bindings for FFmpeg's libavcodec). This provides in-process
|
|
audio transcoding without subprocess management or pipe overhead.
|
|
|
|
Supported input codecs: EAC3, AC3, AAC, Opus, Vorbis, FLAC, MP3
|
|
Output codec: AAC-LC (stereo, configurable bitrate)
|
|
|
|
Architecture:
|
|
raw_frame_bytes -> parse() -> decode() -> resample() -> encode() -> raw_aac_bytes
|
|
|
|
Usage:
|
|
transcoder = AudioTranscoder("eac3", sample_rate=48000, channels=6)
|
|
for raw_eac3_frame in frames:
|
|
aac_frames = transcoder.transcode(raw_eac3_frame)
|
|
for aac_data in aac_frames:
|
|
write(aac_data)
|
|
# Flush remaining frames
|
|
for aac_data in transcoder.flush():
|
|
write(aac_data)
|
|
"""
|
|
|
|
import logging
|
|
|
|
import av
|
|
from av.audio.resampler import AudioResampler
|
|
|
|
from mediaflow_proxy.remuxer.ebml_parser import (
|
|
CODEC_ID_AAC,
|
|
CODEC_ID_AC3,
|
|
CODEC_ID_EAC3,
|
|
CODEC_ID_FLAC,
|
|
CODEC_ID_OPUS,
|
|
CODEC_ID_VORBIS,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _generate_silence_aac_frame() -> bytes | None:
|
|
"""Pre-encode a single silent AAC frame (48 kHz stereo, 1024 samples).
|
|
|
|
PyAV's AAC encoder has an intermittent ``avcodec_send_frame`` bug when
|
|
rapidly creating/destroying codec contexts, so we retry a few times.
|
|
This function is called once at module load; the result is cached in
|
|
``_SILENCE_AAC_FRAME``.
|
|
"""
|
|
for _attempt in range(10):
|
|
try:
|
|
enc = av.CodecContext.create("aac", "w")
|
|
enc.sample_rate = 48000
|
|
enc.layout = "stereo"
|
|
enc.format = av.AudioFormat("fltp")
|
|
enc.bit_rate = 192000
|
|
enc.open()
|
|
|
|
frame = av.AudioFrame(
|
|
format=enc.format.name,
|
|
layout=enc.layout.name,
|
|
samples=enc.frame_size or 1024,
|
|
)
|
|
frame.sample_rate = enc.sample_rate
|
|
frame.pts = 0
|
|
|
|
for pkt in enc.encode(frame):
|
|
return bytes(pkt)
|
|
# AAC priming delay: first encode buffered; flush to retrieve
|
|
for pkt in enc.encode(None):
|
|
return bytes(pkt)
|
|
except Exception:
|
|
continue
|
|
return None
|
|
|
|
|
|
# Module-level silence frame -- generated once, reused by every transcoder.
|
|
_SILENCE_AAC_FRAME: bytes | None = _generate_silence_aac_frame()
|
|
|
|
# Map MKV codec IDs to PyAV/FFmpeg codec names
|
|
_MKV_TO_FFMPEG_CODEC = {
|
|
CODEC_ID_EAC3: "eac3",
|
|
CODEC_ID_AC3: "ac3",
|
|
CODEC_ID_AAC: "aac",
|
|
CODEC_ID_OPUS: "opus",
|
|
CODEC_ID_VORBIS: "vorbis",
|
|
CODEC_ID_FLAC: "flac",
|
|
"A_DTS": "dts",
|
|
"A_MP3": "mp3",
|
|
"A_MPEG/L3": "mp3",
|
|
}
|
|
|
|
# Codecs that need transcoding to AAC for browser playback
|
|
NEEDS_TRANSCODE = frozenset(
|
|
{
|
|
CODEC_ID_EAC3,
|
|
CODEC_ID_AC3,
|
|
CODEC_ID_OPUS,
|
|
CODEC_ID_VORBIS,
|
|
CODEC_ID_FLAC,
|
|
"A_DTS",
|
|
"A_MP3",
|
|
"A_MPEG/L3",
|
|
}
|
|
)
|
|
|
|
# Output AAC settings
|
|
_OUTPUT_CODEC = "aac"
|
|
_OUTPUT_SAMPLE_FORMAT = "fltp" # AAC requires float planar
|
|
_OUTPUT_LAYOUT = "stereo"
|
|
|
|
# Map channel count -> FFmpeg layout name
|
|
_CHANNEL_LAYOUT_MAP = {
|
|
1: "mono",
|
|
2: "stereo",
|
|
3: "2.1",
|
|
4: "quad",
|
|
6: "5.1",
|
|
8: "7.1",
|
|
}
|
|
|
|
|
|
def needs_transcode(codec_id: str) -> bool:
|
|
"""Check if an MKV audio codec needs transcoding for browser playback."""
|
|
return codec_id in NEEDS_TRANSCODE
|
|
|
|
|
|
def get_ffmpeg_codec_name(mkv_codec_id: str) -> str | None:
|
|
"""Map an MKV CodecID to an FFmpeg codec name."""
|
|
return _MKV_TO_FFMPEG_CODEC.get(mkv_codec_id)
|
|
|
|
|
|
class AudioTranscoder:
|
|
"""
|
|
In-process audio transcoder using PyAV's CodecContext API.
|
|
|
|
Decodes raw audio frames from one codec and encodes them to AAC-LC
|
|
stereo, suitable for MP4 container and browser playback. No container
|
|
I/O or subprocess involved -- operates directly on raw frame bytes.
|
|
|
|
The transcoder handles sample format conversion and resampling
|
|
automatically via AudioResampler.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_codec: str,
|
|
input_sample_rate: int = 48000,
|
|
input_channels: int = 6,
|
|
output_sample_rate: int = 48000,
|
|
output_channels: int = 2,
|
|
output_bitrate: int = 192000,
|
|
) -> None:
|
|
"""
|
|
Initialize the transcoder.
|
|
|
|
Args:
|
|
input_codec: FFmpeg codec name (e.g., "eac3", "ac3", "aac").
|
|
input_sample_rate: Input sample rate in Hz.
|
|
input_channels: Input channel count.
|
|
output_sample_rate: Output sample rate in Hz (default 48000).
|
|
output_channels: Output channel count (default 2 = stereo).
|
|
output_bitrate: Output bitrate in bits/s (default 192000).
|
|
"""
|
|
# Set up decoder -- use layout to configure channel count
|
|
# (PyAV's channels property is read-only; layout drives it)
|
|
self._decoder = av.CodecContext.create(input_codec, "r")
|
|
self._decoder.sample_rate = input_sample_rate
|
|
input_layout = _CHANNEL_LAYOUT_MAP.get(input_channels, "stereo")
|
|
self._decoder.layout = input_layout
|
|
|
|
# Set up encoder
|
|
self._encoder = av.CodecContext.create(_OUTPUT_CODEC, "w")
|
|
self._encoder.sample_rate = output_sample_rate
|
|
self._encoder.layout = _OUTPUT_LAYOUT
|
|
self._encoder.format = av.AudioFormat(_OUTPUT_SAMPLE_FORMAT)
|
|
self._encoder.bit_rate = output_bitrate
|
|
self._encoder.open()
|
|
|
|
# Set up resampler for format/rate/channel conversion
|
|
self._resampler = AudioResampler(
|
|
format=_OUTPUT_SAMPLE_FORMAT,
|
|
layout=_OUTPUT_LAYOUT,
|
|
rate=output_sample_rate,
|
|
)
|
|
|
|
self._input_codec = input_codec
|
|
self._frames_decoded = 0
|
|
self._frames_encoded = 0
|
|
self._audio_specific_config: bytes | None = None
|
|
|
|
logger.info(
|
|
"[audio_transcoder] Initialized: %s %dHz %dch -> aac %dHz %dch @%dk",
|
|
input_codec,
|
|
input_sample_rate,
|
|
input_channels,
|
|
output_sample_rate,
|
|
output_channels,
|
|
output_bitrate // 1000,
|
|
)
|
|
|
|
@property
|
|
def audio_specific_config(self) -> bytes | None:
|
|
"""
|
|
AAC AudioSpecificConfig from the encoder (available after first encode).
|
|
|
|
This is needed for the MP4 esds box.
|
|
"""
|
|
if self._audio_specific_config is not None:
|
|
return self._audio_specific_config
|
|
|
|
# PyAV exposes extradata after the encoder is opened
|
|
if self._encoder.extradata:
|
|
self._audio_specific_config = bytes(self._encoder.extradata)
|
|
return self._audio_specific_config
|
|
return None
|
|
|
|
@property
|
|
def output_sample_rate(self) -> int:
|
|
return self._encoder.sample_rate
|
|
|
|
@property
|
|
def output_channels(self) -> int:
|
|
return self._encoder.channels
|
|
|
|
@property
|
|
def frame_size(self) -> int:
|
|
"""AAC frame size (samples per frame), typically 1024."""
|
|
return self._encoder.frame_size or 1024
|
|
|
|
def transcode(self, raw_frame_data: bytes) -> list[bytes]:
|
|
"""
|
|
Transcode a raw audio frame from the input codec to AAC.
|
|
|
|
Args:
|
|
raw_frame_data: Raw audio frame bytes (one codec frame, e.g.,
|
|
one EAC3 sync frame).
|
|
|
|
Returns:
|
|
List of raw AAC frame bytes. May return 0, 1, or more frames
|
|
depending on codec frame sizes and buffering.
|
|
"""
|
|
output = []
|
|
|
|
# Parse raw bytes into packets
|
|
packets = self._decoder.parse(raw_frame_data)
|
|
|
|
for packet in packets:
|
|
# Decode to PCM frames
|
|
try:
|
|
decoded_frames = self._decoder.decode(packet)
|
|
except av.error.InvalidDataError as e:
|
|
logger.debug("[audio_transcoder] Decode error (skipping frame): %s", e)
|
|
continue
|
|
|
|
for frame in decoded_frames:
|
|
self._frames_decoded += 1
|
|
|
|
# Resample to match encoder format
|
|
resampled = self._resampler.resample(frame)
|
|
if resampled is None:
|
|
continue
|
|
|
|
# resampled can be a single frame or list of frames
|
|
if not isinstance(resampled, list):
|
|
resampled = [resampled]
|
|
|
|
for rs_frame in resampled:
|
|
# Encode to AAC
|
|
try:
|
|
encoded_packets = self._encoder.encode(rs_frame)
|
|
except av.error.InvalidDataError as e:
|
|
logger.debug("[audio_transcoder] Encode error: %s", e)
|
|
continue
|
|
|
|
for enc_packet in encoded_packets:
|
|
self._frames_encoded += 1
|
|
output.append(bytes(enc_packet))
|
|
|
|
return output
|
|
|
|
def flush(self) -> list[bytes]:
|
|
"""
|
|
Flush the decoder and encoder buffers.
|
|
|
|
Call this when the input stream ends to get remaining frames.
|
|
|
|
Returns:
|
|
List of remaining raw AAC frame bytes.
|
|
"""
|
|
output = []
|
|
|
|
# Flush decoder
|
|
try:
|
|
for frame in self._decoder.decode(None):
|
|
self._frames_decoded += 1
|
|
resampled = self._resampler.resample(frame)
|
|
if resampled is None:
|
|
continue
|
|
if not isinstance(resampled, list):
|
|
resampled = [resampled]
|
|
for rs_frame in resampled:
|
|
for enc_packet in self._encoder.encode(rs_frame):
|
|
self._frames_encoded += 1
|
|
output.append(bytes(enc_packet))
|
|
except Exception as e:
|
|
logger.debug("[audio_transcoder] Decoder flush error: %s", e)
|
|
|
|
# Flush resampler
|
|
try:
|
|
resampled = self._resampler.resample(None)
|
|
if resampled is not None:
|
|
if not isinstance(resampled, list):
|
|
resampled = [resampled]
|
|
for rs_frame in resampled:
|
|
for enc_packet in self._encoder.encode(rs_frame):
|
|
self._frames_encoded += 1
|
|
output.append(bytes(enc_packet))
|
|
except Exception as e:
|
|
logger.debug("[audio_transcoder] Resampler flush error: %s", e)
|
|
|
|
# Flush encoder
|
|
try:
|
|
for enc_packet in self._encoder.encode(None):
|
|
self._frames_encoded += 1
|
|
output.append(bytes(enc_packet))
|
|
except Exception as e:
|
|
logger.debug("[audio_transcoder] Encoder flush error: %s", e)
|
|
|
|
logger.info(
|
|
"[audio_transcoder] Flushed: %d decoded, %d encoded total",
|
|
self._frames_decoded,
|
|
self._frames_encoded,
|
|
)
|
|
return output
|
|
|
|
def generate_silence_frame(self) -> bytes | None:
|
|
"""Return a pre-encoded silent AAC frame (module-level singleton)."""
|
|
return _SILENCE_AAC_FRAME
|
|
|
|
def close(self) -> None:
|
|
"""Release codec contexts (best-effort; PyAV AudioCodecContext may not have close())."""
|
|
for ctx in (self._decoder, self._encoder):
|
|
try:
|
|
if hasattr(ctx, "close"):
|
|
ctx.close()
|
|
except Exception:
|
|
pass
|
|
|
|
def __del__(self) -> None:
|
|
self.close()
|