mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 03:40:54 +00:00
updated to lastest version
This commit is contained in:
123
mediaflow_proxy/utils/base64_utils.py
Normal file
123
mediaflow_proxy/utils/base64_utils.py
Normal file
@@ -0,0 +1,123 @@
|
||||
import base64
|
||||
import logging
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_base64_url(url: str) -> bool:
|
||||
"""
|
||||
Check if a URL appears to be base64 encoded.
|
||||
|
||||
Args:
|
||||
url (str): The URL to check.
|
||||
|
||||
Returns:
|
||||
bool: True if the URL appears to be base64 encoded, False otherwise.
|
||||
"""
|
||||
# Check if the URL doesn't start with http/https and contains base64-like characters
|
||||
if url.startswith(('http://', 'https://', 'ftp://', 'ftps://')):
|
||||
return False
|
||||
|
||||
# Base64 URLs typically contain only alphanumeric characters, +, /, and =
|
||||
# and don't contain typical URL characters like ://
|
||||
base64_chars = set('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=')
|
||||
url_chars = set(url)
|
||||
|
||||
# If the URL contains characters not in base64 charset, it's likely not base64
|
||||
if not url_chars.issubset(base64_chars):
|
||||
return False
|
||||
|
||||
# Additional heuristic: base64 strings are typically longer and don't contain common URL patterns
|
||||
if len(url) < 10: # Too short to be a meaningful base64 encoded URL
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def decode_base64_url(encoded_url: str) -> Optional[str]:
|
||||
"""
|
||||
Decode a base64 encoded URL.
|
||||
|
||||
Args:
|
||||
encoded_url (str): The base64 encoded URL string.
|
||||
|
||||
Returns:
|
||||
Optional[str]: The decoded URL if successful, None if decoding fails.
|
||||
"""
|
||||
try:
|
||||
# Handle URL-safe base64 encoding (replace - with + and _ with /)
|
||||
url_safe_encoded = encoded_url.replace('-', '+').replace('_', '/')
|
||||
|
||||
# Add padding if necessary
|
||||
missing_padding = len(url_safe_encoded) % 4
|
||||
if missing_padding:
|
||||
url_safe_encoded += '=' * (4 - missing_padding)
|
||||
|
||||
# Decode the base64 string
|
||||
decoded_bytes = base64.b64decode(url_safe_encoded)
|
||||
decoded_url = decoded_bytes.decode('utf-8')
|
||||
|
||||
# Validate that the decoded string is a valid URL
|
||||
parsed = urlparse(decoded_url)
|
||||
if parsed.scheme and parsed.netloc:
|
||||
logger.info(f"Successfully decoded base64 URL: {encoded_url[:50]}... -> {decoded_url}")
|
||||
return decoded_url
|
||||
else:
|
||||
logger.warning(f"Decoded string is not a valid URL: {decoded_url}")
|
||||
return None
|
||||
|
||||
except (base64.binascii.Error, UnicodeDecodeError, ValueError) as e:
|
||||
logger.debug(f"Failed to decode base64 URL '{encoded_url[:50]}...': {e}")
|
||||
return None
|
||||
|
||||
|
||||
def encode_url_to_base64(url: str, url_safe: bool = True) -> str:
|
||||
"""
|
||||
Encode a URL to base64.
|
||||
|
||||
Args:
|
||||
url (str): The URL to encode.
|
||||
url_safe (bool): Whether to use URL-safe base64 encoding (default: True).
|
||||
|
||||
Returns:
|
||||
str: The base64 encoded URL.
|
||||
"""
|
||||
try:
|
||||
url_bytes = url.encode('utf-8')
|
||||
if url_safe:
|
||||
# Use URL-safe base64 encoding (replace + with - and / with _)
|
||||
encoded = base64.urlsafe_b64encode(url_bytes).decode('utf-8')
|
||||
# Remove padding for cleaner URLs
|
||||
encoded = encoded.rstrip('=')
|
||||
else:
|
||||
encoded = base64.b64encode(url_bytes).decode('utf-8')
|
||||
|
||||
logger.debug(f"Encoded URL to base64: {url} -> {encoded}")
|
||||
return encoded
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to encode URL to base64: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def process_potential_base64_url(url: str) -> str:
|
||||
"""
|
||||
Process a URL that might be base64 encoded. If it's base64 encoded, decode it.
|
||||
Otherwise, return the original URL.
|
||||
|
||||
Args:
|
||||
url (str): The URL to process.
|
||||
|
||||
Returns:
|
||||
str: The processed URL (decoded if it was base64, original otherwise).
|
||||
"""
|
||||
if is_base64_url(url):
|
||||
decoded_url = decode_base64_url(url)
|
||||
if decoded_url:
|
||||
return decoded_url
|
||||
else:
|
||||
logger.warning(f"URL appears to be base64 but failed to decode: {url[:50]}...")
|
||||
|
||||
return url
|
||||
373
mediaflow_proxy/utils/dash_prebuffer.py
Normal file
373
mediaflow_proxy/utils/dash_prebuffer.py
Normal file
@@ -0,0 +1,373 @@
|
||||
import logging
|
||||
import psutil
|
||||
from typing import Dict, Optional, List
|
||||
from urllib.parse import urljoin
|
||||
import xmltodict
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client
|
||||
from mediaflow_proxy.configs import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DASHPreBuffer:
|
||||
"""
|
||||
Pre-buffer system for DASH streams to reduce latency and improve streaming performance.
|
||||
"""
|
||||
|
||||
def __init__(self, max_cache_size: Optional[int] = None, prebuffer_segments: Optional[int] = None):
|
||||
"""
|
||||
Initialize the DASH pre-buffer system.
|
||||
|
||||
Args:
|
||||
max_cache_size (int): Maximum number of segments to cache (uses config if None)
|
||||
prebuffer_segments (int): Number of segments to pre-buffer ahead (uses config if None)
|
||||
"""
|
||||
self.max_cache_size = max_cache_size or settings.dash_prebuffer_cache_size
|
||||
self.prebuffer_segments = prebuffer_segments or settings.dash_prebuffer_segments
|
||||
self.max_memory_percent = settings.dash_prebuffer_max_memory_percent
|
||||
self.emergency_threshold = settings.dash_prebuffer_emergency_threshold
|
||||
|
||||
# Cache for different types of DASH content
|
||||
self.segment_cache: Dict[str, bytes] = {}
|
||||
self.init_segment_cache: Dict[str, bytes] = {}
|
||||
self.manifest_cache: Dict[str, dict] = {}
|
||||
|
||||
# Track segment URLs for each adaptation set
|
||||
self.adaptation_segments: Dict[str, List[str]] = {}
|
||||
self.client = create_httpx_client()
|
||||
|
||||
def _get_memory_usage_percent(self) -> float:
|
||||
"""
|
||||
Get current memory usage percentage.
|
||||
|
||||
Returns:
|
||||
float: Memory usage percentage
|
||||
"""
|
||||
try:
|
||||
memory = psutil.virtual_memory()
|
||||
return memory.percent
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get memory usage: {e}")
|
||||
return 0.0
|
||||
|
||||
def _check_memory_threshold(self) -> bool:
|
||||
"""
|
||||
Check if memory usage exceeds the emergency threshold.
|
||||
|
||||
Returns:
|
||||
bool: True if emergency cleanup is needed
|
||||
"""
|
||||
memory_percent = self._get_memory_usage_percent()
|
||||
return memory_percent > self.emergency_threshold
|
||||
|
||||
def _emergency_cache_cleanup(self) -> None:
|
||||
"""
|
||||
Perform emergency cache cleanup when memory usage is high.
|
||||
"""
|
||||
if self._check_memory_threshold():
|
||||
logger.warning("Emergency DASH cache cleanup triggered due to high memory usage")
|
||||
|
||||
# Clear 50% of segment cache
|
||||
segment_cache_size = len(self.segment_cache)
|
||||
segment_keys_to_remove = list(self.segment_cache.keys())[:segment_cache_size // 2]
|
||||
for key in segment_keys_to_remove:
|
||||
del self.segment_cache[key]
|
||||
|
||||
# Clear 50% of init segment cache
|
||||
init_cache_size = len(self.init_segment_cache)
|
||||
init_keys_to_remove = list(self.init_segment_cache.keys())[:init_cache_size // 2]
|
||||
for key in init_keys_to_remove:
|
||||
del self.init_segment_cache[key]
|
||||
|
||||
logger.info(f"Emergency cleanup removed {len(segment_keys_to_remove)} segments and {len(init_keys_to_remove)} init segments from cache")
|
||||
|
||||
async def prebuffer_dash_manifest(self, mpd_url: str, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Pre-buffer segments from a DASH manifest.
|
||||
|
||||
Args:
|
||||
mpd_url (str): URL of the DASH manifest
|
||||
headers (Dict[str, str]): Headers to use for requests
|
||||
"""
|
||||
try:
|
||||
# Download and parse MPD manifest
|
||||
response = await self.client.get(mpd_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
mpd_content = response.text
|
||||
|
||||
# Parse MPD XML
|
||||
mpd_dict = xmltodict.parse(mpd_content)
|
||||
|
||||
# Store manifest in cache
|
||||
self.manifest_cache[mpd_url] = mpd_dict
|
||||
|
||||
# Extract initialization segments and first few segments
|
||||
await self._extract_and_prebuffer_segments(mpd_dict, mpd_url, headers)
|
||||
|
||||
logger.info(f"Pre-buffered DASH manifest: {mpd_url}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to pre-buffer DASH manifest {mpd_url}: {e}")
|
||||
|
||||
async def _extract_and_prebuffer_segments(self, mpd_dict: dict, base_url: str, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Extract and pre-buffer segments from MPD manifest.
|
||||
|
||||
Args:
|
||||
mpd_dict (dict): Parsed MPD manifest
|
||||
base_url (str): Base URL for resolving relative URLs
|
||||
headers (Dict[str, str]): Headers to use for requests
|
||||
"""
|
||||
try:
|
||||
# Extract Period and AdaptationSet information
|
||||
mpd = mpd_dict.get('MPD', {})
|
||||
periods = mpd.get('Period', [])
|
||||
if not isinstance(periods, list):
|
||||
periods = [periods]
|
||||
|
||||
for period in periods:
|
||||
adaptation_sets = period.get('AdaptationSet', [])
|
||||
if not isinstance(adaptation_sets, list):
|
||||
adaptation_sets = [adaptation_sets]
|
||||
|
||||
for adaptation_set in adaptation_sets:
|
||||
# Extract initialization segment
|
||||
init_segment = adaptation_set.get('SegmentTemplate', {}).get('@initialization')
|
||||
if init_segment:
|
||||
init_url = urljoin(base_url, init_segment)
|
||||
await self._download_init_segment(init_url, headers)
|
||||
|
||||
# Extract segment template
|
||||
segment_template = adaptation_set.get('SegmentTemplate', {})
|
||||
if segment_template:
|
||||
await self._prebuffer_template_segments(segment_template, base_url, headers)
|
||||
|
||||
# Extract segment list
|
||||
segment_list = adaptation_set.get('SegmentList', {})
|
||||
if segment_list:
|
||||
await self._prebuffer_list_segments(segment_list, base_url, headers)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract segments from MPD: {e}")
|
||||
|
||||
async def _download_init_segment(self, init_url: str, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Download and cache initialization segment.
|
||||
|
||||
Args:
|
||||
init_url (str): URL of the initialization segment
|
||||
headers (Dict[str, str]): Headers to use for request
|
||||
"""
|
||||
try:
|
||||
# Check memory usage before downloading
|
||||
memory_percent = self._get_memory_usage_percent()
|
||||
if memory_percent > self.max_memory_percent:
|
||||
logger.warning(f"Memory usage {memory_percent}% exceeds limit {self.max_memory_percent}%, skipping init segment download")
|
||||
return
|
||||
|
||||
response = await self.client.get(init_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Cache the init segment
|
||||
self.init_segment_cache[init_url] = response.content
|
||||
|
||||
# Check for emergency cleanup
|
||||
if self._check_memory_threshold():
|
||||
self._emergency_cache_cleanup()
|
||||
|
||||
logger.debug(f"Cached init segment: {init_url}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to download init segment {init_url}: {e}")
|
||||
|
||||
async def _prebuffer_template_segments(self, segment_template: dict, base_url: str, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Pre-buffer segments using segment template.
|
||||
|
||||
Args:
|
||||
segment_template (dict): Segment template from MPD
|
||||
base_url (str): Base URL for resolving relative URLs
|
||||
headers (Dict[str, str]): Headers to use for requests
|
||||
"""
|
||||
try:
|
||||
media_template = segment_template.get('@media')
|
||||
if not media_template:
|
||||
return
|
||||
|
||||
# Extract template parameters
|
||||
start_number = int(segment_template.get('@startNumber', 1))
|
||||
duration = float(segment_template.get('@duration', 0))
|
||||
timescale = float(segment_template.get('@timescale', 1))
|
||||
|
||||
# Pre-buffer first few segments
|
||||
for i in range(self.prebuffer_segments):
|
||||
segment_number = start_number + i
|
||||
segment_url = media_template.replace('$Number$', str(segment_number))
|
||||
full_url = urljoin(base_url, segment_url)
|
||||
|
||||
await self._download_segment(full_url, headers)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to pre-buffer template segments: {e}")
|
||||
|
||||
async def _prebuffer_list_segments(self, segment_list: dict, base_url: str, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Pre-buffer segments from segment list.
|
||||
|
||||
Args:
|
||||
segment_list (dict): Segment list from MPD
|
||||
base_url (str): Base URL for resolving relative URLs
|
||||
headers (Dict[str, str]): Headers to use for requests
|
||||
"""
|
||||
try:
|
||||
segments = segment_list.get('SegmentURL', [])
|
||||
if not isinstance(segments, list):
|
||||
segments = [segments]
|
||||
|
||||
# Pre-buffer first few segments
|
||||
for segment in segments[:self.prebuffer_segments]:
|
||||
segment_url = segment.get('@src')
|
||||
if segment_url:
|
||||
full_url = urljoin(base_url, segment_url)
|
||||
await self._download_segment(full_url, headers)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to pre-buffer list segments: {e}")
|
||||
|
||||
async def _download_segment(self, segment_url: str, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Download a single segment and cache it.
|
||||
|
||||
Args:
|
||||
segment_url (str): URL of the segment to download
|
||||
headers (Dict[str, str]): Headers to use for request
|
||||
"""
|
||||
try:
|
||||
# Check memory usage before downloading
|
||||
memory_percent = self._get_memory_usage_percent()
|
||||
if memory_percent > self.max_memory_percent:
|
||||
logger.warning(f"Memory usage {memory_percent}% exceeds limit {self.max_memory_percent}%, skipping segment download")
|
||||
return
|
||||
|
||||
response = await self.client.get(segment_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Cache the segment
|
||||
self.segment_cache[segment_url] = response.content
|
||||
|
||||
# Check for emergency cleanup
|
||||
if self._check_memory_threshold():
|
||||
self._emergency_cache_cleanup()
|
||||
# Maintain cache size
|
||||
elif len(self.segment_cache) > self.max_cache_size:
|
||||
# Remove oldest entries (simple FIFO)
|
||||
oldest_key = next(iter(self.segment_cache))
|
||||
del self.segment_cache[oldest_key]
|
||||
|
||||
logger.debug(f"Cached DASH segment: {segment_url}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to download DASH segment {segment_url}: {e}")
|
||||
|
||||
async def get_segment(self, segment_url: str, headers: Dict[str, str]) -> Optional[bytes]:
|
||||
"""
|
||||
Get a segment from cache or download it.
|
||||
|
||||
Args:
|
||||
segment_url (str): URL of the segment
|
||||
headers (Dict[str, str]): Headers to use for request
|
||||
|
||||
Returns:
|
||||
Optional[bytes]: Cached segment data or None if not available
|
||||
"""
|
||||
# Check segment cache first
|
||||
if segment_url in self.segment_cache:
|
||||
logger.debug(f"DASH cache hit for segment: {segment_url}")
|
||||
return self.segment_cache[segment_url]
|
||||
|
||||
# Check init segment cache
|
||||
if segment_url in self.init_segment_cache:
|
||||
logger.debug(f"DASH cache hit for init segment: {segment_url}")
|
||||
return self.init_segment_cache[segment_url]
|
||||
|
||||
# Check memory usage before downloading
|
||||
memory_percent = self._get_memory_usage_percent()
|
||||
if memory_percent > self.max_memory_percent:
|
||||
logger.warning(f"Memory usage {memory_percent}% exceeds limit {self.max_memory_percent}%, skipping download")
|
||||
return None
|
||||
|
||||
# Download if not in cache
|
||||
try:
|
||||
response = await self.client.get(segment_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
segment_data = response.content
|
||||
|
||||
# Determine if it's an init segment or regular segment
|
||||
if 'init' in segment_url.lower() or segment_url.endswith('.mp4'):
|
||||
self.init_segment_cache[segment_url] = segment_data
|
||||
else:
|
||||
self.segment_cache[segment_url] = segment_data
|
||||
|
||||
# Check for emergency cleanup
|
||||
if self._check_memory_threshold():
|
||||
self._emergency_cache_cleanup()
|
||||
# Maintain cache size
|
||||
elif len(self.segment_cache) > self.max_cache_size:
|
||||
oldest_key = next(iter(self.segment_cache))
|
||||
del self.segment_cache[oldest_key]
|
||||
|
||||
logger.debug(f"Downloaded and cached DASH segment: {segment_url}")
|
||||
return segment_data
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get DASH segment {segment_url}: {e}")
|
||||
return None
|
||||
|
||||
async def get_manifest(self, mpd_url: str, headers: Dict[str, str]) -> Optional[dict]:
|
||||
"""
|
||||
Get MPD manifest from cache or download it.
|
||||
|
||||
Args:
|
||||
mpd_url (str): URL of the MPD manifest
|
||||
headers (Dict[str, str]): Headers to use for request
|
||||
|
||||
Returns:
|
||||
Optional[dict]: Cached manifest data or None if not available
|
||||
"""
|
||||
# Check cache first
|
||||
if mpd_url in self.manifest_cache:
|
||||
logger.debug(f"DASH cache hit for manifest: {mpd_url}")
|
||||
return self.manifest_cache[mpd_url]
|
||||
|
||||
# Download if not in cache
|
||||
try:
|
||||
response = await self.client.get(mpd_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
mpd_content = response.text
|
||||
mpd_dict = xmltodict.parse(mpd_content)
|
||||
|
||||
# Cache the manifest
|
||||
self.manifest_cache[mpd_url] = mpd_dict
|
||||
|
||||
logger.debug(f"Downloaded and cached DASH manifest: {mpd_url}")
|
||||
return mpd_dict
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get DASH manifest {mpd_url}: {e}")
|
||||
return None
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""Clear the DASH cache."""
|
||||
self.segment_cache.clear()
|
||||
self.init_segment_cache.clear()
|
||||
self.manifest_cache.clear()
|
||||
self.adaptation_segments.clear()
|
||||
logger.info("DASH pre-buffer cache cleared")
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the pre-buffer system."""
|
||||
await self.client.aclose()
|
||||
|
||||
|
||||
# Global DASH pre-buffer instance
|
||||
dash_prebuffer = DASHPreBuffer()
|
||||
321
mediaflow_proxy/utils/hls_prebuffer.py
Normal file
321
mediaflow_proxy/utils/hls_prebuffer.py
Normal file
@@ -0,0 +1,321 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import psutil
|
||||
from typing import Dict, Optional, List
|
||||
from urllib.parse import urlparse
|
||||
import httpx
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client
|
||||
from mediaflow_proxy.configs import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HLSPreBuffer:
|
||||
"""
|
||||
Pre-buffer system for HLS streams to reduce latency and improve streaming performance.
|
||||
"""
|
||||
|
||||
def __init__(self, max_cache_size: Optional[int] = None, prebuffer_segments: Optional[int] = None):
|
||||
"""
|
||||
Initialize the HLS pre-buffer system.
|
||||
|
||||
Args:
|
||||
max_cache_size (int): Maximum number of segments to cache (uses config if None)
|
||||
prebuffer_segments (int): Number of segments to pre-buffer ahead (uses config if None)
|
||||
"""
|
||||
self.max_cache_size = max_cache_size or settings.hls_prebuffer_cache_size
|
||||
self.prebuffer_segments = prebuffer_segments or settings.hls_prebuffer_segments
|
||||
self.max_memory_percent = settings.hls_prebuffer_max_memory_percent
|
||||
self.emergency_threshold = settings.hls_prebuffer_emergency_threshold
|
||||
self.segment_cache: Dict[str, bytes] = {}
|
||||
self.segment_urls: Dict[str, List[str]] = {}
|
||||
self.client = create_httpx_client()
|
||||
|
||||
async def prebuffer_playlist(self, playlist_url: str, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Pre-buffer segments from an HLS playlist.
|
||||
|
||||
Args:
|
||||
playlist_url (str): URL of the HLS playlist
|
||||
headers (Dict[str, str]): Headers to use for requests
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Starting pre-buffer for playlist: {playlist_url}")
|
||||
|
||||
# Download and parse playlist
|
||||
response = await self.client.get(playlist_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
playlist_content = response.text
|
||||
|
||||
# Check if this is a master playlist (contains variants)
|
||||
if "#EXT-X-STREAM-INF" in playlist_content:
|
||||
logger.debug(f"Master playlist detected, finding first variant")
|
||||
# Extract variant URLs
|
||||
variant_urls = self._extract_variant_urls(playlist_content, playlist_url)
|
||||
if variant_urls:
|
||||
# Pre-buffer the first variant
|
||||
first_variant_url = variant_urls[0]
|
||||
logger.debug(f"Pre-buffering first variant: {first_variant_url}")
|
||||
await self.prebuffer_playlist(first_variant_url, headers)
|
||||
else:
|
||||
logger.warning("No variants found in master playlist")
|
||||
return
|
||||
|
||||
# Extract segment URLs
|
||||
segment_urls = self._extract_segment_urls(playlist_content, playlist_url)
|
||||
|
||||
# Store segment URLs for this playlist
|
||||
self.segment_urls[playlist_url] = segment_urls
|
||||
|
||||
# Pre-buffer first few segments
|
||||
await self._prebuffer_segments(segment_urls[:self.prebuffer_segments], headers)
|
||||
|
||||
logger.info(f"Pre-buffered {min(self.prebuffer_segments, len(segment_urls))} segments for {playlist_url}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to pre-buffer playlist {playlist_url}: {e}")
|
||||
|
||||
def _extract_segment_urls(self, playlist_content: str, base_url: str) -> List[str]:
|
||||
"""
|
||||
Extract segment URLs from HLS playlist content.
|
||||
|
||||
Args:
|
||||
playlist_content (str): Content of the HLS playlist
|
||||
base_url (str): Base URL for resolving relative URLs
|
||||
|
||||
Returns:
|
||||
List[str]: List of segment URLs
|
||||
"""
|
||||
segment_urls = []
|
||||
lines = playlist_content.split('\n')
|
||||
|
||||
logger.debug(f"Analyzing playlist with {len(lines)} lines")
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
# Check if line contains a URL (http/https) or is a relative path
|
||||
if 'http://' in line or 'https://' in line:
|
||||
segment_urls.append(line)
|
||||
logger.debug(f"Found absolute URL: {line}")
|
||||
elif line and not line.startswith('#'):
|
||||
# This might be a relative path to a segment
|
||||
parsed_base = urlparse(base_url)
|
||||
# Ensure proper path joining
|
||||
if line.startswith('/'):
|
||||
segment_url = f"{parsed_base.scheme}://{parsed_base.netloc}{line}"
|
||||
else:
|
||||
# Get the directory path from base_url
|
||||
base_path = parsed_base.path.rsplit('/', 1)[0] if '/' in parsed_base.path else ''
|
||||
segment_url = f"{parsed_base.scheme}://{parsed_base.netloc}{base_path}/{line}"
|
||||
segment_urls.append(segment_url)
|
||||
logger.debug(f"Found relative path: {line} -> {segment_url}")
|
||||
|
||||
logger.debug(f"Extracted {len(segment_urls)} segment URLs from playlist")
|
||||
if segment_urls:
|
||||
logger.debug(f"First segment URL: {segment_urls[0]}")
|
||||
else:
|
||||
logger.debug("No segment URLs found in playlist")
|
||||
# Log first few lines for debugging
|
||||
for i, line in enumerate(lines[:10]):
|
||||
logger.debug(f"Line {i}: {line}")
|
||||
|
||||
return segment_urls
|
||||
|
||||
def _extract_variant_urls(self, playlist_content: str, base_url: str) -> List[str]:
|
||||
"""
|
||||
Extract variant URLs from master playlist content.
|
||||
|
||||
Args:
|
||||
playlist_content (str): Content of the master playlist
|
||||
base_url (str): Base URL for resolving relative URLs
|
||||
|
||||
Returns:
|
||||
List[str]: List of variant URLs
|
||||
"""
|
||||
variant_urls = []
|
||||
lines = playlist_content.split('\n')
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and not line.startswith('#') and ('http://' in line or 'https://' in line):
|
||||
# Resolve relative URLs
|
||||
if line.startswith('http'):
|
||||
variant_urls.append(line)
|
||||
else:
|
||||
# Join with base URL for relative paths
|
||||
parsed_base = urlparse(base_url)
|
||||
variant_url = f"{parsed_base.scheme}://{parsed_base.netloc}{line}"
|
||||
variant_urls.append(variant_url)
|
||||
|
||||
logger.debug(f"Extracted {len(variant_urls)} variant URLs from master playlist")
|
||||
if variant_urls:
|
||||
logger.debug(f"First variant URL: {variant_urls[0]}")
|
||||
|
||||
return variant_urls
|
||||
|
||||
async def _prebuffer_segments(self, segment_urls: List[str], headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Pre-buffer specific segments.
|
||||
|
||||
Args:
|
||||
segment_urls (List[str]): List of segment URLs to pre-buffer
|
||||
headers (Dict[str, str]): Headers to use for requests
|
||||
"""
|
||||
tasks = []
|
||||
for url in segment_urls:
|
||||
if url not in self.segment_cache:
|
||||
tasks.append(self._download_segment(url, headers))
|
||||
|
||||
if tasks:
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
def _get_memory_usage_percent(self) -> float:
|
||||
"""
|
||||
Get current memory usage percentage.
|
||||
|
||||
Returns:
|
||||
float: Memory usage percentage
|
||||
"""
|
||||
try:
|
||||
memory = psutil.virtual_memory()
|
||||
return memory.percent
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get memory usage: {e}")
|
||||
return 0.0
|
||||
|
||||
def _check_memory_threshold(self) -> bool:
|
||||
"""
|
||||
Check if memory usage exceeds the emergency threshold.
|
||||
|
||||
Returns:
|
||||
bool: True if emergency cleanup is needed
|
||||
"""
|
||||
memory_percent = self._get_memory_usage_percent()
|
||||
return memory_percent > self.emergency_threshold
|
||||
|
||||
def _emergency_cache_cleanup(self) -> None:
|
||||
"""
|
||||
Perform emergency cache cleanup when memory usage is high.
|
||||
"""
|
||||
if self._check_memory_threshold():
|
||||
logger.warning("Emergency cache cleanup triggered due to high memory usage")
|
||||
# Clear 50% of cache
|
||||
cache_size = len(self.segment_cache)
|
||||
keys_to_remove = list(self.segment_cache.keys())[:cache_size // 2]
|
||||
for key in keys_to_remove:
|
||||
del self.segment_cache[key]
|
||||
logger.info(f"Emergency cleanup removed {len(keys_to_remove)} segments from cache")
|
||||
|
||||
async def _download_segment(self, segment_url: str, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Download a single segment and cache it.
|
||||
|
||||
Args:
|
||||
segment_url (str): URL of the segment to download
|
||||
headers (Dict[str, str]): Headers to use for request
|
||||
"""
|
||||
try:
|
||||
# Check memory usage before downloading
|
||||
memory_percent = self._get_memory_usage_percent()
|
||||
if memory_percent > self.max_memory_percent:
|
||||
logger.warning(f"Memory usage {memory_percent}% exceeds limit {self.max_memory_percent}%, skipping download")
|
||||
return
|
||||
|
||||
response = await self.client.get(segment_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Cache the segment
|
||||
self.segment_cache[segment_url] = response.content
|
||||
|
||||
# Check for emergency cleanup
|
||||
if self._check_memory_threshold():
|
||||
self._emergency_cache_cleanup()
|
||||
# Maintain cache size
|
||||
elif len(self.segment_cache) > self.max_cache_size:
|
||||
# Remove oldest entries (simple FIFO)
|
||||
oldest_key = next(iter(self.segment_cache))
|
||||
del self.segment_cache[oldest_key]
|
||||
|
||||
logger.debug(f"Cached segment: {segment_url}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to download segment {segment_url}: {e}")
|
||||
|
||||
async def get_segment(self, segment_url: str, headers: Dict[str, str]) -> Optional[bytes]:
|
||||
"""
|
||||
Get a segment from cache or download it.
|
||||
|
||||
Args:
|
||||
segment_url (str): URL of the segment
|
||||
headers (Dict[str, str]): Headers to use for request
|
||||
|
||||
Returns:
|
||||
Optional[bytes]: Cached segment data or None if not available
|
||||
"""
|
||||
# Check cache first
|
||||
if segment_url in self.segment_cache:
|
||||
logger.debug(f"Cache hit for segment: {segment_url}")
|
||||
return self.segment_cache[segment_url]
|
||||
|
||||
# Check memory usage before downloading
|
||||
memory_percent = self._get_memory_usage_percent()
|
||||
if memory_percent > self.max_memory_percent:
|
||||
logger.warning(f"Memory usage {memory_percent}% exceeds limit {self.max_memory_percent}%, skipping download")
|
||||
return None
|
||||
|
||||
# Download if not in cache
|
||||
try:
|
||||
response = await self.client.get(segment_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
segment_data = response.content
|
||||
|
||||
# Cache the segment
|
||||
self.segment_cache[segment_url] = segment_data
|
||||
|
||||
# Check for emergency cleanup
|
||||
if self._check_memory_threshold():
|
||||
self._emergency_cache_cleanup()
|
||||
# Maintain cache size
|
||||
elif len(self.segment_cache) > self.max_cache_size:
|
||||
oldest_key = next(iter(self.segment_cache))
|
||||
del self.segment_cache[oldest_key]
|
||||
|
||||
logger.debug(f"Downloaded and cached segment: {segment_url}")
|
||||
return segment_data
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get segment {segment_url}: {e}")
|
||||
return None
|
||||
|
||||
async def prebuffer_next_segments(self, playlist_url: str, current_segment_index: int, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Pre-buffer next segments based on current playback position.
|
||||
|
||||
Args:
|
||||
playlist_url (str): URL of the playlist
|
||||
current_segment_index (int): Index of current segment
|
||||
headers (Dict[str, str]): Headers to use for requests
|
||||
"""
|
||||
if playlist_url not in self.segment_urls:
|
||||
return
|
||||
|
||||
segment_urls = self.segment_urls[playlist_url]
|
||||
next_segments = segment_urls[current_segment_index + 1:current_segment_index + 1 + self.prebuffer_segments]
|
||||
|
||||
if next_segments:
|
||||
await self._prebuffer_segments(next_segments, headers)
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""Clear the segment cache."""
|
||||
self.segment_cache.clear()
|
||||
self.segment_urls.clear()
|
||||
logger.info("HLS pre-buffer cache cleared")
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the pre-buffer system."""
|
||||
await self.client.aclose()
|
||||
|
||||
|
||||
# Global pre-buffer instance
|
||||
hls_prebuffer = HLSPreBuffer()
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import codecs
|
||||
import re
|
||||
from typing import AsyncGenerator
|
||||
@@ -6,6 +7,7 @@ from urllib import parse
|
||||
from mediaflow_proxy.configs import settings
|
||||
from mediaflow_proxy.utils.crypto_utils import encryption_handler
|
||||
from mediaflow_proxy.utils.http_utils import encode_mediaflow_proxy_url, encode_stremio_proxy_url, get_original_scheme
|
||||
from mediaflow_proxy.utils.hls_prebuffer import hls_prebuffer
|
||||
|
||||
|
||||
class M3U8Processor:
|
||||
@@ -24,6 +26,7 @@ class M3U8Processor:
|
||||
self.mediaflow_proxy_url = str(
|
||||
request.url_for("hls_manifest_proxy").replace(scheme=get_original_scheme(request))
|
||||
)
|
||||
self.playlist_url = None # Will be set when processing starts
|
||||
|
||||
async def process_m3u8(self, content: str, base_url: str) -> str:
|
||||
"""
|
||||
@@ -36,6 +39,9 @@ class M3U8Processor:
|
||||
Returns:
|
||||
str: The processed m3u8 content.
|
||||
"""
|
||||
# Store the playlist URL for prebuffering
|
||||
self.playlist_url = base_url
|
||||
|
||||
lines = content.splitlines()
|
||||
processed_lines = []
|
||||
for line in lines:
|
||||
@@ -45,6 +51,23 @@ class M3U8Processor:
|
||||
processed_lines.append(await self.proxy_content_url(line, base_url))
|
||||
else:
|
||||
processed_lines.append(line)
|
||||
|
||||
# Pre-buffer segments if enabled and this is a playlist
|
||||
if (settings.enable_hls_prebuffer and
|
||||
"#EXTM3U" in content and
|
||||
self.playlist_url):
|
||||
|
||||
# Extract headers from request for pre-buffering
|
||||
headers = {}
|
||||
for key, value in self.request.query_params.items():
|
||||
if key.startswith("h_"):
|
||||
headers[key[2:]] = value
|
||||
|
||||
# Start pre-buffering in background using the actual playlist URL
|
||||
asyncio.create_task(
|
||||
hls_prebuffer.prebuffer_playlist(self.playlist_url, headers)
|
||||
)
|
||||
|
||||
return "\n".join(processed_lines)
|
||||
|
||||
async def process_m3u8_streaming(
|
||||
@@ -52,6 +75,7 @@ class M3U8Processor:
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""
|
||||
Processes the m3u8 content on-the-fly, yielding processed lines as they are read.
|
||||
Optimized to avoid accumulating the entire playlist content in memory.
|
||||
|
||||
Args:
|
||||
content_iterator: An async iterator that yields chunks of the m3u8 content.
|
||||
@@ -60,8 +84,13 @@ class M3U8Processor:
|
||||
Yields:
|
||||
str: Processed lines of the m3u8 content.
|
||||
"""
|
||||
# Store the playlist URL for prebuffering
|
||||
self.playlist_url = base_url
|
||||
|
||||
buffer = "" # String buffer for decoded content
|
||||
decoder = codecs.getincrementaldecoder("utf-8")(errors="replace")
|
||||
is_playlist_detected = False
|
||||
is_prebuffer_started = False
|
||||
|
||||
# Process the content chunk by chunk
|
||||
async for chunk in content_iterator:
|
||||
@@ -72,6 +101,10 @@ class M3U8Processor:
|
||||
decoded_chunk = decoder.decode(chunk)
|
||||
buffer += decoded_chunk
|
||||
|
||||
# Check for playlist marker early to avoid accumulating content
|
||||
if not is_playlist_detected and "#EXTM3U" in buffer:
|
||||
is_playlist_detected = True
|
||||
|
||||
# Process complete lines
|
||||
lines = buffer.split("\n")
|
||||
if len(lines) > 1:
|
||||
@@ -84,6 +117,25 @@ class M3U8Processor:
|
||||
# Keep the last line in the buffer (it might be incomplete)
|
||||
buffer = lines[-1]
|
||||
|
||||
# Start pre-buffering early once we detect this is a playlist
|
||||
# This avoids waiting until the entire playlist is processed
|
||||
if (settings.enable_hls_prebuffer and
|
||||
is_playlist_detected and
|
||||
not is_prebuffer_started and
|
||||
self.playlist_url):
|
||||
|
||||
# Extract headers from request for pre-buffering
|
||||
headers = {}
|
||||
for key, value in self.request.query_params.items():
|
||||
if key.startswith("h_"):
|
||||
headers[key[2:]] = value
|
||||
|
||||
# Start pre-buffering in background using the actual playlist URL
|
||||
asyncio.create_task(
|
||||
hls_prebuffer.prebuffer_playlist(self.playlist_url, headers)
|
||||
)
|
||||
is_prebuffer_started = True
|
||||
|
||||
# Process any remaining data in the buffer plus final bytes
|
||||
final_chunk = decoder.decode(b"", final=True)
|
||||
if final_chunk:
|
||||
@@ -209,4 +261,4 @@ class M3U8Processor:
|
||||
full_url,
|
||||
query_params=query_params,
|
||||
encryption_handler=encryption_handler if has_encrypted else None,
|
||||
)
|
||||
)
|
||||
Reference in New Issue
Block a user