mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 11:50:51 +00:00
368 lines
13 KiB
Python
368 lines
13 KiB
Python
"""
|
|
Base prebuffer class with shared functionality for HLS and DASH prebuffering.
|
|
|
|
This module provides cross-process download coordination using Redis-based locking
|
|
to prevent duplicate downloads across multiple uvicorn workers. Both player requests
|
|
and background prebuffer tasks use the same coordination mechanism.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
import psutil
|
|
from abc import ABC
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, Optional
|
|
|
|
from mediaflow_proxy.utils.cache_utils import (
|
|
get_cached_segment,
|
|
set_cached_segment,
|
|
)
|
|
from mediaflow_proxy.utils.http_utils import download_file_with_retry
|
|
from mediaflow_proxy.utils import redis_utils
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class PrebufferStats:
|
|
"""Statistics for prebuffer performance tracking."""
|
|
|
|
cache_hits: int = 0
|
|
cache_misses: int = 0
|
|
segments_prebuffered: int = 0
|
|
bytes_prebuffered: int = 0
|
|
prefetch_triggered: int = 0
|
|
downloads_coordinated: int = 0 # Times we waited for existing download
|
|
last_reset: float = field(default_factory=time.time)
|
|
|
|
@property
|
|
def hit_rate(self) -> float:
|
|
"""Calculate cache hit rate percentage."""
|
|
total = self.cache_hits + self.cache_misses
|
|
return (self.cache_hits / total * 100) if total > 0 else 0.0
|
|
|
|
def reset(self) -> None:
|
|
"""Reset statistics."""
|
|
self.cache_hits = 0
|
|
self.cache_misses = 0
|
|
self.segments_prebuffered = 0
|
|
self.bytes_prebuffered = 0
|
|
self.prefetch_triggered = 0
|
|
self.downloads_coordinated = 0
|
|
self.last_reset = time.time()
|
|
|
|
def to_dict(self) -> dict:
|
|
"""Convert stats to dictionary for logging."""
|
|
return {
|
|
"cache_hits": self.cache_hits,
|
|
"cache_misses": self.cache_misses,
|
|
"hit_rate": f"{self.hit_rate:.1f}%",
|
|
"segments_prebuffered": self.segments_prebuffered,
|
|
"bytes_prebuffered_mb": f"{self.bytes_prebuffered / 1024 / 1024:.2f}",
|
|
"prefetch_triggered": self.prefetch_triggered,
|
|
"downloads_coordinated": self.downloads_coordinated,
|
|
"uptime_seconds": int(time.time() - self.last_reset),
|
|
}
|
|
|
|
|
|
class BasePrebuffer(ABC):
|
|
"""
|
|
Base class for prebuffer systems with cross-process download coordination.
|
|
|
|
This class provides:
|
|
- Cross-process coordination using Redis locks to prevent duplicate downloads
|
|
- Memory usage monitoring
|
|
- Cache statistics tracking
|
|
- Shared download and caching logic
|
|
|
|
The Redis-based locking ensures that even with multiple uvicorn workers,
|
|
only one worker downloads any given segment at a time.
|
|
|
|
Subclasses should implement protocol-specific logic (HLS playlist parsing,
|
|
DASH MPD handling, etc.) while inheriting the core download coordination.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
max_cache_size: int,
|
|
prebuffer_segments: int,
|
|
max_memory_percent: float,
|
|
emergency_threshold: float,
|
|
segment_ttl: int = 60,
|
|
):
|
|
"""
|
|
Initialize the base prebuffer.
|
|
|
|
Args:
|
|
max_cache_size: Maximum number of segments to track
|
|
prebuffer_segments: Number of segments to pre-buffer ahead
|
|
max_memory_percent: Maximum memory usage percentage before skipping prebuffer
|
|
emergency_threshold: Memory threshold for emergency cleanup
|
|
segment_ttl: TTL for cached segments in seconds
|
|
"""
|
|
self.max_cache_size = max_cache_size
|
|
self.prebuffer_segment_count = prebuffer_segments
|
|
self.max_memory_percent = max_memory_percent
|
|
self.emergency_threshold = emergency_threshold
|
|
self.segment_ttl = segment_ttl
|
|
|
|
# Statistics (per-worker, not shared - but that's fine for monitoring)
|
|
self.stats = PrebufferStats()
|
|
|
|
# Stats logging task
|
|
self._stats_task: Optional[asyncio.Task] = None
|
|
self._stats_interval = 60 # Log stats every 60 seconds
|
|
|
|
def _get_memory_usage_percent(self) -> float:
|
|
"""Get current memory usage percentage."""
|
|
try:
|
|
memory = psutil.virtual_memory()
|
|
return memory.percent
|
|
except Exception as e:
|
|
logger.warning(f"Failed to get memory usage: {e}")
|
|
return 0.0
|
|
|
|
def _check_memory_threshold(self) -> bool:
|
|
"""Check if memory usage exceeds the emergency threshold."""
|
|
return self._get_memory_usage_percent() > self.emergency_threshold
|
|
|
|
def _should_skip_for_memory(self) -> bool:
|
|
"""Check if we should skip prebuffering due to high memory usage."""
|
|
return self._get_memory_usage_percent() > self.max_memory_percent
|
|
|
|
def record_cache_hit(self) -> None:
|
|
"""Record a cache hit for statistics."""
|
|
self.stats.cache_hits += 1
|
|
self._ensure_stats_logging()
|
|
|
|
def record_cache_miss(self) -> None:
|
|
"""Record a cache miss for statistics."""
|
|
self.stats.cache_misses += 1
|
|
self._ensure_stats_logging()
|
|
|
|
def _ensure_stats_logging(self) -> None:
|
|
"""Ensure the stats logging task is running."""
|
|
if self._stats_task is None or self._stats_task.done():
|
|
self._stats_task = asyncio.create_task(self._periodic_stats_logging())
|
|
|
|
async def _periodic_stats_logging(self) -> None:
|
|
"""Periodically log prebuffer statistics."""
|
|
while True:
|
|
try:
|
|
await asyncio.sleep(self._stats_interval)
|
|
|
|
# Only log if there's been activity
|
|
if self.stats.cache_hits > 0 or self.stats.cache_misses > 0:
|
|
self.log_stats()
|
|
except asyncio.CancelledError:
|
|
return
|
|
except Exception as e:
|
|
logger.warning(f"Error in stats logging: {e}")
|
|
|
|
async def get_or_download(
|
|
self,
|
|
url: str,
|
|
headers: Dict[str, str],
|
|
timeout: float = 10.0,
|
|
) -> Optional[bytes]:
|
|
"""
|
|
Get a segment from cache or download it, with cross-process coordination.
|
|
|
|
This is the primary method for getting segments. It:
|
|
1. Checks cache first (immediate return if hit)
|
|
2. Acquires Redis lock to prevent duplicate downloads across workers
|
|
3. Double-checks cache after acquiring lock
|
|
4. Downloads and caches if needed
|
|
|
|
The Redis-based locking ensures that even with multiple uvicorn workers,
|
|
only one worker downloads any given segment at a time.
|
|
|
|
Args:
|
|
url: URL of the segment to get
|
|
headers: Headers to use for the request
|
|
timeout: Maximum time to wait for lock acquisition (seconds).
|
|
Keep this short (10s) for player requests - if lock is held
|
|
too long, fall back to direct streaming.
|
|
|
|
Returns:
|
|
Segment data if successful, None if failed or timed out
|
|
"""
|
|
self._ensure_stats_logging()
|
|
|
|
# Check cache first (Redis cache is shared across workers)
|
|
cached = await get_cached_segment(url)
|
|
if cached:
|
|
self.record_cache_hit()
|
|
logger.info(f"[get_or_download] CACHE HIT ({len(cached)} bytes): {url}")
|
|
return cached
|
|
|
|
# Cache miss - need to coordinate download across workers
|
|
logger.info(f"[get_or_download] CACHE MISS: {url}")
|
|
|
|
lock_key = f"segment_download:{url}"
|
|
lock_acquired = False
|
|
|
|
try:
|
|
# Acquire Redis lock - only one worker downloads at a time
|
|
lock_acquired = await redis_utils.acquire_lock(lock_key, ttl=30, timeout=timeout)
|
|
|
|
if not lock_acquired:
|
|
logger.warning(f"[get_or_download] Lock TIMEOUT ({timeout}s), falling back to streaming: {url}")
|
|
return None
|
|
|
|
# Double-check cache after acquiring lock
|
|
# Another worker may have completed the download while we waited
|
|
cached = await get_cached_segment(url)
|
|
if cached:
|
|
# Count this as a cache hit since we didn't download
|
|
self.record_cache_hit()
|
|
self.stats.downloads_coordinated += 1
|
|
logger.info(f"[get_or_download] Found in cache after lock (coordinated): {url}")
|
|
return cached
|
|
|
|
# We're the one who needs to download - count as miss now
|
|
self.record_cache_miss()
|
|
|
|
# We're the first - download and cache
|
|
logger.info(f"[get_or_download] Downloading: {url}")
|
|
content = await self._download_and_cache(url, headers)
|
|
return content
|
|
|
|
except Exception as e:
|
|
logger.warning(f"[get_or_download] Error during download coordination: {e}")
|
|
return None
|
|
finally:
|
|
if lock_acquired:
|
|
await redis_utils.release_lock(lock_key)
|
|
|
|
async def _download_and_cache(
|
|
self,
|
|
url: str,
|
|
headers: Dict[str, str],
|
|
) -> Optional[bytes]:
|
|
"""
|
|
Download a segment and cache it.
|
|
|
|
This method should only be called while holding the Redis lock.
|
|
|
|
Args:
|
|
url: URL to download
|
|
headers: Headers for the request
|
|
|
|
Returns:
|
|
Downloaded content if successful, None otherwise
|
|
"""
|
|
try:
|
|
content = await download_file_with_retry(url, headers)
|
|
if content:
|
|
logger.info(f"[_download_and_cache] Downloaded {len(content)} bytes, caching: {url}")
|
|
await set_cached_segment(url, content, ttl=self.segment_ttl)
|
|
self.stats.segments_prebuffered += 1
|
|
self.stats.bytes_prebuffered += len(content)
|
|
return content
|
|
else:
|
|
logger.warning(f"[_download_and_cache] Download returned empty: {url}")
|
|
return None
|
|
except Exception as e:
|
|
logger.warning(f"[_download_and_cache] Failed to download: {url} - {e}")
|
|
return None
|
|
|
|
async def try_get_cached(self, url: str) -> Optional[bytes]:
|
|
"""
|
|
Check cache only, don't download.
|
|
|
|
Use this for background prebuffer tasks that shouldn't block
|
|
if segment isn't available yet.
|
|
|
|
Args:
|
|
url: URL to check in cache
|
|
|
|
Returns:
|
|
Cached data if available, None otherwise
|
|
"""
|
|
return await get_cached_segment(url)
|
|
|
|
async def prebuffer_segment(self, url: str, headers: Dict[str, str]) -> None:
|
|
"""
|
|
Prebuffer a single segment in the background.
|
|
|
|
This method uses Redis locking to prevent duplicate downloads
|
|
across multiple workers.
|
|
|
|
Args:
|
|
url: URL of segment to prebuffer
|
|
headers: Headers for the request
|
|
"""
|
|
if self._should_skip_for_memory():
|
|
logger.debug("Skipping prebuffer due to high memory usage")
|
|
return
|
|
|
|
# Check if already cached
|
|
cached = await get_cached_segment(url)
|
|
if cached:
|
|
logger.debug(f"[prebuffer_segment] Already cached, skipping: {url}")
|
|
return
|
|
|
|
lock_key = f"segment_download:{url}"
|
|
lock_acquired = False
|
|
|
|
try:
|
|
# Try to acquire lock with short timeout for prebuffering
|
|
# If lock is held by another process, skip this segment
|
|
lock_acquired = await redis_utils.acquire_lock(lock_key, ttl=30, timeout=1.0)
|
|
|
|
if not lock_acquired:
|
|
# Another process is downloading, skip this segment
|
|
logger.debug(f"[prebuffer_segment] Lock busy, skipping: {url}")
|
|
return
|
|
|
|
# Double-check cache after acquiring lock
|
|
cached = await get_cached_segment(url)
|
|
if cached:
|
|
logger.debug(f"[prebuffer_segment] Found in cache after lock: {url}")
|
|
return
|
|
|
|
# Download and cache
|
|
logger.info(f"[prebuffer_segment] Downloading: {url}")
|
|
await self._download_and_cache(url, headers)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"[prebuffer_segment] Error: {e}")
|
|
finally:
|
|
if lock_acquired:
|
|
await redis_utils.release_lock(lock_key)
|
|
|
|
async def prebuffer_segments_batch(
|
|
self,
|
|
urls: list,
|
|
headers: Dict[str, str],
|
|
max_concurrent: int = 2,
|
|
) -> None:
|
|
"""
|
|
Prebuffer multiple segments with concurrency control.
|
|
|
|
Args:
|
|
urls: List of segment URLs to prebuffer
|
|
headers: Headers for requests
|
|
max_concurrent: Maximum concurrent downloads (default 2 to avoid
|
|
lock contention with player requests)
|
|
"""
|
|
if self._should_skip_for_memory():
|
|
logger.warning("Skipping prebuffer due to high memory usage")
|
|
return
|
|
|
|
semaphore = asyncio.Semaphore(max_concurrent)
|
|
|
|
async def limited_prebuffer(url: str):
|
|
async with semaphore:
|
|
await self.prebuffer_segment(url, headers)
|
|
|
|
# Start all prebuffer tasks
|
|
tasks = [limited_prebuffer(url) for url in urls]
|
|
await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
def log_stats(self) -> None:
|
|
"""Log current prebuffer statistics."""
|
|
logger.info(f"Prebuffer Stats: {self.stats.to_dict()}")
|