update

2026-04-11 11:50:51 +00:00 · 2026-02-19 20:15:03 +01:00
parent 7785e8c604
commit cfc6bbabc9
181 changed files with 32141 additions and 4629 deletions
--- a/mediaflow_proxy/utils/base_prebuffer.py
+++ b/mediaflow_proxy/utils/base_prebuffer.py
@@ -0,0 +1,367 @@
+"""
+Base prebuffer class with shared functionality for HLS and DASH prebuffering.
+
+This module provides cross-process download coordination using Redis-based locking
+to prevent duplicate downloads across multiple uvicorn workers. Both player requests
+and background prebuffer tasks use the same coordination mechanism.
+"""
+
+import asyncio
+import logging
+import time
+import psutil
+from abc import ABC
+from dataclasses import dataclass, field
+from typing import Dict, Optional
+
+from mediaflow_proxy.utils.cache_utils import (
+    get_cached_segment,
+    set_cached_segment,
+)
+from mediaflow_proxy.utils.http_utils import download_file_with_retry
+from mediaflow_proxy.utils import redis_utils
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PrebufferStats:
+    """Statistics for prebuffer performance tracking."""
+
+    cache_hits: int = 0
+    cache_misses: int = 0
+    segments_prebuffered: int = 0
+    bytes_prebuffered: int = 0
+    prefetch_triggered: int = 0
+    downloads_coordinated: int = 0  # Times we waited for existing download
+    last_reset: float = field(default_factory=time.time)
+
+    @property
+    def hit_rate(self) -> float:
+        """Calculate cache hit rate percentage."""
+        total = self.cache_hits + self.cache_misses
+        return (self.cache_hits / total * 100) if total > 0 else 0.0
+
+    def reset(self) -> None:
+        """Reset statistics."""
+        self.cache_hits = 0
+        self.cache_misses = 0
+        self.segments_prebuffered = 0
+        self.bytes_prebuffered = 0
+        self.prefetch_triggered = 0
+        self.downloads_coordinated = 0
+        self.last_reset = time.time()
+
+    def to_dict(self) -> dict:
+        """Convert stats to dictionary for logging."""
+        return {
+            "cache_hits": self.cache_hits,
+            "cache_misses": self.cache_misses,
+            "hit_rate": f"{self.hit_rate:.1f}%",
+            "segments_prebuffered": self.segments_prebuffered,
+            "bytes_prebuffered_mb": f"{self.bytes_prebuffered / 1024 / 1024:.2f}",
+            "prefetch_triggered": self.prefetch_triggered,
+            "downloads_coordinated": self.downloads_coordinated,
+            "uptime_seconds": int(time.time() - self.last_reset),
+        }
+
+
+class BasePrebuffer(ABC):
+    """
+    Base class for prebuffer systems with cross-process download coordination.
+
+    This class provides:
+    - Cross-process coordination using Redis locks to prevent duplicate downloads
+    - Memory usage monitoring
+    - Cache statistics tracking
+    - Shared download and caching logic
+
+    The Redis-based locking ensures that even with multiple uvicorn workers,
+    only one worker downloads any given segment at a time.
+
+    Subclasses should implement protocol-specific logic (HLS playlist parsing,
+    DASH MPD handling, etc.) while inheriting the core download coordination.
+    """
+
+    def __init__(
+        self,
+        max_cache_size: int,
+        prebuffer_segments: int,
+        max_memory_percent: float,
+        emergency_threshold: float,
+        segment_ttl: int = 60,
+    ):
+        """
+        Initialize the base prebuffer.
+
+        Args:
+            max_cache_size: Maximum number of segments to track
+            prebuffer_segments: Number of segments to pre-buffer ahead
+            max_memory_percent: Maximum memory usage percentage before skipping prebuffer
+            emergency_threshold: Memory threshold for emergency cleanup
+            segment_ttl: TTL for cached segments in seconds
+        """
+        self.max_cache_size = max_cache_size
+        self.prebuffer_segment_count = prebuffer_segments
+        self.max_memory_percent = max_memory_percent
+        self.emergency_threshold = emergency_threshold
+        self.segment_ttl = segment_ttl
+
+        # Statistics (per-worker, not shared - but that's fine for monitoring)
+        self.stats = PrebufferStats()
+
+        # Stats logging task
+        self._stats_task: Optional[asyncio.Task] = None
+        self._stats_interval = 60  # Log stats every 60 seconds
+
+    def _get_memory_usage_percent(self) -> float:
+        """Get current memory usage percentage."""
+        try:
+            memory = psutil.virtual_memory()
+            return memory.percent
+        except Exception as e:
+            logger.warning(f"Failed to get memory usage: {e}")
+            return 0.0
+
+    def _check_memory_threshold(self) -> bool:
+        """Check if memory usage exceeds the emergency threshold."""
+        return self._get_memory_usage_percent() > self.emergency_threshold
+
+    def _should_skip_for_memory(self) -> bool:
+        """Check if we should skip prebuffering due to high memory usage."""
+        return self._get_memory_usage_percent() > self.max_memory_percent
+
+    def record_cache_hit(self) -> None:
+        """Record a cache hit for statistics."""
+        self.stats.cache_hits += 1
+        self._ensure_stats_logging()
+
+    def record_cache_miss(self) -> None:
+        """Record a cache miss for statistics."""
+        self.stats.cache_misses += 1
+        self._ensure_stats_logging()
+
+    def _ensure_stats_logging(self) -> None:
+        """Ensure the stats logging task is running."""
+        if self._stats_task is None or self._stats_task.done():
+            self._stats_task = asyncio.create_task(self._periodic_stats_logging())
+
+    async def _periodic_stats_logging(self) -> None:
+        """Periodically log prebuffer statistics."""
+        while True:
+            try:
+                await asyncio.sleep(self._stats_interval)
+
+                # Only log if there's been activity
+                if self.stats.cache_hits > 0 or self.stats.cache_misses > 0:
+                    self.log_stats()
+            except asyncio.CancelledError:
+                return
+            except Exception as e:
+                logger.warning(f"Error in stats logging: {e}")
+
+    async def get_or_download(
+        self,
+        url: str,
+        headers: Dict[str, str],
+        timeout: float = 10.0,
+    ) -> Optional[bytes]:
+        """
+        Get a segment from cache or download it, with cross-process coordination.
+
+        This is the primary method for getting segments. It:
+        1. Checks cache first (immediate return if hit)
+        2. Acquires Redis lock to prevent duplicate downloads across workers
+        3. Double-checks cache after acquiring lock
+        4. Downloads and caches if needed
+
+        The Redis-based locking ensures that even with multiple uvicorn workers,
+        only one worker downloads any given segment at a time.
+
+        Args:
+            url: URL of the segment to get
+            headers: Headers to use for the request
+            timeout: Maximum time to wait for lock acquisition (seconds).
+                     Keep this short (10s) for player requests - if lock is held
+                     too long, fall back to direct streaming.
+
+        Returns:
+            Segment data if successful, None if failed or timed out
+        """
+        self._ensure_stats_logging()
+
+        # Check cache first (Redis cache is shared across workers)
+        cached = await get_cached_segment(url)
+        if cached:
+            self.record_cache_hit()
+            logger.info(f"[get_or_download] CACHE HIT ({len(cached)} bytes): {url}")
+            return cached
+
+        # Cache miss - need to coordinate download across workers
+        logger.info(f"[get_or_download] CACHE MISS: {url}")
+
+        lock_key = f"segment_download:{url}"
+        lock_acquired = False
+
+        try:
+            # Acquire Redis lock - only one worker downloads at a time
+            lock_acquired = await redis_utils.acquire_lock(lock_key, ttl=30, timeout=timeout)
+
+            if not lock_acquired:
+                logger.warning(f"[get_or_download] Lock TIMEOUT ({timeout}s), falling back to streaming: {url}")
+                return None
+
+            # Double-check cache after acquiring lock
+            # Another worker may have completed the download while we waited
+            cached = await get_cached_segment(url)
+            if cached:
+                # Count this as a cache hit since we didn't download
+                self.record_cache_hit()
+                self.stats.downloads_coordinated += 1
+                logger.info(f"[get_or_download] Found in cache after lock (coordinated): {url}")
+                return cached
+
+            # We're the one who needs to download - count as miss now
+            self.record_cache_miss()
+
+            # We're the first - download and cache
+            logger.info(f"[get_or_download] Downloading: {url}")
+            content = await self._download_and_cache(url, headers)
+            return content
+
+        except Exception as e:
+            logger.warning(f"[get_or_download] Error during download coordination: {e}")
+            return None
+        finally:
+            if lock_acquired:
+                await redis_utils.release_lock(lock_key)
+
+    async def _download_and_cache(
+        self,
+        url: str,
+        headers: Dict[str, str],
+    ) -> Optional[bytes]:
+        """
+        Download a segment and cache it.
+
+        This method should only be called while holding the Redis lock.
+
+        Args:
+            url: URL to download
+            headers: Headers for the request
+
+        Returns:
+            Downloaded content if successful, None otherwise
+        """
+        try:
+            content = await download_file_with_retry(url, headers)
+            if content:
+                logger.info(f"[_download_and_cache] Downloaded {len(content)} bytes, caching: {url}")
+                await set_cached_segment(url, content, ttl=self.segment_ttl)
+                self.stats.segments_prebuffered += 1
+                self.stats.bytes_prebuffered += len(content)
+                return content
+            else:
+                logger.warning(f"[_download_and_cache] Download returned empty: {url}")
+                return None
+        except Exception as e:
+            logger.warning(f"[_download_and_cache] Failed to download: {url} - {e}")
+            return None
+
+    async def try_get_cached(self, url: str) -> Optional[bytes]:
+        """
+        Check cache only, don't download.
+
+        Use this for background prebuffer tasks that shouldn't block
+        if segment isn't available yet.
+
+        Args:
+            url: URL to check in cache
+
+        Returns:
+            Cached data if available, None otherwise
+        """
+        return await get_cached_segment(url)
+
+    async def prebuffer_segment(self, url: str, headers: Dict[str, str]) -> None:
+        """
+        Prebuffer a single segment in the background.
+
+        This method uses Redis locking to prevent duplicate downloads
+        across multiple workers.
+
+        Args:
+            url: URL of segment to prebuffer
+            headers: Headers for the request
+        """
+        if self._should_skip_for_memory():
+            logger.debug("Skipping prebuffer due to high memory usage")
+            return
+
+        # Check if already cached
+        cached = await get_cached_segment(url)
+        if cached:
+            logger.debug(f"[prebuffer_segment] Already cached, skipping: {url}")
+            return
+
+        lock_key = f"segment_download:{url}"
+        lock_acquired = False
+
+        try:
+            # Try to acquire lock with short timeout for prebuffering
+            # If lock is held by another process, skip this segment
+            lock_acquired = await redis_utils.acquire_lock(lock_key, ttl=30, timeout=1.0)
+
+            if not lock_acquired:
+                # Another process is downloading, skip this segment
+                logger.debug(f"[prebuffer_segment] Lock busy, skipping: {url}")
+                return
+
+            # Double-check cache after acquiring lock
+            cached = await get_cached_segment(url)
+            if cached:
+                logger.debug(f"[prebuffer_segment] Found in cache after lock: {url}")
+                return
+
+            # Download and cache
+            logger.info(f"[prebuffer_segment] Downloading: {url}")
+            await self._download_and_cache(url, headers)
+
+        except Exception as e:
+            logger.warning(f"[prebuffer_segment] Error: {e}")
+        finally:
+            if lock_acquired:
+                await redis_utils.release_lock(lock_key)
+
+    async def prebuffer_segments_batch(
+        self,
+        urls: list,
+        headers: Dict[str, str],
+        max_concurrent: int = 2,
+    ) -> None:
+        """
+        Prebuffer multiple segments with concurrency control.
+
+        Args:
+            urls: List of segment URLs to prebuffer
+            headers: Headers for requests
+            max_concurrent: Maximum concurrent downloads (default 2 to avoid
+                           lock contention with player requests)
+        """
+        if self._should_skip_for_memory():
+            logger.warning("Skipping prebuffer due to high memory usage")
+            return
+
+        semaphore = asyncio.Semaphore(max_concurrent)
+
+        async def limited_prebuffer(url: str):
+            async with semaphore:
+                await self.prebuffer_segment(url, headers)
+
+        # Start all prebuffer tasks
+        tasks = [limited_prebuffer(url) for url in urls]
+        await asyncio.gather(*tasks, return_exceptions=True)
+
+    def log_stats(self) -> None:
+        """Log current prebuffer statistics."""
+        logger.info(f"Prebuffer Stats: {self.stats.to_dict()}")