mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 11:50:51 +00:00
update
This commit is contained in:
367
mediaflow_proxy/utils/base_prebuffer.py
Normal file
367
mediaflow_proxy/utils/base_prebuffer.py
Normal file
@@ -0,0 +1,367 @@
|
||||
"""
|
||||
Base prebuffer class with shared functionality for HLS and DASH prebuffering.
|
||||
|
||||
This module provides cross-process download coordination using Redis-based locking
|
||||
to prevent duplicate downloads across multiple uvicorn workers. Both player requests
|
||||
and background prebuffer tasks use the same coordination mechanism.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import psutil
|
||||
from abc import ABC
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Optional
|
||||
|
||||
from mediaflow_proxy.utils.cache_utils import (
|
||||
get_cached_segment,
|
||||
set_cached_segment,
|
||||
)
|
||||
from mediaflow_proxy.utils.http_utils import download_file_with_retry
|
||||
from mediaflow_proxy.utils import redis_utils
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PrebufferStats:
|
||||
"""Statistics for prebuffer performance tracking."""
|
||||
|
||||
cache_hits: int = 0
|
||||
cache_misses: int = 0
|
||||
segments_prebuffered: int = 0
|
||||
bytes_prebuffered: int = 0
|
||||
prefetch_triggered: int = 0
|
||||
downloads_coordinated: int = 0 # Times we waited for existing download
|
||||
last_reset: float = field(default_factory=time.time)
|
||||
|
||||
@property
|
||||
def hit_rate(self) -> float:
|
||||
"""Calculate cache hit rate percentage."""
|
||||
total = self.cache_hits + self.cache_misses
|
||||
return (self.cache_hits / total * 100) if total > 0 else 0.0
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset statistics."""
|
||||
self.cache_hits = 0
|
||||
self.cache_misses = 0
|
||||
self.segments_prebuffered = 0
|
||||
self.bytes_prebuffered = 0
|
||||
self.prefetch_triggered = 0
|
||||
self.downloads_coordinated = 0
|
||||
self.last_reset = time.time()
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert stats to dictionary for logging."""
|
||||
return {
|
||||
"cache_hits": self.cache_hits,
|
||||
"cache_misses": self.cache_misses,
|
||||
"hit_rate": f"{self.hit_rate:.1f}%",
|
||||
"segments_prebuffered": self.segments_prebuffered,
|
||||
"bytes_prebuffered_mb": f"{self.bytes_prebuffered / 1024 / 1024:.2f}",
|
||||
"prefetch_triggered": self.prefetch_triggered,
|
||||
"downloads_coordinated": self.downloads_coordinated,
|
||||
"uptime_seconds": int(time.time() - self.last_reset),
|
||||
}
|
||||
|
||||
|
||||
class BasePrebuffer(ABC):
|
||||
"""
|
||||
Base class for prebuffer systems with cross-process download coordination.
|
||||
|
||||
This class provides:
|
||||
- Cross-process coordination using Redis locks to prevent duplicate downloads
|
||||
- Memory usage monitoring
|
||||
- Cache statistics tracking
|
||||
- Shared download and caching logic
|
||||
|
||||
The Redis-based locking ensures that even with multiple uvicorn workers,
|
||||
only one worker downloads any given segment at a time.
|
||||
|
||||
Subclasses should implement protocol-specific logic (HLS playlist parsing,
|
||||
DASH MPD handling, etc.) while inheriting the core download coordination.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_cache_size: int,
|
||||
prebuffer_segments: int,
|
||||
max_memory_percent: float,
|
||||
emergency_threshold: float,
|
||||
segment_ttl: int = 60,
|
||||
):
|
||||
"""
|
||||
Initialize the base prebuffer.
|
||||
|
||||
Args:
|
||||
max_cache_size: Maximum number of segments to track
|
||||
prebuffer_segments: Number of segments to pre-buffer ahead
|
||||
max_memory_percent: Maximum memory usage percentage before skipping prebuffer
|
||||
emergency_threshold: Memory threshold for emergency cleanup
|
||||
segment_ttl: TTL for cached segments in seconds
|
||||
"""
|
||||
self.max_cache_size = max_cache_size
|
||||
self.prebuffer_segment_count = prebuffer_segments
|
||||
self.max_memory_percent = max_memory_percent
|
||||
self.emergency_threshold = emergency_threshold
|
||||
self.segment_ttl = segment_ttl
|
||||
|
||||
# Statistics (per-worker, not shared - but that's fine for monitoring)
|
||||
self.stats = PrebufferStats()
|
||||
|
||||
# Stats logging task
|
||||
self._stats_task: Optional[asyncio.Task] = None
|
||||
self._stats_interval = 60 # Log stats every 60 seconds
|
||||
|
||||
def _get_memory_usage_percent(self) -> float:
|
||||
"""Get current memory usage percentage."""
|
||||
try:
|
||||
memory = psutil.virtual_memory()
|
||||
return memory.percent
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get memory usage: {e}")
|
||||
return 0.0
|
||||
|
||||
def _check_memory_threshold(self) -> bool:
|
||||
"""Check if memory usage exceeds the emergency threshold."""
|
||||
return self._get_memory_usage_percent() > self.emergency_threshold
|
||||
|
||||
def _should_skip_for_memory(self) -> bool:
|
||||
"""Check if we should skip prebuffering due to high memory usage."""
|
||||
return self._get_memory_usage_percent() > self.max_memory_percent
|
||||
|
||||
def record_cache_hit(self) -> None:
|
||||
"""Record a cache hit for statistics."""
|
||||
self.stats.cache_hits += 1
|
||||
self._ensure_stats_logging()
|
||||
|
||||
def record_cache_miss(self) -> None:
|
||||
"""Record a cache miss for statistics."""
|
||||
self.stats.cache_misses += 1
|
||||
self._ensure_stats_logging()
|
||||
|
||||
def _ensure_stats_logging(self) -> None:
|
||||
"""Ensure the stats logging task is running."""
|
||||
if self._stats_task is None or self._stats_task.done():
|
||||
self._stats_task = asyncio.create_task(self._periodic_stats_logging())
|
||||
|
||||
async def _periodic_stats_logging(self) -> None:
|
||||
"""Periodically log prebuffer statistics."""
|
||||
while True:
|
||||
try:
|
||||
await asyncio.sleep(self._stats_interval)
|
||||
|
||||
# Only log if there's been activity
|
||||
if self.stats.cache_hits > 0 or self.stats.cache_misses > 0:
|
||||
self.log_stats()
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in stats logging: {e}")
|
||||
|
||||
async def get_or_download(
|
||||
self,
|
||||
url: str,
|
||||
headers: Dict[str, str],
|
||||
timeout: float = 10.0,
|
||||
) -> Optional[bytes]:
|
||||
"""
|
||||
Get a segment from cache or download it, with cross-process coordination.
|
||||
|
||||
This is the primary method for getting segments. It:
|
||||
1. Checks cache first (immediate return if hit)
|
||||
2. Acquires Redis lock to prevent duplicate downloads across workers
|
||||
3. Double-checks cache after acquiring lock
|
||||
4. Downloads and caches if needed
|
||||
|
||||
The Redis-based locking ensures that even with multiple uvicorn workers,
|
||||
only one worker downloads any given segment at a time.
|
||||
|
||||
Args:
|
||||
url: URL of the segment to get
|
||||
headers: Headers to use for the request
|
||||
timeout: Maximum time to wait for lock acquisition (seconds).
|
||||
Keep this short (10s) for player requests - if lock is held
|
||||
too long, fall back to direct streaming.
|
||||
|
||||
Returns:
|
||||
Segment data if successful, None if failed or timed out
|
||||
"""
|
||||
self._ensure_stats_logging()
|
||||
|
||||
# Check cache first (Redis cache is shared across workers)
|
||||
cached = await get_cached_segment(url)
|
||||
if cached:
|
||||
self.record_cache_hit()
|
||||
logger.info(f"[get_or_download] CACHE HIT ({len(cached)} bytes): {url}")
|
||||
return cached
|
||||
|
||||
# Cache miss - need to coordinate download across workers
|
||||
logger.info(f"[get_or_download] CACHE MISS: {url}")
|
||||
|
||||
lock_key = f"segment_download:{url}"
|
||||
lock_acquired = False
|
||||
|
||||
try:
|
||||
# Acquire Redis lock - only one worker downloads at a time
|
||||
lock_acquired = await redis_utils.acquire_lock(lock_key, ttl=30, timeout=timeout)
|
||||
|
||||
if not lock_acquired:
|
||||
logger.warning(f"[get_or_download] Lock TIMEOUT ({timeout}s), falling back to streaming: {url}")
|
||||
return None
|
||||
|
||||
# Double-check cache after acquiring lock
|
||||
# Another worker may have completed the download while we waited
|
||||
cached = await get_cached_segment(url)
|
||||
if cached:
|
||||
# Count this as a cache hit since we didn't download
|
||||
self.record_cache_hit()
|
||||
self.stats.downloads_coordinated += 1
|
||||
logger.info(f"[get_or_download] Found in cache after lock (coordinated): {url}")
|
||||
return cached
|
||||
|
||||
# We're the one who needs to download - count as miss now
|
||||
self.record_cache_miss()
|
||||
|
||||
# We're the first - download and cache
|
||||
logger.info(f"[get_or_download] Downloading: {url}")
|
||||
content = await self._download_and_cache(url, headers)
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[get_or_download] Error during download coordination: {e}")
|
||||
return None
|
||||
finally:
|
||||
if lock_acquired:
|
||||
await redis_utils.release_lock(lock_key)
|
||||
|
||||
async def _download_and_cache(
|
||||
self,
|
||||
url: str,
|
||||
headers: Dict[str, str],
|
||||
) -> Optional[bytes]:
|
||||
"""
|
||||
Download a segment and cache it.
|
||||
|
||||
This method should only be called while holding the Redis lock.
|
||||
|
||||
Args:
|
||||
url: URL to download
|
||||
headers: Headers for the request
|
||||
|
||||
Returns:
|
||||
Downloaded content if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
content = await download_file_with_retry(url, headers)
|
||||
if content:
|
||||
logger.info(f"[_download_and_cache] Downloaded {len(content)} bytes, caching: {url}")
|
||||
await set_cached_segment(url, content, ttl=self.segment_ttl)
|
||||
self.stats.segments_prebuffered += 1
|
||||
self.stats.bytes_prebuffered += len(content)
|
||||
return content
|
||||
else:
|
||||
logger.warning(f"[_download_and_cache] Download returned empty: {url}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"[_download_and_cache] Failed to download: {url} - {e}")
|
||||
return None
|
||||
|
||||
async def try_get_cached(self, url: str) -> Optional[bytes]:
|
||||
"""
|
||||
Check cache only, don't download.
|
||||
|
||||
Use this for background prebuffer tasks that shouldn't block
|
||||
if segment isn't available yet.
|
||||
|
||||
Args:
|
||||
url: URL to check in cache
|
||||
|
||||
Returns:
|
||||
Cached data if available, None otherwise
|
||||
"""
|
||||
return await get_cached_segment(url)
|
||||
|
||||
async def prebuffer_segment(self, url: str, headers: Dict[str, str]) -> None:
|
||||
"""
|
||||
Prebuffer a single segment in the background.
|
||||
|
||||
This method uses Redis locking to prevent duplicate downloads
|
||||
across multiple workers.
|
||||
|
||||
Args:
|
||||
url: URL of segment to prebuffer
|
||||
headers: Headers for the request
|
||||
"""
|
||||
if self._should_skip_for_memory():
|
||||
logger.debug("Skipping prebuffer due to high memory usage")
|
||||
return
|
||||
|
||||
# Check if already cached
|
||||
cached = await get_cached_segment(url)
|
||||
if cached:
|
||||
logger.debug(f"[prebuffer_segment] Already cached, skipping: {url}")
|
||||
return
|
||||
|
||||
lock_key = f"segment_download:{url}"
|
||||
lock_acquired = False
|
||||
|
||||
try:
|
||||
# Try to acquire lock with short timeout for prebuffering
|
||||
# If lock is held by another process, skip this segment
|
||||
lock_acquired = await redis_utils.acquire_lock(lock_key, ttl=30, timeout=1.0)
|
||||
|
||||
if not lock_acquired:
|
||||
# Another process is downloading, skip this segment
|
||||
logger.debug(f"[prebuffer_segment] Lock busy, skipping: {url}")
|
||||
return
|
||||
|
||||
# Double-check cache after acquiring lock
|
||||
cached = await get_cached_segment(url)
|
||||
if cached:
|
||||
logger.debug(f"[prebuffer_segment] Found in cache after lock: {url}")
|
||||
return
|
||||
|
||||
# Download and cache
|
||||
logger.info(f"[prebuffer_segment] Downloading: {url}")
|
||||
await self._download_and_cache(url, headers)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[prebuffer_segment] Error: {e}")
|
||||
finally:
|
||||
if lock_acquired:
|
||||
await redis_utils.release_lock(lock_key)
|
||||
|
||||
async def prebuffer_segments_batch(
|
||||
self,
|
||||
urls: list,
|
||||
headers: Dict[str, str],
|
||||
max_concurrent: int = 2,
|
||||
) -> None:
|
||||
"""
|
||||
Prebuffer multiple segments with concurrency control.
|
||||
|
||||
Args:
|
||||
urls: List of segment URLs to prebuffer
|
||||
headers: Headers for requests
|
||||
max_concurrent: Maximum concurrent downloads (default 2 to avoid
|
||||
lock contention with player requests)
|
||||
"""
|
||||
if self._should_skip_for_memory():
|
||||
logger.warning("Skipping prebuffer due to high memory usage")
|
||||
return
|
||||
|
||||
semaphore = asyncio.Semaphore(max_concurrent)
|
||||
|
||||
async def limited_prebuffer(url: str):
|
||||
async with semaphore:
|
||||
await self.prebuffer_segment(url, headers)
|
||||
|
||||
# Start all prebuffer tasks
|
||||
tasks = [limited_prebuffer(url) for url in urls]
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
def log_stats(self) -> None:
|
||||
"""Log current prebuffer statistics."""
|
||||
logger.info(f"Prebuffer Stats: {self.stats.to_dict()}")
|
||||
Reference in New Issue
Block a user