mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 11:50:51 +00:00
update
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import copy
|
||||
import logging
|
||||
from typing import Annotated
|
||||
|
||||
@@ -7,7 +8,10 @@ from fastapi.responses import RedirectResponse
|
||||
from mediaflow_proxy.extractors.base import ExtractorError
|
||||
from mediaflow_proxy.extractors.factory import ExtractorFactory
|
||||
from mediaflow_proxy.schemas import ExtractorURLParams
|
||||
from mediaflow_proxy.utils.cache_utils import get_cached_extractor_result, set_cache_extractor_result
|
||||
from mediaflow_proxy.utils.cache_utils import (
|
||||
get_cached_extractor_result,
|
||||
set_cache_extractor_result,
|
||||
)
|
||||
from mediaflow_proxy.utils.http_utils import (
|
||||
DownloadError,
|
||||
encode_mediaflow_proxy_url,
|
||||
@@ -16,11 +20,28 @@ from mediaflow_proxy.utils.http_utils import (
|
||||
get_proxy_headers,
|
||||
)
|
||||
from mediaflow_proxy.utils.base64_utils import process_potential_base64_url
|
||||
from mediaflow_proxy.utils import redis_utils
|
||||
|
||||
extractor_router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def refresh_extractor_cache(cache_key: str, extractor_params: ExtractorURLParams, proxy_headers: ProxyRequestHeaders):
|
||||
# Cooldown duration for background refresh (2 minutes)
|
||||
_REFRESH_COOLDOWN = 120
|
||||
|
||||
# Hosts where background refresh should be DISABLED
|
||||
# These hosts generate unique CDN URLs per extraction - refreshing invalidates existing streams!
|
||||
# When a new URL is extracted, the old URL becomes invalid and causes 509 errors.
|
||||
_NO_BACKGROUND_REFRESH_HOSTS = frozenset(
|
||||
{
|
||||
"Vidoza",
|
||||
# Add other hosts here that generate unique per-extraction URLs
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def refresh_extractor_cache(
|
||||
cache_key: str, extractor_params: ExtractorURLParams, proxy_headers: ProxyRequestHeaders
|
||||
):
|
||||
"""Asynchronously refreshes the extractor cache in the background."""
|
||||
try:
|
||||
logger.info(f"Background cache refresh started for key: {cache_key}")
|
||||
@@ -32,32 +53,114 @@ async def refresh_extractor_cache(cache_key: str, extractor_params: ExtractorURL
|
||||
logger.error(f"Background cache refresh failed for key {cache_key}: {e}")
|
||||
|
||||
|
||||
@extractor_router.head("/video")
|
||||
@extractor_router.get("/video")
|
||||
async def extract_url(
|
||||
extractor_params: Annotated[ExtractorURLParams, Query()],
|
||||
# Extension to content-type mapping for player compatibility
|
||||
# When a player requests /extractor/video.m3u8, it can detect HLS from the URL
|
||||
EXTRACTOR_EXT_CONTENT_TYPES = {
|
||||
"m3u8": "application/vnd.apple.mpegurl",
|
||||
"m3u": "application/vnd.apple.mpegurl",
|
||||
"mp4": "video/mp4",
|
||||
"mkv": "video/x-matroska",
|
||||
"ts": "video/mp2t",
|
||||
"avi": "video/x-msvideo",
|
||||
"webm": "video/webm",
|
||||
}
|
||||
|
||||
|
||||
async def _extract_url_impl(
|
||||
extractor_params: ExtractorURLParams,
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
proxy_headers: Annotated[ProxyRequestHeaders, Depends(get_proxy_headers)],
|
||||
proxy_headers: ProxyRequestHeaders,
|
||||
ext: str | None = None,
|
||||
):
|
||||
"""Extract clean links from various video hosting services."""
|
||||
"""
|
||||
Core extraction logic shared by all extractor endpoints.
|
||||
|
||||
Args:
|
||||
extractor_params: Extraction parameters from query string
|
||||
request: FastAPI request object
|
||||
background_tasks: Background task manager
|
||||
proxy_headers: Proxy headers from request
|
||||
ext: Optional file extension hint for player compatibility (e.g., "m3u8", "mp4")
|
||||
"""
|
||||
try:
|
||||
# Process potential base64 encoded destination URL
|
||||
processed_destination = process_potential_base64_url(extractor_params.destination)
|
||||
extractor_params.destination = processed_destination
|
||||
|
||||
|
||||
cache_key = f"{extractor_params.host}_{extractor_params.model_dump_json()}"
|
||||
response = await get_cached_extractor_result(cache_key)
|
||||
|
||||
|
||||
# Extractor results are resolved via the pod's outgoing IP and may not
|
||||
# be valid when served from a different pod. Namespace the cache and
|
||||
# all associated coordination keys so each pod operates on its own
|
||||
# partition of the shared Redis. On single-instance deployments (no
|
||||
# CACHE_NAMESPACE env var) make_instance_key() is a no-op.
|
||||
instance_cache_key = redis_utils.make_instance_key(cache_key)
|
||||
|
||||
response = await get_cached_extractor_result(instance_cache_key)
|
||||
|
||||
if response:
|
||||
logger.info(f"Serving from cache for key: {cache_key}")
|
||||
# Schedule a background task to refresh the cache without blocking the user
|
||||
background_tasks.add_task(refresh_extractor_cache, cache_key, extractor_params, proxy_headers)
|
||||
logger.info(f"Serving from cache for key: {instance_cache_key}")
|
||||
# Schedule a background refresh, but only if:
|
||||
# 1. The host is NOT in the no-refresh list (hosts with unique per-extraction URLs)
|
||||
# 2. The cooldown has elapsed (prevents flooding upstream)
|
||||
#
|
||||
# WARNING: For hosts like Vidoza, background refresh is DANGEROUS!
|
||||
# Each extraction generates a unique CDN URL. Refreshing invalidates the
|
||||
# old URL, causing 509 errors for clients still using it.
|
||||
if extractor_params.host not in _NO_BACKGROUND_REFRESH_HOSTS:
|
||||
cooldown_key = f"extractor_refresh:{instance_cache_key}"
|
||||
if await redis_utils.check_and_set_cooldown(cooldown_key, _REFRESH_COOLDOWN):
|
||||
background_tasks.add_task(
|
||||
refresh_extractor_cache, instance_cache_key, extractor_params, proxy_headers
|
||||
)
|
||||
else:
|
||||
logger.debug(f"Skipping background refresh for {extractor_params.host} (unique CDN URLs)")
|
||||
else:
|
||||
logger.info(f"Cache miss for key: {cache_key}. Fetching fresh data.")
|
||||
extractor = ExtractorFactory.get_extractor(extractor_params.host, proxy_headers.request)
|
||||
response = await extractor.extract(extractor_params.destination, **extractor_params.extra_params)
|
||||
await set_cache_extractor_result(cache_key, response)
|
||||
# Use Redis-based in-flight tracking for cross-worker deduplication.
|
||||
# If another worker is already extracting, wait for them to finish.
|
||||
inflight_key = f"extractor:{instance_cache_key}"
|
||||
|
||||
if not await redis_utils.mark_inflight(inflight_key, ttl=60):
|
||||
# Another worker is extracting - wait for them to finish and check cache
|
||||
logger.info(f"Waiting for in-flight extraction (cross-worker) for key: {instance_cache_key}")
|
||||
if await redis_utils.wait_for_completion(inflight_key, timeout=30.0):
|
||||
# Extraction completed, check cache
|
||||
response = await get_cached_extractor_result(instance_cache_key)
|
||||
if response:
|
||||
logger.info(f"Serving from cache (after wait) for key: {instance_cache_key}")
|
||||
|
||||
if response is None:
|
||||
# We either marked it as in-flight (first) or waited and still no cache hit.
|
||||
# Use Redis lock to ensure only one worker extracts at a time.
|
||||
if await redis_utils.acquire_lock(f"extractor_lock:{instance_cache_key}", ttl=30, timeout=30.0):
|
||||
try:
|
||||
# Re-check cache after acquiring lock - another worker may have populated it
|
||||
response = await get_cached_extractor_result(instance_cache_key)
|
||||
if response:
|
||||
logger.info(f"Serving from cache (after lock) for key: {instance_cache_key}")
|
||||
else:
|
||||
logger.info(f"Cache miss for key: {instance_cache_key}. Fetching fresh data.")
|
||||
try:
|
||||
extractor = ExtractorFactory.get_extractor(extractor_params.host, proxy_headers.request)
|
||||
response = await extractor.extract(
|
||||
extractor_params.destination, **extractor_params.extra_params
|
||||
)
|
||||
await set_cache_extractor_result(instance_cache_key, response)
|
||||
except Exception:
|
||||
raise
|
||||
finally:
|
||||
await redis_utils.release_lock(f"extractor_lock:{instance_cache_key}")
|
||||
await redis_utils.clear_inflight(inflight_key)
|
||||
else:
|
||||
# Lock timeout - try to serve from cache anyway
|
||||
response = await get_cached_extractor_result(instance_cache_key)
|
||||
if not response:
|
||||
raise HTTPException(status_code=503, detail="Extraction in progress, please retry")
|
||||
|
||||
# Deep copy so each concurrent request gets its own dict to mutate
|
||||
# (pop mediaflow_endpoint, update request_headers, etc.)
|
||||
response = copy.deepcopy(response)
|
||||
|
||||
# Ensure the latest request headers are used, even with cached data
|
||||
if "request_headers" not in response:
|
||||
@@ -94,3 +197,62 @@ async def extract_url(
|
||||
except Exception as e:
|
||||
logger.exception(f"Extraction failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Extraction failed: {str(e)}")
|
||||
|
||||
|
||||
@extractor_router.head("/video")
|
||||
@extractor_router.get("/video")
|
||||
async def extract_url(
|
||||
extractor_params: Annotated[ExtractorURLParams, Query()],
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
proxy_headers: Annotated[ProxyRequestHeaders, Depends(get_proxy_headers)],
|
||||
):
|
||||
"""
|
||||
Extract clean links from various video hosting services.
|
||||
|
||||
This is the base endpoint without extension. For better player compatibility
|
||||
(especially ExoPlayer), use the extension variants:
|
||||
- /extractor/video.m3u8 for HLS streams
|
||||
- /extractor/video.mp4 for MP4 streams
|
||||
"""
|
||||
return await _extract_url_impl(extractor_params, request, background_tasks, proxy_headers)
|
||||
|
||||
|
||||
@extractor_router.head("/video.{ext}")
|
||||
@extractor_router.get("/video.{ext}")
|
||||
async def extract_url_with_extension(
|
||||
ext: str,
|
||||
extractor_params: Annotated[ExtractorURLParams, Query()],
|
||||
request: Request,
|
||||
background_tasks: BackgroundTasks,
|
||||
proxy_headers: Annotated[ProxyRequestHeaders, Depends(get_proxy_headers)],
|
||||
):
|
||||
"""
|
||||
Extract clean links with file extension hint for player compatibility.
|
||||
|
||||
The extension in the URL helps players like ExoPlayer detect the content type
|
||||
without needing to follow redirects or inspect headers. This is especially
|
||||
important for HLS streams where ExoPlayer needs .m3u8 in the URL to use
|
||||
HlsMediaSource instead of ProgressiveMediaSource.
|
||||
|
||||
Supported extensions:
|
||||
- .m3u8, .m3u - HLS playlists (application/vnd.apple.mpegurl)
|
||||
- .mp4 - MP4 video (video/mp4)
|
||||
- .mkv - Matroska video (video/x-matroska)
|
||||
- .ts - MPEG-TS (video/mp2t)
|
||||
- .avi - AVI video (video/x-msvideo)
|
||||
- .webm - WebM video (video/webm)
|
||||
|
||||
Example:
|
||||
/extractor/video.m3u8?host=TurboVidPlay&d=...&redirect_stream=true
|
||||
|
||||
This URL clearly indicates HLS content, making ExoPlayer use the correct source.
|
||||
"""
|
||||
ext_lower = ext.lower()
|
||||
if ext_lower not in EXTRACTOR_EXT_CONTENT_TYPES:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported extension: .{ext}. Supported: {', '.join('.' + e for e in EXTRACTOR_EXT_CONTENT_TYPES.keys())}",
|
||||
)
|
||||
|
||||
return await _extract_url_impl(extractor_params, request, background_tasks, proxy_headers, ext=ext_lower)
|
||||
|
||||
Reference in New Issue
Block a user