mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 11:50:51 +00:00
update
This commit is contained in:
@@ -1,21 +1,53 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Optional, Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
import aiohttp
|
||||
import json
|
||||
import logging
|
||||
|
||||
from mediaflow_proxy.configs import settings
|
||||
from mediaflow_proxy.utils.http_utils import create_httpx_client, DownloadError
|
||||
from mediaflow_proxy.utils.http_client import create_aiohttp_session
|
||||
from mediaflow_proxy.utils.http_utils import DownloadError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExtractorError(Exception):
|
||||
"""Base exception for all extractors."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class HttpResponse:
|
||||
"""
|
||||
Simple response container for extractor HTTP requests.
|
||||
|
||||
Uses aiohttp-style naming conventions:
|
||||
- status (not status_code)
|
||||
- text (pre-loaded content as string)
|
||||
- content (pre-loaded content as bytes)
|
||||
"""
|
||||
|
||||
status: int
|
||||
headers: Dict[str, str]
|
||||
text: str
|
||||
content: bytes
|
||||
url: str
|
||||
|
||||
def json(self) -> Any:
|
||||
"""Parse response content as JSON."""
|
||||
return json.loads(self.text)
|
||||
|
||||
def get_origin(self) -> str:
|
||||
"""Get the origin (scheme + host) from the response URL."""
|
||||
parsed = urlparse(self.url)
|
||||
return f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
|
||||
class BaseExtractor(ABC):
|
||||
"""Base class for all URL extractors.
|
||||
|
||||
@@ -43,74 +75,99 @@ class BaseExtractor(ABC):
|
||||
backoff_factor: float = 0.5,
|
||||
raise_on_status: bool = True,
|
||||
**kwargs,
|
||||
) -> httpx.Response:
|
||||
) -> HttpResponse:
|
||||
"""
|
||||
Make HTTP request with retry and timeout support.
|
||||
Make HTTP request with retry and timeout support using aiohttp.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
The URL to request.
|
||||
method : str
|
||||
HTTP method (GET, POST, etc.). Defaults to GET.
|
||||
headers : dict | None
|
||||
Additional headers to merge with base headers.
|
||||
timeout : float | None
|
||||
Seconds to wait for the request (applied to httpx.Timeout). Defaults to 15s.
|
||||
Seconds to wait for the request. Defaults to 15s.
|
||||
retries : int
|
||||
Number of attempts for transient errors.
|
||||
backoff_factor : float
|
||||
Base for exponential backoff between retries.
|
||||
raise_on_status : bool
|
||||
If True, HTTP non-2xx raises DownloadError (preserves status code).
|
||||
If True, HTTP non-2xx raises DownloadError.
|
||||
**kwargs
|
||||
Additional arguments passed to aiohttp request (e.g., data, json).
|
||||
|
||||
Returns
|
||||
-------
|
||||
HttpResponse
|
||||
Response object with pre-loaded content.
|
||||
"""
|
||||
attempt = 0
|
||||
last_exc = None
|
||||
|
||||
# build request headers merging base and per-request
|
||||
# Build request headers merging base and per-request
|
||||
request_headers = self.base_headers.copy()
|
||||
if headers:
|
||||
request_headers.update(headers)
|
||||
|
||||
timeout_cfg = httpx.Timeout(timeout or 15.0)
|
||||
timeout_val = timeout or 15.0
|
||||
|
||||
while attempt < retries:
|
||||
try:
|
||||
async with create_httpx_client(timeout=timeout_cfg) as client:
|
||||
response = await client.request(
|
||||
async with create_aiohttp_session(url, timeout=timeout_val) as (session, proxy_url):
|
||||
async with session.request(
|
||||
method,
|
||||
url,
|
||||
headers=request_headers,
|
||||
proxy=proxy_url,
|
||||
**kwargs,
|
||||
)
|
||||
) as response:
|
||||
# Read content while session is still open
|
||||
content = await response.read()
|
||||
text = content.decode("utf-8", errors="replace")
|
||||
final_url = str(response.url)
|
||||
status = response.status
|
||||
resp_headers = dict(response.headers)
|
||||
|
||||
if raise_on_status:
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
# Provide a short body preview for debugging
|
||||
body_preview = ""
|
||||
try:
|
||||
body_preview = e.response.text[:500]
|
||||
except Exception:
|
||||
body_preview = "<unreadable body>"
|
||||
if raise_on_status and status >= 400:
|
||||
body_preview = text[:500]
|
||||
logger.debug(
|
||||
"HTTPStatusError for %s (status=%s) -- body preview: %s",
|
||||
"HTTP error for %s (status=%s) -- body preview: %s",
|
||||
url,
|
||||
e.response.status_code,
|
||||
status,
|
||||
body_preview,
|
||||
)
|
||||
raise DownloadError(e.response.status_code, f"HTTP error {e.response.status_code} while requesting {url}")
|
||||
return response
|
||||
raise DownloadError(status, f"HTTP error {status} while requesting {url}")
|
||||
|
||||
return HttpResponse(
|
||||
status=status,
|
||||
headers=resp_headers,
|
||||
text=text,
|
||||
content=content,
|
||||
url=final_url,
|
||||
)
|
||||
|
||||
except DownloadError:
|
||||
# Do not retry on explicit HTTP status errors (they are intentional)
|
||||
raise
|
||||
except (httpx.ReadTimeout, httpx.ConnectTimeout, httpx.NetworkError, httpx.TransportError) as e:
|
||||
# Transient network error — retry with backoff
|
||||
except (asyncio.TimeoutError, aiohttp.ClientError) as e:
|
||||
# Transient network error - retry with backoff
|
||||
last_exc = e
|
||||
attempt += 1
|
||||
sleep_for = backoff_factor * (2 ** (attempt - 1))
|
||||
logger.warning("Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
|
||||
attempt, retries, url, e, sleep_for)
|
||||
logger.warning(
|
||||
"Transient network error (attempt %s/%s) for %s: %s — retrying in %.1fs",
|
||||
attempt,
|
||||
retries,
|
||||
url,
|
||||
e,
|
||||
sleep_for,
|
||||
)
|
||||
await asyncio.sleep(sleep_for)
|
||||
continue
|
||||
except Exception as e:
|
||||
# Unexpected exception — wrap as ExtractorError to keep interface consistent
|
||||
# Unexpected exception - wrap as ExtractorError to keep interface consistent
|
||||
logger.exception("Unhandled exception while requesting %s: %s", url, e)
|
||||
raise ExtractorError(f"Request failed for URL {url}: {str(e)}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user