Add files via upload

This commit is contained in:
Urlo30
2024-12-29 23:18:53 +01:00
committed by GitHub
parent 76987906b5
commit fb60e99822
40 changed files with 5170 additions and 0 deletions

View File

View File

@@ -0,0 +1,50 @@
from abc import ABC, abstractmethod
from typing import Dict, Optional, Any
import httpx
from mediaflow_proxy.configs import settings
from mediaflow_proxy.utils.http_utils import create_httpx_client
class ExtractorError(Exception):
"""Base exception for all extractors."""
pass
class BaseExtractor(ABC):
"""Base class for all URL extractors."""
def __init__(self, request_headers: dict):
self.base_headers = {
"user-agent": settings.user_agent,
}
self.mediaflow_endpoint = "proxy_stream_endpoint"
self.base_headers.update(request_headers)
async def _make_request(
self, url: str, method: str = "GET", headers: Optional[Dict] = None, **kwargs
) -> httpx.Response:
"""Make HTTP request with error handling."""
try:
async with create_httpx_client() as client:
request_headers = self.base_headers
request_headers.update(headers or {})
response = await client.request(
method,
url,
headers=request_headers,
**kwargs,
)
response.raise_for_status()
return response
except httpx.HTTPError as e:
raise ExtractorError(f"HTTP request failed: {str(e)}")
except Exception as e:
raise ExtractorError(f"Request failed: {str(e)}")
@abstractmethod
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract final URL and required headers."""
pass

View File

@@ -0,0 +1,39 @@
import re
import time
from typing import Dict
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class DoodStreamExtractor(BaseExtractor):
"""DoodStream URL extractor."""
def __init__(self, request_headers: dict):
super().__init__(request_headers)
self.base_url = "https://d000d.com"
async def extract(self, url: str, **kwargs) -> Dict[str, str]:
"""Extract DoodStream URL."""
response = await self._make_request(url)
# Extract URL pattern
pattern = r"(\/pass_md5\/.*?)'.*(\?token=.*?expiry=)"
match = re.search(pattern, response.text, re.DOTALL)
if not match:
raise ExtractorError("Failed to extract URL pattern")
# Build final URL
pass_url = f"{self.base_url}{match[1]}"
referer = f"{self.base_url}/"
headers = {"range": "bytes=0-", "referer": referer}
response = await self._make_request(pass_url, headers=headers)
timestamp = str(int(time.time()))
final_url = f"{response.text}123456789{match[2]}{timestamp}"
self.base_headers["referer"] = referer
return {
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}

View File

@@ -0,0 +1,32 @@
from typing import Dict, Type
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
from mediaflow_proxy.extractors.doodstream import DoodStreamExtractor
from mediaflow_proxy.extractors.livetv import LiveTVExtractor
from mediaflow_proxy.extractors.mixdrop import MixdropExtractor
from mediaflow_proxy.extractors.uqload import UqloadExtractor
from mediaflow_proxy.extractors.streamtape import StreamtapeExtractor
from mediaflow_proxy.extractors.supervideo import SupervideoExtractor
class ExtractorFactory:
"""Factory for creating URL extractors."""
_extractors: Dict[str, Type[BaseExtractor]] = {
"Doodstream": DoodStreamExtractor,
"Uqload": UqloadExtractor,
"Mixdrop": MixdropExtractor,
"Streamtape": StreamtapeExtractor,
"Supervideo": SupervideoExtractor,
"LiveTV": LiveTVExtractor,
}
@classmethod
def get_extractor(cls, host: str, request_headers: dict) -> BaseExtractor:
"""Get appropriate extractor instance for the given host."""
extractor_class = cls._extractors.get(host)
if not extractor_class:
raise ExtractorError(f"Unsupported host: {host}")
return extractor_class(request_headers)

View File

@@ -0,0 +1,251 @@
import re
from typing import Dict, Tuple, Optional
from urllib.parse import urljoin, urlparse, unquote
from httpx import Response
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class LiveTVExtractor(BaseExtractor):
"""LiveTV URL extractor for both M3U8 and MPD streams."""
def __init__(self, request_headers: dict):
super().__init__(request_headers)
# Default to HLS proxy endpoint, will be updated based on stream type
self.mediaflow_endpoint = "hls_manifest_proxy"
# Patterns for stream URL extraction
self.fallback_pattern = re.compile(
r"source: [\'\"](.*?)[\'\"]\s*,\s*[\s\S]*?mimeType: [\'\"](application/x-mpegURL|application/vnd\.apple\.mpegURL|application/dash\+xml)[\'\"]",
re.IGNORECASE,
)
self.any_m3u8_pattern = re.compile(
r'["\']?(https?://.*?\.m3u8(?:\?[^"\']*)?)["\']?',
re.IGNORECASE,
)
async def extract(self, url: str, stream_title: str = None, **kwargs) -> Dict[str, str]:
"""Extract LiveTV URL and required headers.
Args:
url: The channel page URL
stream_title: Optional stream title to filter specific stream
Returns:
Tuple[str, Dict[str, str]]: Stream URL and required headers
"""
try:
# Get the channel page
response = await self._make_request(url)
self.base_headers["referer"] = urljoin(url, "/")
# Extract player API details
player_api_base, method = await self._extract_player_api_base(response.text)
if not player_api_base:
raise ExtractorError("Failed to extract player API URL")
# Get player options
options_data = await self._get_player_options(response.text)
if not options_data:
raise ExtractorError("No player options found")
# Process player options to find matching stream
for option in options_data:
current_title = option.get("title")
if stream_title and current_title != stream_title:
continue
# Get stream URL based on player option
stream_data = await self._process_player_option(
player_api_base, method, option.get("post"), option.get("nume"), option.get("type")
)
if stream_data:
stream_url = stream_data.get("url")
if not stream_url:
continue
response = {
"destination_url": stream_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}
# Set endpoint based on stream type
if stream_data.get("type") == "mpd":
if stream_data.get("drm_key_id") and stream_data.get("drm_key"):
response.update(
{
"query_params": {
"key_id": stream_data["drm_key_id"],
"key": stream_data["drm_key"],
},
"mediaflow_endpoint": "mpd_manifest_proxy",
}
)
return response
raise ExtractorError("No valid stream found")
except Exception as e:
raise ExtractorError(f"Extraction failed: {str(e)}")
async def _extract_player_api_base(self, html_content: str) -> Tuple[Optional[str], Optional[str]]:
"""Extract player API base URL and method."""
admin_ajax_pattern = r'"player_api"\s*:\s*"([^"]+)".*?"play_method"\s*:\s*"([^"]+)"'
match = re.search(admin_ajax_pattern, html_content)
if not match:
return None, None
url = match.group(1).replace("\\/", "/")
method = match.group(2)
if method == "wp_json":
return url, method
url = urljoin(url, "/wp-admin/admin-ajax.php")
return url, method
async def _get_player_options(self, html_content: str) -> list:
"""Extract player options from HTML content."""
pattern = r'<li[^>]*class=["\']dooplay_player_option["\'][^>]*data-type=["\']([^"\']*)["\'][^>]*data-post=["\']([^"\']*)["\'][^>]*data-nume=["\']([^"\']*)["\'][^>]*>.*?<span class=["\']title["\']>([^<]*)</span>'
matches = re.finditer(pattern, html_content, re.DOTALL)
return [
{"type": match.group(1), "post": match.group(2), "nume": match.group(3), "title": match.group(4).strip()}
for match in matches
]
async def _process_player_option(self, api_base: str, method: str, post: str, nume: str, type_: str) -> Dict:
"""Process player option to get stream URL."""
if method == "wp_json":
api_url = f"{api_base}{post}/{type_}/{nume}"
response = await self._make_request(api_url)
else:
form_data = {"action": "doo_player_ajax", "post": post, "nume": nume, "type": type_}
response = await self._make_request(api_base, method="POST", data=form_data)
# Get iframe URL from API response
try:
data = response.json()
iframe_url = urljoin(api_base, data.get("embed_url", "").replace("\\/", "/"))
# Get stream URL from iframe
iframe_response = await self._make_request(iframe_url)
stream_data = await self._extract_stream_url(iframe_response, iframe_url)
return stream_data
except Exception as e:
raise ExtractorError(f"Failed to process player option: {str(e)}")
async def _extract_stream_url(self, iframe_response: Response, iframe_url: str) -> Dict:
"""
Extract final stream URL from iframe content.
"""
try:
# Parse URL components
parsed_url = urlparse(iframe_url)
query_params = dict(param.split("=") for param in parsed_url.query.split("&") if "=" in param)
# Check if content is already a direct M3U8 stream
content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"]
if any(ext in iframe_response.headers["content-type"] for ext in content_types):
return {"url": iframe_url, "type": "m3u8"}
stream_data = {}
# Check for source parameter in URL
if "source" in query_params:
stream_data = {
"url": urljoin(iframe_url, unquote(query_params["source"])),
"type": "m3u8",
}
# Check for MPD stream with DRM
elif "zy" in query_params and ".mpd``" in query_params["zy"]:
data = query_params["zy"].split("``")
url = data[0]
key_id, key = data[1].split(":")
stream_data = {"url": url, "type": "mpd", "drm_key_id": key_id, "drm_key": key}
# Check for tamilultra specific format
elif "tamilultra" in iframe_url:
stream_data = {"url": urljoin(iframe_url, parsed_url.query), "type": "m3u8"}
# Try pattern matching for stream URLs
else:
channel_id = query_params.get("id", [""])
stream_url = None
html_content = iframe_response.text
if channel_id:
# Try channel ID specific pattern
pattern = rf'{re.escape(channel_id)}["\']:\s*{{\s*["\']?url["\']?\s*:\s*["\']([^"\']+)["\']'
match = re.search(pattern, html_content)
if match:
stream_url = match.group(1)
# Try fallback patterns if channel ID pattern fails
if not stream_url:
for pattern in [self.fallback_pattern, self.any_m3u8_pattern]:
match = pattern.search(html_content)
if match:
stream_url = match.group(1)
break
if stream_url:
stream_data = {"url": stream_url, "type": "m3u8"} # Default to m3u8, will be updated
# Check for MPD stream and extract DRM keys
if stream_url.endswith(".mpd"):
stream_data["type"] = "mpd"
drm_data = await self._extract_drm_keys(html_content, channel_id)
if drm_data:
stream_data.update(drm_data)
# If no stream data found, raise error
if not stream_data:
raise ExtractorError("No valid stream URL found")
# Update stream type based on URL if not already set
if stream_data.get("type") == "m3u8":
if stream_data["url"].endswith(".mpd"):
stream_data["type"] = "mpd"
elif not any(ext in stream_data["url"] for ext in [".m3u8", ".m3u"]):
stream_data["type"] = "m3u8" # Default to m3u8 if no extension found
return stream_data
except Exception as e:
raise ExtractorError(f"Failed to extract stream URL: {str(e)}")
async def _extract_drm_keys(self, html_content: str, channel_id: str) -> Dict:
"""
Extract DRM keys for MPD streams.
"""
try:
# Pattern for channel entry
channel_pattern = rf'"{re.escape(channel_id)}":\s*{{[^}}]+}}'
channel_match = re.search(channel_pattern, html_content)
if channel_match:
channel_data = channel_match.group(0)
# Try clearkeys pattern first
clearkey_pattern = r'["\']?clearkeys["\']?\s*:\s*{\s*["\'](.+?)["\']:\s*["\'](.+?)["\']'
clearkey_match = re.search(clearkey_pattern, channel_data)
# Try k1/k2 pattern if clearkeys not found
if not clearkey_match:
k1k2_pattern = r'["\']?k1["\']?\s*:\s*["\'](.+?)["\'],\s*["\']?k2["\']?\s*:\s*["\'](.+?)["\']'
k1k2_match = re.search(k1k2_pattern, channel_data)
if k1k2_match:
return {"drm_key_id": k1k2_match.group(1), "drm_key": k1k2_match.group(2)}
else:
return {"drm_key_id": clearkey_match.group(1), "drm_key": clearkey_match.group(2)}
return {}
except Exception:
return {}

View File

@@ -0,0 +1,36 @@
import re
import string
from typing import Dict, Any
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class MixdropExtractor(BaseExtractor):
"""Mixdrop URL extractor."""
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract Mixdrop URL."""
response = await self._make_request(url, headers={"accept-language": "en-US,en;q=0.5"})
# Extract and decode URL
match = re.search(r"}\('(.+)',.+,'(.+)'\.split", response.text)
if not match:
raise ExtractorError("Failed to extract URL components")
s1, s2 = match.group(1, 2)
schema = s1.split(";")[2][5:-1]
terms = s2.split("|")
# Build character mapping
charset = string.digits + string.ascii_letters
char_map = {charset[i]: terms[i] or charset[i] for i in range(len(terms))}
# Construct final URL
final_url = "https:" + "".join(char_map.get(c, c) for c in schema)
self.base_headers["referer"] = url
return {
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}

View File

@@ -0,0 +1,32 @@
import re
from typing import Dict, Any
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class StreamtapeExtractor(BaseExtractor):
"""Streamtape URL extractor."""
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract Streamtape URL."""
response = await self._make_request(url)
# Extract and decode URL
matches = re.findall(r"id=.*?(?=')", response.text)
if not matches:
raise ExtractorError("Failed to extract URL components")
final_url = next(
(
f"https://streamtape.com/get_video?{matches[i + 1]}"
for i in range(len(matches) - 1)
if matches[i] == matches[i + 1]
),
None,
)
self.base_headers["referer"] = url
return {
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}

View File

@@ -0,0 +1,27 @@
import re
from typing import Dict, Any
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class SupervideoExtractor(BaseExtractor):
"""Supervideo URL extractor."""
async def extract(self, url: str, **kwargs) -> Dict[str, Any]:
"""Extract Supervideo URL."""
response = await self._make_request(url)
# Extract and decode URL
s2 = re.search(r"\}\('(.+)',.+,'(.+)'\.split", response.text).group(2)
terms = s2.split("|")
hfs = next(terms[i] for i in range(terms.index("file"), len(terms)) if "hfs" in terms[i])
result = terms[terms.index("urlset") + 1 : terms.index("hls")]
base_url = f"https://{hfs}.serversicuro.cc/hls/"
final_url = base_url + ",".join(reversed(result)) + (".urlset/master.m3u8" if result else "")
self.base_headers["referer"] = url
return {
"destination_url": final_url,
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}

View File

@@ -0,0 +1,24 @@
import re
from typing import Dict
from urllib.parse import urljoin
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
class UqloadExtractor(BaseExtractor):
"""Uqload URL extractor."""
async def extract(self, url: str, **kwargs) -> Dict[str, str]:
"""Extract Uqload URL."""
response = await self._make_request(url)
video_url_match = re.search(r'sources: \["(.*?)"]', response.text)
if not video_url_match:
raise ExtractorError("Failed to extract video URL")
self.base_headers["referer"] = urljoin(url, "/")
return {
"destination_url": video_url_match.group(1),
"request_headers": self.base_headers,
"mediaflow_endpoint": self.mediaflow_endpoint,
}