mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-09 02:40:47 +00:00
252 lines
10 KiB
Python
252 lines
10 KiB
Python
import re
|
|
from typing import Dict, Tuple, Optional
|
|
from urllib.parse import urljoin, urlparse, unquote
|
|
|
|
from httpx import Response
|
|
|
|
from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError
|
|
|
|
|
|
class LiveTVExtractor(BaseExtractor):
|
|
"""LiveTV URL extractor for both M3U8 and MPD streams."""
|
|
|
|
def __init__(self, request_headers: dict):
|
|
super().__init__(request_headers)
|
|
# Default to HLS proxy endpoint, will be updated based on stream type
|
|
self.mediaflow_endpoint = "hls_manifest_proxy"
|
|
|
|
# Patterns for stream URL extraction
|
|
self.fallback_pattern = re.compile(
|
|
r"source: [\'\"](.*?)[\'\"]\s*,\s*[\s\S]*?mimeType: [\'\"](application/x-mpegURL|application/vnd\.apple\.mpegURL|application/dash\+xml)[\'\"]",
|
|
re.IGNORECASE,
|
|
)
|
|
self.any_m3u8_pattern = re.compile(
|
|
r'["\']?(https?://.*?\.m3u8(?:\?[^"\']*)?)["\']?',
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
async def extract(self, url: str, stream_title: str = None, **kwargs) -> Dict[str, str]:
|
|
"""Extract LiveTV URL and required headers.
|
|
|
|
Args:
|
|
url: The channel page URL
|
|
stream_title: Optional stream title to filter specific stream
|
|
|
|
Returns:
|
|
Tuple[str, Dict[str, str]]: Stream URL and required headers
|
|
"""
|
|
try:
|
|
# Get the channel page
|
|
response = await self._make_request(url)
|
|
self.base_headers["referer"] = urljoin(url, "/")
|
|
|
|
# Extract player API details
|
|
player_api_base, method = await self._extract_player_api_base(response.text)
|
|
if not player_api_base:
|
|
raise ExtractorError("Failed to extract player API URL")
|
|
|
|
# Get player options
|
|
options_data = await self._get_player_options(response.text)
|
|
if not options_data:
|
|
raise ExtractorError("No player options found")
|
|
|
|
# Process player options to find matching stream
|
|
for option in options_data:
|
|
current_title = option.get("title")
|
|
if stream_title and current_title != stream_title:
|
|
continue
|
|
|
|
# Get stream URL based on player option
|
|
stream_data = await self._process_player_option(
|
|
player_api_base, method, option.get("post"), option.get("nume"), option.get("type")
|
|
)
|
|
|
|
if stream_data:
|
|
stream_url = stream_data.get("url")
|
|
if not stream_url:
|
|
continue
|
|
|
|
response = {
|
|
"destination_url": stream_url,
|
|
"request_headers": self.base_headers,
|
|
"mediaflow_endpoint": self.mediaflow_endpoint,
|
|
}
|
|
|
|
# Set endpoint based on stream type
|
|
if stream_data.get("type") == "mpd":
|
|
if stream_data.get("drm_key_id") and stream_data.get("drm_key"):
|
|
response.update(
|
|
{
|
|
"query_params": {
|
|
"key_id": stream_data["drm_key_id"],
|
|
"key": stream_data["drm_key"],
|
|
},
|
|
"mediaflow_endpoint": "mpd_manifest_proxy",
|
|
}
|
|
)
|
|
|
|
return response
|
|
|
|
raise ExtractorError("No valid stream found")
|
|
|
|
except Exception as e:
|
|
raise ExtractorError(f"Extraction failed: {str(e)}")
|
|
|
|
async def _extract_player_api_base(self, html_content: str) -> Tuple[Optional[str], Optional[str]]:
|
|
"""Extract player API base URL and method."""
|
|
admin_ajax_pattern = r'"player_api"\s*:\s*"([^"]+)".*?"play_method"\s*:\s*"([^"]+)"'
|
|
match = re.search(admin_ajax_pattern, html_content)
|
|
if not match:
|
|
return None, None
|
|
url = match.group(1).replace("\\/", "/")
|
|
method = match.group(2)
|
|
if method == "wp_json":
|
|
return url, method
|
|
url = urljoin(url, "/wp-admin/admin-ajax.php")
|
|
return url, method
|
|
|
|
async def _get_player_options(self, html_content: str) -> list:
|
|
"""Extract player options from HTML content."""
|
|
pattern = r'<li[^>]*class=["\']dooplay_player_option["\'][^>]*data-type=["\']([^"\']*)["\'][^>]*data-post=["\']([^"\']*)["\'][^>]*data-nume=["\']([^"\']*)["\'][^>]*>.*?<span class=["\']title["\']>([^<]*)</span>'
|
|
matches = re.finditer(pattern, html_content, re.DOTALL)
|
|
return [
|
|
{"type": match.group(1), "post": match.group(2), "nume": match.group(3), "title": match.group(4).strip()}
|
|
for match in matches
|
|
]
|
|
|
|
async def _process_player_option(self, api_base: str, method: str, post: str, nume: str, type_: str) -> Dict:
|
|
"""Process player option to get stream URL."""
|
|
if method == "wp_json":
|
|
api_url = f"{api_base}{post}/{type_}/{nume}"
|
|
response = await self._make_request(api_url)
|
|
else:
|
|
form_data = {"action": "doo_player_ajax", "post": post, "nume": nume, "type": type_}
|
|
response = await self._make_request(api_base, method="POST", data=form_data)
|
|
|
|
# Get iframe URL from API response
|
|
try:
|
|
data = response.json()
|
|
iframe_url = urljoin(api_base, data.get("embed_url", "").replace("\\/", "/"))
|
|
|
|
# Get stream URL from iframe
|
|
iframe_response = await self._make_request(iframe_url)
|
|
stream_data = await self._extract_stream_url(iframe_response, iframe_url)
|
|
return stream_data
|
|
|
|
except Exception as e:
|
|
raise ExtractorError(f"Failed to process player option: {str(e)}")
|
|
|
|
async def _extract_stream_url(self, iframe_response: Response, iframe_url: str) -> Dict:
|
|
"""
|
|
Extract final stream URL from iframe content.
|
|
"""
|
|
try:
|
|
# Parse URL components
|
|
parsed_url = urlparse(iframe_url)
|
|
query_params = dict(param.split("=") for param in parsed_url.query.split("&") if "=" in param)
|
|
|
|
# Check if content is already a direct M3U8 stream
|
|
content_types = ["application/x-mpegurl", "application/vnd.apple.mpegurl"]
|
|
|
|
if any(ext in iframe_response.headers["content-type"] for ext in content_types):
|
|
return {"url": iframe_url, "type": "m3u8"}
|
|
|
|
stream_data = {}
|
|
|
|
# Check for source parameter in URL
|
|
if "source" in query_params:
|
|
stream_data = {
|
|
"url": urljoin(iframe_url, unquote(query_params["source"])),
|
|
"type": "m3u8",
|
|
}
|
|
|
|
# Check for MPD stream with DRM
|
|
elif "zy" in query_params and ".mpd``" in query_params["zy"]:
|
|
data = query_params["zy"].split("``")
|
|
url = data[0]
|
|
key_id, key = data[1].split(":")
|
|
stream_data = {"url": url, "type": "mpd", "drm_key_id": key_id, "drm_key": key}
|
|
|
|
# Check for tamilultra specific format
|
|
elif "tamilultra" in iframe_url:
|
|
stream_data = {"url": urljoin(iframe_url, parsed_url.query), "type": "m3u8"}
|
|
|
|
# Try pattern matching for stream URLs
|
|
else:
|
|
channel_id = query_params.get("id", [""])
|
|
stream_url = None
|
|
|
|
html_content = iframe_response.text
|
|
|
|
if channel_id:
|
|
# Try channel ID specific pattern
|
|
pattern = rf'{re.escape(channel_id)}["\']:\s*{{\s*["\']?url["\']?\s*:\s*["\']([^"\']+)["\']'
|
|
match = re.search(pattern, html_content)
|
|
if match:
|
|
stream_url = match.group(1)
|
|
|
|
# Try fallback patterns if channel ID pattern fails
|
|
if not stream_url:
|
|
for pattern in [self.fallback_pattern, self.any_m3u8_pattern]:
|
|
match = pattern.search(html_content)
|
|
if match:
|
|
stream_url = match.group(1)
|
|
break
|
|
|
|
if stream_url:
|
|
stream_data = {"url": stream_url, "type": "m3u8"} # Default to m3u8, will be updated
|
|
|
|
# Check for MPD stream and extract DRM keys
|
|
if stream_url.endswith(".mpd"):
|
|
stream_data["type"] = "mpd"
|
|
drm_data = await self._extract_drm_keys(html_content, channel_id)
|
|
if drm_data:
|
|
stream_data.update(drm_data)
|
|
|
|
# If no stream data found, raise error
|
|
if not stream_data:
|
|
raise ExtractorError("No valid stream URL found")
|
|
|
|
# Update stream type based on URL if not already set
|
|
if stream_data.get("type") == "m3u8":
|
|
if stream_data["url"].endswith(".mpd"):
|
|
stream_data["type"] = "mpd"
|
|
elif not any(ext in stream_data["url"] for ext in [".m3u8", ".m3u"]):
|
|
stream_data["type"] = "m3u8" # Default to m3u8 if no extension found
|
|
|
|
return stream_data
|
|
|
|
except Exception as e:
|
|
raise ExtractorError(f"Failed to extract stream URL: {str(e)}")
|
|
|
|
async def _extract_drm_keys(self, html_content: str, channel_id: str) -> Dict:
|
|
"""
|
|
Extract DRM keys for MPD streams.
|
|
"""
|
|
try:
|
|
# Pattern for channel entry
|
|
channel_pattern = rf'"{re.escape(channel_id)}":\s*{{[^}}]+}}'
|
|
channel_match = re.search(channel_pattern, html_content)
|
|
|
|
if channel_match:
|
|
channel_data = channel_match.group(0)
|
|
|
|
# Try clearkeys pattern first
|
|
clearkey_pattern = r'["\']?clearkeys["\']?\s*:\s*{\s*["\'](.+?)["\']:\s*["\'](.+?)["\']'
|
|
clearkey_match = re.search(clearkey_pattern, channel_data)
|
|
|
|
# Try k1/k2 pattern if clearkeys not found
|
|
if not clearkey_match:
|
|
k1k2_pattern = r'["\']?k1["\']?\s*:\s*["\'](.+?)["\'],\s*["\']?k2["\']?\s*:\s*["\'](.+?)["\']'
|
|
k1k2_match = re.search(k1k2_pattern, channel_data)
|
|
|
|
if k1k2_match:
|
|
return {"drm_key_id": k1k2_match.group(1), "drm_key": k1k2_match.group(2)}
|
|
else:
|
|
return {"drm_key_id": clearkey_match.group(1), "drm_key": clearkey_match.group(2)}
|
|
|
|
return {}
|
|
|
|
except Exception:
|
|
return {}
|