import re import json import base64 from typing import Dict, Any from urllib.parse import urlparse, parse_qs from bs4 import BeautifulSoup from mediaflow_proxy.extractors.base import BaseExtractor, ExtractorError class CityExtractor(BaseExtractor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.mediaflow_endpoint = "hls_manifest_proxy" def atob_fixed(self, data: str) -> str: try: return base64.b64decode(data).decode("utf-8", errors="ignore") except Exception: return "" def extract_json_array(self, decoded: str): start = decoded.find("file:") if start == -1: start = decoded.find("sources:") if start == -1: return None start = decoded.find("[", start) if start == -1: return None depth = 0 for i in range(start, len(decoded)): if decoded[i] == "[": depth += 1 elif decoded[i] == "]": depth -= 1 if depth == 0: return decoded[start : i + 1] return None def pick_stream(self, file_data, season: int = 1, episode: int = 1): if isinstance(file_data, str): return file_data if isinstance(file_data, list): if all(isinstance(x, dict) and "file" in x for x in file_data): idx = max(0, episode - 1) return file_data[idx]["file"] selected_season = None for s in file_data: if not isinstance(s, dict): continue folder = s.get("folder") if not folder: continue title = (s.get("title") or "").lower() if re.search(rf"(season|s)\s*0*{season}\b", title): selected_season = folder break if not selected_season: for s in file_data: folder = s.get("folder") if folder: selected_season = folder break if not selected_season: return None idx = max(0, episode - 1) return selected_season[idx].get("file") if idx < len(selected_season) else selected_season[0].get("file") return None async def extract(self, url: str, season: int = 1, episode: int = 1, **kwargs) -> Dict[str, Any]: """Main extraction entry point""" parsed = urlparse(url) query = parse_qs(parsed.query) if "s" in query: try: season = int(query["s"][0]) except Exception: pass if "e" in query: try: episode = int(query["e"][0]) except Exception: pass clean_url = f"{parsed.scheme}://{parsed.netloc}{parsed.path}" cookie_b64 = "ZGxlX3VzZXJfaWQ9MzI3Mjk7IGRsZV9wYXNzd29yZD04OTQxNzFjNmE4ZGFiMThlZTU5NGQ1YzY1MjAwOWEzNTs=" cookie = base64.b64decode(cookie_b64).decode() headers = { "User-Agent": self.base_headers.get("user-agent"), "Referer": clean_url, "Cookie": cookie, } response = await self._make_request(clean_url, headers=headers) if response.status != 200: raise ExtractorError("Failed to load City page") soup = BeautifulSoup(response.text, "lxml") file_data = None for script in soup.find_all("script"): if file_data: break script_html = script.string or script.text or "" if "atob" not in script_html: continue matches = re.finditer(r'atob\(\s*[\'"](.*?)[\'"]\s*\)', script_html) for match in matches: encoded = match.group(1) decoded = self.atob_fixed(encoded) if not decoded: continue raw_json = self.extract_json_array(decoded) if raw_json: try: raw_json = re.sub(r"\\(.)", r"\1", raw_json) file_data = json.loads(raw_json) except Exception: file_data = raw_json break file_match = re.search(r'file\s*:\s*[\'"](.*?)[\'"]', decoded, re.S) if file_match: file_data = file_match.group(1) break if not file_data: raise ExtractorError("No stream found") stream_url = self.pick_stream(file_data, season=season, episode=episode) if not stream_url: raise ExtractorError("Stream extraction failed") return { "destination_url": stream_url, "request_headers": { "Referer": clean_url, "User-Agent": self.base_headers.get("user-agent"), }, "mediaflow_endpoint": self.mediaflow_endpoint, }