mirror of
https://github.com/UrloMythus/UnHided.git
synced 2026-04-11 11:50:51 +00:00
1264 lines
45 KiB
Python
1264 lines
45 KiB
Python
"""
|
|
Telegram MTProto streaming support with parallel chunk downloads.
|
|
|
|
This module provides:
|
|
- TelegramSessionManager: Manages the Telethon client session
|
|
- TelegramMediaRef: Parsed reference to Telegram media (t.me links or file_id)
|
|
- ParallelTransferrer: FastTelethon-based parallel chunk downloader for high-speed streaming
|
|
|
|
Based on FastTelethon technique from mautrix-telegram for parallel downloads.
|
|
"""
|
|
|
|
import asyncio
|
|
import base64
|
|
import logging
|
|
import math
|
|
import re
|
|
import struct
|
|
from dataclasses import dataclass
|
|
from io import BytesIO
|
|
from typing import AsyncGenerator, Optional, Union
|
|
from urllib.parse import urlparse
|
|
|
|
from telethon import TelegramClient, utils
|
|
from telethon.crypto import AuthKey
|
|
from telethon.network import MTProtoSender
|
|
from telethon.sessions import StringSession
|
|
from telethon.tl.alltlobjects import LAYER
|
|
from telethon.tl.functions import InvokeWithLayerRequest
|
|
from telethon.tl.functions.auth import ExportAuthorizationRequest, ImportAuthorizationRequest
|
|
from telethon.tl.functions.upload import GetFileRequest
|
|
from telethon.tl.types import (
|
|
Document,
|
|
InputDocumentFileLocation,
|
|
InputFileLocation,
|
|
InputPeerPhotoFileLocation,
|
|
InputPhotoFileLocation,
|
|
Message,
|
|
MessageMediaDocument,
|
|
MessageMediaPhoto,
|
|
Photo,
|
|
)
|
|
|
|
from mediaflow_proxy.configs import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Type aliases for file locations
|
|
TypeLocation = Union[
|
|
Document,
|
|
InputDocumentFileLocation,
|
|
InputPeerPhotoFileLocation,
|
|
InputFileLocation,
|
|
InputPhotoFileLocation,
|
|
]
|
|
|
|
# File type IDs for Bot API file_id
|
|
FILE_TYPE_THUMBNAIL = 0
|
|
FILE_TYPE_PROFILE_PHOTO = 1
|
|
FILE_TYPE_PHOTO = 2
|
|
FILE_TYPE_VOICE = 3
|
|
FILE_TYPE_VIDEO = 4
|
|
FILE_TYPE_DOCUMENT = 5
|
|
FILE_TYPE_ENCRYPTED = 6
|
|
FILE_TYPE_TEMP = 7
|
|
FILE_TYPE_STICKER = 8
|
|
FILE_TYPE_AUDIO = 9
|
|
FILE_TYPE_ANIMATION = 10
|
|
FILE_TYPE_ENCRYPTED_THUMBNAIL = 11
|
|
FILE_TYPE_WALLPAPER = 12
|
|
FILE_TYPE_VIDEO_NOTE = 13
|
|
FILE_TYPE_SECURE_RAW = 14
|
|
FILE_TYPE_SECURE = 15
|
|
FILE_TYPE_BACKGROUND = 16
|
|
FILE_TYPE_DOCUMENT_AS_FILE = 17
|
|
|
|
# Flags in type_id
|
|
TYPE_ID_WEB_LOCATION_FLAG = 1 << 24
|
|
TYPE_ID_FILE_REFERENCE_FLAG = 1 << 25
|
|
|
|
|
|
@dataclass
|
|
class DecodedFileId:
|
|
"""Decoded Bot API file_id structure."""
|
|
|
|
type_id: int
|
|
dc_id: int
|
|
id: int
|
|
access_hash: int
|
|
file_reference: bytes = b""
|
|
has_web_location: bool = False
|
|
has_reference: bool = False
|
|
|
|
|
|
def _decode_telegram_base64(s: str) -> bytes:
|
|
"""Decode Telegram's URL-safe base64."""
|
|
s = s.replace("-", "+").replace("_", "/")
|
|
padding = 4 - len(s) % 4
|
|
if padding != 4:
|
|
s += "=" * padding
|
|
return base64.b64decode(s)
|
|
|
|
|
|
def _rle_decode(data: bytes) -> bytes:
|
|
"""RLE decode Telegram's file_id encoding."""
|
|
result = bytearray()
|
|
i = 0
|
|
while i < len(data):
|
|
if data[i] == 0 and i + 1 < len(data):
|
|
result.extend(bytes(data[i + 1]))
|
|
i += 2
|
|
else:
|
|
result.append(data[i])
|
|
i += 1
|
|
return bytes(result)
|
|
|
|
|
|
def decode_file_id(file_id: str) -> DecodedFileId:
|
|
"""
|
|
Decode a Bot API file_id into its components.
|
|
|
|
Supports both old and new file_id formats (including version 4 with high sub_versions).
|
|
|
|
Args:
|
|
file_id: Bot API file_id string
|
|
|
|
Returns:
|
|
DecodedFileId with parsed components
|
|
|
|
Raises:
|
|
ValueError: If file_id cannot be decoded
|
|
"""
|
|
try:
|
|
decoded = _decode_telegram_base64(file_id)
|
|
data = _rle_decode(decoded)
|
|
except Exception as e:
|
|
raise ValueError(f"Failed to decode file_id base64: {e}") from e
|
|
|
|
if len(data) < 20:
|
|
raise ValueError(f"file_id too short: {len(data)} bytes")
|
|
|
|
buf = BytesIO(data)
|
|
|
|
# Read type_id (4 bytes, little-endian)
|
|
type_id_raw = struct.unpack("<i", buf.read(4))[0]
|
|
|
|
# Extract flags and actual type
|
|
has_web_location = bool(type_id_raw & TYPE_ID_WEB_LOCATION_FLAG)
|
|
has_reference = bool(type_id_raw & TYPE_ID_FILE_REFERENCE_FLAG)
|
|
type_id = type_id_raw & 0xFFFFFF
|
|
|
|
# Read dc_id (4 bytes)
|
|
dc_id = struct.unpack("<i", buf.read(4))[0]
|
|
|
|
file_reference = b""
|
|
if has_reference:
|
|
# Read TL string (length-prefixed)
|
|
ref_len_byte = buf.read(1)[0]
|
|
if ref_len_byte == 254:
|
|
# Long string: next 3 bytes are length
|
|
ref_len = struct.unpack("<I", buf.read(3) + b"\x00")[0]
|
|
else:
|
|
ref_len = ref_len_byte
|
|
|
|
file_reference = buf.read(ref_len)
|
|
|
|
# Skip padding to 4-byte alignment
|
|
total_len = 1 + (3 if ref_len_byte == 254 else 0) + ref_len
|
|
padding = total_len % 4
|
|
if padding:
|
|
buf.read(4 - padding)
|
|
|
|
# Read id and access_hash (8 bytes each)
|
|
remaining = buf.read()
|
|
if len(remaining) < 16:
|
|
raise ValueError(f"file_id remaining data too short: {len(remaining)} bytes")
|
|
|
|
id_val = struct.unpack("<q", remaining[0:8])[0]
|
|
access_hash = struct.unpack("<q", remaining[8:16])[0]
|
|
|
|
return DecodedFileId(
|
|
type_id=type_id,
|
|
dc_id=dc_id,
|
|
id=id_val,
|
|
access_hash=access_hash,
|
|
file_reference=file_reference,
|
|
has_web_location=has_web_location,
|
|
has_reference=has_reference,
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class TelegramMediaRef:
|
|
"""
|
|
Parsed reference to Telegram media.
|
|
|
|
Can be constructed from:
|
|
- t.me links: https://t.me/channel/123, https://t.me/c/123456789/456
|
|
- file_id: Direct Telegram file IDs
|
|
"""
|
|
|
|
chat_id: Optional[Union[int, str]] = None # Channel/group/user ID or username
|
|
message_id: Optional[int] = None # Message ID for t.me links
|
|
file_id: Optional[str] = None # Direct file reference
|
|
|
|
|
|
@dataclass
|
|
class MediaInfo:
|
|
"""Information about a Telegram media file."""
|
|
|
|
file_id: str
|
|
file_size: int
|
|
mime_type: str
|
|
file_name: Optional[str] = None
|
|
duration: Optional[int] = None # For video/audio
|
|
width: Optional[int] = None # For video/photo
|
|
height: Optional[int] = None # For video/photo
|
|
dc_id: Optional[int] = None
|
|
|
|
|
|
def parse_telegram_url(url: str) -> TelegramMediaRef:
|
|
"""
|
|
Parse a Telegram URL or file_id into a TelegramMediaRef.
|
|
|
|
Supported formats:
|
|
- https://t.me/channel/123 (public channel)
|
|
- https://t.me/c/123456789/456 (private channel)
|
|
- https://t.me/username/123 (user/channel by username)
|
|
- file_id (base64-encoded)
|
|
|
|
Args:
|
|
url: The URL or file_id to parse
|
|
|
|
Returns:
|
|
TelegramMediaRef with parsed information
|
|
"""
|
|
if not url:
|
|
raise ValueError("URL cannot be empty")
|
|
|
|
# Check if it's a t.me link
|
|
parsed = urlparse(url)
|
|
if parsed.netloc in ("t.me", "telegram.me", "telegram.dog"):
|
|
path_parts = parsed.path.strip("/").split("/")
|
|
|
|
if len(path_parts) >= 2:
|
|
# Format: /c/chat_id/message_id (private channel)
|
|
if path_parts[0] == "c" and len(path_parts) >= 3:
|
|
try:
|
|
# Private channel IDs need -100 prefix
|
|
chat_id = int(f"-100{path_parts[1]}")
|
|
message_id = int(path_parts[2])
|
|
return TelegramMediaRef(chat_id=chat_id, message_id=message_id)
|
|
except ValueError as e:
|
|
raise ValueError(f"Invalid private channel URL format: {url}") from e
|
|
|
|
# Format: /username/message_id (public channel or user)
|
|
else:
|
|
try:
|
|
username = path_parts[0]
|
|
message_id = int(path_parts[1])
|
|
return TelegramMediaRef(chat_id=username, message_id=message_id)
|
|
except ValueError as e:
|
|
raise ValueError(f"Invalid public channel URL format: {url}") from e
|
|
|
|
raise ValueError(f"Invalid Telegram URL format: {url}")
|
|
|
|
# Check if it looks like a file_id (base64-like string)
|
|
if re.match(r"^[A-Za-z0-9_-]+$", url) and len(url) > 20:
|
|
return TelegramMediaRef(file_id=url)
|
|
|
|
raise ValueError(f"Unrecognized Telegram URL or file_id format: {url}")
|
|
|
|
|
|
@dataclass
|
|
class DownloadSender:
|
|
"""Handles downloading chunks from a single connection."""
|
|
|
|
client: TelegramClient
|
|
sender: MTProtoSender
|
|
request: GetFileRequest
|
|
remaining: int
|
|
stride: int
|
|
|
|
async def next(self) -> Optional[bytes]:
|
|
"""Download the next chunk."""
|
|
if not self.remaining:
|
|
return None
|
|
result = await self.client._call(self.sender, self.request)
|
|
self.remaining -= 1
|
|
self.request.offset += self.stride
|
|
return result.bytes
|
|
|
|
async def disconnect(self) -> None:
|
|
"""Disconnect this sender gracefully."""
|
|
try:
|
|
await self.sender.disconnect()
|
|
except Exception:
|
|
# Ignore errors during disconnect - connection may already be closed
|
|
pass
|
|
|
|
|
|
class ParallelTransferrer:
|
|
"""
|
|
Parallel chunk downloader using multiple DC connections.
|
|
|
|
Based on FastTelethon technique from mautrix-telegram.
|
|
Creates multiple MTProtoSender connections to the same DC
|
|
and downloads different chunks in parallel for maximum speed.
|
|
"""
|
|
|
|
def __init__(self, client: TelegramClient, dc_id: Optional[int] = None) -> None:
|
|
self.client = client
|
|
self.loop = client.loop
|
|
self.dc_id = dc_id or client.session.dc_id
|
|
self.auth_key: Optional[AuthKey] = None if dc_id and client.session.dc_id != dc_id else client.session.auth_key
|
|
self.senders: Optional[list[DownloadSender]] = None
|
|
|
|
async def _cleanup(self) -> None:
|
|
"""Clean up all sender connections gracefully."""
|
|
if self.senders:
|
|
# Use return_exceptions=True to prevent one failed disconnect from blocking others
|
|
await asyncio.gather(*[sender.disconnect() for sender in self.senders], return_exceptions=True)
|
|
self.senders = None
|
|
|
|
@staticmethod
|
|
def _get_connection_count(file_size: int, max_count: int = 20, full_size: int = 100 * 1024 * 1024) -> int:
|
|
"""
|
|
Calculate optimal number of connections based on file size.
|
|
|
|
Small files use fewer connections, large files use more.
|
|
"""
|
|
if file_size > full_size:
|
|
return max_count
|
|
return max(1, math.ceil((file_size / full_size) * max_count))
|
|
|
|
async def _create_sender(self) -> MTProtoSender:
|
|
"""Create a new MTProtoSender connection to the DC."""
|
|
dc = await self.client._get_dc(self.dc_id)
|
|
sender = MTProtoSender(self.auth_key, loggers=self.client._log)
|
|
await sender.connect(
|
|
self.client._connection(
|
|
dc.ip_address,
|
|
dc.port,
|
|
dc.id,
|
|
loggers=self.client._log,
|
|
proxy=self.client._proxy,
|
|
)
|
|
)
|
|
if not self.auth_key:
|
|
logger.debug(f"Exporting auth to DC {self.dc_id}")
|
|
auth = await self.client(ExportAuthorizationRequest(self.dc_id))
|
|
self.client._init_request.query = ImportAuthorizationRequest(id=auth.id, bytes=auth.bytes)
|
|
req = InvokeWithLayerRequest(LAYER, self.client._init_request)
|
|
await sender.send(req)
|
|
self.auth_key = sender.auth_key
|
|
return sender
|
|
|
|
async def _create_download_sender(
|
|
self,
|
|
file: TypeLocation,
|
|
index: int,
|
|
part_size: int,
|
|
stride: int,
|
|
part_count: int,
|
|
base_offset: int = 0,
|
|
) -> DownloadSender:
|
|
"""Create a DownloadSender for a specific chunk offset."""
|
|
return DownloadSender(
|
|
client=self.client,
|
|
sender=await self._create_sender(),
|
|
request=GetFileRequest(file, offset=base_offset + index * part_size, limit=part_size),
|
|
stride=stride,
|
|
remaining=part_count,
|
|
)
|
|
|
|
async def _init_download(
|
|
self,
|
|
connections: int,
|
|
file: TypeLocation,
|
|
part_count: int,
|
|
part_size: int,
|
|
base_offset: int = 0,
|
|
) -> None:
|
|
"""Initialize all download senders."""
|
|
minimum, remainder = divmod(part_count, connections)
|
|
|
|
def get_part_count() -> int:
|
|
nonlocal remainder
|
|
if remainder > 0:
|
|
remainder -= 1
|
|
return minimum + 1
|
|
return minimum
|
|
|
|
# Create first sender synchronously to handle auth export
|
|
self.senders = [
|
|
await self._create_download_sender(
|
|
file, 0, part_size, connections * part_size, get_part_count(), base_offset
|
|
),
|
|
*await asyncio.gather(
|
|
*[
|
|
self._create_download_sender(
|
|
file, i, part_size, connections * part_size, get_part_count(), base_offset
|
|
)
|
|
for i in range(1, connections)
|
|
]
|
|
),
|
|
]
|
|
|
|
async def download(
|
|
self,
|
|
file: TypeLocation,
|
|
file_size: int,
|
|
offset: int = 0,
|
|
limit: Optional[int] = None,
|
|
part_size_kb: Optional[float] = None,
|
|
connection_count: Optional[int] = None,
|
|
) -> AsyncGenerator[bytes, None]:
|
|
"""
|
|
Download file in parallel chunks.
|
|
|
|
Args:
|
|
file: The file location to download
|
|
file_size: Total file size in bytes
|
|
offset: Byte offset to start from (for range requests)
|
|
limit: Number of bytes to download (None for entire file)
|
|
part_size_kb: Chunk size in KB (auto-calculated if None)
|
|
connection_count: Number of parallel connections (auto-calculated if None)
|
|
|
|
Yields:
|
|
Chunks of file data
|
|
"""
|
|
# Calculate actual range
|
|
if limit is None:
|
|
limit = file_size - offset
|
|
|
|
# Clamp connection count to configured max
|
|
max_connections = min(settings.telegram_max_connections, 20)
|
|
connection_count = connection_count or self._get_connection_count(limit, max_count=max_connections)
|
|
connection_count = min(connection_count, max_connections)
|
|
|
|
part_size = int((part_size_kb or utils.get_appropriated_part_size(file_size)) * 1024)
|
|
# Round offset down to part boundary
|
|
aligned_offset = (offset // part_size) * part_size
|
|
skip_bytes = offset - aligned_offset
|
|
|
|
part_count = math.ceil((limit + skip_bytes) / part_size)
|
|
|
|
logger.debug(
|
|
f"Starting parallel download: {connection_count} connections, "
|
|
f"{part_size} bytes/part, {part_count} parts, offset={offset}, aligned_offset={aligned_offset}"
|
|
)
|
|
|
|
await self._init_download(connection_count, file, part_count, part_size, base_offset=aligned_offset)
|
|
|
|
try:
|
|
part = 0
|
|
bytes_yielded = 0
|
|
while part < part_count and bytes_yielded < limit:
|
|
tasks = [self.loop.create_task(sender.next()) for sender in self.senders]
|
|
for task in tasks:
|
|
data = await task
|
|
if not data:
|
|
break
|
|
|
|
# Handle offset alignment - skip initial bytes if needed
|
|
if skip_bytes > 0:
|
|
if len(data) <= skip_bytes:
|
|
skip_bytes -= len(data)
|
|
part += 1
|
|
continue
|
|
data = data[skip_bytes:]
|
|
skip_bytes = 0
|
|
|
|
# Handle limit - truncate if we'd exceed
|
|
remaining = limit - bytes_yielded
|
|
if len(data) > remaining:
|
|
data = data[:remaining]
|
|
|
|
yield data
|
|
bytes_yielded += len(data)
|
|
part += 1
|
|
|
|
if bytes_yielded >= limit:
|
|
break
|
|
|
|
logger.debug("Parallel download finished, cleaning up connections")
|
|
finally:
|
|
await self._cleanup()
|
|
|
|
|
|
class _SingleSenderPool:
|
|
"""
|
|
Pool of persistent ``MTProtoSender`` connections per DC.
|
|
|
|
Instead of creating a new connection for every HLS segment request
|
|
(which involves handshake + auth export overhead), this pool maintains
|
|
a queue of idle senders per DC. When a caller needs a sender, it
|
|
borrows one from the pool (or creates a new one if the pool is empty).
|
|
After use, the sender is returned to the pool for reuse.
|
|
|
|
Senders that have been idle longer than ``_MAX_IDLE_SECONDS`` are
|
|
discarded on checkout.
|
|
"""
|
|
|
|
_MAX_IDLE_SECONDS = 120.0 # discard senders idle longer than this
|
|
|
|
def __init__(self) -> None:
|
|
# dc_id -> list of (sender, auth_key, last_used_monotonic)
|
|
self._pool: dict[int, list[tuple[MTProtoSender, AuthKey, float]]] = {}
|
|
self._lock = asyncio.Lock()
|
|
# Cached auth keys per DC -- shared across all senders.
|
|
self._auth_keys: dict[int, AuthKey] = {}
|
|
|
|
async def acquire(
|
|
self,
|
|
client: TelegramClient,
|
|
dc_id: int,
|
|
) -> tuple[MTProtoSender, AuthKey]:
|
|
"""
|
|
Borrow a connected ``MTProtoSender`` for *dc_id*.
|
|
|
|
Returns an existing idle sender if one is available, otherwise
|
|
creates a new one (handling auth export if needed).
|
|
"""
|
|
import time as _time
|
|
|
|
async with self._lock:
|
|
bucket = self._pool.get(dc_id, [])
|
|
now = _time.monotonic()
|
|
# Try to find a live sender
|
|
while bucket:
|
|
sender, auth_key, last_used = bucket.pop()
|
|
idle = now - last_used
|
|
if idle > self._MAX_IDLE_SECONDS:
|
|
# Stale -- disconnect quietly
|
|
logger.debug("[sender_pool] Discarding stale sender for DC %d (idle %.0fs)", dc_id, idle)
|
|
try:
|
|
await sender.disconnect()
|
|
except Exception:
|
|
pass
|
|
continue
|
|
# Check if still connected
|
|
if sender.is_connected():
|
|
logger.debug("[sender_pool] Reusing sender for DC %d (idle %.1fs)", dc_id, idle)
|
|
return sender, auth_key
|
|
else:
|
|
logger.debug("[sender_pool] Sender for DC %d disconnected, discarding", dc_id)
|
|
try:
|
|
await sender.disconnect()
|
|
except Exception:
|
|
pass
|
|
|
|
# No reusable sender -- create a new one
|
|
logger.debug("[sender_pool] Creating new sender for DC %d", dc_id)
|
|
return await self._create_sender(client, dc_id)
|
|
|
|
async def _create_sender(
|
|
self,
|
|
client: TelegramClient,
|
|
dc_id: int,
|
|
) -> tuple[MTProtoSender, AuthKey]:
|
|
"""Create a new ``MTProtoSender`` with auth export if needed."""
|
|
auth_key = self._auth_keys.get(dc_id)
|
|
if auth_key is None and dc_id == client.session.dc_id:
|
|
auth_key = client.session.auth_key
|
|
|
|
dc = await client._get_dc(dc_id)
|
|
sender = MTProtoSender(auth_key, loggers=client._log)
|
|
await sender.connect(
|
|
client._connection(
|
|
dc.ip_address,
|
|
dc.port,
|
|
dc.id,
|
|
loggers=client._log,
|
|
proxy=client._proxy,
|
|
)
|
|
)
|
|
if not auth_key:
|
|
logger.debug("[sender_pool] Exporting auth to DC %d", dc_id)
|
|
auth = await client(ExportAuthorizationRequest(dc_id))
|
|
client._init_request.query = ImportAuthorizationRequest(id=auth.id, bytes=auth.bytes)
|
|
req = InvokeWithLayerRequest(LAYER, client._init_request)
|
|
await sender.send(req)
|
|
auth_key = sender.auth_key
|
|
self._auth_keys[dc_id] = auth_key
|
|
return sender, auth_key
|
|
|
|
async def release(
|
|
self,
|
|
dc_id: int,
|
|
sender: MTProtoSender,
|
|
auth_key: AuthKey,
|
|
) -> None:
|
|
"""Return a sender to the pool for reuse."""
|
|
import time as _time
|
|
|
|
# Cache auth key
|
|
if auth_key is not None:
|
|
self._auth_keys[dc_id] = auth_key
|
|
|
|
if not sender.is_connected():
|
|
logger.debug("[sender_pool] Sender for DC %d disconnected, not returning to pool", dc_id)
|
|
try:
|
|
await sender.disconnect()
|
|
except Exception:
|
|
pass
|
|
return
|
|
|
|
async with self._lock:
|
|
bucket = self._pool.setdefault(dc_id, [])
|
|
bucket.append((sender, auth_key, _time.monotonic()))
|
|
logger.debug("[sender_pool] Returned sender to pool for DC %d (pool size=%d)", dc_id, len(bucket))
|
|
|
|
async def discard(self, sender: MTProtoSender) -> None:
|
|
"""Disconnect and discard a sender without returning it to the pool."""
|
|
try:
|
|
await sender.disconnect()
|
|
except Exception:
|
|
pass
|
|
|
|
async def close_all(self) -> None:
|
|
"""Disconnect all pooled senders."""
|
|
async with self._lock:
|
|
for dc_id, bucket in self._pool.items():
|
|
for sender, _, _ in bucket:
|
|
try:
|
|
await sender.disconnect()
|
|
except Exception:
|
|
pass
|
|
bucket.clear()
|
|
self._pool.clear()
|
|
self._auth_keys.clear()
|
|
|
|
|
|
class TelegramSessionManager:
|
|
"""
|
|
Manages the Telethon client session.
|
|
|
|
Features:
|
|
- Lazy initialization on first request
|
|
- Session persistence via StringSession
|
|
- Automatic reconnection on disconnect
|
|
- Thread-safe with asyncio lock
|
|
- Persistent sender pool for HLS segment downloads
|
|
"""
|
|
|
|
# Cache TTL for get_media_info results (seconds)
|
|
_MEDIA_INFO_CACHE_TTL = 3600 # 1 hour
|
|
|
|
def __init__(self):
|
|
self._client: Optional[TelegramClient] = None
|
|
self._lock = asyncio.Lock()
|
|
self._initialized = False
|
|
# In-memory cache: key → (MediaInfo, expiry_timestamp)
|
|
self._media_info_cache: dict[str, tuple["MediaInfo", float]] = {}
|
|
# Persistent sender pool for single-connection downloads (HLS).
|
|
self._sender_pool = _SingleSenderPool()
|
|
|
|
async def get_client(self) -> TelegramClient:
|
|
"""
|
|
Get the Telethon client, initializing if needed.
|
|
|
|
Returns:
|
|
Connected TelegramClient instance
|
|
|
|
Raises:
|
|
ValueError: If Telegram settings are not configured
|
|
RuntimeError: If connection fails
|
|
"""
|
|
async with self._lock:
|
|
if self._client is not None and self._client.is_connected():
|
|
return self._client
|
|
|
|
# Validate settings
|
|
if not settings.telegram_api_id or not settings.telegram_api_hash:
|
|
raise ValueError("Telegram API credentials not configured (telegram_api_id, telegram_api_hash)")
|
|
|
|
if not settings.telegram_session_string:
|
|
raise ValueError(
|
|
"Telegram session string not configured. Generate one using the web UI at /url-generator#telegram"
|
|
)
|
|
|
|
logger.info("Initializing Telegram client...")
|
|
|
|
# Create client with StringSession (extract raw values from SecretStr)
|
|
self._client = TelegramClient(
|
|
StringSession(settings.telegram_session_string.get_secret_value()),
|
|
settings.telegram_api_id,
|
|
settings.telegram_api_hash.get_secret_value(),
|
|
request_retries=3,
|
|
connection_retries=3,
|
|
retry_delay=1,
|
|
timeout=settings.telegram_request_timeout,
|
|
)
|
|
|
|
await self._client.connect()
|
|
|
|
if not await self._client.is_user_authorized():
|
|
raise RuntimeError(
|
|
"Telegram session is not authorized. Please regenerate the session string with valid credentials."
|
|
)
|
|
|
|
self._initialized = True
|
|
logger.info("Telegram client initialized successfully")
|
|
return self._client
|
|
|
|
async def get_message(self, ref: TelegramMediaRef) -> Message:
|
|
"""
|
|
Get a message by its reference.
|
|
|
|
Args:
|
|
ref: TelegramMediaRef with chat_id and message_id
|
|
|
|
Returns:
|
|
The Message object
|
|
|
|
Raises:
|
|
ValueError: If reference is incomplete
|
|
Various Telegram errors: ChannelPrivateError, MessageIdInvalidError, etc.
|
|
"""
|
|
if ref.chat_id is None or ref.message_id is None:
|
|
raise ValueError("chat_id and message_id are required to fetch a message")
|
|
|
|
client = await self.get_client()
|
|
messages = await client.get_messages(ref.chat_id, ids=ref.message_id)
|
|
|
|
if not messages:
|
|
raise ValueError(f"Message {ref.message_id} not found in {ref.chat_id}")
|
|
|
|
return messages
|
|
|
|
def resolve_file_id(self, file_id: str) -> tuple[Union[Document, Photo], int]:
|
|
"""
|
|
Resolve a Bot API file_id to a Telethon Document or Photo object.
|
|
|
|
Supports both old and new file_id formats by using a custom decoder
|
|
that handles all version/sub_version combinations.
|
|
|
|
Args:
|
|
file_id: Bot API style file_id string
|
|
|
|
Returns:
|
|
Tuple of (Document or Photo object, dc_id)
|
|
|
|
Raises:
|
|
ValueError: If file_id is invalid or cannot be decoded
|
|
"""
|
|
# First try Telethon's built-in resolver (works for older formats)
|
|
media = utils.resolve_bot_file_id(file_id)
|
|
if media is not None:
|
|
if isinstance(media, Document):
|
|
return media, media.dc_id
|
|
elif isinstance(media, Photo):
|
|
return media, media.dc_id
|
|
|
|
# Fall back to our custom decoder for newer formats
|
|
logger.debug("Telethon couldn't decode file_id, trying custom decoder")
|
|
decoded = decode_file_id(file_id)
|
|
|
|
# Determine if it's a photo or document based on type_id
|
|
if decoded.type_id in (FILE_TYPE_PHOTO, FILE_TYPE_PROFILE_PHOTO, FILE_TYPE_THUMBNAIL):
|
|
# Create a Photo object
|
|
return Photo(
|
|
id=decoded.id,
|
|
access_hash=decoded.access_hash,
|
|
file_reference=decoded.file_reference,
|
|
date=None,
|
|
sizes=[], # Empty, we don't have size info from file_id
|
|
dc_id=decoded.dc_id,
|
|
), decoded.dc_id
|
|
else:
|
|
# Create a Document object (video, audio, document, etc.)
|
|
return Document(
|
|
id=decoded.id,
|
|
access_hash=decoded.access_hash,
|
|
file_reference=decoded.file_reference,
|
|
date=None,
|
|
mime_type="", # Unknown from file_id
|
|
size=0, # Unknown from file_id
|
|
thumbs=None,
|
|
dc_id=decoded.dc_id,
|
|
attributes=[],
|
|
), decoded.dc_id
|
|
|
|
def _media_info_cache_key(self, ref: TelegramMediaRef) -> str:
|
|
"""Derive an in-memory cache key for a TelegramMediaRef."""
|
|
if ref.file_id and not ref.message_id:
|
|
return f"fid:{ref.file_id}"
|
|
if ref.chat_id is not None and ref.message_id is not None:
|
|
return f"chat:{ref.chat_id}:msg:{ref.message_id}"
|
|
return ""
|
|
|
|
async def get_media_info(self, ref: TelegramMediaRef, file_size: Optional[int] = None) -> MediaInfo:
|
|
"""
|
|
Get information about a media file.
|
|
|
|
Results are cached in-memory (with TTL) to avoid repeated Telegram API
|
|
calls for the same media -- especially important for HLS, where each
|
|
sub-request (playlist, init, segments) resolves the same source.
|
|
|
|
Args:
|
|
ref: TelegramMediaRef pointing to the media
|
|
file_size: Optional file size (required for file_id since it's not encoded in the ID)
|
|
|
|
Returns:
|
|
MediaInfo with file details
|
|
"""
|
|
# Check in-memory cache first
|
|
import time
|
|
|
|
ck = self._media_info_cache_key(ref)
|
|
if ck:
|
|
cached = self._media_info_cache.get(ck)
|
|
if cached is not None:
|
|
info, expiry = cached
|
|
if time.monotonic() < expiry:
|
|
return info
|
|
else:
|
|
del self._media_info_cache[ck]
|
|
|
|
info = await self._get_media_info_uncached(ref, file_size)
|
|
|
|
# Store in cache
|
|
if ck:
|
|
self._media_info_cache[ck] = (info, time.monotonic() + self._MEDIA_INFO_CACHE_TTL)
|
|
|
|
return info
|
|
|
|
async def _get_media_info_uncached(
|
|
self,
|
|
ref: TelegramMediaRef,
|
|
file_size: Optional[int] = None,
|
|
) -> MediaInfo:
|
|
"""Uncached implementation of get_media_info."""
|
|
# Handle file_id reference
|
|
if ref.file_id and not ref.message_id:
|
|
media, dc_id = self.resolve_file_id(ref.file_id)
|
|
|
|
if isinstance(media, Document):
|
|
# Extract attributes
|
|
file_name = None
|
|
duration = None
|
|
width = None
|
|
height = None
|
|
mime_type = media.mime_type or "application/octet-stream"
|
|
|
|
for attr in media.attributes:
|
|
attr_dict = attr.to_dict()
|
|
if "file_name" in attr_dict:
|
|
file_name = attr_dict["file_name"]
|
|
if "duration" in attr_dict:
|
|
duration = attr_dict["duration"]
|
|
if "w" in attr_dict:
|
|
width = attr_dict["w"]
|
|
if "h" in attr_dict:
|
|
height = attr_dict["h"]
|
|
|
|
# Determine mime_type from attributes if empty
|
|
if mime_type == "application/octet-stream" or not mime_type:
|
|
# Infer from document type
|
|
for attr in media.attributes:
|
|
if hasattr(attr, "voice") and attr.voice:
|
|
mime_type = "audio/ogg"
|
|
break
|
|
elif hasattr(attr, "round_message") and attr.round_message:
|
|
mime_type = "video/mp4"
|
|
break
|
|
elif attr.__class__.__name__ == "DocumentAttributeVideo":
|
|
mime_type = "video/mp4"
|
|
break
|
|
elif attr.__class__.__name__ == "DocumentAttributeAudio":
|
|
mime_type = "audio/mpeg"
|
|
break
|
|
elif attr.__class__.__name__ == "DocumentAttributeSticker":
|
|
mime_type = "image/webp"
|
|
break
|
|
elif attr.__class__.__name__ == "DocumentAttributeAnimated":
|
|
mime_type = "application/x-tgsticker"
|
|
break
|
|
|
|
return MediaInfo(
|
|
file_id=ref.file_id,
|
|
file_size=file_size or media.size, # Use provided size or 0 from resolved
|
|
mime_type=mime_type,
|
|
file_name=file_name,
|
|
duration=duration,
|
|
width=width,
|
|
height=height,
|
|
dc_id=dc_id,
|
|
)
|
|
|
|
elif isinstance(media, Photo):
|
|
# Get largest photo size
|
|
largest = max(media.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
|
|
|
|
return MediaInfo(
|
|
file_id=ref.file_id,
|
|
file_size=file_size or getattr(largest, "size", 0),
|
|
mime_type="image/jpeg",
|
|
width=getattr(largest, "w", None),
|
|
height=getattr(largest, "h", None),
|
|
dc_id=dc_id,
|
|
)
|
|
|
|
raise ValueError(f"Unsupported media type from file_id: {type(media)}")
|
|
|
|
# Handle message-based reference
|
|
message = await self.get_message(ref)
|
|
|
|
if not message.media:
|
|
raise ValueError(f"Message {ref.message_id} does not contain media")
|
|
|
|
if isinstance(message.media, MessageMediaDocument):
|
|
doc = message.media.document
|
|
if not isinstance(doc, Document):
|
|
raise ValueError("Invalid document in message")
|
|
|
|
# Extract attributes
|
|
file_name = None
|
|
duration = None
|
|
width = None
|
|
height = None
|
|
|
|
for attr in doc.attributes:
|
|
attr_dict = attr.to_dict()
|
|
if "file_name" in attr_dict:
|
|
file_name = attr_dict["file_name"]
|
|
if "duration" in attr_dict:
|
|
duration = attr_dict["duration"]
|
|
if "w" in attr_dict:
|
|
width = attr_dict["w"]
|
|
if "h" in attr_dict:
|
|
height = attr_dict["h"]
|
|
|
|
return MediaInfo(
|
|
file_id=str(doc.id),
|
|
file_size=doc.size,
|
|
mime_type=doc.mime_type or "application/octet-stream",
|
|
file_name=file_name,
|
|
duration=duration,
|
|
width=width,
|
|
height=height,
|
|
dc_id=doc.dc_id,
|
|
)
|
|
|
|
elif isinstance(message.media, MessageMediaPhoto):
|
|
photo = message.media.photo
|
|
if not photo:
|
|
raise ValueError("Invalid photo in message")
|
|
|
|
# Get largest photo size
|
|
largest = max(photo.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
|
|
|
|
return MediaInfo(
|
|
file_id=str(photo.id),
|
|
file_size=getattr(largest, "size", 0),
|
|
mime_type="image/jpeg",
|
|
width=getattr(largest, "w", None),
|
|
height=getattr(largest, "h", None),
|
|
dc_id=photo.dc_id,
|
|
)
|
|
|
|
else:
|
|
raise ValueError(f"Unsupported media type: {type(message.media)}")
|
|
|
|
async def validate_file_access(
|
|
self,
|
|
ref: TelegramMediaRef,
|
|
file_size: Optional[int] = None,
|
|
) -> None:
|
|
"""
|
|
Validate that the session can access the file before streaming.
|
|
|
|
This makes a small test request to verify the file_reference is valid
|
|
and the session has access. This should be called before streaming to
|
|
avoid mid-stream errors.
|
|
|
|
Args:
|
|
ref: TelegramMediaRef pointing to the media
|
|
file_size: Optional file size for file_id mode
|
|
|
|
Raises:
|
|
FileReferenceExpiredError: If file_id belongs to different session
|
|
Various Telegram errors: For access issues
|
|
"""
|
|
client = await self.get_client()
|
|
|
|
if ref.file_id and not ref.message_id:
|
|
media, dc_id = self.resolve_file_id(ref.file_id)
|
|
|
|
if isinstance(media, Document):
|
|
file_location = InputDocumentFileLocation(
|
|
id=media.id,
|
|
access_hash=media.access_hash,
|
|
file_reference=media.file_reference,
|
|
thumb_size="",
|
|
)
|
|
elif isinstance(media, Photo):
|
|
largest = max(media.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
|
|
file_location = InputPhotoFileLocation(
|
|
id=media.id,
|
|
access_hash=media.access_hash,
|
|
file_reference=media.file_reference,
|
|
thumb_size=getattr(largest, "type", "x"),
|
|
)
|
|
else:
|
|
raise ValueError(f"Unsupported media type from file_id: {type(media)}")
|
|
|
|
# Make a small test request to validate access
|
|
# Use ParallelTransferrer which handles DC migration properly
|
|
transferrer = ParallelTransferrer(client, dc_id)
|
|
try:
|
|
# Just request a tiny amount to validate - the download method handles DC connections
|
|
download_gen = transferrer.download(file_location, file_size or 4096, offset=0, limit=4096)
|
|
try:
|
|
await download_gen.__anext__() # Get first chunk to validate
|
|
except StopAsyncIteration:
|
|
pass # Empty file is still valid
|
|
finally:
|
|
# Properly close the generator
|
|
await download_gen.aclose()
|
|
logger.debug("[validate_file_access] file_id access validated on DC %d", dc_id)
|
|
except Exception as e:
|
|
logger.warning(f"[validate_file_access] file_id validation failed: {e}")
|
|
raise
|
|
finally:
|
|
# Clean up transferrer connections
|
|
await transferrer._cleanup()
|
|
|
|
async def _resolve_file_location(
|
|
self,
|
|
ref: TelegramMediaRef,
|
|
file_size: Optional[int] = None,
|
|
) -> tuple["TypeLocation", int, int]:
|
|
"""
|
|
Resolve a ``TelegramMediaRef`` into a Telegram file location.
|
|
|
|
Returns:
|
|
``(file_location, dc_id, actual_file_size)``
|
|
"""
|
|
# Handle file_id reference (no message needed, fast local parse)
|
|
if ref.file_id and not ref.message_id:
|
|
media, dc_id = self.resolve_file_id(ref.file_id)
|
|
|
|
if isinstance(media, Document):
|
|
actual_file_size = file_size or media.size
|
|
if actual_file_size == 0:
|
|
raise ValueError(
|
|
"file_size parameter is required when streaming by file_id. "
|
|
"The file_id doesn't contain size information."
|
|
)
|
|
file_location = InputDocumentFileLocation(
|
|
id=media.id,
|
|
access_hash=media.access_hash,
|
|
file_reference=media.file_reference,
|
|
thumb_size="",
|
|
)
|
|
return file_location, dc_id, actual_file_size
|
|
|
|
elif isinstance(media, Photo):
|
|
largest = max(media.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
|
|
actual_file_size = file_size or getattr(largest, "size", 0)
|
|
if actual_file_size == 0:
|
|
raise ValueError(
|
|
"file_size parameter is required when streaming by file_id. "
|
|
"The file_id doesn't contain size information."
|
|
)
|
|
file_location = InputPhotoFileLocation(
|
|
id=media.id,
|
|
access_hash=media.access_hash,
|
|
file_reference=media.file_reference,
|
|
thumb_size=getattr(largest, "type", "x"),
|
|
)
|
|
return file_location, dc_id, actual_file_size
|
|
|
|
else:
|
|
raise ValueError(f"Unsupported media type from file_id: {type(media)}")
|
|
|
|
# Handle message-based reference (requires Telegram API call)
|
|
message = await self.get_message(ref)
|
|
|
|
if not message.media:
|
|
raise ValueError(f"Message {ref.message_id} does not contain media")
|
|
|
|
if isinstance(message.media, MessageMediaDocument):
|
|
doc = message.media.document
|
|
if not isinstance(doc, Document):
|
|
raise ValueError("Invalid document")
|
|
|
|
file_location = InputDocumentFileLocation(
|
|
id=doc.id,
|
|
access_hash=doc.access_hash,
|
|
file_reference=doc.file_reference,
|
|
thumb_size="",
|
|
)
|
|
return file_location, doc.dc_id, doc.size
|
|
|
|
elif isinstance(message.media, MessageMediaPhoto):
|
|
photo = message.media.photo
|
|
if not photo:
|
|
raise ValueError("Invalid photo")
|
|
|
|
largest = max(photo.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
|
|
file_location = InputPhotoFileLocation(
|
|
id=photo.id,
|
|
access_hash=photo.access_hash,
|
|
file_reference=photo.file_reference,
|
|
thumb_size=getattr(largest, "type", ""),
|
|
)
|
|
return file_location, photo.dc_id, getattr(largest, "size", 0)
|
|
|
|
else:
|
|
raise ValueError(f"Unsupported media type: {type(message.media)}")
|
|
|
|
async def stream_media(
|
|
self,
|
|
ref: TelegramMediaRef,
|
|
offset: int = 0,
|
|
limit: Optional[int] = None,
|
|
file_size: Optional[int] = None,
|
|
) -> AsyncGenerator[bytes, None]:
|
|
"""
|
|
Stream media content with **parallel** downloads (fast Telethon).
|
|
|
|
Creates multiple MTProtoSender connections to the file's DC for
|
|
maximum throughput. Best suited for large/full-file downloads
|
|
(e.g. the non-transcode ``/proxy/telegram/stream`` endpoint).
|
|
|
|
For small byte-range fetches (HLS segments) use
|
|
``stream_media_single`` instead.
|
|
|
|
Args:
|
|
ref: TelegramMediaRef pointing to the media
|
|
offset: Byte offset to start from
|
|
limit: Number of bytes to download (None for entire file)
|
|
file_size: Optional file size (required for file_id streaming)
|
|
|
|
Yields:
|
|
Chunks of media data
|
|
"""
|
|
client = await self.get_client()
|
|
file_location, dc_id, actual_file_size = await self._resolve_file_location(ref, file_size)
|
|
|
|
transferrer = ParallelTransferrer(client, dc_id)
|
|
try:
|
|
async for chunk in transferrer.download(
|
|
file_location,
|
|
actual_file_size,
|
|
offset=offset,
|
|
limit=limit,
|
|
):
|
|
yield chunk
|
|
finally:
|
|
await transferrer._cleanup()
|
|
|
|
async def stream_media_single(
|
|
self,
|
|
ref: TelegramMediaRef,
|
|
offset: int = 0,
|
|
limit: Optional[int] = None,
|
|
file_size: Optional[int] = None,
|
|
) -> AsyncGenerator[bytes, None]:
|
|
"""
|
|
Stream media content over a **pooled** single MTProto connection.
|
|
|
|
Borrows a persistent ``MTProtoSender`` from ``_SingleSenderPool``
|
|
for the target DC. The sender is returned to the pool after the
|
|
download completes so the next request reuses the same TCP
|
|
connection (no handshake, no ``ExportAuthorizationRequest``).
|
|
|
|
This is ideal for small byte-range fetches (HLS segments, probe
|
|
headers) where spinning up connections per request is wasteful.
|
|
|
|
Args:
|
|
ref: TelegramMediaRef pointing to the media
|
|
offset: Byte offset to start from
|
|
limit: Number of bytes to download (None for entire file)
|
|
file_size: Optional file size (required for file_id streaming)
|
|
|
|
Yields:
|
|
Chunks of media data
|
|
"""
|
|
client = await self.get_client()
|
|
file_location, dc_id, actual_file_size = await self._resolve_file_location(ref, file_size)
|
|
|
|
if offset >= actual_file_size:
|
|
return
|
|
|
|
if limit is None:
|
|
limit = actual_file_size - offset
|
|
|
|
part_size = int(utils.get_appropriated_part_size(actual_file_size) * 1024)
|
|
aligned_offset = (offset // part_size) * part_size
|
|
skip_bytes = offset - aligned_offset
|
|
part_count = math.ceil((limit + skip_bytes) / part_size)
|
|
|
|
logger.debug(
|
|
"[single] DC %d: offset=%d, limit=%d, parts=%d, part_size=%d",
|
|
dc_id,
|
|
offset,
|
|
limit,
|
|
part_count,
|
|
part_size,
|
|
)
|
|
|
|
sender, auth_key = await self._sender_pool.acquire(client, dc_id)
|
|
sender_ok = True # track whether to return to pool or discard
|
|
|
|
try:
|
|
request = GetFileRequest(file_location, offset=aligned_offset, limit=part_size)
|
|
bytes_yielded = 0
|
|
|
|
for _ in range(part_count):
|
|
if bytes_yielded >= limit:
|
|
break
|
|
try:
|
|
result = await client._call(sender, request)
|
|
except Exception:
|
|
sender_ok = False
|
|
raise
|
|
data = result.bytes
|
|
if not data:
|
|
break
|
|
request.offset += part_size
|
|
|
|
# Handle offset alignment
|
|
if skip_bytes > 0:
|
|
if len(data) <= skip_bytes:
|
|
skip_bytes -= len(data)
|
|
continue
|
|
data = data[skip_bytes:]
|
|
skip_bytes = 0
|
|
|
|
# Trim to limit
|
|
remaining = limit - bytes_yielded
|
|
if len(data) > remaining:
|
|
data = data[:remaining]
|
|
|
|
bytes_yielded += len(data)
|
|
yield data
|
|
finally:
|
|
if sender_ok:
|
|
await self._sender_pool.release(dc_id, sender, auth_key)
|
|
else:
|
|
await self._sender_pool.discard(sender)
|
|
|
|
async def close(self) -> None:
|
|
"""Close the Telegram client connection and pooled senders."""
|
|
await self._sender_pool.close_all()
|
|
async with self._lock:
|
|
if self._client is not None:
|
|
await self._client.disconnect()
|
|
self._client = None
|
|
self._initialized = False
|
|
logger.info("Telegram client disconnected")
|
|
|
|
@property
|
|
def is_initialized(self) -> bool:
|
|
"""Check if the client is initialized and connected."""
|
|
return self._initialized and self._client is not None and self._client.is_connected()
|
|
|
|
|
|
# Global session manager instance
|
|
telegram_manager = TelegramSessionManager()
|