Files
UnHided/mediaflow_proxy/utils/telegram.py
UrloMythus cfc6bbabc9 update
2026-02-19 20:15:03 +01:00

1264 lines
45 KiB
Python

"""
Telegram MTProto streaming support with parallel chunk downloads.
This module provides:
- TelegramSessionManager: Manages the Telethon client session
- TelegramMediaRef: Parsed reference to Telegram media (t.me links or file_id)
- ParallelTransferrer: FastTelethon-based parallel chunk downloader for high-speed streaming
Based on FastTelethon technique from mautrix-telegram for parallel downloads.
"""
import asyncio
import base64
import logging
import math
import re
import struct
from dataclasses import dataclass
from io import BytesIO
from typing import AsyncGenerator, Optional, Union
from urllib.parse import urlparse
from telethon import TelegramClient, utils
from telethon.crypto import AuthKey
from telethon.network import MTProtoSender
from telethon.sessions import StringSession
from telethon.tl.alltlobjects import LAYER
from telethon.tl.functions import InvokeWithLayerRequest
from telethon.tl.functions.auth import ExportAuthorizationRequest, ImportAuthorizationRequest
from telethon.tl.functions.upload import GetFileRequest
from telethon.tl.types import (
Document,
InputDocumentFileLocation,
InputFileLocation,
InputPeerPhotoFileLocation,
InputPhotoFileLocation,
Message,
MessageMediaDocument,
MessageMediaPhoto,
Photo,
)
from mediaflow_proxy.configs import settings
logger = logging.getLogger(__name__)
# Type aliases for file locations
TypeLocation = Union[
Document,
InputDocumentFileLocation,
InputPeerPhotoFileLocation,
InputFileLocation,
InputPhotoFileLocation,
]
# File type IDs for Bot API file_id
FILE_TYPE_THUMBNAIL = 0
FILE_TYPE_PROFILE_PHOTO = 1
FILE_TYPE_PHOTO = 2
FILE_TYPE_VOICE = 3
FILE_TYPE_VIDEO = 4
FILE_TYPE_DOCUMENT = 5
FILE_TYPE_ENCRYPTED = 6
FILE_TYPE_TEMP = 7
FILE_TYPE_STICKER = 8
FILE_TYPE_AUDIO = 9
FILE_TYPE_ANIMATION = 10
FILE_TYPE_ENCRYPTED_THUMBNAIL = 11
FILE_TYPE_WALLPAPER = 12
FILE_TYPE_VIDEO_NOTE = 13
FILE_TYPE_SECURE_RAW = 14
FILE_TYPE_SECURE = 15
FILE_TYPE_BACKGROUND = 16
FILE_TYPE_DOCUMENT_AS_FILE = 17
# Flags in type_id
TYPE_ID_WEB_LOCATION_FLAG = 1 << 24
TYPE_ID_FILE_REFERENCE_FLAG = 1 << 25
@dataclass
class DecodedFileId:
"""Decoded Bot API file_id structure."""
type_id: int
dc_id: int
id: int
access_hash: int
file_reference: bytes = b""
has_web_location: bool = False
has_reference: bool = False
def _decode_telegram_base64(s: str) -> bytes:
"""Decode Telegram's URL-safe base64."""
s = s.replace("-", "+").replace("_", "/")
padding = 4 - len(s) % 4
if padding != 4:
s += "=" * padding
return base64.b64decode(s)
def _rle_decode(data: bytes) -> bytes:
"""RLE decode Telegram's file_id encoding."""
result = bytearray()
i = 0
while i < len(data):
if data[i] == 0 and i + 1 < len(data):
result.extend(bytes(data[i + 1]))
i += 2
else:
result.append(data[i])
i += 1
return bytes(result)
def decode_file_id(file_id: str) -> DecodedFileId:
"""
Decode a Bot API file_id into its components.
Supports both old and new file_id formats (including version 4 with high sub_versions).
Args:
file_id: Bot API file_id string
Returns:
DecodedFileId with parsed components
Raises:
ValueError: If file_id cannot be decoded
"""
try:
decoded = _decode_telegram_base64(file_id)
data = _rle_decode(decoded)
except Exception as e:
raise ValueError(f"Failed to decode file_id base64: {e}") from e
if len(data) < 20:
raise ValueError(f"file_id too short: {len(data)} bytes")
buf = BytesIO(data)
# Read type_id (4 bytes, little-endian)
type_id_raw = struct.unpack("<i", buf.read(4))[0]
# Extract flags and actual type
has_web_location = bool(type_id_raw & TYPE_ID_WEB_LOCATION_FLAG)
has_reference = bool(type_id_raw & TYPE_ID_FILE_REFERENCE_FLAG)
type_id = type_id_raw & 0xFFFFFF
# Read dc_id (4 bytes)
dc_id = struct.unpack("<i", buf.read(4))[0]
file_reference = b""
if has_reference:
# Read TL string (length-prefixed)
ref_len_byte = buf.read(1)[0]
if ref_len_byte == 254:
# Long string: next 3 bytes are length
ref_len = struct.unpack("<I", buf.read(3) + b"\x00")[0]
else:
ref_len = ref_len_byte
file_reference = buf.read(ref_len)
# Skip padding to 4-byte alignment
total_len = 1 + (3 if ref_len_byte == 254 else 0) + ref_len
padding = total_len % 4
if padding:
buf.read(4 - padding)
# Read id and access_hash (8 bytes each)
remaining = buf.read()
if len(remaining) < 16:
raise ValueError(f"file_id remaining data too short: {len(remaining)} bytes")
id_val = struct.unpack("<q", remaining[0:8])[0]
access_hash = struct.unpack("<q", remaining[8:16])[0]
return DecodedFileId(
type_id=type_id,
dc_id=dc_id,
id=id_val,
access_hash=access_hash,
file_reference=file_reference,
has_web_location=has_web_location,
has_reference=has_reference,
)
@dataclass
class TelegramMediaRef:
"""
Parsed reference to Telegram media.
Can be constructed from:
- t.me links: https://t.me/channel/123, https://t.me/c/123456789/456
- file_id: Direct Telegram file IDs
"""
chat_id: Optional[Union[int, str]] = None # Channel/group/user ID or username
message_id: Optional[int] = None # Message ID for t.me links
file_id: Optional[str] = None # Direct file reference
@dataclass
class MediaInfo:
"""Information about a Telegram media file."""
file_id: str
file_size: int
mime_type: str
file_name: Optional[str] = None
duration: Optional[int] = None # For video/audio
width: Optional[int] = None # For video/photo
height: Optional[int] = None # For video/photo
dc_id: Optional[int] = None
def parse_telegram_url(url: str) -> TelegramMediaRef:
"""
Parse a Telegram URL or file_id into a TelegramMediaRef.
Supported formats:
- https://t.me/channel/123 (public channel)
- https://t.me/c/123456789/456 (private channel)
- https://t.me/username/123 (user/channel by username)
- file_id (base64-encoded)
Args:
url: The URL or file_id to parse
Returns:
TelegramMediaRef with parsed information
"""
if not url:
raise ValueError("URL cannot be empty")
# Check if it's a t.me link
parsed = urlparse(url)
if parsed.netloc in ("t.me", "telegram.me", "telegram.dog"):
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) >= 2:
# Format: /c/chat_id/message_id (private channel)
if path_parts[0] == "c" and len(path_parts) >= 3:
try:
# Private channel IDs need -100 prefix
chat_id = int(f"-100{path_parts[1]}")
message_id = int(path_parts[2])
return TelegramMediaRef(chat_id=chat_id, message_id=message_id)
except ValueError as e:
raise ValueError(f"Invalid private channel URL format: {url}") from e
# Format: /username/message_id (public channel or user)
else:
try:
username = path_parts[0]
message_id = int(path_parts[1])
return TelegramMediaRef(chat_id=username, message_id=message_id)
except ValueError as e:
raise ValueError(f"Invalid public channel URL format: {url}") from e
raise ValueError(f"Invalid Telegram URL format: {url}")
# Check if it looks like a file_id (base64-like string)
if re.match(r"^[A-Za-z0-9_-]+$", url) and len(url) > 20:
return TelegramMediaRef(file_id=url)
raise ValueError(f"Unrecognized Telegram URL or file_id format: {url}")
@dataclass
class DownloadSender:
"""Handles downloading chunks from a single connection."""
client: TelegramClient
sender: MTProtoSender
request: GetFileRequest
remaining: int
stride: int
async def next(self) -> Optional[bytes]:
"""Download the next chunk."""
if not self.remaining:
return None
result = await self.client._call(self.sender, self.request)
self.remaining -= 1
self.request.offset += self.stride
return result.bytes
async def disconnect(self) -> None:
"""Disconnect this sender gracefully."""
try:
await self.sender.disconnect()
except Exception:
# Ignore errors during disconnect - connection may already be closed
pass
class ParallelTransferrer:
"""
Parallel chunk downloader using multiple DC connections.
Based on FastTelethon technique from mautrix-telegram.
Creates multiple MTProtoSender connections to the same DC
and downloads different chunks in parallel for maximum speed.
"""
def __init__(self, client: TelegramClient, dc_id: Optional[int] = None) -> None:
self.client = client
self.loop = client.loop
self.dc_id = dc_id or client.session.dc_id
self.auth_key: Optional[AuthKey] = None if dc_id and client.session.dc_id != dc_id else client.session.auth_key
self.senders: Optional[list[DownloadSender]] = None
async def _cleanup(self) -> None:
"""Clean up all sender connections gracefully."""
if self.senders:
# Use return_exceptions=True to prevent one failed disconnect from blocking others
await asyncio.gather(*[sender.disconnect() for sender in self.senders], return_exceptions=True)
self.senders = None
@staticmethod
def _get_connection_count(file_size: int, max_count: int = 20, full_size: int = 100 * 1024 * 1024) -> int:
"""
Calculate optimal number of connections based on file size.
Small files use fewer connections, large files use more.
"""
if file_size > full_size:
return max_count
return max(1, math.ceil((file_size / full_size) * max_count))
async def _create_sender(self) -> MTProtoSender:
"""Create a new MTProtoSender connection to the DC."""
dc = await self.client._get_dc(self.dc_id)
sender = MTProtoSender(self.auth_key, loggers=self.client._log)
await sender.connect(
self.client._connection(
dc.ip_address,
dc.port,
dc.id,
loggers=self.client._log,
proxy=self.client._proxy,
)
)
if not self.auth_key:
logger.debug(f"Exporting auth to DC {self.dc_id}")
auth = await self.client(ExportAuthorizationRequest(self.dc_id))
self.client._init_request.query = ImportAuthorizationRequest(id=auth.id, bytes=auth.bytes)
req = InvokeWithLayerRequest(LAYER, self.client._init_request)
await sender.send(req)
self.auth_key = sender.auth_key
return sender
async def _create_download_sender(
self,
file: TypeLocation,
index: int,
part_size: int,
stride: int,
part_count: int,
base_offset: int = 0,
) -> DownloadSender:
"""Create a DownloadSender for a specific chunk offset."""
return DownloadSender(
client=self.client,
sender=await self._create_sender(),
request=GetFileRequest(file, offset=base_offset + index * part_size, limit=part_size),
stride=stride,
remaining=part_count,
)
async def _init_download(
self,
connections: int,
file: TypeLocation,
part_count: int,
part_size: int,
base_offset: int = 0,
) -> None:
"""Initialize all download senders."""
minimum, remainder = divmod(part_count, connections)
def get_part_count() -> int:
nonlocal remainder
if remainder > 0:
remainder -= 1
return minimum + 1
return minimum
# Create first sender synchronously to handle auth export
self.senders = [
await self._create_download_sender(
file, 0, part_size, connections * part_size, get_part_count(), base_offset
),
*await asyncio.gather(
*[
self._create_download_sender(
file, i, part_size, connections * part_size, get_part_count(), base_offset
)
for i in range(1, connections)
]
),
]
async def download(
self,
file: TypeLocation,
file_size: int,
offset: int = 0,
limit: Optional[int] = None,
part_size_kb: Optional[float] = None,
connection_count: Optional[int] = None,
) -> AsyncGenerator[bytes, None]:
"""
Download file in parallel chunks.
Args:
file: The file location to download
file_size: Total file size in bytes
offset: Byte offset to start from (for range requests)
limit: Number of bytes to download (None for entire file)
part_size_kb: Chunk size in KB (auto-calculated if None)
connection_count: Number of parallel connections (auto-calculated if None)
Yields:
Chunks of file data
"""
# Calculate actual range
if limit is None:
limit = file_size - offset
# Clamp connection count to configured max
max_connections = min(settings.telegram_max_connections, 20)
connection_count = connection_count or self._get_connection_count(limit, max_count=max_connections)
connection_count = min(connection_count, max_connections)
part_size = int((part_size_kb or utils.get_appropriated_part_size(file_size)) * 1024)
# Round offset down to part boundary
aligned_offset = (offset // part_size) * part_size
skip_bytes = offset - aligned_offset
part_count = math.ceil((limit + skip_bytes) / part_size)
logger.debug(
f"Starting parallel download: {connection_count} connections, "
f"{part_size} bytes/part, {part_count} parts, offset={offset}, aligned_offset={aligned_offset}"
)
await self._init_download(connection_count, file, part_count, part_size, base_offset=aligned_offset)
try:
part = 0
bytes_yielded = 0
while part < part_count and bytes_yielded < limit:
tasks = [self.loop.create_task(sender.next()) for sender in self.senders]
for task in tasks:
data = await task
if not data:
break
# Handle offset alignment - skip initial bytes if needed
if skip_bytes > 0:
if len(data) <= skip_bytes:
skip_bytes -= len(data)
part += 1
continue
data = data[skip_bytes:]
skip_bytes = 0
# Handle limit - truncate if we'd exceed
remaining = limit - bytes_yielded
if len(data) > remaining:
data = data[:remaining]
yield data
bytes_yielded += len(data)
part += 1
if bytes_yielded >= limit:
break
logger.debug("Parallel download finished, cleaning up connections")
finally:
await self._cleanup()
class _SingleSenderPool:
"""
Pool of persistent ``MTProtoSender`` connections per DC.
Instead of creating a new connection for every HLS segment request
(which involves handshake + auth export overhead), this pool maintains
a queue of idle senders per DC. When a caller needs a sender, it
borrows one from the pool (or creates a new one if the pool is empty).
After use, the sender is returned to the pool for reuse.
Senders that have been idle longer than ``_MAX_IDLE_SECONDS`` are
discarded on checkout.
"""
_MAX_IDLE_SECONDS = 120.0 # discard senders idle longer than this
def __init__(self) -> None:
# dc_id -> list of (sender, auth_key, last_used_monotonic)
self._pool: dict[int, list[tuple[MTProtoSender, AuthKey, float]]] = {}
self._lock = asyncio.Lock()
# Cached auth keys per DC -- shared across all senders.
self._auth_keys: dict[int, AuthKey] = {}
async def acquire(
self,
client: TelegramClient,
dc_id: int,
) -> tuple[MTProtoSender, AuthKey]:
"""
Borrow a connected ``MTProtoSender`` for *dc_id*.
Returns an existing idle sender if one is available, otherwise
creates a new one (handling auth export if needed).
"""
import time as _time
async with self._lock:
bucket = self._pool.get(dc_id, [])
now = _time.monotonic()
# Try to find a live sender
while bucket:
sender, auth_key, last_used = bucket.pop()
idle = now - last_used
if idle > self._MAX_IDLE_SECONDS:
# Stale -- disconnect quietly
logger.debug("[sender_pool] Discarding stale sender for DC %d (idle %.0fs)", dc_id, idle)
try:
await sender.disconnect()
except Exception:
pass
continue
# Check if still connected
if sender.is_connected():
logger.debug("[sender_pool] Reusing sender for DC %d (idle %.1fs)", dc_id, idle)
return sender, auth_key
else:
logger.debug("[sender_pool] Sender for DC %d disconnected, discarding", dc_id)
try:
await sender.disconnect()
except Exception:
pass
# No reusable sender -- create a new one
logger.debug("[sender_pool] Creating new sender for DC %d", dc_id)
return await self._create_sender(client, dc_id)
async def _create_sender(
self,
client: TelegramClient,
dc_id: int,
) -> tuple[MTProtoSender, AuthKey]:
"""Create a new ``MTProtoSender`` with auth export if needed."""
auth_key = self._auth_keys.get(dc_id)
if auth_key is None and dc_id == client.session.dc_id:
auth_key = client.session.auth_key
dc = await client._get_dc(dc_id)
sender = MTProtoSender(auth_key, loggers=client._log)
await sender.connect(
client._connection(
dc.ip_address,
dc.port,
dc.id,
loggers=client._log,
proxy=client._proxy,
)
)
if not auth_key:
logger.debug("[sender_pool] Exporting auth to DC %d", dc_id)
auth = await client(ExportAuthorizationRequest(dc_id))
client._init_request.query = ImportAuthorizationRequest(id=auth.id, bytes=auth.bytes)
req = InvokeWithLayerRequest(LAYER, client._init_request)
await sender.send(req)
auth_key = sender.auth_key
self._auth_keys[dc_id] = auth_key
return sender, auth_key
async def release(
self,
dc_id: int,
sender: MTProtoSender,
auth_key: AuthKey,
) -> None:
"""Return a sender to the pool for reuse."""
import time as _time
# Cache auth key
if auth_key is not None:
self._auth_keys[dc_id] = auth_key
if not sender.is_connected():
logger.debug("[sender_pool] Sender for DC %d disconnected, not returning to pool", dc_id)
try:
await sender.disconnect()
except Exception:
pass
return
async with self._lock:
bucket = self._pool.setdefault(dc_id, [])
bucket.append((sender, auth_key, _time.monotonic()))
logger.debug("[sender_pool] Returned sender to pool for DC %d (pool size=%d)", dc_id, len(bucket))
async def discard(self, sender: MTProtoSender) -> None:
"""Disconnect and discard a sender without returning it to the pool."""
try:
await sender.disconnect()
except Exception:
pass
async def close_all(self) -> None:
"""Disconnect all pooled senders."""
async with self._lock:
for dc_id, bucket in self._pool.items():
for sender, _, _ in bucket:
try:
await sender.disconnect()
except Exception:
pass
bucket.clear()
self._pool.clear()
self._auth_keys.clear()
class TelegramSessionManager:
"""
Manages the Telethon client session.
Features:
- Lazy initialization on first request
- Session persistence via StringSession
- Automatic reconnection on disconnect
- Thread-safe with asyncio lock
- Persistent sender pool for HLS segment downloads
"""
# Cache TTL for get_media_info results (seconds)
_MEDIA_INFO_CACHE_TTL = 3600 # 1 hour
def __init__(self):
self._client: Optional[TelegramClient] = None
self._lock = asyncio.Lock()
self._initialized = False
# In-memory cache: key → (MediaInfo, expiry_timestamp)
self._media_info_cache: dict[str, tuple["MediaInfo", float]] = {}
# Persistent sender pool for single-connection downloads (HLS).
self._sender_pool = _SingleSenderPool()
async def get_client(self) -> TelegramClient:
"""
Get the Telethon client, initializing if needed.
Returns:
Connected TelegramClient instance
Raises:
ValueError: If Telegram settings are not configured
RuntimeError: If connection fails
"""
async with self._lock:
if self._client is not None and self._client.is_connected():
return self._client
# Validate settings
if not settings.telegram_api_id or not settings.telegram_api_hash:
raise ValueError("Telegram API credentials not configured (telegram_api_id, telegram_api_hash)")
if not settings.telegram_session_string:
raise ValueError(
"Telegram session string not configured. Generate one using the web UI at /url-generator#telegram"
)
logger.info("Initializing Telegram client...")
# Create client with StringSession (extract raw values from SecretStr)
self._client = TelegramClient(
StringSession(settings.telegram_session_string.get_secret_value()),
settings.telegram_api_id,
settings.telegram_api_hash.get_secret_value(),
request_retries=3,
connection_retries=3,
retry_delay=1,
timeout=settings.telegram_request_timeout,
)
await self._client.connect()
if not await self._client.is_user_authorized():
raise RuntimeError(
"Telegram session is not authorized. Please regenerate the session string with valid credentials."
)
self._initialized = True
logger.info("Telegram client initialized successfully")
return self._client
async def get_message(self, ref: TelegramMediaRef) -> Message:
"""
Get a message by its reference.
Args:
ref: TelegramMediaRef with chat_id and message_id
Returns:
The Message object
Raises:
ValueError: If reference is incomplete
Various Telegram errors: ChannelPrivateError, MessageIdInvalidError, etc.
"""
if ref.chat_id is None or ref.message_id is None:
raise ValueError("chat_id and message_id are required to fetch a message")
client = await self.get_client()
messages = await client.get_messages(ref.chat_id, ids=ref.message_id)
if not messages:
raise ValueError(f"Message {ref.message_id} not found in {ref.chat_id}")
return messages
def resolve_file_id(self, file_id: str) -> tuple[Union[Document, Photo], int]:
"""
Resolve a Bot API file_id to a Telethon Document or Photo object.
Supports both old and new file_id formats by using a custom decoder
that handles all version/sub_version combinations.
Args:
file_id: Bot API style file_id string
Returns:
Tuple of (Document or Photo object, dc_id)
Raises:
ValueError: If file_id is invalid or cannot be decoded
"""
# First try Telethon's built-in resolver (works for older formats)
media = utils.resolve_bot_file_id(file_id)
if media is not None:
if isinstance(media, Document):
return media, media.dc_id
elif isinstance(media, Photo):
return media, media.dc_id
# Fall back to our custom decoder for newer formats
logger.debug("Telethon couldn't decode file_id, trying custom decoder")
decoded = decode_file_id(file_id)
# Determine if it's a photo or document based on type_id
if decoded.type_id in (FILE_TYPE_PHOTO, FILE_TYPE_PROFILE_PHOTO, FILE_TYPE_THUMBNAIL):
# Create a Photo object
return Photo(
id=decoded.id,
access_hash=decoded.access_hash,
file_reference=decoded.file_reference,
date=None,
sizes=[], # Empty, we don't have size info from file_id
dc_id=decoded.dc_id,
), decoded.dc_id
else:
# Create a Document object (video, audio, document, etc.)
return Document(
id=decoded.id,
access_hash=decoded.access_hash,
file_reference=decoded.file_reference,
date=None,
mime_type="", # Unknown from file_id
size=0, # Unknown from file_id
thumbs=None,
dc_id=decoded.dc_id,
attributes=[],
), decoded.dc_id
def _media_info_cache_key(self, ref: TelegramMediaRef) -> str:
"""Derive an in-memory cache key for a TelegramMediaRef."""
if ref.file_id and not ref.message_id:
return f"fid:{ref.file_id}"
if ref.chat_id is not None and ref.message_id is not None:
return f"chat:{ref.chat_id}:msg:{ref.message_id}"
return ""
async def get_media_info(self, ref: TelegramMediaRef, file_size: Optional[int] = None) -> MediaInfo:
"""
Get information about a media file.
Results are cached in-memory (with TTL) to avoid repeated Telegram API
calls for the same media -- especially important for HLS, where each
sub-request (playlist, init, segments) resolves the same source.
Args:
ref: TelegramMediaRef pointing to the media
file_size: Optional file size (required for file_id since it's not encoded in the ID)
Returns:
MediaInfo with file details
"""
# Check in-memory cache first
import time
ck = self._media_info_cache_key(ref)
if ck:
cached = self._media_info_cache.get(ck)
if cached is not None:
info, expiry = cached
if time.monotonic() < expiry:
return info
else:
del self._media_info_cache[ck]
info = await self._get_media_info_uncached(ref, file_size)
# Store in cache
if ck:
self._media_info_cache[ck] = (info, time.monotonic() + self._MEDIA_INFO_CACHE_TTL)
return info
async def _get_media_info_uncached(
self,
ref: TelegramMediaRef,
file_size: Optional[int] = None,
) -> MediaInfo:
"""Uncached implementation of get_media_info."""
# Handle file_id reference
if ref.file_id and not ref.message_id:
media, dc_id = self.resolve_file_id(ref.file_id)
if isinstance(media, Document):
# Extract attributes
file_name = None
duration = None
width = None
height = None
mime_type = media.mime_type or "application/octet-stream"
for attr in media.attributes:
attr_dict = attr.to_dict()
if "file_name" in attr_dict:
file_name = attr_dict["file_name"]
if "duration" in attr_dict:
duration = attr_dict["duration"]
if "w" in attr_dict:
width = attr_dict["w"]
if "h" in attr_dict:
height = attr_dict["h"]
# Determine mime_type from attributes if empty
if mime_type == "application/octet-stream" or not mime_type:
# Infer from document type
for attr in media.attributes:
if hasattr(attr, "voice") and attr.voice:
mime_type = "audio/ogg"
break
elif hasattr(attr, "round_message") and attr.round_message:
mime_type = "video/mp4"
break
elif attr.__class__.__name__ == "DocumentAttributeVideo":
mime_type = "video/mp4"
break
elif attr.__class__.__name__ == "DocumentAttributeAudio":
mime_type = "audio/mpeg"
break
elif attr.__class__.__name__ == "DocumentAttributeSticker":
mime_type = "image/webp"
break
elif attr.__class__.__name__ == "DocumentAttributeAnimated":
mime_type = "application/x-tgsticker"
break
return MediaInfo(
file_id=ref.file_id,
file_size=file_size or media.size, # Use provided size or 0 from resolved
mime_type=mime_type,
file_name=file_name,
duration=duration,
width=width,
height=height,
dc_id=dc_id,
)
elif isinstance(media, Photo):
# Get largest photo size
largest = max(media.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
return MediaInfo(
file_id=ref.file_id,
file_size=file_size or getattr(largest, "size", 0),
mime_type="image/jpeg",
width=getattr(largest, "w", None),
height=getattr(largest, "h", None),
dc_id=dc_id,
)
raise ValueError(f"Unsupported media type from file_id: {type(media)}")
# Handle message-based reference
message = await self.get_message(ref)
if not message.media:
raise ValueError(f"Message {ref.message_id} does not contain media")
if isinstance(message.media, MessageMediaDocument):
doc = message.media.document
if not isinstance(doc, Document):
raise ValueError("Invalid document in message")
# Extract attributes
file_name = None
duration = None
width = None
height = None
for attr in doc.attributes:
attr_dict = attr.to_dict()
if "file_name" in attr_dict:
file_name = attr_dict["file_name"]
if "duration" in attr_dict:
duration = attr_dict["duration"]
if "w" in attr_dict:
width = attr_dict["w"]
if "h" in attr_dict:
height = attr_dict["h"]
return MediaInfo(
file_id=str(doc.id),
file_size=doc.size,
mime_type=doc.mime_type or "application/octet-stream",
file_name=file_name,
duration=duration,
width=width,
height=height,
dc_id=doc.dc_id,
)
elif isinstance(message.media, MessageMediaPhoto):
photo = message.media.photo
if not photo:
raise ValueError("Invalid photo in message")
# Get largest photo size
largest = max(photo.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
return MediaInfo(
file_id=str(photo.id),
file_size=getattr(largest, "size", 0),
mime_type="image/jpeg",
width=getattr(largest, "w", None),
height=getattr(largest, "h", None),
dc_id=photo.dc_id,
)
else:
raise ValueError(f"Unsupported media type: {type(message.media)}")
async def validate_file_access(
self,
ref: TelegramMediaRef,
file_size: Optional[int] = None,
) -> None:
"""
Validate that the session can access the file before streaming.
This makes a small test request to verify the file_reference is valid
and the session has access. This should be called before streaming to
avoid mid-stream errors.
Args:
ref: TelegramMediaRef pointing to the media
file_size: Optional file size for file_id mode
Raises:
FileReferenceExpiredError: If file_id belongs to different session
Various Telegram errors: For access issues
"""
client = await self.get_client()
if ref.file_id and not ref.message_id:
media, dc_id = self.resolve_file_id(ref.file_id)
if isinstance(media, Document):
file_location = InputDocumentFileLocation(
id=media.id,
access_hash=media.access_hash,
file_reference=media.file_reference,
thumb_size="",
)
elif isinstance(media, Photo):
largest = max(media.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
file_location = InputPhotoFileLocation(
id=media.id,
access_hash=media.access_hash,
file_reference=media.file_reference,
thumb_size=getattr(largest, "type", "x"),
)
else:
raise ValueError(f"Unsupported media type from file_id: {type(media)}")
# Make a small test request to validate access
# Use ParallelTransferrer which handles DC migration properly
transferrer = ParallelTransferrer(client, dc_id)
try:
# Just request a tiny amount to validate - the download method handles DC connections
download_gen = transferrer.download(file_location, file_size or 4096, offset=0, limit=4096)
try:
await download_gen.__anext__() # Get first chunk to validate
except StopAsyncIteration:
pass # Empty file is still valid
finally:
# Properly close the generator
await download_gen.aclose()
logger.debug("[validate_file_access] file_id access validated on DC %d", dc_id)
except Exception as e:
logger.warning(f"[validate_file_access] file_id validation failed: {e}")
raise
finally:
# Clean up transferrer connections
await transferrer._cleanup()
async def _resolve_file_location(
self,
ref: TelegramMediaRef,
file_size: Optional[int] = None,
) -> tuple["TypeLocation", int, int]:
"""
Resolve a ``TelegramMediaRef`` into a Telegram file location.
Returns:
``(file_location, dc_id, actual_file_size)``
"""
# Handle file_id reference (no message needed, fast local parse)
if ref.file_id and not ref.message_id:
media, dc_id = self.resolve_file_id(ref.file_id)
if isinstance(media, Document):
actual_file_size = file_size or media.size
if actual_file_size == 0:
raise ValueError(
"file_size parameter is required when streaming by file_id. "
"The file_id doesn't contain size information."
)
file_location = InputDocumentFileLocation(
id=media.id,
access_hash=media.access_hash,
file_reference=media.file_reference,
thumb_size="",
)
return file_location, dc_id, actual_file_size
elif isinstance(media, Photo):
largest = max(media.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
actual_file_size = file_size or getattr(largest, "size", 0)
if actual_file_size == 0:
raise ValueError(
"file_size parameter is required when streaming by file_id. "
"The file_id doesn't contain size information."
)
file_location = InputPhotoFileLocation(
id=media.id,
access_hash=media.access_hash,
file_reference=media.file_reference,
thumb_size=getattr(largest, "type", "x"),
)
return file_location, dc_id, actual_file_size
else:
raise ValueError(f"Unsupported media type from file_id: {type(media)}")
# Handle message-based reference (requires Telegram API call)
message = await self.get_message(ref)
if not message.media:
raise ValueError(f"Message {ref.message_id} does not contain media")
if isinstance(message.media, MessageMediaDocument):
doc = message.media.document
if not isinstance(doc, Document):
raise ValueError("Invalid document")
file_location = InputDocumentFileLocation(
id=doc.id,
access_hash=doc.access_hash,
file_reference=doc.file_reference,
thumb_size="",
)
return file_location, doc.dc_id, doc.size
elif isinstance(message.media, MessageMediaPhoto):
photo = message.media.photo
if not photo:
raise ValueError("Invalid photo")
largest = max(photo.sizes, key=lambda s: getattr(s, "size", 0) if hasattr(s, "size") else 0)
file_location = InputPhotoFileLocation(
id=photo.id,
access_hash=photo.access_hash,
file_reference=photo.file_reference,
thumb_size=getattr(largest, "type", ""),
)
return file_location, photo.dc_id, getattr(largest, "size", 0)
else:
raise ValueError(f"Unsupported media type: {type(message.media)}")
async def stream_media(
self,
ref: TelegramMediaRef,
offset: int = 0,
limit: Optional[int] = None,
file_size: Optional[int] = None,
) -> AsyncGenerator[bytes, None]:
"""
Stream media content with **parallel** downloads (fast Telethon).
Creates multiple MTProtoSender connections to the file's DC for
maximum throughput. Best suited for large/full-file downloads
(e.g. the non-transcode ``/proxy/telegram/stream`` endpoint).
For small byte-range fetches (HLS segments) use
``stream_media_single`` instead.
Args:
ref: TelegramMediaRef pointing to the media
offset: Byte offset to start from
limit: Number of bytes to download (None for entire file)
file_size: Optional file size (required for file_id streaming)
Yields:
Chunks of media data
"""
client = await self.get_client()
file_location, dc_id, actual_file_size = await self._resolve_file_location(ref, file_size)
transferrer = ParallelTransferrer(client, dc_id)
try:
async for chunk in transferrer.download(
file_location,
actual_file_size,
offset=offset,
limit=limit,
):
yield chunk
finally:
await transferrer._cleanup()
async def stream_media_single(
self,
ref: TelegramMediaRef,
offset: int = 0,
limit: Optional[int] = None,
file_size: Optional[int] = None,
) -> AsyncGenerator[bytes, None]:
"""
Stream media content over a **pooled** single MTProto connection.
Borrows a persistent ``MTProtoSender`` from ``_SingleSenderPool``
for the target DC. The sender is returned to the pool after the
download completes so the next request reuses the same TCP
connection (no handshake, no ``ExportAuthorizationRequest``).
This is ideal for small byte-range fetches (HLS segments, probe
headers) where spinning up connections per request is wasteful.
Args:
ref: TelegramMediaRef pointing to the media
offset: Byte offset to start from
limit: Number of bytes to download (None for entire file)
file_size: Optional file size (required for file_id streaming)
Yields:
Chunks of media data
"""
client = await self.get_client()
file_location, dc_id, actual_file_size = await self._resolve_file_location(ref, file_size)
if offset >= actual_file_size:
return
if limit is None:
limit = actual_file_size - offset
part_size = int(utils.get_appropriated_part_size(actual_file_size) * 1024)
aligned_offset = (offset // part_size) * part_size
skip_bytes = offset - aligned_offset
part_count = math.ceil((limit + skip_bytes) / part_size)
logger.debug(
"[single] DC %d: offset=%d, limit=%d, parts=%d, part_size=%d",
dc_id,
offset,
limit,
part_count,
part_size,
)
sender, auth_key = await self._sender_pool.acquire(client, dc_id)
sender_ok = True # track whether to return to pool or discard
try:
request = GetFileRequest(file_location, offset=aligned_offset, limit=part_size)
bytes_yielded = 0
for _ in range(part_count):
if bytes_yielded >= limit:
break
try:
result = await client._call(sender, request)
except Exception:
sender_ok = False
raise
data = result.bytes
if not data:
break
request.offset += part_size
# Handle offset alignment
if skip_bytes > 0:
if len(data) <= skip_bytes:
skip_bytes -= len(data)
continue
data = data[skip_bytes:]
skip_bytes = 0
# Trim to limit
remaining = limit - bytes_yielded
if len(data) > remaining:
data = data[:remaining]
bytes_yielded += len(data)
yield data
finally:
if sender_ok:
await self._sender_pool.release(dc_id, sender, auth_key)
else:
await self._sender_pool.discard(sender)
async def close(self) -> None:
"""Close the Telegram client connection and pooled senders."""
await self._sender_pool.close_all()
async with self._lock:
if self._client is not None:
await self._client.disconnect()
self._client = None
self._initialized = False
logger.info("Telegram client disconnected")
@property
def is_initialized(self) -> bool:
"""Check if the client is initialized and connected."""
return self._initialized and self._client is not None and self._client.is_connected()
# Global session manager instance
telegram_manager = TelegramSessionManager()