2025-12-11 01:07:04 +03:00
|
|
|
|
"""
|
|
|
|
|
|
YouTube Video Downloader Service
|
|
|
|
|
|
Отдельный микросервис для скачивания видео с YouTube
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
|
|
|
|
|
Версия 2: subprocess-based yt-dlp CLI (обход SSL бага в gunicorn pre-fork)
|
2025-12-11 01:07:04 +03:00
|
|
|
|
"""
|
|
|
|
|
|
import os
|
2026-04-30 17:21:10 +03:00
|
|
|
|
import time
|
2025-12-11 01:07:04 +03:00
|
|
|
|
import logging
|
2026-01-10 21:40:07 +00:00
|
|
|
|
import traceback
|
2026-06-08 16:17:02 +00:00
|
|
|
|
import subprocess
|
|
|
|
|
|
import json as json_lib
|
2025-12-11 01:07:04 +03:00
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from flask import Flask, request, jsonify
|
|
|
|
|
|
from flask_cors import CORS
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
# Настройка логирования
|
|
|
|
|
|
logging.basicConfig(
|
|
|
|
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
|
|
|
|
level=logging.INFO
|
|
|
|
|
|
)
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
app = Flask(__name__)
|
2026-06-08 16:17:02 +00:00
|
|
|
|
CORS(app)
|
2025-12-11 01:07:04 +03:00
|
|
|
|
|
|
|
|
|
|
# Директория для временных файлов
|
|
|
|
|
|
DOWNLOADS_DIR = Path('downloads')
|
|
|
|
|
|
DOWNLOADS_DIR.mkdir(exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _safe_filename(title: str) -> str:
|
|
|
|
|
|
"""Создает безопасное имя файла"""
|
|
|
|
|
|
safe_title = re.sub(r'[<>:"/\\|?*]', '', title)[:100]
|
|
|
|
|
|
return str(DOWNLOADS_DIR / f'{uuid.uuid4()}_{safe_title}.%(ext)s')
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-04-30 17:21:10 +03:00
|
|
|
|
def _cleanup_downloads():
|
|
|
|
|
|
"""Удаляет все файлы из папки загрузок"""
|
|
|
|
|
|
for f in DOWNLOADS_DIR.glob('*'):
|
|
|
|
|
|
try:
|
|
|
|
|
|
f.unlink()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
def _find_latest_downloaded() -> Path | None:
|
|
|
|
|
|
"""Возвращает самый свежий файл в папке загрузок (не .part/.ytdl)."""
|
|
|
|
|
|
files = [f for f in DOWNLOADS_DIR.glob('*')
|
|
|
|
|
|
if f.suffix not in ('.part', '.ytdl')]
|
|
|
|
|
|
if not files:
|
|
|
|
|
|
return None
|
|
|
|
|
|
files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
|
|
|
|
|
|
return files[0]
|
2025-12-17 17:53:34 +03:00
|
|
|
|
|
|
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
def _file_has_video_stream(filepath: Path) -> bool:
|
|
|
|
|
|
"""Проверяет через ffprobe, содержит ли файл видео-поток."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
|
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
|
|
|
|
|
|
'-show_entries', 'stream=codec_type', '-of', 'csv=p=0',
|
|
|
|
|
|
str(filepath)],
|
|
|
|
|
|
capture_output=True, text=True, timeout=15
|
|
|
|
|
|
)
|
|
|
|
|
|
return result.stdout.strip() == 'video'
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.warning(f"[VALIDATE] Не удалось проверить видео-поток в {filepath.name}: {e}")
|
|
|
|
|
|
return True
|
2026-04-30 17:21:10 +03:00
|
|
|
|
|
|
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
def _file_has_audio_stream(filepath: Path) -> bool:
|
|
|
|
|
|
"""Проверяет через ffprobe, содержит ли файл аудио-поток."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
|
['ffprobe', '-v', 'error', '-select_streams', 'a:0',
|
|
|
|
|
|
'-show_entries', 'stream=codec_type', '-of', 'csv=p=0',
|
|
|
|
|
|
str(filepath)],
|
|
|
|
|
|
capture_output=True, text=True, timeout=15
|
|
|
|
|
|
)
|
|
|
|
|
|
return result.stdout.strip() == 'audio'
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return False
|
2026-04-30 17:21:10 +03:00
|
|
|
|
|
|
|
|
|
|
|
2026-05-03 17:43:24 +03:00
|
|
|
|
def _find_video_file() -> Path | None:
|
2026-06-08 16:17:02 +00:00
|
|
|
|
"""Находит видеофайл среди загрузок. Если видео+аудио раздельные — мержит ffmpeg."""
|
|
|
|
|
|
files = [f for f in DOWNLOADS_DIR.glob('*') if f.suffix not in ('.part', '.ytdl')]
|
2026-05-03 17:43:24 +03:00
|
|
|
|
if not files:
|
|
|
|
|
|
return None
|
|
|
|
|
|
files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-05-03 17:43:24 +03:00
|
|
|
|
video_file = None
|
|
|
|
|
|
audio_file = None
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-05-03 17:43:24 +03:00
|
|
|
|
for f in files:
|
|
|
|
|
|
if _file_has_video_stream(f):
|
2026-06-08 16:17:02 +00:00
|
|
|
|
if _file_has_audio_stream(f):
|
|
|
|
|
|
return f # combined stream
|
2026-05-03 17:43:24 +03:00
|
|
|
|
if video_file is None:
|
|
|
|
|
|
video_file = f
|
|
|
|
|
|
elif not audio_file and _file_has_audio_stream(f):
|
|
|
|
|
|
audio_file = f
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-05-03 17:43:24 +03:00
|
|
|
|
if video_file is None:
|
|
|
|
|
|
return None
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-05-03 17:43:24 +03:00
|
|
|
|
if audio_file:
|
|
|
|
|
|
merged = DOWNLOADS_DIR / f"{video_file.stem}_merged{video_file.suffix}"
|
2026-06-08 16:17:02 +00:00
|
|
|
|
logger.info(f"[MERGE] Мержим {video_file.name} + {audio_file.name} -> {merged.name}")
|
2026-05-03 17:43:24 +03:00
|
|
|
|
result = subprocess.run(
|
|
|
|
|
|
['ffmpeg', '-y', '-i', str(video_file), '-i', str(audio_file),
|
|
|
|
|
|
'-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', str(merged)],
|
|
|
|
|
|
capture_output=True, text=True, timeout=120
|
|
|
|
|
|
)
|
2026-06-08 16:17:02 +00:00
|
|
|
|
if result.returncode == 0:
|
|
|
|
|
|
video_file.unlink(missing_ok=True)
|
|
|
|
|
|
audio_file.unlink(missing_ok=True)
|
|
|
|
|
|
return merged
|
|
|
|
|
|
logger.error(f"[MERGE] Ошибка ffmpeg: {result.stderr[-300:]}")
|
|
|
|
|
|
return video_file
|
|
|
|
|
|
|
2026-05-03 17:43:24 +03:00
|
|
|
|
return video_file
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
# CORE: subprocess-based yt-dlp
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
2026-05-03 17:43:24 +03:00
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
YTDLP_CMD = 'yt-dlp'
|
|
|
|
|
|
DOWNLOAD_TIMEOUT = 300
|
|
|
|
|
|
INFO_TIMEOUT = 60
|
2026-05-03 17:43:24 +03:00
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
PLAYER_CLIENTS = 'web,android'
|
|
|
|
|
|
EXTRACTOR_ARGS = 'youtube:player_client=web,android:skip=translated_subs,hls'
|
2026-05-03 01:36:04 +03:00
|
|
|
|
|
|
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
def _build_ytdlp_base_cmd() -> list:
|
|
|
|
|
|
"""Базовые аргументы yt-dlp CLI."""
|
|
|
|
|
|
cookies_file = Path(os.getenv('YOUTUBE_COOKIES_FILE', '/app/youtube_cookies.txt'))
|
|
|
|
|
|
cmd = [
|
|
|
|
|
|
YTDLP_CMD,
|
|
|
|
|
|
'--socket-timeout', '15',
|
|
|
|
|
|
'--extractor-args', EXTRACTOR_ARGS,
|
|
|
|
|
|
'--js-runtimes', 'node',
|
|
|
|
|
|
'--remote-components', 'ejs:github',
|
|
|
|
|
|
'--no-playlist',
|
|
|
|
|
|
]
|
|
|
|
|
|
if cookies_file.exists() and cookies_file.stat().st_size > 0:
|
|
|
|
|
|
cmd += ['--cookies', str(cookies_file.absolute())]
|
|
|
|
|
|
return cmd
|
2026-05-03 01:36:04 +03:00
|
|
|
|
|
|
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
def _run_ytdlp(args: list, timeout: int = DOWNLOAD_TIMEOUT) -> subprocess.CompletedProcess:
|
|
|
|
|
|
"""Запускает yt-dlp CLI как subprocess (чистый SSL стек)."""
|
|
|
|
|
|
logger.info(f"[YTDLP] {' '.join(args)}")
|
|
|
|
|
|
return subprocess.run(args, capture_output=True, text=True, timeout=timeout)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
# YouTube formatter parser (shared with old codebase)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_height(format_dict: dict) -> int:
|
|
|
|
|
|
"""Извлекает реальную высоту из формата."""
|
|
|
|
|
|
h = format_dict.get('height')
|
|
|
|
|
|
w = format_dict.get('width')
|
|
|
|
|
|
if h and w and isinstance(h, (int, float)) and isinstance(w, (int, float)):
|
|
|
|
|
|
return min(int(h), int(w))
|
|
|
|
|
|
if h and isinstance(h, (int, float)) and h > 0:
|
|
|
|
|
|
return int(h)
|
|
|
|
|
|
if w and isinstance(w, (int, float)) and w > 0:
|
|
|
|
|
|
return int(w)
|
|
|
|
|
|
note = str(format_dict.get('format_note', '') or '')
|
|
|
|
|
|
match = re.search(r'(\d+)\s*p', note)
|
|
|
|
|
|
if match:
|
|
|
|
|
|
return int(match.group(1))
|
|
|
|
|
|
match = re.search(r'(\d+)\s*x\s*(\d+)', note, re.IGNORECASE)
|
|
|
|
|
|
if match:
|
|
|
|
|
|
return min(int(match.group(1)), int(match.group(2)))
|
|
|
|
|
|
res = str(format_dict.get('resolution', '') or '')
|
|
|
|
|
|
match = re.search(r'(\d+)\s*x\s*(\d+)', res, re.IGNORECASE)
|
|
|
|
|
|
if match:
|
|
|
|
|
|
return min(int(match.group(1)), int(match.group(2)))
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
# Форматы (--dump-json)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
2025-12-11 01:07:04 +03:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
def get_youtube_formats(url: str) -> list[dict]:
|
2026-06-08 16:17:02 +00:00
|
|
|
|
"""Получает список доступных форматов через subprocess yt-dlp --dump-json."""
|
2026-04-30 01:36:43 +03:00
|
|
|
|
logger.info(f"[FORMATS] Получение списка форматов для: {url}")
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
|
|
|
|
|
cmd = _build_ytdlp_base_cmd() + ['--dump-json', '--quiet', '--no-warnings', url]
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = _run_ytdlp(cmd, timeout=INFO_TIMEOUT)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"[FORMATS] Ошибка subprocess: {e}")
|
|
|
|
|
|
raise Exception(f"Не удалось получить информацию о видео: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
if result.returncode != 0:
|
|
|
|
|
|
err = result.stderr.strip()[-500:]
|
|
|
|
|
|
logger.error(f"[FORMATS] yt-dlp failed: {err}")
|
|
|
|
|
|
raise Exception(f"yt-dlp error: {err}")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
info = json_lib.loads(result.stdout)
|
|
|
|
|
|
except json_lib.JSONDecodeError as e:
|
|
|
|
|
|
raise Exception(f"Failed to parse --dump-json: {e}")
|
|
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
formats = info.get('formats', [])
|
2026-06-08 16:17:02 +00:00
|
|
|
|
duration = info.get('duration')
|
|
|
|
|
|
logger.info(f"[FORMATS] Всего форматов: {len(formats)}, длительность: {duration}с")
|
|
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
def _get_filesize(f: dict) -> int:
|
|
|
|
|
|
size = f.get('filesize') or f.get('filesize_approx') or 0
|
|
|
|
|
|
if size:
|
|
|
|
|
|
return size
|
|
|
|
|
|
if duration:
|
|
|
|
|
|
tbr = f.get('tbr') or 0
|
|
|
|
|
|
if tbr:
|
|
|
|
|
|
return int(tbr * 1024 / 8 * duration)
|
|
|
|
|
|
vbr = f.get('vbr') or 0
|
|
|
|
|
|
abr = f.get('abr') or 0
|
|
|
|
|
|
if vbr or abr:
|
|
|
|
|
|
return int((vbr + abr) * 1024 / 8 * duration)
|
|
|
|
|
|
return 0
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
quality_tiers = [
|
2026-06-08 16:17:02 +00:00
|
|
|
|
(2160, '4K'), (1440, '1440p'), (1080, '1080p'), (720, '720p'),
|
|
|
|
|
|
(480, '480p'), (360, '360p'), (240, '240p'), (144, '144p'),
|
2026-04-30 01:36:43 +03:00
|
|
|
|
]
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
available_heights = set()
|
|
|
|
|
|
best_audio_info = {'size': 0, 'ext': 'm4a', 'format_id': None}
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
for f in formats:
|
|
|
|
|
|
vcodec = f.get('vcodec', 'none')
|
|
|
|
|
|
acodec = f.get('acodec', 'none')
|
2026-04-30 17:21:10 +03:00
|
|
|
|
height = _parse_height(f)
|
|
|
|
|
|
if vcodec != 'none' and height > 0:
|
2026-04-30 01:36:43 +03:00
|
|
|
|
available_heights.add(height)
|
2026-05-03 17:43:24 +03:00
|
|
|
|
if vcodec == 'none' and acodec != 'none' and best_audio_info['format_id'] is None:
|
2026-06-08 16:17:02 +00:00
|
|
|
|
best_audio_info = {'size': _get_filesize(f), 'ext': f.get('ext', 'm4a'),
|
|
|
|
|
|
'format_id': f.get('format_id', '')}
|
|
|
|
|
|
|
|
|
|
|
|
max_actual_height = max(available_heights) if available_heights else 2160
|
2026-04-30 01:36:43 +03:00
|
|
|
|
result = []
|
2026-04-30 17:21:10 +03:00
|
|
|
|
used_heights = set()
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
for max_height, label in quality_tiers:
|
2026-04-30 17:21:10 +03:00
|
|
|
|
if max_height > max_actual_height:
|
2026-06-08 16:17:02 +00:00
|
|
|
|
continue
|
|
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
best_video = None
|
|
|
|
|
|
best_video_height = 0
|
2026-05-03 17:43:24 +03:00
|
|
|
|
is_best_dash = False
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
for f in formats:
|
|
|
|
|
|
vcodec = f.get('vcodec', 'none')
|
2026-04-30 17:21:10 +03:00
|
|
|
|
height = _parse_height(f)
|
2026-05-03 17:43:24 +03:00
|
|
|
|
if vcodec == 'none' or height <= 0 or height > max_height:
|
2026-04-30 01:36:43 +03:00
|
|
|
|
continue
|
2026-05-03 17:43:24 +03:00
|
|
|
|
is_dash = (f.get('acodec', 'none') == 'none')
|
|
|
|
|
|
pick = False
|
|
|
|
|
|
if height > best_video_height:
|
|
|
|
|
|
pick = True
|
|
|
|
|
|
elif height == best_video_height and is_dash and not is_best_dash:
|
|
|
|
|
|
pick = True
|
|
|
|
|
|
if pick:
|
2026-04-30 01:36:43 +03:00
|
|
|
|
best_video = f
|
|
|
|
|
|
best_video_height = height
|
2026-05-03 17:43:24 +03:00
|
|
|
|
is_best_dash = is_dash
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
|
|
|
|
|
if not best_video or best_video_height in used_heights:
|
2026-04-30 01:36:43 +03:00
|
|
|
|
continue
|
|
|
|
|
|
used_heights.add(best_video_height)
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
video_size = _get_filesize(best_video)
|
|
|
|
|
|
has_audio = best_video.get('acodec', 'none') != 'none'
|
|
|
|
|
|
total_size = video_size + (best_audio_info['size'] if not has_audio else 0)
|
|
|
|
|
|
video_ext = best_video.get('ext', 'mp4')
|
|
|
|
|
|
video_format_id = best_video.get('format_id', '')
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 17:21:10 +03:00
|
|
|
|
format_note = best_video.get('format_note', '') or ''
|
|
|
|
|
|
if format_note and str(best_video_height) in format_note:
|
2026-04-30 01:36:43 +03:00
|
|
|
|
display_label = format_note
|
2026-04-30 17:21:10 +03:00
|
|
|
|
else:
|
|
|
|
|
|
display_label = f"{best_video_height}p"
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
if has_audio:
|
2026-05-03 17:43:24 +03:00
|
|
|
|
format_selector = f"{video_format_id}/best[height<={best_video_height}]/best"
|
2026-04-30 01:36:43 +03:00
|
|
|
|
elif best_audio_info['format_id']:
|
|
|
|
|
|
format_selector = (
|
|
|
|
|
|
f"{video_format_id}+{best_audio_info['format_id']}/"
|
|
|
|
|
|
f"bestvideo[height<={best_video_height}]+bestaudio/"
|
|
|
|
|
|
f"best[height<={best_video_height}]"
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
2026-05-03 17:43:24 +03:00
|
|
|
|
format_selector = f"{video_format_id}+bestaudio/best[height<={best_video_height}]/best"
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
result.append({
|
|
|
|
|
|
'format_id': format_selector,
|
|
|
|
|
|
'label': f"{display_label} ({video_ext})",
|
|
|
|
|
|
'quality': display_label,
|
|
|
|
|
|
'ext': video_ext,
|
|
|
|
|
|
'filesize_mb': round(total_size / 1024 / 1024, 1) if total_size else None,
|
|
|
|
|
|
})
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
if best_audio_info['size']:
|
|
|
|
|
|
result.append({
|
2026-05-03 17:43:24 +03:00
|
|
|
|
'format_id': 'bestaudio/best',
|
2026-04-30 01:36:43 +03:00
|
|
|
|
'label': f"Audio only ({best_audio_info['ext']})",
|
|
|
|
|
|
'quality': 'audio',
|
|
|
|
|
|
'ext': best_audio_info['ext'],
|
|
|
|
|
|
'filesize_mb': round(best_audio_info['size'] / 1024 / 1024, 1) if best_audio_info['size'] else None,
|
|
|
|
|
|
})
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
|
|
|
|
|
# Fallsback: если форматов нет — оценочные
|
2026-04-30 17:21:10 +03:00
|
|
|
|
if len(result) == 0:
|
|
|
|
|
|
logger.info(f"[FORMATS] Реальных форматов не найдено, генерируем оценочные")
|
2026-06-08 16:17:02 +00:00
|
|
|
|
max_possible_height = max_actual_height
|
2026-04-30 01:36:43 +03:00
|
|
|
|
if duration:
|
2026-06-08 16:17:02 +00:00
|
|
|
|
typical_bitrates = {2160: 40000, 1440: 20000, 1080: 10000, 720: 5000,
|
|
|
|
|
|
480: 2500, 360: 1200, 240: 600, 144: 300}
|
|
|
|
|
|
for max_height, label in quality_tiers:
|
|
|
|
|
|
if max_height > max_possible_height:
|
|
|
|
|
|
continue
|
|
|
|
|
|
video_kbps = typical_bitrates.get(max_height, 1000)
|
|
|
|
|
|
total_kbps = video_kbps + 128
|
|
|
|
|
|
bytes_est = total_kbps * 1000 / 8 * duration
|
2026-04-30 01:36:43 +03:00
|
|
|
|
result.append({
|
2026-06-08 16:17:02 +00:00
|
|
|
|
'format_id': f"bestvideo[height<={max_height}]+bestaudio/best[height<={max_height}]",
|
2026-04-30 01:36:43 +03:00
|
|
|
|
'label': f"{label} (mp4)",
|
|
|
|
|
|
'quality': label,
|
|
|
|
|
|
'ext': 'mp4',
|
2026-06-08 16:17:02 +00:00
|
|
|
|
'filesize_mb': round(bytes_est / 1024 / 1024, 1),
|
2026-04-30 01:36:43 +03:00
|
|
|
|
})
|
2026-06-08 16:17:02 +00:00
|
|
|
|
audio_bytes = 128 * 1000 / 8 * duration
|
2026-04-30 01:36:43 +03:00
|
|
|
|
result.append({
|
|
|
|
|
|
'format_id': 'bestaudio/best',
|
|
|
|
|
|
'label': 'Audio only (m4a)',
|
|
|
|
|
|
'quality': 'audio',
|
|
|
|
|
|
'ext': 'm4a',
|
2026-06-08 16:17:02 +00:00
|
|
|
|
'filesize_mb': round(audio_bytes / 1024 / 1024, 1),
|
2026-04-30 01:36:43 +03:00
|
|
|
|
})
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
logger.info(f"[FORMATS] Возвращаем {len(result)} форматов")
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
# Скачивание (subprocess yt-dlp CLI)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
|
|
|
|
|
|
def download_youtube_video(url: str, max_retries: int = 3, format_id: str | None = None) -> tuple[Path, str]:
|
|
|
|
|
|
"""Скачивает видео через subprocess yt-dlp CLI.
|
|
|
|
|
|
Возвращает (путь_к_файлу, 'cli')."""
|
|
|
|
|
|
logger.info(f"[DOWNLOAD] Начало скачивания: {url} (format={format_id})")
|
|
|
|
|
|
|
|
|
|
|
|
if not format_id:
|
|
|
|
|
|
# Fallback chain через yt-dlp format selector
|
|
|
|
|
|
format_id = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
|
|
|
|
|
|
|
|
|
|
|
|
safe_tmpl = str(DOWNLOADS_DIR / f'%(title)s_%(id)s.%(ext)s')
|
|
|
|
|
|
|
|
|
|
|
|
for attempt in range(max_retries):
|
|
|
|
|
|
_cleanup_downloads()
|
|
|
|
|
|
|
|
|
|
|
|
cmd = _build_ytdlp_base_cmd() + [
|
|
|
|
|
|
'--downloader', 'aria2c',
|
|
|
|
|
|
'--downloader-args',
|
|
|
|
|
|
'aria2c:--connect-timeout=15 --timeout=120 --max-tries=1',
|
|
|
|
|
|
'-f', format_id,
|
|
|
|
|
|
'-o', safe_tmpl,
|
|
|
|
|
|
url,
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = _run_ytdlp(cmd, timeout=DOWNLOAD_TIMEOUT)
|
|
|
|
|
|
except subprocess.TimeoutExpired:
|
|
|
|
|
|
logger.error(f"[DOWNLOAD] yt-dlp timeout ({DOWNLOAD_TIMEOUT}s)")
|
|
|
|
|
|
if attempt < max_retries - 1:
|
|
|
|
|
|
time.sleep((attempt + 1) * 2)
|
|
|
|
|
|
continue
|
|
|
|
|
|
raise Exception(f"Превышен таймаут скачивания ({DOWNLOAD_TIMEOUT}с)")
|
|
|
|
|
|
|
|
|
|
|
|
if result.returncode == 0:
|
|
|
|
|
|
for line in result.stdout.split('\n'):
|
|
|
|
|
|
if 'Destination:' in line:
|
|
|
|
|
|
logger.info(f"[DOWNLOAD] {line.strip()}")
|
|
|
|
|
|
|
|
|
|
|
|
file = _find_latest_downloaded()
|
|
|
|
|
|
if file:
|
|
|
|
|
|
logger.info(f"[DOWNLOAD] Скачан файл: {file.name} ({file.stat().st_size} bytes)")
|
|
|
|
|
|
return file, 'cli'
|
|
|
|
|
|
|
|
|
|
|
|
logger.error("[DOWNLOAD] Файл не найден после успешного yt-dlp")
|
|
|
|
|
|
raise Exception("Файл не найден после скачивания")
|
|
|
|
|
|
|
|
|
|
|
|
# Обработка ошибок
|
|
|
|
|
|
stderr = result.stderr.strip()[-800:]
|
|
|
|
|
|
logger.error(f"[DOWNLOAD] Попытка {attempt + 1}: yt-dlp failed: {stderr[:300]}")
|
|
|
|
|
|
|
|
|
|
|
|
# Try without cookies on cookies-related errors
|
|
|
|
|
|
if ('cookies' in stderr.lower() or 'bot' in stderr.lower() or 'sign in' in stderr.lower()) \
|
|
|
|
|
|
and '--cookies' in ' '.join(cmd):
|
|
|
|
|
|
logger.warning("[DOWNLOAD] Пробуем без cookies")
|
|
|
|
|
|
cmd_no_cookies = [a for i, a in enumerate(cmd) if a != '--cookies' and cmd[i-1] != '--cookies']
|
|
|
|
|
|
try:
|
|
|
|
|
|
result2 = _run_ytdlp(cmd_no_cookies, timeout=DOWNLOAD_TIMEOUT)
|
|
|
|
|
|
if result2.returncode == 0:
|
|
|
|
|
|
file = _find_latest_downloaded()
|
|
|
|
|
|
if file:
|
|
|
|
|
|
return file, 'cli-no-cookies'
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
if attempt < max_retries - 1:
|
|
|
|
|
|
time.sleep((attempt + 1) * 2)
|
|
|
|
|
|
|
|
|
|
|
|
raise Exception(f"Не удалось скачать видео после {max_retries} попыток")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
# Кэш форматов
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
|
2026-04-30 17:21:10 +03:00
|
|
|
|
_formats_cache: dict[str, tuple[float, list[dict]]] = {}
|
2026-06-08 16:17:02 +00:00
|
|
|
|
_FORMATS_CACHE_TTL = 30 * 60 # 30 минут
|
2026-04-30 17:21:10 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize_youtube_url(url: str) -> str:
|
|
|
|
|
|
m = re.search(r'(youtu\.be/|youtube\.com/watch\?v=)([a-zA-Z0-9_-]{11})', url)
|
|
|
|
|
|
if m:
|
|
|
|
|
|
prefix, video_id = m.group(1), m.group(2)
|
|
|
|
|
|
return f"https://www.youtube.com/watch?v={video_id}"
|
2026-06-08 16:17:02 +00:00
|
|
|
|
return url
|
|
|
|
|
|
|
2026-04-30 17:21:10 +03:00
|
|
|
|
|
2026-06-08 16:17:02 +00:00
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
# Flask endpoints
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════
|
2026-04-30 17:21:10 +03:00
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
@app.route('/health', methods=['GET'])
|
|
|
|
|
|
def health():
|
2026-01-10 21:40:07 +00:00
|
|
|
|
return jsonify({'status': 'ok', 'service': 'youtube-downloader'}), 200
|
2025-12-11 01:07:04 +03:00
|
|
|
|
|
|
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
@app.route('/formats', methods=['POST'])
|
|
|
|
|
|
def formats():
|
|
|
|
|
|
request_id = str(uuid.uuid4())[:8]
|
|
|
|
|
|
logger.info(f"[FORMATS {request_id}] ========== ЗАПРОС ФОРМАТОВ ==========")
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
try:
|
|
|
|
|
|
data = request.get_json()
|
|
|
|
|
|
if not data or 'url' not in data:
|
|
|
|
|
|
return jsonify({'error': 'URL is required'}), 400
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
url = data['url']
|
|
|
|
|
|
if 'youtube.com' not in url and 'youtu.be' not in url:
|
|
|
|
|
|
return jsonify({'error': 'Only YouTube URLs are supported'}), 400
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 17:21:10 +03:00
|
|
|
|
cache_key = _normalize_youtube_url(url)
|
|
|
|
|
|
now = time.time()
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 17:21:10 +03:00
|
|
|
|
if cache_key in _formats_cache:
|
|
|
|
|
|
cached_time, cached_formats = _formats_cache[cache_key]
|
|
|
|
|
|
if now - cached_time < _FORMATS_CACHE_TTL:
|
2026-06-08 16:17:02 +00:00
|
|
|
|
logger.info(f"[FORMATS {request_id}] Кэш: {len(cached_formats)} форматов ({now - cached_time:.0f}с)")
|
2026-04-30 17:21:10 +03:00
|
|
|
|
return jsonify({'formats': cached_formats}), 200
|
2026-06-08 16:17:02 +00:00
|
|
|
|
del _formats_cache[cache_key]
|
|
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
format_list = get_youtube_formats(url)
|
2026-04-30 17:21:10 +03:00
|
|
|
|
_formats_cache[cache_key] = (time.time(), format_list)
|
2026-04-30 01:36:43 +03:00
|
|
|
|
return jsonify({'formats': format_list}), 200
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-04-30 01:36:43 +03:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"[FORMATS {request_id}] Ошибка: {e}")
|
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
@app.route('/download/stream', methods=['POST'])
|
|
|
|
|
|
def download_stream():
|
2026-01-10 21:40:07 +00:00
|
|
|
|
request_id = str(uuid.uuid4())[:8]
|
|
|
|
|
|
logger.info(f"[REQUEST {request_id}] ========== НОВЫЙ ЗАПРОС ==========")
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
try:
|
|
|
|
|
|
data = request.get_json()
|
|
|
|
|
|
if not data or 'url' not in data:
|
|
|
|
|
|
return jsonify({'error': 'URL is required'}), 400
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
url = data['url']
|
2026-06-08 16:17:02 +00:00
|
|
|
|
format_id = data.get('format_id')
|
|
|
|
|
|
logger.info(f"[REQUEST {request_id}] Скачивание: {url}, format_id={format_id}")
|
|
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
if 'youtube.com' not in url and 'youtu.be' not in url:
|
|
|
|
|
|
return jsonify({'error': 'Only YouTube URLs are supported'}), 400
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-06-04 21:30:37 +00:00
|
|
|
|
video_path, used_downloader = download_youtube_video(url, format_id=format_id)
|
2026-06-08 16:17:02 +00:00
|
|
|
|
logger.info(f"[REQUEST {request_id}] Видео скачано: {video_path} ({used_downloader})")
|
|
|
|
|
|
|
2026-01-10 21:40:07 +00:00
|
|
|
|
file_size = video_path.stat().st_size
|
2025-12-11 01:07:04 +03:00
|
|
|
|
with open(video_path, 'rb') as f:
|
|
|
|
|
|
video_data = f.read()
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
safe_filename = video_path.name.encode('ascii', 'ignore').decode('ascii') or 'youtube_video.mp4'
|
2026-06-08 16:17:02 +00:00
|
|
|
|
if not any(safe_filename.endswith(ext) for ext in ('.mp4', '.webm', '.mkv', '.m4a', '.mp3')):
|
2025-12-11 01:07:04 +03:00
|
|
|
|
safe_filename = 'youtube_video.mp4'
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2026-05-03 01:56:31 +03:00
|
|
|
|
ext = video_path.suffix.lower()
|
|
|
|
|
|
content_type_map = {
|
2026-06-08 16:17:02 +00:00
|
|
|
|
'.webm': 'video/webm', '.mkv': 'video/x-matroska',
|
|
|
|
|
|
'.mp4': 'video/mp4', '.m4a': 'audio/mp4', '.mp3': 'audio/mpeg',
|
2026-05-03 01:56:31 +03:00
|
|
|
|
}
|
|
|
|
|
|
content_type = content_type_map.get(ext, 'video/mp4')
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
video_path.unlink()
|
2026-01-10 21:40:07 +00:00
|
|
|
|
logger.info(f"[REQUEST {request_id}] ========== ЗАПРОС УСПЕШНО ЗАВЕРШЕН ==========")
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
return video_data, 200, {
|
|
|
|
|
|
'Content-Type': content_type,
|
|
|
|
|
|
'Content-Disposition': f'attachment; filename="{safe_filename}"'
|
|
|
|
|
|
}
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2025-12-11 01:07:04 +03:00
|
|
|
|
except Exception as e:
|
2025-12-17 17:53:34 +03:00
|
|
|
|
error_str = str(e)
|
2026-06-08 16:17:02 +00:00
|
|
|
|
logger.error(f"[REQUEST {request_id}] ========== ОШИБКА ==========")
|
|
|
|
|
|
logger.error(f"[REQUEST {request_id}] {error_str}")
|
|
|
|
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
|
|
|
|
|
|
|
if any(kw in error_str.lower() for kw in ('cookies', 'bot', 'sign in', 'authentication')):
|
2026-01-08 19:05:41 +03:00
|
|
|
|
error_msg = (
|
|
|
|
|
|
f"{error_str}\n\n"
|
2026-01-10 21:40:07 +00:00
|
|
|
|
"💡 Совет: Cookies устарели или недействительны. "
|
2026-06-08 16:17:02 +00:00
|
|
|
|
"Обновите cookies через скрипт:\n"
|
2026-01-10 21:40:07 +00:00
|
|
|
|
" ./youtube-downloader/get_youtube_cookies.sh\n"
|
|
|
|
|
|
"Затем перезапустите сервис."
|
2026-01-08 19:05:41 +03:00
|
|
|
|
)
|
2026-01-10 21:40:07 +00:00
|
|
|
|
else:
|
|
|
|
|
|
error_msg = error_str
|
2026-06-08 16:17:02 +00:00
|
|
|
|
|
2025-12-17 17:53:34 +03:00
|
|
|
|
return jsonify({'error': error_msg}), 500
|
2025-12-11 01:07:04 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2026-06-08 16:17:02 +00:00
|
|
|
|
port = int(os.getenv('PORT', 5000))
|
2025-12-11 01:07:04 +03:00
|
|
|
|
host = os.getenv('HOST', '0.0.0.0')
|
|
|
|
|
|
logger.info(f"Запуск YouTube Downloader сервиса на {host}:{port}")
|
|
|
|
|
|
app.run(host=host, port=port, debug=False)
|