fix: correct quality selection -- specific format_id first, exclude av01, validate video stream

This commit is contained in:
vrubelroman 2026-05-03 01:36:04 +03:00
parent 326eabaa99
commit 053f6c8afc
3 changed files with 139 additions and 33 deletions

View file

@ -86,7 +86,7 @@ def _is_valid_cookies_file(cookies_path: Path) -> bool:
def _parse_height(format_dict: dict) -> int:
"""Извлекает реальную высоту из формата: height/width/format_note"""
"""Извлекает реальную высоту из формата: height/width/format_note/resolution"""
h = format_dict.get('height')
w = format_dict.get('width')
# Для вертикальных видео (Shorts) height и width могут быть перепутаны —
@ -97,11 +97,21 @@ def _parse_height(format_dict: dict) -> int:
return real_h
if h and isinstance(h, (int, float)) and h > 0:
return int(h)
# Если вообще нет размеров — парсим format_note (например "360p")
note = format_dict.get('format_note', '') or ''
match = re.search(r'(\d+)p', str(note))
if w and isinstance(w, (int, float)) and w > 0:
return int(w)
# Если вообще нет размеров — парсим format_note (например "360p" или "1080x1920")
note = str(format_dict.get('format_note', '') or '')
match = re.search(r'(\d+)\s*p', note)
if match:
return int(match.group(1))
match = re.search(r'(\d+)\s*x\s*(\d+)', note, re.IGNORECASE)
if match:
return min(int(match.group(1)), int(match.group(2)))
# Парсим поле resolution (например "1920x1080")
res = str(format_dict.get('resolution', '') or '')
match = re.search(r'(\d+)\s*x\s*(\d+)', res, re.IGNORECASE)
if match:
return min(int(match.group(1)), int(match.group(2)))
return 0
@ -163,6 +173,31 @@ def _make_base_ydl_opts(user_agent: str, cookies_file_path: Path | None = None)
return opts
def _find_latest_downloaded() -> Path | None:
"""Возвращает самый свежий файл в папке загрузок."""
files = list(DOWNLOADS_DIR.glob('*'))
if not files:
return None
files.sort(key=lambda x: x.stat().st_mtime, reverse=True)
return files[0]
def _file_has_video_stream(filepath: Path) -> bool:
"""Проверяет через ffprobe, содержит ли файл видео-поток."""
import subprocess
try:
result = subprocess.run(
['ffprobe', '-v', 'error', '-select_streams', 'v:0',
'-show_entries', 'stream=codec_type', '-of', 'csv=p=0',
str(filepath)],
capture_output=True, text=True, timeout=15
)
return result.stdout.strip() == 'video'
except Exception as e:
logger.warning(f"[VALIDATE] Не удалось проверить видео-поток в {filepath.name}: {e}")
return True # в случае ошибки считаем, что видео есть
def download_youtube_video(url: str, max_retries: int = 3, format_id: str | None = None) -> Path:
"""Скачивает видео с YouTube - используем cookies для обхода блокировок"""
logger.info(f"[DOWNLOAD] Начало скачивания: {url}")
@ -265,18 +300,20 @@ def download_youtube_video(url: str, max_retries: int = 3, format_id: str | None
is_specific_code = not ('[' in format_id or ']' in format_id)
requested_height = _extract_height_from_format_id(format_id)
format_options = [format_id]
if requested_height is not None:
if is_specific_code:
logger.info(f"[DOWNLOAD] Конкретный format code: {format_id}")
else:
logger.info(f"[DOWNLOAD] Format selector: {format_id}")
logger.info(f"[DOWNLOAD] Добавляем качество-сохраняющий fallback для height<={requested_height}")
format_options.append(f"bestvideo[height<={requested_height}]+bestaudio/best[height<={requested_height}]")
# Конкретный format_id (из /formats) ставим ПЕРВЫМ —
# он точно указывает выбранные пользователем format codes.
# Height-ограниченный селектор идёт как fallback
# (c исключением av01, чтобы yt-dlp не выбрал unplayable формат 400).
format_options = [
format_id,
f"bestvideo[height<={requested_height}][vcodec!=av01]+bestaudio/best[height<={requested_height}]",
]
logger.info(f"[DOWNLOAD] Размерное ограничение: {requested_height}p, format_id: {format_id}")
format_options.extend(default_format_options)
format_options.extend(combined_fallback)
else:
format_options = [format_id]
format_options.extend(default_format_options)
logger.info(f"[DOWNLOAD] Итоговый список format_options ({len(format_options)} шт.): {format_options}")
@ -316,6 +353,19 @@ def download_youtube_video(url: str, max_retries: int = 3, format_id: str | None
logger.info(f"[DOWNLOAD] Попытка {attempt + 1}: реально скачан формат: id={actual_format_id}, height={actual_height}, ext={actual_ext}, size={actual_filesize}")
logger.info(f"[DOWNLOAD] Попытка {attempt + 1}: успешно скачано с форматом {format_option}")
# Проверяем, что файл содержит видео-поток, а не только аудио
# (yt-dlp c allow_unplayable_formats может скачать av01 формат
# и отказаться от мержа, вернув только аудио)
downloaded = _find_latest_downloaded()
if downloaded and not _file_has_video_stream(downloaded):
logger.warning(f"[DOWNLOAD] Файл {downloaded.name} не содержит видео-потока (только аудио). Удаляем и пробуем следующий формат...")
try:
downloaded.unlink()
except Exception:
pass
continue
download_success = True
break
except Exception as download_error:
@ -341,6 +391,16 @@ def download_youtube_video(url: str, max_retries: int = 3, format_id: str | None
with yt_dlp.YoutubeDL(ydl_opts_download_no_cookies) as ydl:
ydl.download([url])
logger.info(f"[DOWNLOAD] Попытка {attempt + 1}: успешно скачано без cookies")
downloaded = _find_latest_downloaded()
if downloaded and not _file_has_video_stream(downloaded):
logger.warning(f"[DOWNLOAD] Файл {downloaded.name} без видео-потока. Удаляем и пробуем следующий формат...")
try:
downloaded.unlink()
except Exception:
pass
continue
download_success = True
cookies_valid = False # Отключаем cookies для следующих попыток
break
@ -624,9 +684,26 @@ def get_youtube_formats(url: str) -> list[dict]:
# ---------------------------------------------------------------
if len(result) == 0:
logger.info(f"[FORMATS] Реальных форматов не найдено, генерируем оценочные")
# Пытаемся определить реальную максимальную высоту из всех полей
max_possible_height = 0
for f in formats:
height = _parse_height(f)
if height > max_possible_height:
max_possible_height = height
if max_possible_height == 0:
# Если ничего не смогли определить — используем format_note напрямую
for f in formats:
note = str(f.get('format_note', '') or '')
numbers = re.findall(r'(\d+)', note)
for num in numbers:
n = int(num)
if 100 < n < 10000 and n > max_possible_height:
max_possible_height = n
if max_possible_height == 0:
max_possible_height = 2160
max_available_height = max(available_heights) if available_heights else 2160
available_tiers = [(h, l) for h, l in quality_tiers if h <= max_available_height]
available_tiers = [(h, l) for h, l in quality_tiers if h <= max_possible_height]
TYPICAL_VIDEO_BITRATES: dict[int, int] = {
2160: 40000, 1440: 20000, 1080: 10000, 720: 5000,

View file

@ -12,31 +12,31 @@
.youtube.com TRUE / FALSE 1771971074000 ST-1dsf764 session_logininfo=AFmmF2swRQIgZPfEOdmfC8u5sHvE1aOagKEvp5rRUe5hRUeLiYmxLDwCIQDqFIR59yZ_aBb5BLYSpK7LGdJ6YZqnh32USuOyMZTC5g%3AQUQ3MjNmd0ZzX01fTjViQ2kzMDJEWG5Ed09zMGF1TlhJcm81YWt3WWdKS2RCZkY3Z2NmMVhudUF4MFVZdFlHd0YtaEU0R3VHNHQ3VmFSZHdfR1RIcnBJNUtXeWhKWVVScE1ZcXNJdzRfdkFGVi1lZzY2dWxCcVVGZ0FPSjNzVmFjTVg1YTBYS0xBajEzU1REM3dnbUc5U3E3NHVtLVRLLXRn
.youtube.com TRUE / FALSE 1772057340000 ST-hcbf8d session_logininfo=AFmmF2swRQIgZPfEOdmfC8u5sHvE1aOagKEvp5rRUe5hRUeLiYmxLDwCIQDqFIR59yZ_aBb5BLYSpK7LGdJ6YZqnh32USuOyMZTC5g%3AQUQ3MjNmd0ZzX01fTjViQ2kzMDJEWG5Ed09zMGF1TlhJcm81YWt3WWdKS2RCZkY3Z2NmMVhudUF4MFVZdFlHd0YtaEU0R3VHNHQ3VmFSZHdfR1RIcnBJNUtXeWhKWVVScE1ZcXNJdzRfdkFGVi1lZzY2dWxCcVVGZ0FPSjNzVmFjTVg1YTBYS0xBajEzU1REM3dnbUc5U3E3NHVtLVRLLXRn
.youtube.com TRUE / FALSE 1772093274000 ST-1b disableCache=true&session_logininfo=AFmmF2swRQIgZPfEOdmfC8u5sHvE1aOagKEvp5rRUe5hRUeLiYmxLDwCIQDqFIR59yZ_aBb5BLYSpK7LGdJ6YZqnh32USuOyMZTC5g%3AQUQ3MjNmd0ZzX01fTjViQ2kzMDJEWG5Ed09zMGF1TlhJcm81YWt3WWdKS2RCZkY3Z2NmMVhudUF4MFVZdFlHd0YtaEU0R3VHNHQ3VmFSZHdfR1RIcnBJNUtXeWhKWVVScE1ZcXNJdzRfdkFGVi1lZzY2dWxCcVVGZ0FPSjNzVmFjTVg1YTBYS0xBajEzU1REM3dnbUc5U3E3NHVtLVRLLXRn&endpoint=%7B%22browseEndpoint%22%3A%7B%22browseId%22%3A%22FEwhat_to_watch%22%7D%2C%22commandMetadata%22%3A%7B%22webCommandMetadata%22%3A%7B%22url%22%3A%22%2F%22%2C%22rootVe%22%3A3854%2C%22webPageType%22%3A%22WEB_PAGE_TYPE_BROWSE%22%7D%7D%7D
.youtube.com TRUE / FALSE 1840620810 HSID AyQ5v_SYe7XVSwk4B
.youtube.com TRUE / TRUE 1840620810 SSID A6URSCEMDAehLdZmX
.youtube.com TRUE / FALSE 1840620810 APISID 8dbTFmLBSXBgxwR5/Aqxn9OCBXLwhMCr-P
.youtube.com TRUE / TRUE 1840620810 SAPISID T-VywQwW6YYwPZ05/AVcBJlwHBEyhqZuI6
.youtube.com TRUE / TRUE 1840620810 __Secure-1PAPISID T-VywQwW6YYwPZ05/AVcBJlwHBEyhqZuI6
.youtube.com TRUE / TRUE 1840620810 __Secure-3PAPISID T-VywQwW6YYwPZ05/AVcBJlwHBEyhqZuI6
.youtube.com TRUE / FALSE 1840620810 SID g.a0009QhwIHIkg4t_oMWmKHHoJyYI4VKCwXZzgObCsdqkdliI_o1E-iILBJwqGZqsMai74aasEQACgYKAUMSARISFQHGX2Mi8v7RPSIIRSpCy6cyYpF6OBoVAUF8yKpoHCEtgIdkpfFVsSxnHcMb0076
.youtube.com TRUE / TRUE 1840620810 __Secure-1PSID g.a0009QhwIHIkg4t_oMWmKHHoJyYI4VKCwXZzgObCsdqkdliI_o1EPUsVlK9u7TV_DmO1S_HjsAACgYKAcUSARISFQHGX2MieEAf3c2YDM23ElqwWQedxxoVAUF8yKr3lCeiD3YRIEUhEGm-xT2E0076
.youtube.com TRUE / TRUE 1840620810 __Secure-3PSID g.a0009QhwIHIkg4t_oMWmKHHoJyYI4VKCwXZzgObCsdqkdliI_o1E5UX-Uejh1R-eG6Kej6fVxQACgYKARcSARISFQHGX2MiKiX8dJY-eTnU5GRrj77ExRoVAUF8yKpJxnl1sW6_ZLm33PtAOTLY0076
.youtube.com TRUE / FALSE 1840832909 HSID AyQ5v_SYe7XVSwk4B
.youtube.com TRUE / TRUE 1840832909 SSID A6URSCEMDAehLdZmX
.youtube.com TRUE / FALSE 1840832909 APISID 8dbTFmLBSXBgxwR5/Aqxn9OCBXLwhMCr-P
.youtube.com TRUE / TRUE 1840832909 SAPISID T-VywQwW6YYwPZ05/AVcBJlwHBEyhqZuI6
.youtube.com TRUE / TRUE 1840832909 __Secure-1PAPISID T-VywQwW6YYwPZ05/AVcBJlwHBEyhqZuI6
.youtube.com TRUE / TRUE 1840832909 __Secure-3PAPISID T-VywQwW6YYwPZ05/AVcBJlwHBEyhqZuI6
.youtube.com TRUE / FALSE 1840832909 SID g.a0009ghwIHcIZqqcY1WV989v420rAlDDepZEj46RPYROUv0etocZfpJzYL10nsGcwt3tO1SfpQACgYKAcoSARISFQHGX2Mi8GOe9epkX1gj-mPmGiEkqhoVAUF8yKqrUaL07JB8aPrxKDzH094X0076
.youtube.com TRUE / TRUE 1840832909 __Secure-1PSID g.a0009ghwIHcIZqqcY1WV989v420rAlDDepZEj46RPYROUv0etocZUxys_3LZCIbQ68z8wQ2c_wACgYKAa0SARISFQHGX2MijinAP4ZtZI0DQThrXUqIpBoVAUF8yKpJ3DA2uat4crjoKJ6Yo6zB0076
.youtube.com TRUE / TRUE 1840832909 __Secure-3PSID g.a0009ghwIHcIZqqcY1WV989v420rAlDDepZEj46RPYROUv0etocZGrDGBah2HczPmxnQjYdK1gACgYKAQQSARISFQHGX2MiACXkSAqve--bS36VrXAmBhoVAUF8yKrUoj5-724_fZGNhKeh8uds0076
.youtube.com TRUE / TRUE 1791136352432 __Secure-BUCKET CMoC
.youtube.com TRUE / TRUE 1793100810 __Secure-YNID 18.YT=xHpVaJh_WysK3C312T879F3rw8GoX1zMaW4NSER6VNGzGdn03Fq3GgfAwDfRLFmTUzZL1KuA4i3Xzl6xseIv84nIRZN7eKqGx_uV903913AsRH6iS6KXklN0GQBJBGys48rdXAaC1mu_gYxqBx4x-2yrBuQWNzhK_rHjgqWH5tND2NT5vPk1o0TDNAdfnrc-kNGeINuM8L37VfNPOgRLKmxi5ifGNhMIIaZe9hUuAzcmD8zhf5bWpjOfTSwinw3aMR7Vv7bpE_qTUv3schu56JjgC5wc4SnBWZFbp3E_9NZKw0UGbYAy-nin_OmO9Ep-o69rFo_Ph2SA3zM07viMdw
.youtube.com TRUE / TRUE 1793100810 __Secure-ROLLOUT_TOKEN CKPS2eDK6Lu50QEQwdv1spXZkQMYz_CUq72VlAM%3D
.youtube.com TRUE / TRUE 1793312896 __Secure-YNID 18.YT=kZLd9ecftHf18l5BUM_PFt6TXFNyMV5Nrka7bly97iA8f-psa9ha5DWmRmQSUzQRIfbiEoZ-RXeTw9IwgSWh5KLafRfZ9G8RgoXgoCnFvqeZrYclpbVjM3oc-97z9Q8Dt9n1BBJ37qbHuUjRqHafR2RXkJDlPa3EKqkmA7DIUSLwwu6XhIuJZ8aSIo36ci-KXL2w6XO1qaDVQG6FK7k5J3714s86d45YL0h_vwtlVVayUEtgLX4UL-kAqBaOkUt-ZuhhzfayX2z-JUVu_U2fl8lhNUfbKDbVgeFT1eLGQIrGprZAeQP9QK-y383E2ZAW3tSVB-OT2-WST2I8tN557w
.youtube.com TRUE / TRUE 1793312896 __Secure-ROLLOUT_TOKEN CKPS2eDK6Lu50QEQwdv1spXZkQMY4vDstdOblAM%3D
.youtube.com TRUE / FALSE 1776287761000 ST-yve142 session_logininfo=AFmmF2swRQIgZPfEOdmfC8u5sHvE1aOagKEvp5rRUe5hRUeLiYmxLDwCIQDqFIR59yZ_aBb5BLYSpK7LGdJ6YZqnh32USuOyMZTC5g%3AQUQ3MjNmd0ZzX01fTjViQ2kzMDJEWG5Ed09zMGF1TlhJcm81YWt3WWdKS2RCZkY3Z2NmMVhudUF4MFVZdFlHd0YtaEU0R3VHNHQ3VmFSZHdfR1RIcnBJNUtXeWhKWVVScE1ZcXNJdzRfdkFGVi1lZzY2dWxCcVVGZ0FPSjNzVmFjTVg1YTBYS0xBajEzU1REM3dnbUc5U3E3NHVtLVRLLXRn
.youtube.com TRUE / TRUE 1807824503515 __Secure-1PSIDTS sidts-CjUBWhotCSAL5EMsgNfc0JD8UVvU5vyCYbx9ZFc0Nnry9Qc7YHRzl6a7o8Zm6bPYHoFyKALKlBAA
.youtube.com TRUE / TRUE 1807824503517 __Secure-3PSIDTS sidts-CjUBWhotCSAL5EMsgNfc0JD8UVvU5vyCYbx9ZFc0Nnry9Qc7YHRzl6a7o8Zm6bPYHoFyKALKlBAA
.youtube.com TRUE / TRUE 1793109764 VISITOR_INFO1_LIVE vFr43YvHJaE
.youtube.com TRUE / TRUE 1793109764 VISITOR_PRIVACY_METADATA CgJSVRIEGgAgMg%3D%3D
.youtube.com TRUE / TRUE 1793313256 VISITOR_INFO1_LIVE vFr43YvHJaE
.youtube.com TRUE / TRUE 1793313256 VISITOR_PRIVACY_METADATA CgJSVRIEGgAgMg%3D%3D
.youtube.com TRUE / FALSE 1776288519000 ST-tladcw session_logininfo=AFmmF2swRQIgZPfEOdmfC8u5sHvE1aOagKEvp5rRUe5hRUeLiYmxLDwCIQDqFIR59yZ_aBb5BLYSpK7LGdJ6YZqnh32USuOyMZTC5g%3AQUQ3MjNmd0ZzX01fTjViQ2kzMDJEWG5Ed09zMGF1TlhJcm81YWt3WWdKS2RCZkY3Z2NmMVhudUF4MFVZdFlHd0YtaEU0R3VHNHQ3VmFSZHdfR1RIcnBJNUtXeWhKWVVScE1ZcXNJdzRfdkFGVi1lZzY2dWxCcVVGZ0FPSjNzVmFjTVg1YTBYS0xBajEzU1REM3dnbUc5U3E3NHVtLVRLLXRn
.youtube.com TRUE / FALSE 0 PREF tz=UTC&f7=100&f6=40000000&hl=en
.youtube.com TRUE / FALSE 1776288527000 ST-xuwub9 session_logininfo=AFmmF2swRQIgZPfEOdmfC8u5sHvE1aOagKEvp5rRUe5hRUeLiYmxLDwCIQDqFIR59yZ_aBb5BLYSpK7LGdJ6YZqnh32USuOyMZTC5g%3AQUQ3MjNmd0ZzX01fTjViQ2kzMDJEWG5Ed09zMGF1TlhJcm81YWt3WWdKS2RCZkY3Z2NmMVhudUF4MFVZdFlHd0YtaEU0R3VHNHQ3VmFSZHdfR1RIcnBJNUtXeWhKWVVScE1ZcXNJdzRfdkFGVi1lZzY2dWxCcVVGZ0FPSjNzVmFjTVg1YTBYS0xBajEzU1REM3dnbUc5U3E3NHVtLVRLLXRn
.youtube.com TRUE / FALSE 1809093764 SIDCC AKEyXzVuQ1CJQldvhHlCwcxliD-5HoCB4nz-1_bWYcCGuntiTxyQ3zu2dKQ4e3cRVw0qxx_E4oVX
.youtube.com TRUE / TRUE 1809093764 __Secure-1PSIDCC AKEyXzW5L7zbNTopC4iWw_0rNkDB3asr9lB-mwtstrlNvT9qZ0YJq7QKK7wBm33Bi-e6H-vQiJOr
.youtube.com TRUE / TRUE 1809093764 __Secure-3PSIDCC AKEyXzXALMvWYXgR6z2KCVBMX-wN_wvpUOdOQ9GZ_J3fhKVMgy8QhkwKXCr4zzQKqo9vgZAxEDuq
.youtube.com TRUE / FALSE 1809297256 SIDCC AKEyXzVsS5YLUQD8z9C1v-mL2JIyS_lqX6qpnZKQ_AFrB5WfKI8t61IDvWwihKLswvR3ya_Y2JOP
.youtube.com TRUE / TRUE 1809297256 __Secure-1PSIDCC AKEyXzXDaKBNexbPjEPwCB8IDGZRrPTCTOVNDWgBRtsKv5XcaCCg5JxpeRXlk2gX4lidlrONyC52
.youtube.com TRUE / TRUE 1809297256 __Secure-3PSIDCC AKEyXzUF0D1vjEN7XqU2ReXIRFuti0YZjmliSwyRcCUSq5rUlGmYoYmzjSngu8HtDmEiioigkUI9
.youtube.com TRUE / FALSE 1776288585000 ST-3opvp5 session_logininfo=AFmmF2swRQIgZPfEOdmfC8u5sHvE1aOagKEvp5rRUe5hRUeLiYmxLDwCIQDqFIR59yZ_aBb5BLYSpK7LGdJ6YZqnh32USuOyMZTC5g%3AQUQ3MjNmd0ZzX01fTjViQ2kzMDJEWG5Ed09zMGF1TlhJcm81YWt3WWdKS2RCZkY3Z2NmMVhudUF4MFVZdFlHd0YtaEU0R3VHNHQ3VmFSZHdfR1RIcnBJNUtXeWhKWVVScE1ZcXNJdzRfdkFGVi1lZzY2dWxCcVVGZ0FPSjNzVmFjTVg1YTBYS0xBajEzU1REM3dnbUc5U3E3NHVtLVRLLXRn
.youtube.com TRUE / TRUE 0 YSC IewJyGJN7Aw
.youtube.com TRUE / TRUE 0 YSC KTvrS45hA30
.instagram.com TRUE / TRUE 1801240452128 datr hGdNaS-QqakSYV8X2eqVTIyA
.instagram.com TRUE / TRUE 1798216452128 ig_did 2C886E85-30B9-4495-B882-D9F545DF28E4
.instagram.com TRUE / TRUE 1801240453000 mid aU1nhAAEAAGuKRzTGE9SdmhLzZ5Z