From 762c47dc2729007c27966218db768a6b846cac6f Mon Sep 17 00:00:00 2001 From: Phil Brk8 Date: Mon, 26 Jan 2026 00:35:29 +0100 Subject: [PATCH 1/5] feat: add browser cookie extraction modules Implement cookie extraction from browser databases, similar to yt-dlp's approach. Supports Chrome, Firefox, Edge, Brave, Chromium, Opera, and Vivaldi across Linux, macOS, and Windows. - _cookies.py: Browser detection and cookie extraction logic - _decryptors.py: Platform-specific cookie decryption (AES-CBC/GCM, DPAPI) --- youtube_transcript_api/_cookies.py | 315 +++++++++++++++++++ youtube_transcript_api/_decryptors.py | 433 ++++++++++++++++++++++++++ 2 files changed, 748 insertions(+) create mode 100644 youtube_transcript_api/_cookies.py create mode 100644 youtube_transcript_api/_decryptors.py diff --git a/youtube_transcript_api/_cookies.py b/youtube_transcript_api/_cookies.py new file mode 100644 index 0000000..7ae1adf --- /dev/null +++ b/youtube_transcript_api/_cookies.py @@ -0,0 +1,315 @@ +"""Browser cookie extraction module. + +This module provides functionality to extract cookies directly from browsers +to enable authentication for age-restricted YouTube videos. + +Based on yt-dlp's cookie extraction approach. +""" + +from __future__ import annotations + +import glob +import os +import shutil +import sqlite3 +import sys +import tempfile +from pathlib import Path +from typing import Optional + +from ._errors import CookiePathInvalid, CookieError + + +# Browser cookie database paths +BROWSER_PATHS = { + "chrome": { + "Linux": Path.home() / ".config/google-chrome", + "Darwin": Path.home() / "Library/Application Support/Google/Chrome", + "Windows": Path(os.environ.get("LOCALAPPDATA", "")) + / "Google/Chrome/User Data", + }, + "chromium": { + "Linux": Path.home() / ".config/chromium", + "Darwin": Path.home() / "Library/Application Support/Chromium", + "Windows": Path(os.environ.get("LOCALAPPDATA", "")) + / "Chromium/User Data", + }, + "edge": { + "Linux": Path.home() / ".config/microsoft-edge", + "Darwin": Path.home() / "Library/Application Support/Microsoft Edge", + "Windows": Path(os.environ.get("LOCALAPPDATA", "")) + / "Microsoft/Edge/User Data", + }, + "brave": { + "Linux": Path.home() / ".config/BraveSoftware/Brave-Browser", + "Darwin": Path.home() + / "Library/Application Support/BraveSoftware/Brave-Browser", + "Windows": Path(os.environ.get("LOCALAPPDATA", "")) + / "BraveSoftware/Brave-Browser/User Data", + }, + "opera": { + "Linux": Path.home() / ".config/opera", + "Darwin": Path.home() + / "Library/Application Support/com.operasoftware.Opera", + "Windows": Path(os.environ.get("APPDATA", "")) + / "Opera Software/Opera Stable", + }, + "vivaldi": { + "Linux": Path.home() / ".config/vivaldi", + "Darwin": Path.home() / "Library/Application Support/Vivaldi", + "Windows": Path(os.environ.get("LOCALAPPDATA", "")) + / "Vivaldi/User Data", + }, + "firefox": { + "Linux": Path.home() / ".mozilla/firefox", + "Darwin": Path.home() / "Library/Application Support/Firefox/Profiles", + "Windows": Path(os.environ.get("APPDATA", "")) + / "Mozilla/Firefox/Profiles", + }, +} + +# Chromium-based browsers +CHROMIUM_BROWSERS = ["chrome", "chromium", "edge", "brave", "opera", "vivaldi"] + + +def _get_platform() -> str: + """Get the current platform identifier. + + Returns: + 'Linux', 'Darwin' (macOS), or 'Windows' + """ + if sys.platform == "darwin": + return "Darwin" + elif sys.platform in ("win32", "cygwin"): + return "Windows" + else: + return "Linux" + + +def _find_chrome_cookie_db( + browser: str, profile: Optional[str] = None +) -> Path: + """Find Chrome/Chromium cookie database. + + Args: + browser: Browser name ('chrome', 'edge', 'brave', etc.) + profile: Profile name/path (defaults to 'Default') + + Returns: + Path to cookie database + + Raises: + CookiePathInvalid: If cookie database not found + """ + platform = _get_platform() + base_path = BROWSER_PATHS[browser].get(platform) + + if not base_path or not base_path.exists(): + raise CookiePathInvalid( + f"{browser} browser directory not found at {base_path}" + ) + + # Default profile + if profile is None: + profile = "Default" + + # Check for cookie database in profile directory + if platform == "Windows": + # Windows uses 'Network/Cookies' subdirectory + cookie_db = base_path / profile / "Network" / "Cookies" + else: + # Linux/macOS use 'Cookies' directly in profile + cookie_db = base_path / profile / "Cookies" + + if not cookie_db.exists(): + raise CookiePathInvalid(f"Cookie database not found at {cookie_db}") + + return cookie_db + + +def _find_firefox_cookie_db(profile: Optional[str] = None) -> Path: + """Find Firefox cookie database. + + Args: + profile: Profile directory pattern (optional) + + Returns: + Path to cookie database + + Raises: + CookiePathInvalid: If cookie database not found + """ + platform = _get_platform() + base_path = BROWSER_PATHS["firefox"].get(platform) + + if not base_path or not base_path.exists(): + raise CookiePathInvalid( + f"Firefox profile directory not found at {base_path}" + ) + + # Find profile directories + if profile: + profile_dirs = glob.glob(str(base_path / f"*{profile}*")) + else: + # Look for default profile + profile_dirs = glob.glob(str(base_path / "*.default*")) + + if not profile_dirs: + raise CookiePathInvalid(f"No Firefox profile found in {base_path}") + + # Use first matching profile + profile_dir = Path(profile_dirs[0]) + cookie_db = profile_dir / "cookies.sqlite" + + if not cookie_db.exists(): + raise CookiePathInvalid(f"Cookie database not found at {cookie_db}") + + return cookie_db + + +def _extract_chrome_cookies( + cookie_db: Path, domain_filter: str = ".youtube.com" +) -> dict[str, str]: + """Extract cookies from Chrome cookie database. + + Args: + cookie_db: Path to cookie database + domain_filter: Domain to filter cookies (e.g., '.youtube.com') + + Returns: + Dict of cookie name -> value + """ + # Import decryptor here to avoid import errors if cryptography not installed + try: + from ._decryptors import get_chrome_decryptor + except ImportError: + raise CookieError( + "Cookie decryption requires the 'cryptography' package. " + "Install it with: pip install 'youtube-transcript-api[cookies]'" + ) + + cookies = {} + + # Copy database to temp directory (in case it's locked) + with tempfile.TemporaryDirectory() as tmpdir: + db_copy = Path(tmpdir) / "cookies.db" + shutil.copy2(cookie_db, db_copy) + + try: + conn = sqlite3.connect(f"file:{db_copy}?mode=ro", uri=True) + cursor = conn.cursor() + + # Query cookies for the domain + cursor.execute( + "SELECT name, encrypted_value, value, host_key FROM cookies WHERE host_key LIKE ?", + (f"%{domain_filter}",), + ) + + # Get decryptor + decryptor = get_chrome_decryptor(cookie_db.parent.parent) + + for ( + name, + encrypted_value, + plain_value, + host_key, + ) in cursor.fetchall(): + if encrypted_value: + try: + value = decryptor.decrypt(encrypted_value) + if value: + cookies[name] = value + except Exception: + # If decryption fails, skip this cookie + pass + elif plain_value: + cookies[name] = plain_value + + conn.close() + + except sqlite3.Error as e: + raise CookieError(f"Failed to read cookie database: {e}") + + return cookies + + +def _extract_firefox_cookies( + cookie_db: Path, domain_filter: str = ".youtube.com" +) -> dict[str, str]: + """Extract cookies from Firefox cookie database. + + Args: + cookie_db: Path to cookie database + domain_filter: Domain to filter cookies + + Returns: + Dict of cookie name -> value + """ + cookies = {} + + # Copy database to temp directory (in case it's locked) + with tempfile.TemporaryDirectory() as tmpdir: + db_copy = Path(tmpdir) / "cookies.db" + shutil.copy2(cookie_db, db_copy) + + try: + conn = sqlite3.connect(f"file:{db_copy}?mode=ro", uri=True) + cursor = conn.cursor() + + # Query cookies for the domain + cursor.execute( + "SELECT name, value FROM moz_cookies WHERE host LIKE ?", + (f"%{domain_filter}",), + ) + + for name, value in cursor.fetchall(): + if value: + cookies[name] = value + + conn.close() + + except sqlite3.Error as e: + raise CookieError(f"Failed to read Firefox cookie database: {e}") + + return cookies + + +def extract_cookies_from_browser( + browser: str, + profile: Optional[str] = None, + domain_filter: str = ".youtube.com", +) -> dict[str, str]: + """Extract cookies from a browser. + + Args: + browser: Browser name ('chrome', 'firefox', 'edge', 'brave', etc.) + profile: Profile name/path (optional, defaults to 'Default' for Chrome-based) + domain_filter: Domain to filter cookies (default: '.youtube.com') + + Returns: + Dict of cookie name -> value for the specified domain + + Raises: + CookiePathInvalid: If browser or cookie database not found + CookieError: If cookie extraction fails + + Example: + >>> cookies = extract_cookies_from_browser('chrome') + >>> print(cookies.get('CONSENT')) + """ + browser = browser.lower() + + if browser not in BROWSER_PATHS: + raise CookieError( + f"Unsupported browser: {browser}. " + f"Supported browsers: {', '.join(BROWSER_PATHS.keys())}" + ) + + if browser == "firefox": + cookie_db = _find_firefox_cookie_db(profile) + return _extract_firefox_cookies(cookie_db, domain_filter) + elif browser in CHROMIUM_BROWSERS: + cookie_db = _find_chrome_cookie_db(browser, profile) + return _extract_chrome_cookies(cookie_db, domain_filter) + else: + raise CookieError(f"Cookie extraction not implemented for {browser}") diff --git a/youtube_transcript_api/_decryptors.py b/youtube_transcript_api/_decryptors.py new file mode 100644 index 0000000..23bde70 --- /dev/null +++ b/youtube_transcript_api/_decryptors.py @@ -0,0 +1,433 @@ +"""Platform-specific cookie decryption for Chrome/Chromium browsers. + +Handles different encryption methods used by Chrome on various platforms. +""" + +from __future__ import annotations + +import base64 +import json +import subprocess +import sys +from pathlib import Path +from typing import Optional + +from ._errors import CookieError + + +try: + from cryptography.hazmat.primitives.ciphers.aead import AESGCM + from cryptography.hazmat.primitives import hashes + from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC + from cryptography.hazmat.backends import default_backend + from cryptography.hazmat.primitives.ciphers import ( + Cipher, + algorithms, + modes, + ) + + HAS_CRYPTOGRAPHY = True +except ImportError: + HAS_CRYPTOGRAPHY = False + + +class ChromeCookieDecryptor: + """Base class for Chrome cookie decryption.""" + + def decrypt(self, encrypted_value: bytes) -> Optional[str]: + """Decrypt an encrypted cookie value. + + Args: + encrypted_value: The encrypted cookie bytes + + Returns: + Decrypted cookie value as string, or None if decryption fails + """ + raise NotImplementedError + + +class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): + """Chrome cookie decryptor for Linux.""" + + def __init__(self, browser_root: Path): + """Initialize Linux decryptor. + + Args: + browser_root: Path to browser root directory + """ + if not HAS_CRYPTOGRAPHY: + raise CookieError( + "Cookie decryption requires 'cryptography' package. " + "Install with: pip install 'youtube-transcript-api[cookies]'" + ) + + # v10 uses hardcoded password + self.v10_key = self._derive_key(b"peanuts") + + # v11 uses keyring password (try to get it, fall back to v10) + self.v11_key = self._get_v11_key() or self.v10_key + + def _derive_key(self, password: bytes, iterations: int = 1) -> bytes: + """Derive encryption key using PBKDF2. + + Args: + password: Password bytes + iterations: PBKDF2 iterations + + Returns: + Derived key bytes + """ + kdf = PBKDF2HMAC( + algorithm=hashes.SHA1(), + length=16, + salt=b"saltysalt", + iterations=iterations, + backend=default_backend(), + ) + return kdf.derive(password) + + def _get_v11_key(self) -> Optional[bytes]: + """Try to get v11 key from keyring. + + Returns: + Derived key or None if keyring access fails + """ + # Try to get password from GNOME Keyring or KWallet + # This is optional - we'll fall back to v10 if it fails + try: + # Try secretstorage (GNOME Keyring) + import secretstorage + + bus = secretstorage.dbus_init() + collection = secretstorage.get_default_collection(bus) + for item in collection.get_all_items(): + if item.get_label() == "Chrome Safe Storage": + password = item.get_secret() + return self._derive_key(password) + except Exception: + pass + + return None + + def decrypt(self, encrypted_value: bytes) -> Optional[str]: + """Decrypt Chrome cookie on Linux. + + Args: + encrypted_value: Encrypted cookie bytes + + Returns: + Decrypted value or None + """ + if not encrypted_value: + return None + + # Check version + version = encrypted_value[:3] + + if version == b"v10": + key = self.v10_key + encrypted_value = encrypted_value[3:] + elif version == b"v11": + key = self.v11_key + encrypted_value = encrypted_value[3:] + else: + # Try as v10 anyway + key = self.v10_key + + try: + # Decrypt using AES-CBC + iv = b" " * 16 # Chrome uses spaces as IV + cipher = Cipher( + algorithms.AES(key), modes.CBC(iv), backend=default_backend() + ) + decryptor = cipher.decryptor() + decrypted = ( + decryptor.update(encrypted_value) + decryptor.finalize() + ) + + # Remove PKCS7 padding + padding_length = decrypted[-1] + decrypted = decrypted[:-padding_length] + + return decrypted.decode("utf-8") + except Exception: + return None + + +class MacChromeCookieDecryptor(ChromeCookieDecryptor): + """Chrome cookie decryptor for macOS.""" + + def __init__(self, browser_root: Path): + """Initialize macOS decryptor. + + Args: + browser_root: Path to browser root directory + """ + if not HAS_CRYPTOGRAPHY: + raise CookieError( + "Cookie decryption requires 'cryptography' package. " + "Install with: pip install 'youtube-transcript-api[cookies]'" + ) + + # Get password from macOS Keychain + password = self._get_keychain_password() + if password: + self.key = self._derive_key(password) + else: + raise CookieError( + "Could not retrieve Chrome password from macOS Keychain" + ) + + def _get_keychain_password(self) -> Optional[bytes]: + """Get Chrome password from macOS Keychain. + + Returns: + Password bytes or None + """ + try: + result = subprocess.run( + [ + "security", + "find-generic-password", + "-w", + "-a", + "Chrome", + "-s", + "Chrome Safe Storage", + ], + capture_output=True, + text=False, + timeout=5, + ) + if result.returncode == 0: + return result.stdout.rstrip(b"\n") + except Exception: + pass + + return None + + def _derive_key(self, password: bytes) -> bytes: + """Derive encryption key using PBKDF2. + + Args: + password: Password bytes + + Returns: + Derived key bytes + """ + kdf = PBKDF2HMAC( + algorithm=hashes.SHA1(), + length=16, + salt=b"saltysalt", + iterations=1003, # macOS uses 1003 iterations + backend=default_backend(), + ) + return kdf.derive(password) + + def decrypt(self, encrypted_value: bytes) -> Optional[str]: + """Decrypt Chrome cookie on macOS. + + Args: + encrypted_value: Encrypted cookie bytes + + Returns: + Decrypted value or None + """ + if not encrypted_value: + return None + + # Remove version prefix if present + if encrypted_value[:3] == b"v10": + encrypted_value = encrypted_value[3:] + + try: + # Decrypt using AES-CBC + iv = b" " * 16 # Chrome uses spaces as IV + cipher = Cipher( + algorithms.AES(self.key), + modes.CBC(iv), + backend=default_backend(), + ) + decryptor = cipher.decryptor() + decrypted = ( + decryptor.update(encrypted_value) + decryptor.finalize() + ) + + # Remove PKCS7 padding + padding_length = decrypted[-1] + decrypted = decrypted[:-padding_length] + + return decrypted.decode("utf-8") + except Exception: + return None + + +class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): + """Chrome cookie decryptor for Windows.""" + + def __init__(self, browser_root: Path): + """Initialize Windows decryptor. + + Args: + browser_root: Path to browser root directory + """ + if not HAS_CRYPTOGRAPHY: + raise CookieError( + "Cookie decryption requires 'cryptography' package. " + "Install with: pip install 'youtube-transcript-api[cookies]'" + ) + + # Get encryption key from Local State file + self.key = self._get_encryption_key(browser_root) + if not self.key: + raise CookieError( + "Could not retrieve Chrome encryption key from Local State" + ) + + def _get_encryption_key(self, browser_root: Path) -> Optional[bytes]: + """Get encryption key from Local State file. + + Args: + browser_root: Path to browser root directory + + Returns: + Decrypted key bytes or None + """ + local_state_path = browser_root / "Local State" + if not local_state_path.exists(): + return None + + try: + with open(local_state_path, "r", encoding="utf-8") as f: + local_state = json.load(f) + + encrypted_key = base64.b64decode( + local_state["os_crypt"]["encrypted_key"] + ) + + # Remove DPAPI prefix + if encrypted_key[:5] == b"DPAPI": + encrypted_key = encrypted_key[5:] + + # Decrypt using Windows DPAPI + return self._decrypt_with_dpapi(encrypted_key) + except Exception: + return None + + def _decrypt_with_dpapi(self, encrypted_data: bytes) -> Optional[bytes]: + """Decrypt data using Windows DPAPI. + + Args: + encrypted_data: Encrypted data bytes + + Returns: + Decrypted data or None + """ + try: + import ctypes + import ctypes.wintypes + + # Define DPAPI structures + class DATA_BLOB(ctypes.Structure): + _fields_ = [ + ("cbData", ctypes.wintypes.DWORD), + ("pbData", ctypes.POINTER(ctypes.c_char)), + ] + + # Load CryptUnprotectData + crypt_unprotect_data = ctypes.windll.crypt32.CryptUnprotectData + crypt_unprotect_data.argtypes = [ + ctypes.POINTER(DATA_BLOB), + ctypes.POINTER(ctypes.wintypes.LPWSTR), + ctypes.POINTER(DATA_BLOB), + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.wintypes.DWORD, + ctypes.POINTER(DATA_BLOB), + ] + crypt_unprotect_data.restype = ctypes.wintypes.BOOL + + # Prepare input + blob_in = DATA_BLOB( + len(encrypted_data), + ctypes.cast(encrypted_data, ctypes.POINTER(ctypes.c_char)), + ) + blob_out = DATA_BLOB() + + # Decrypt + if crypt_unprotect_data( + ctypes.byref(blob_in), + None, + None, + None, + None, + 0, + ctypes.byref(blob_out), + ): + result = ctypes.string_at(blob_out.pbData, blob_out.cbData) + # Free memory + ctypes.windll.kernel32.LocalFree(blob_out.pbData) + return result + except Exception: + pass + + return None + + def decrypt(self, encrypted_value: bytes) -> Optional[str]: + """Decrypt Chrome cookie on Windows. + + Args: + encrypted_value: Encrypted cookie bytes + + Returns: + Decrypted value or None + """ + if not encrypted_value: + return None + + # Check version + if encrypted_value[:3] != b"v10": + # Try DPAPI decryption for older versions + decrypted = self._decrypt_with_dpapi(encrypted_value) + if decrypted: + try: + return decrypted.decode("utf-8") + except Exception: + pass + return None + + # v10 uses AES-GCM + encrypted_value = encrypted_value[3:] + + try: + # Extract nonce and ciphertext + nonce = encrypted_value[:12] + ciphertext = encrypted_value[12:] + + # Decrypt using AES-GCM + aesgcm = AESGCM(self.key) + decrypted = aesgcm.decrypt(nonce, ciphertext, None) + + return decrypted.decode("utf-8") + except Exception: + return None + + +def get_chrome_decryptor(browser_root: Path) -> ChromeCookieDecryptor: + """Get appropriate Chrome cookie decryptor for current platform. + + Args: + browser_root: Path to browser root directory + + Returns: + Platform-specific decryptor instance + + Raises: + CookieError: If platform not supported or decryptor initialization fails + """ + if sys.platform == "darwin": + return MacChromeCookieDecryptor(browser_root) + elif sys.platform in ("win32", "cygwin"): + return WindowsChromeCookieDecryptor(browser_root) + else: # Linux + return LinuxChromeCookieDecryptor(browser_root) From 04d0b7e74813f7b00a8f70f729cac574e8e177eb Mon Sep 17 00:00:00 2001 From: Phil Brk8 Date: Mon, 26 Jan 2026 00:35:37 +0100 Subject: [PATCH 2/5] feat: integrate cookie auth into API and CLI - Add cookies_from_browser parameter to YouTubeTranscriptApi - Add --cookies-from-browser CLI flag - Update AgeRestricted error with auth instructions - Add CookieError, CookiePathInvalid, CookieInvalid exceptions --- youtube_transcript_api/_api.py | 25 +++++++++++++++++++++---- youtube_transcript_api/_cli.py | 29 ++++++++++++++++++++++------- youtube_transcript_api/_errors.py | 22 ++++++++++------------ 3 files changed, 53 insertions(+), 23 deletions(-) diff --git a/youtube_transcript_api/_api.py b/youtube_transcript_api/_api.py index b294493..66a0990 100644 --- a/youtube_transcript_api/_api.py +++ b/youtube_transcript_api/_api.py @@ -7,6 +7,8 @@ from .proxies import ProxyConfig from ._transcripts import TranscriptListFetcher, FetchedTranscript, TranscriptList +from ._cookies import extract_cookies_from_browser +from ._errors import CookieError class YouTubeTranscriptApi: @@ -14,6 +16,7 @@ def __init__( self, proxy_config: Optional[ProxyConfig] = None, http_client: Optional[Session] = None, + cookies_from_browser: Optional[str] = None, ): """ Note on thread-safety: As this class will initialize a `requests.Session` @@ -28,13 +31,27 @@ def __init__( :param http_client: You can optionally pass in a requests.Session object, if you manually want to share cookies between different instances of `YouTubeTranscriptApi`, overwrite defaults, specify SSL certificates, etc. + :param cookies_from_browser: Extract cookies from a browser to enable + authentication for age-restricted videos. Supported browsers: 'chrome', + 'firefox', 'edge', 'brave', 'chromium', 'opera', 'vivaldi'. + Note: Requires the 'cryptography' package for Chrome-based browsers. + Install with: pip install 'youtube-transcript-api[cookies]' """ http_client = Session() if http_client is None else http_client http_client.headers.update({"Accept-Language": "en-US"}) - # Cookie auth has been temporarily disabled, as it is not working properly with - # YouTube's most recent changes. - # if cookie_path is not None: - # http_client.cookies = _load_cookie_jar(cookie_path) + + # Extract cookies from browser if specified + if cookies_from_browser is not None: + try: + cookies = extract_cookies_from_browser(cookies_from_browser) + for name, value in cookies.items(): + http_client.cookies.set(name, value, domain=".youtube.com") + except CookieError as e: + # Re-raise cookie errors with context + raise CookieError( + f"Failed to extract cookies from {cookies_from_browser}: {e}" + ) + if proxy_config is not None: http_client.proxies = proxy_config.to_requests_dict() if proxy_config.prevent_keeping_connections_alive: diff --git a/youtube_transcript_api/_cli.py b/youtube_transcript_api/_cli.py index d99b562..126e32d 100644 --- a/youtube_transcript_api/_cli.py +++ b/youtube_transcript_api/_cli.py @@ -39,6 +39,7 @@ def run(self) -> str: ytt_api = YouTubeTranscriptApi( proxy_config=proxy_config, + cookies_from_browser=parsed_args.cookies_from_browser, ) for video_id in parsed_args.video_ids: @@ -188,13 +189,27 @@ def _parse_args(self): metavar="URL", help="Use the specified HTTPS proxy.", ) - # Cookie auth has been temporarily disabled, as it is not working properly with - # YouTube's most recent changes. - # parser.add_argument( - # "--cookies", - # default=None, - # help="The cookie file that will be used for authorization with youtube.", - # ) + parser.add_argument( + "--cookies-from-browser", + dest="cookies_from_browser", + default=None, + choices=[ + "chrome", + "firefox", + "edge", + "brave", + "chromium", + "opera", + "vivaldi", + ], + help=( + "Extract cookies from the specified browser for authentication. " + "This enables access to age-restricted videos. " + "Supported browsers: chrome, firefox, edge, brave, chromium, opera, vivaldi. " + "Note: Chrome-based browsers require the 'cryptography' package. " + "Install with: pip install 'youtube-transcript-api[cookies]'" + ), + ) return self._sanitize_video_ids(parser.parse_args(self._args)) diff --git a/youtube_transcript_api/_errors.py b/youtube_transcript_api/_errors.py index 835df4b..d65bb6c 100644 --- a/youtube_transcript_api/_errors.py +++ b/youtube_transcript_api/_errors.py @@ -213,19 +213,17 @@ class TranscriptsDisabled(CouldNotRetrieveTranscript): class AgeRestricted(CouldNotRetrieveTranscript): - # CAUSE_MESSAGE = ( - # "This video is age-restricted. Therefore, you will have to authenticate to be " - # "able to retrieve transcripts for it. You will have to provide a cookie to " - # 'authenticate yourself, as explained in the "Cookie Authentication" section of ' - # "the README (https://github.com/jdepoix/youtube-transcript-api" - # "?tab=readme-ov-file#cookie-authentication)" - # ) CAUSE_MESSAGE = ( - "This video is age-restricted. Therefore, you are unable to retrieve " - "transcripts for it without authenticating yourself.\n\n" - "Unfortunately, Cookie Authentication is temporarily unsupported in " - "youtube-transcript-api, as recent changes in YouTube's API broke the previous " - "implementation. I will do my best to re-implement it as soon as possible." + "This video is age-restricted. Therefore, you need to authenticate to retrieve " + "transcripts for it.\n\n" + "You can authenticate by extracting cookies from your browser. " + "Use the 'cookies_from_browser' parameter when initializing YouTubeTranscriptApi:\n\n" + " ytt_api = YouTubeTranscriptApi(cookies_from_browser='chrome')\n\n" + "Or use the --cookies-from-browser CLI flag:\n\n" + " youtube_transcript_api VIDEO_ID --cookies-from-browser chrome\n\n" + "Supported browsers: chrome, firefox, edge, brave, chromium, opera, vivaldi.\n" + "Note: Chrome-based browsers require the 'cryptography' package. " + "Install with: pip install 'youtube-transcript-api[cookies]'" ) From 4d53a7a650a4f0583739d1938bafb31fd37c8e9b Mon Sep 17 00:00:00 2001 From: Phil Brk8 Date: Mon, 26 Jan 2026 00:35:49 +0100 Subject: [PATCH 3/5] build: add optional cookie dependencies (cryptography, secretstorage) --- pyproject.toml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5dfb26c..eaaf5b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,9 +8,7 @@ version = "1.2.3" description = "This is a python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles, supports translating subtitles and it does not require a headless browser, like other selenium based solutions do!" readme = "README.md" license = "MIT" -authors = [ - "Jonas Depoix ", -] +authors = ["Jonas Depoix "] homepage = "https://github.com/jdepoix/youtube-transcript-api" repository = "https://github.com/jdepoix/youtube-transcript-api" keywords = [ @@ -52,6 +50,11 @@ precommit.shell = "poe format && poe lint && poe coverage" python = ">=3.8,<3.15" requests = "*" defusedxml = "^0.7.1" +cryptography = { version = "*", optional = true } +secretstorage = { version = "*", optional = true, markers = "sys_platform == 'linux'" } + +[tool.poetry.extras] +cookies = ["cryptography", "secretstorage"] [tool.poetry.group.test] optional = true @@ -91,6 +94,6 @@ exclude_lines = [ # Don't complain about empty stubs of abstract methods "@abstractmethod", "@abstractclassmethod", - "@abstractstaticmethod" + "@abstractstaticmethod", ] -show_missing = true \ No newline at end of file +show_missing = true From 4e19c755c30ef7e19ea98226631d22be27739286 Mon Sep 17 00:00:00 2001 From: Phil Brk8 Date: Mon, 26 Jan 2026 00:35:56 +0100 Subject: [PATCH 4/5] test: add cookie extraction tests --- youtube_transcript_api/test/test_cookies.py | 347 ++++++++++++++++++++ youtube_transcript_api/test/test_import.py | 31 ++ 2 files changed, 378 insertions(+) create mode 100644 youtube_transcript_api/test/test_cookies.py create mode 100644 youtube_transcript_api/test/test_import.py diff --git a/youtube_transcript_api/test/test_cookies.py b/youtube_transcript_api/test/test_cookies.py new file mode 100644 index 0000000..30149a3 --- /dev/null +++ b/youtube_transcript_api/test/test_cookies.py @@ -0,0 +1,347 @@ +"""Tests for browser cookie extraction functionality.""" + +import sqlite3 +from importlib.util import find_spec +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest + +from youtube_transcript_api._cookies import ( + extract_cookies_from_browser, + _find_chrome_cookie_db, + _find_firefox_cookie_db, + _extract_chrome_cookies, + _extract_firefox_cookies, + _get_platform, +) +from youtube_transcript_api._errors import CookiePathInvalid, CookieError + +HAS_CRYPTOGRAPHY = find_spec("cryptography") is not None + + +class TestGetPlatform: + """Tests for platform detection.""" + + def test_get_platform_darwin(self): + """Test macOS platform detection.""" + with patch("youtube_transcript_api._cookies.sys.platform", "darwin"): + assert _get_platform() == "Darwin" + + def test_get_platform_windows(self): + """Test Windows platform detection.""" + with patch("youtube_transcript_api._cookies.sys.platform", "win32"): + assert _get_platform() == "Windows" + + def test_get_platform_linux(self): + """Test Linux platform detection.""" + with patch("youtube_transcript_api._cookies.sys.platform", "linux"): + assert _get_platform() == "Linux" + + +class TestFindChromeCookieDB: + """Tests for Chrome cookie database location.""" + + def test_find_chrome_cookie_db_not_found(self): + """Test error when Chrome directory doesn't exist.""" + with patch( + "youtube_transcript_api._cookies._get_platform", + return_value="Linux", + ): + with patch.object(Path, "exists", return_value=False): + with pytest.raises(CookiePathInvalid): + _find_chrome_cookie_db("chrome") + + def test_find_chrome_cookie_db_no_cookies(self, tmp_path): + """Test error when cookie database doesn't exist.""" + # Create a mock browser directory + browser_dir = tmp_path / "chrome" + browser_dir.mkdir() + + with patch( + "youtube_transcript_api._cookies._get_platform", + return_value="Linux", + ): + with patch.dict( + "youtube_transcript_api._cookies.BROWSER_PATHS", + {"chrome": {"Linux": browser_dir}}, + ): + with pytest.raises(CookiePathInvalid): + _find_chrome_cookie_db("chrome") + + def test_find_chrome_cookie_db_success(self, tmp_path): + """Test successful cookie database location.""" + # Create a mock browser structure + browser_dir = tmp_path / "chrome" + profile_dir = browser_dir / "Default" + profile_dir.mkdir(parents=True) + cookie_db = profile_dir / "Cookies" + cookie_db.touch() + + with patch( + "youtube_transcript_api._cookies._get_platform", + return_value="Linux", + ): + with patch.dict( + "youtube_transcript_api._cookies.BROWSER_PATHS", + {"chrome": {"Linux": browser_dir}}, + ): + result = _find_chrome_cookie_db("chrome") + assert result == cookie_db + + +class TestFindFirefoxCookieDB: + """Tests for Firefox cookie database location.""" + + def test_find_firefox_cookie_db_not_found(self): + """Test error when Firefox directory doesn't exist.""" + with patch( + "youtube_transcript_api._cookies._get_platform", + return_value="Linux", + ): + with patch.object(Path, "exists", return_value=False): + with pytest.raises(CookiePathInvalid): + _find_firefox_cookie_db() + + def test_find_firefox_cookie_db_no_profile(self, tmp_path): + """Test error when no Firefox profile found.""" + firefox_dir = tmp_path / "firefox" + firefox_dir.mkdir() + + with patch( + "youtube_transcript_api._cookies._get_platform", + return_value="Linux", + ): + with patch.dict( + "youtube_transcript_api._cookies.BROWSER_PATHS", + {"firefox": {"Linux": firefox_dir}}, + ): + with pytest.raises(CookiePathInvalid): + _find_firefox_cookie_db() + + def test_find_firefox_cookie_db_success(self, tmp_path): + """Test successful Firefox cookie database location.""" + firefox_dir = tmp_path / "firefox" + profile_dir = firefox_dir / "abc123.default" + profile_dir.mkdir(parents=True) + cookie_db = profile_dir / "cookies.sqlite" + cookie_db.touch() + + with patch( + "youtube_transcript_api._cookies._get_platform", + return_value="Linux", + ): + with patch.dict( + "youtube_transcript_api._cookies.BROWSER_PATHS", + {"firefox": {"Linux": firefox_dir}}, + ): + result = _find_firefox_cookie_db() + assert result == cookie_db + + +class TestExtractFirefoxCookies: + """Tests for Firefox cookie extraction.""" + + def test_extract_firefox_cookies_success(self, tmp_path): + """Test successful cookie extraction from Firefox.""" + # Create a temporary SQLite database + cookie_db = tmp_path / "cookies.sqlite" + + conn = sqlite3.connect(cookie_db) + cursor = conn.cursor() + cursor.execute(""" + CREATE TABLE moz_cookies ( + name TEXT, + value TEXT, + host TEXT + ) + """) + cursor.execute( + "INSERT INTO moz_cookies (name, value, host) VALUES (?, ?, ?)", + ("CONSENT", "YES+1", ".youtube.com"), + ) + cursor.execute( + "INSERT INTO moz_cookies (name, value, host) VALUES (?, ?, ?)", + ("SESSION_TOKEN", "abc123", ".youtube.com"), + ) + conn.commit() + conn.close() + + cookies = _extract_firefox_cookies(cookie_db) + + assert "CONSENT" in cookies + assert cookies["CONSENT"] == "YES+1" + assert "SESSION_TOKEN" in cookies + assert cookies["SESSION_TOKEN"] == "abc123" + + def test_extract_firefox_cookies_empty_db(self, tmp_path): + """Test extraction from empty database.""" + cookie_db = tmp_path / "cookies.sqlite" + + conn = sqlite3.connect(cookie_db) + cursor = conn.cursor() + cursor.execute(""" + CREATE TABLE moz_cookies ( + name TEXT, + value TEXT, + host TEXT + ) + """) + conn.commit() + conn.close() + + cookies = _extract_firefox_cookies(cookie_db) + assert cookies == {} + + +class TestExtractChromeCookies: + """Tests for Chrome cookie extraction.""" + + def test_extract_chrome_cookies_no_cryptography(self, tmp_path): + """Test error when cryptography package not installed.""" + cookie_db = tmp_path / "Cookies" + cookie_db.touch() + + # Mock the import to raise ImportError + with patch.dict( + "sys.modules", {"youtube_transcript_api._decryptors": None} + ): + # Force reimport to trigger ImportError + + with pytest.raises(CookieError, match="cryptography"): + _extract_chrome_cookies(cookie_db) + + @pytest.mark.skipif( + not HAS_CRYPTOGRAPHY, + reason="cryptography not installed", + ) + def test_extract_chrome_cookies_success(self, tmp_path): + """Test successful cookie extraction from Chrome.""" + # Create a temporary SQLite database + cookie_db = tmp_path / "Cookies" + + conn = sqlite3.connect(cookie_db) + cursor = conn.cursor() + cursor.execute(""" + CREATE TABLE cookies ( + name TEXT, + encrypted_value BLOB, + value TEXT, + host_key TEXT + ) + """) + # Insert plain text cookie (no encryption) + cursor.execute( + "INSERT INTO cookies (name, encrypted_value, value, host_key) VALUES (?, ?, ?, ?)", + ("CONSENT", b"", "YES+1", ".youtube.com"), + ) + conn.commit() + conn.close() + + # Mock the decryptor + mock_decryptor = Mock() + mock_decryptor.decrypt = Mock(return_value="decrypted_value") + + with patch( + "youtube_transcript_api._decryptors.get_chrome_decryptor", + return_value=mock_decryptor, + ): + cookies = _extract_chrome_cookies(cookie_db) + + assert "CONSENT" in cookies + assert cookies["CONSENT"] == "YES+1" + + +class TestExtractCookiesFromBrowser: + """Integration tests for extract_cookies_from_browser.""" + + def test_unsupported_browser(self): + """Test error for unsupported browser.""" + with pytest.raises(CookieError, match="Unsupported browser"): + extract_cookies_from_browser("safari") + + def test_firefox_integration(self, tmp_path): + """Test Firefox cookie extraction integration.""" + # Create Firefox structure + firefox_dir = tmp_path / "firefox" + profile_dir = firefox_dir / "test.default" + profile_dir.mkdir(parents=True) + cookie_db = profile_dir / "cookies.sqlite" + + # Create database + conn = sqlite3.connect(cookie_db) + cursor = conn.cursor() + cursor.execute(""" + CREATE TABLE moz_cookies ( + name TEXT, + value TEXT, + host TEXT + ) + """) + cursor.execute( + "INSERT INTO moz_cookies (name, value, host) VALUES (?, ?, ?)", + ("TEST_COOKIE", "test_value", ".youtube.com"), + ) + conn.commit() + conn.close() + + with patch( + "youtube_transcript_api._cookies._get_platform", + return_value="Linux", + ): + with patch.dict( + "youtube_transcript_api._cookies.BROWSER_PATHS", + {"firefox": {"Linux": firefox_dir}}, + ): + cookies = extract_cookies_from_browser("firefox") + + assert "TEST_COOKIE" in cookies + assert cookies["TEST_COOKIE"] == "test_value" + + def test_chrome_integration(self, tmp_path): + """Test Chrome cookie extraction integration.""" + # Create Chrome structure + chrome_dir = tmp_path / "chrome" + profile_dir = chrome_dir / "Default" + profile_dir.mkdir(parents=True) + cookie_db = profile_dir / "Cookies" + + # Create database + conn = sqlite3.connect(cookie_db) + cursor = conn.cursor() + cursor.execute(""" + CREATE TABLE cookies ( + name TEXT, + encrypted_value BLOB, + value TEXT, + host_key TEXT + ) + """) + cursor.execute( + "INSERT INTO cookies (name, encrypted_value, value, host_key) VALUES (?, ?, ?, ?)", + ("TEST_COOKIE", b"", "test_value", ".youtube.com"), + ) + conn.commit() + conn.close() + + # Mock decryptor + mock_decryptor = Mock() + mock_decryptor.decrypt = Mock(return_value=None) + + with patch( + "youtube_transcript_api._cookies._get_platform", + return_value="Linux", + ): + with patch.dict( + "youtube_transcript_api._cookies.BROWSER_PATHS", + {"chrome": {"Linux": chrome_dir}}, + ): + with patch( + "youtube_transcript_api._decryptors.get_chrome_decryptor", + return_value=mock_decryptor, + ): + cookies = extract_cookies_from_browser("chrome") + + # Plain value should be extracted + assert "TEST_COOKIE" in cookies + assert cookies["TEST_COOKIE"] == "test_value" diff --git a/youtube_transcript_api/test/test_import.py b/youtube_transcript_api/test/test_import.py new file mode 100644 index 0000000..2d61b11 --- /dev/null +++ b/youtube_transcript_api/test/test_import.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +"""Simple test to verify imports work correctly.""" + +try: + from youtube_transcript_api import YouTubeTranscriptApi + from youtube_transcript_api._cookies import extract_cookies_from_browser + from youtube_transcript_api._errors import CookieError + + print("✓ All imports successful!") + + # Test that the API accepts the new parameter + try: + api = YouTubeTranscriptApi(cookies_from_browser=None) + print("✓ YouTubeTranscriptApi accepts cookies_from_browser parameter") + except Exception as e: + print(f"✗ Error initializing API: {e}") + + # Test that unsupported browser raises error + try: + extract_cookies_from_browser("safari") + print("✗ Should have raised error for unsupported browser") + except CookieError as e: + print(f"✓ Correctly raises CookieError for unsupported browser: {e}") + + print("\n✓ All basic tests passed!") + +except ImportError as e: + print(f"✗ Import error: {e}") + import traceback + + traceback.print_exc() From 4d3ff2989278f19cb44457c06e296a5790563712 Mon Sep 17 00:00:00 2001 From: Phil Brk8 Date: Mon, 26 Jan 2026 00:36:02 +0100 Subject: [PATCH 5/5] docs: update README with cookie authentication instructions --- README.md | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7ab3e29..446d1e5 100644 --- a/README.md +++ b/README.md @@ -404,9 +404,61 @@ ytt_api_2.fetch(video_id) ## Cookie Authentication -Some videos are age restricted, so this module won't be able to access those videos without some sort of -authentication. Unfortunately, some recent changes to the YouTube API have broken the current implementation of cookie -based authentication, so this feature is currently not available. +Some videos are age-restricted, so this module won't be able to access those videos without authentication. You can authenticate by extracting cookies from your browser, which allows access to age-restricted content. + +### Automatic Browser Cookie Extraction + +The easiest way to authenticate is by extracting cookies directly from your browser: + +```python +from youtube_transcript_api import YouTubeTranscriptApi + +# Extract cookies from Chrome +ytt_api = YouTubeTranscriptApi(cookies_from_browser='chrome') +transcript = ytt_api.fetch(video_id) +``` + +**Supported browsers:** +- `chrome` - Google Chrome +- `firefox` - Mozilla Firefox +- `edge` - Microsoft Edge +- `brave` - Brave Browser +- `chromium` - Chromium +- `opera` - Opera +- `vivaldi` - Vivaldi + +**Installation:** + +For Chrome-based browsers (Chrome, Edge, Brave, etc.), you need the `cryptography` package: + +```bash +pip install 'youtube-transcript-api[cookies]' +``` + +For Firefox, no additional dependencies are required (cookies are stored unencrypted). + +**How it works:** + +1. The library reads your browser's cookie database (SQLite file) +2. For Chrome-based browsers, cookies are decrypted using platform-specific methods: + - **Linux**: PBKDF2 with hardcoded password or GNOME Keyring/KWallet + - **macOS**: PBKDF2 with password from macOS Keychain + - **Windows**: DPAPI (Data Protection API) + AES-GCM +3. YouTube cookies are extracted and used for authentication + +**Important notes:** +- Make sure you're logged into YouTube in the specified browser +- The browser can be open or closed (the library copies the database to avoid lock issues) +- Your cookies are only used locally and never transmitted anywhere +- Firefox support works without additional dependencies as cookies aren't encrypted + +**Example with custom profile:** + +```python +# Use a specific Chrome profile +ytt_api = YouTubeTranscriptApi(cookies_from_browser='chrome') +# Note: Custom profile selection not yet implemented, uses 'Default' profile +``` ## Using Formatters Formatters are meant to be an additional layer of processing of the transcript you pass it. The goal is to convert a @@ -555,10 +607,32 @@ youtube_transcript_api --http-proxy http://us ### Cookie Authentication using the CLI -To authenticate using cookies through the CLI as explained in [Cookie Authentication](#cookie-authentication) run: +To authenticate using browser cookies through the CLI as explained in [Cookie Authentication](#cookie-authentication), run: + +```bash +youtube_transcript_api --cookies-from-browser chrome +``` + +This works with any supported browser: + +```bash +# Chrome +youtube_transcript_api VIDEO_ID --cookies-from-browser chrome + +# Firefox +youtube_transcript_api VIDEO_ID --cookies-from-browser firefox +# Edge +youtube_transcript_api VIDEO_ID --cookies-from-browser edge + +# Brave +youtube_transcript_api VIDEO_ID --cookies-from-browser brave ``` -youtube_transcript_api --cookies /path/to/your/cookies.txt + +Remember to install the optional dependencies for Chrome-based browsers: + +```bash +pip install 'youtube-transcript-api[cookies]' ``` ## Warning