diff --git a/youtube_transcript_api/_cli.py b/youtube_transcript_api/_cli.py index d99b562..bd04e96 100644 --- a/youtube_transcript_api/_cli.py +++ b/youtube_transcript_api/_cli.py @@ -1,4 +1,5 @@ import argparse +import re from importlib.metadata import PackageNotFoundError, version from typing import List @@ -199,5 +200,22 @@ def _parse_args(self): return self._sanitize_video_ids(parser.parse_args(self._args)) def _sanitize_video_ids(self, args): - args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids] + sanitized = [] + for video_id in args.video_ids: + video_id = video_id.replace("\\", "") + # Detect YouTube URLs and extract the video ID + url_patterns = [ + r"(?:https?://)?(?:www\.)?youtube\.com/watch\?(?:.*&)?v=([\w-]{11})", + r"(?:https?://)?youtu\.be/([\w-]{11})", + r"(?:https?://)?(?:www\.)?youtube\.com/embed/([\w-]{11})", + r"(?:https?://)?(?:www\.)?youtube\.com/v/([\w-]{11})", + ] + extracted = None + for pattern in url_patterns: + match = re.search(pattern, video_id) + if match: + extracted = match.group(1) + break + sanitized.append(extracted if extracted else video_id) + args.video_ids = sanitized return args diff --git a/youtube_transcript_api/test/test_cli.py b/youtube_transcript_api/test/test_cli.py index 78c23c3..cd83060 100644 --- a/youtube_transcript_api/test/test_cli.py +++ b/youtube_transcript_api/test/test_cli.py @@ -139,6 +139,49 @@ def test_argument_parsing__video_ids_starting_with_dash(self): self.assertEqual(parsed_args.format, "pretty") self.assertEqual(parsed_args.languages, ["en"]) + def test_argument_parsing__youtube_watch_url(self): + parsed_args = YouTubeTranscriptCli( + ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"] + )._parse_args() + self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"]) + + def test_argument_parsing__youtu_be_url(self): + parsed_args = YouTubeTranscriptCli( + ["https://youtu.be/dQw4w9WgXcQ"] + )._parse_args() + self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"]) + + def test_argument_parsing__youtube_embed_url(self): + parsed_args = YouTubeTranscriptCli( + ["https://www.youtube.com/embed/dQw4w9WgXcQ"] + )._parse_args() + self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"]) + + def test_argument_parsing__youtube_v_url(self): + parsed_args = YouTubeTranscriptCli( + ["https://www.youtube.com/v/dQw4w9WgXcQ"] + )._parse_args() + self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"]) + + def test_argument_parsing__youtube_url_with_extra_params(self): + parsed_args = YouTubeTranscriptCli( + ["https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=30s&list=PLtest"] + )._parse_args() + self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"]) + + def test_argument_parsing__mix_of_ids_and_urls(self): + parsed_args = YouTubeTranscriptCli( + [ + "dQw4w9WgXcQ", + "https://www.youtube.com/watch?v=82IOSYpY6Qo", + "https://youtu.be/abcdefghijk", + ] + )._parse_args() + self.assertEqual( + parsed_args.video_ids, + ["dQw4w9WgXcQ", "82IOSYpY6Qo", "abcdefghijk"], + ) + def test_argument_parsing__fail_without_video_ids(self): with self.assertRaises(SystemExit): YouTubeTranscriptCli("--format json".split())._parse_args()