Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion youtube_transcript_api/_cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import re
from importlib.metadata import PackageNotFoundError, version
from typing import List

Expand Down Expand Up @@ -199,5 +200,22 @@ def _parse_args(self):
return self._sanitize_video_ids(parser.parse_args(self._args))

def _sanitize_video_ids(self, args):
args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids]
sanitized = []
for video_id in args.video_ids:
video_id = video_id.replace("\\", "")
# Detect YouTube URLs and extract the video ID
url_patterns = [
r"(?:https?://)?(?:www\.)?youtube\.com/watch\?(?:.*&)?v=([\w-]{11})",
r"(?:https?://)?youtu\.be/([\w-]{11})",
r"(?:https?://)?(?:www\.)?youtube\.com/embed/([\w-]{11})",
r"(?:https?://)?(?:www\.)?youtube\.com/v/([\w-]{11})",
]
extracted = None
for pattern in url_patterns:
match = re.search(pattern, video_id)
if match:
extracted = match.group(1)
break
sanitized.append(extracted if extracted else video_id)
args.video_ids = sanitized
return args
43 changes: 43 additions & 0 deletions youtube_transcript_api/test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,49 @@ def test_argument_parsing__video_ids_starting_with_dash(self):
self.assertEqual(parsed_args.format, "pretty")
self.assertEqual(parsed_args.languages, ["en"])

def test_argument_parsing__youtube_watch_url(self):
parsed_args = YouTubeTranscriptCli(
["https://www.youtube.com/watch?v=dQw4w9WgXcQ"]
)._parse_args()
self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"])

def test_argument_parsing__youtu_be_url(self):
parsed_args = YouTubeTranscriptCli(
["https://youtu.be/dQw4w9WgXcQ"]
)._parse_args()
self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"])

def test_argument_parsing__youtube_embed_url(self):
parsed_args = YouTubeTranscriptCli(
["https://www.youtube.com/embed/dQw4w9WgXcQ"]
)._parse_args()
self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"])

def test_argument_parsing__youtube_v_url(self):
parsed_args = YouTubeTranscriptCli(
["https://www.youtube.com/v/dQw4w9WgXcQ"]
)._parse_args()
self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"])

def test_argument_parsing__youtube_url_with_extra_params(self):
parsed_args = YouTubeTranscriptCli(
["https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=30s&list=PLtest"]
)._parse_args()
self.assertEqual(parsed_args.video_ids, ["dQw4w9WgXcQ"])

def test_argument_parsing__mix_of_ids_and_urls(self):
parsed_args = YouTubeTranscriptCli(
[
"dQw4w9WgXcQ",
"https://www.youtube.com/watch?v=82IOSYpY6Qo",
"https://youtu.be/abcdefghijk",
]
)._parse_args()
self.assertEqual(
parsed_args.video_ids,
["dQw4w9WgXcQ", "82IOSYpY6Qo", "abcdefghijk"],
)

def test_argument_parsing__fail_without_video_ids(self):
with self.assertRaises(SystemExit):
YouTubeTranscriptCli("--format json".split())._parse_args()
Expand Down