66import base64
77import os
88import html
9+ from bs4 import BeautifulSoup
910
1011load_dotenv ()
1112YOUTUBE_API_KEY = os .getenv ("YOUTUBE_API_KEY" )
@@ -25,6 +26,54 @@ def fetch_videos():
2526 process_video_item (item )
2627
2728
29+ def get_video_duration (video_id ):
30+ """
31+ Gets video duration using YouTube API
32+ """
33+ try :
34+ url = f"https://www.googleapis.com/youtube/v3/videos?key={ YOUTUBE_API_KEY } &id={ video_id } &part=contentDetails"
35+ response = requests .get (url )
36+ response .raise_for_status ()
37+ data = response .json ()
38+
39+ if data .get ("items" ):
40+ duration_iso = data ["items" ][0 ]["contentDetails" ]["duration" ]
41+ return convert_iso_duration (duration_iso )
42+ return None
43+ except Exception as e :
44+ print (f"Error getting video duration: { e } " )
45+ return None
46+
47+
48+ def convert_iso_duration (iso_duration ):
49+ """
50+ Converts ISO 8601 duration (PT2M5S) to readable format (2:05)
51+ Examples:
52+ - PT2M5S -> 2:05
53+ - PT1H23M45S -> 1:23:45
54+ - PT30S -> 0:30
55+ """
56+ import re
57+
58+ # Remove PT prefix
59+ duration = iso_duration .replace ('PT' , '' )
60+
61+ # Extract hours, minutes, seconds
62+ hours = re .search (r'(\d+)H' , duration )
63+ minutes = re .search (r'(\d+)M' , duration )
64+ seconds = re .search (r'(\d+)S' , duration )
65+
66+ h = int (hours .group (1 )) if hours else 0
67+ m = int (minutes .group (1 )) if minutes else 0
68+ s = int (seconds .group (1 )) if seconds else 0
69+
70+ # Format as MM:SS or HH:MM:SS
71+ if h > 0 :
72+ return f"{ h } :{ m :02d} :{ s :02d} "
73+ else :
74+ return f"{ m } :{ s :02d} "
75+
76+
2877def process_video_item (item ):
2978 """
3079 Extracts the required data from a video item and
@@ -55,14 +104,17 @@ def process_video_item(item):
55104 published_at = snippet .get ("publishedAt" )
56105 video_url = f"https://www.youtube.com/watch?v={ video_id } "
57106
107+ duration = get_video_duration (video_id )
108+
58109 video_data = {
59- "id" : video_id , # use video id for easy retrieval
110+ "id" : video_id ,
60111 "title" : title ,
61112 "description" : description ,
62113 "thumbnail" : thumbnail ,
63114 "b64_thumbnail" : encoded_thumbnail ,
64115 "url" : video_url ,
65116 "published_at" : published_at ,
117+ "duration" : duration ,
66118 }
67119 process_video_data (video_data )
68120
0 commit comments