Skip to content

Commit d63d932

Browse files
authored
Merge pull request #44 from cuappdev/claire/youtubeVideoDuration
Add duration field to YoutubeVideo model and related mutations
2 parents f89c543 + 5348a28 commit d63d932

5 files changed

Lines changed: 64 additions & 3 deletions

File tree

src/models/youtube_video.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@ class YoutubeVideo:
1212
- `thumbnail` The thumbnail of the video, as a URL string pointing to a `.jpg` file.
1313
- `url` The URL of the video.
1414
- `published_at` The date and time the video was published.
15+
- `duration` The duration of the video.
1516
"""
1617

1718
def __init__(
18-
self, title, description, thumbnail, b64_thumbnail, url, published_at, id=None
19+
self, title, description, thumbnail, b64_thumbnail, url, published_at, duration=None, id=None
1920
):
2021
self.id = id if id else str(ObjectId())
2122
self.title = title
@@ -24,6 +25,7 @@ def __init__(
2425
self.b64_thumbnail = b64_thumbnail
2526
self.url = url
2627
self.published_at = published_at
28+
self.duration = duration
2729

2830
def to_dict(self):
2931
"""
@@ -37,6 +39,7 @@ def to_dict(self):
3739
"b64_thumbnail": self.b64_thumbnail,
3840
"url": self.url,
3941
"published_at": self.published_at,
42+
"duration": self.duration,
4043
}
4144

4245
@staticmethod
@@ -52,4 +55,5 @@ def from_dict(data):
5255
b64_thumbnail=data.get("b64_thumbnail"),
5356
url=data.get("url"),
5457
published_at=data.get("published_at"),
58+
duration=data.get("duration"),
5559
)

src/mutations/create_youtube_video.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ class Arguments:
1111
b64_thumbnail = String(required=True)
1212
url = String(required=True)
1313
published_at = String(required=True)
14+
duration = String(required=True)
1415

1516
youtube_video = Field(lambda: YoutubeVideoType)
1617

17-
def mutate(self, info, id, title, description, thumbnail, url, published_at):
18+
def mutate(self, info, id, title, description, thumbnail, b64_thumbnail, url, published_at, duration):
1819
video_data = {
1920
"id": id,
2021
"title": title,
@@ -23,6 +24,7 @@ def mutate(self, info, id, title, description, thumbnail, url, published_at):
2324
"b64_thumbnail": b64_thumbnail,
2425
"url": url,
2526
"published_at": published_at,
27+
"duration": duration,
2628
}
2729
new_video = YoutubeVideoService.create_video(video_data)
2830
return CreateYoutubeVideo(youtube_video=new_video)

src/scrapers/youtube_stats.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import base64
77
import os
88
import html
9+
from bs4 import BeautifulSoup
910

1011
load_dotenv()
1112
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
@@ -25,6 +26,54 @@ def fetch_videos():
2526
process_video_item(item)
2627

2728

29+
def get_video_duration(video_id):
30+
"""
31+
Gets video duration using YouTube API
32+
"""
33+
try:
34+
url = f"https://www.googleapis.com/youtube/v3/videos?key={YOUTUBE_API_KEY}&id={video_id}&part=contentDetails"
35+
response = requests.get(url)
36+
response.raise_for_status()
37+
data = response.json()
38+
39+
if data.get("items"):
40+
duration_iso = data["items"][0]["contentDetails"]["duration"]
41+
return convert_iso_duration(duration_iso)
42+
return None
43+
except Exception as e:
44+
print(f"Error getting video duration: {e}")
45+
return None
46+
47+
48+
def convert_iso_duration(iso_duration):
49+
"""
50+
Converts ISO 8601 duration (PT2M5S) to readable format (2:05)
51+
Examples:
52+
- PT2M5S -> 2:05
53+
- PT1H23M45S -> 1:23:45
54+
- PT30S -> 0:30
55+
"""
56+
import re
57+
58+
# Remove PT prefix
59+
duration = iso_duration.replace('PT', '')
60+
61+
# Extract hours, minutes, seconds
62+
hours = re.search(r'(\d+)H', duration)
63+
minutes = re.search(r'(\d+)M', duration)
64+
seconds = re.search(r'(\d+)S', duration)
65+
66+
h = int(hours.group(1)) if hours else 0
67+
m = int(minutes.group(1)) if minutes else 0
68+
s = int(seconds.group(1)) if seconds else 0
69+
70+
# Format as MM:SS or HH:MM:SS
71+
if h > 0:
72+
return f"{h}:{m:02d}:{s:02d}"
73+
else:
74+
return f"{m}:{s:02d}"
75+
76+
2877
def process_video_item(item):
2978
"""
3079
Extracts the required data from a video item and
@@ -55,14 +104,17 @@ def process_video_item(item):
55104
published_at = snippet.get("publishedAt")
56105
video_url = f"https://www.youtube.com/watch?v={video_id}"
57106

107+
duration = get_video_duration(video_id)
108+
58109
video_data = {
59-
"id": video_id, # use video id for easy retrieval
110+
"id": video_id,
60111
"title": title,
61112
"description": description,
62113
"thumbnail": thumbnail,
63114
"b64_thumbnail": encoded_thumbnail,
64115
"url": video_url,
65116
"published_at": published_at,
117+
"duration": duration,
66118
}
67119
process_video_data(video_data)
68120

src/services/youtube_video_service.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def create_video(data):
3030
b64_thumbnail=data.get("b64_thumbnail"),
3131
url=data.get("url"),
3232
published_at=data.get("published_at"),
33+
duration=data.get("duration"),
3334
)
3435
YoutubeVideoRepository.insert(video)
3536
return video

src/types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ class YoutubeVideoType(ObjectType):
155155
- thumbnail: The URL of the video's thumbnail.
156156
- url: The URL to the video.
157157
- published_at: The date and time the video was published.
158+
- duration: The duration of the video (optional).
158159
"""
159160
id = String(required=False)
160161
title = String(required=True)
@@ -163,6 +164,7 @@ class YoutubeVideoType(ObjectType):
163164
b64_thumbnail = String(required=True)
164165
url = String(required=True)
165166
published_at = String(required=True)
167+
duration = String(required=False)
166168

167169
def __init__(self, **kwargs):
168170
for key, value in kwargs.items():

0 commit comments

Comments
 (0)