Skip to content

Commit b0cf9d7

Browse files
authored
Merge pull request #53 from cuappdev/master
Prod merge Jan 30
2 parents bd14659 + 4b66034 commit b0cf9d7

31 files changed

Lines changed: 986 additions & 47 deletions

.env_template

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ YOUTUBE_API_KEY=
22
MONGO_URI=
33
MONGO_DB=
44
STAGE=
5+
DAILY_SUN_URL=

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ To start the project, run the following command in the terminal
2222

2323
## Setting up the database
2424

25-
Add /graphql to the url to access the interactive GraphQL platform
25+
Create a Mongo database named `score_db` and another named `daily_sun_db`. A partnership with the Daily Sun has given us access to their articles which we copy and paginate the results for frontend.
26+
27+
Add /graphql to the url to access the interactive GraphQL platform

app.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
from flask_graphql import GraphQLView
66
from graphene import Schema
77
from src.schema import Query, Mutation
8+
from src.scrapers.games_scraper import fetch_game_schedule
9+
from src.scrapers.youtube_stats import fetch_videos
10+
from src.scrapers.daily_sun_scrape import fetch_news
11+
from src.services.article_service import ArticleService
812
from src.utils.team_loader import TeamLoader
913
import signal
1014
import sys
@@ -83,6 +87,30 @@ def create_context():
8387
),
8488
)
8589

90+
# Setup command line arguments
91+
def parse_args():
92+
parser = argparse.ArgumentParser(description="Skip scraping tasks, for dev purposes.")
93+
parser.add_argument(
94+
"--no-scrape",
95+
action="store_true",
96+
help="Skips scraping tasks if set, useful for frontend development.",
97+
)
98+
parser.add_argument(
99+
"--no-daily-sun",
100+
action="store_true",
101+
help="Skips using the Daily Sun page for alerts",
102+
)
103+
return parser.parse_args()
104+
105+
# Only parse arguments when running directly (not when imported by gunicorn)
106+
if __name__ == "__main__":
107+
args = parse_args()
108+
else:
109+
# Default args when imported by gunicorn
110+
class DefaultArgs:
111+
no_scrape = False
112+
no_daily_sun = False
113+
args = DefaultArgs()
86114

87115
def signal_handler(sig, frame):
88116
sys.exit(0)
@@ -91,5 +119,50 @@ def signal_handler(sig, frame):
91119
signal.signal(signal.SIGINT, signal_handler)
92120
signal.signal(signal.SIGTERM, signal_handler)
93121

122+
# Only parse arguments when running directly (not when imported by gunicorn)
123+
if __name__ == "__main__":
124+
args = parse_args()
125+
else:
126+
# Default args when imported by gunicorn
127+
class DefaultArgs:
128+
no_scrape = False
129+
no_daily_sun = False
130+
args = DefaultArgs()
131+
132+
# Only run scraping tasks if not disabled
133+
if not args.no_scrape:
134+
from flask_apscheduler import APScheduler
135+
scheduler = APScheduler()
136+
scheduler.init_app(app)
137+
scheduler.start()
138+
139+
@scheduler.task("interval", id="scrape_schedules", seconds=43200) # 12 hours
140+
def scrape_schedules():
141+
logging.info("Scraping game schedules...")
142+
fetch_game_schedule()
143+
144+
@scheduler.task("interval", id="scrape_videos", seconds=43200) # 12 hours
145+
def scrape_videos():
146+
logging.info("Scraping YouTube videos...")
147+
fetch_videos()
148+
149+
scrape_schedules()
150+
scrape_videos()
151+
152+
if not args.no_daily_sun and not args.no_scrape:
153+
@scheduler.task("interval", id="scrape_daily_sun", seconds=3600)
154+
def scrape_daily_sun():
155+
logging.info("Getting Daily Sun Sports News...")
156+
fetch_news()
157+
158+
@scheduler.task("interval", id="cleanse_daily_sun_db", seconds=604800) # 1 week
159+
def cleanse_daily_sun_db():
160+
logging.info("Cleaning the Daily Sun database from old articles...")
161+
ArticleService.cleanse_old_articles()
162+
163+
scrape_daily_sun()
164+
cleanse_daily_sun_db()
165+
166+
94167
if __name__ == "__main__":
95168
app.run(debug=True, host="0.0.0.0", port=8000)

src/database.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def keep_connection_alive():
4848

4949
# Access the database
5050
db = client[os.getenv("MONGO_DB", "score_db")]
51+
daily_sun_db = client[os.getenv("DAILY_SUN_DB", "daily_sun_db")]
5152

5253

5354
def setup_database_indexes():
@@ -65,6 +66,31 @@ def setup_database_indexes():
6566

6667
# Index for sorting operations
6768
game_collection.create_index([("date", -1)], background=True)
69+
70+
# Index to have unique games so we won't add duplicates
71+
game_collection.create_index(
72+
[
73+
("sport", 1),
74+
("gender", 1),
75+
("date", 1),
76+
("opponent_id", 1),
77+
("state", 1),
78+
],
79+
unique=True,
80+
background=True
81+
)
82+
83+
# Additional index for tournament games (without opponent_id)
84+
game_collection.create_index(
85+
[
86+
("sport", 1),
87+
("gender", 1),
88+
("date", 1),
89+
("city", 1),
90+
("state", 1),
91+
],
92+
background=True
93+
)
6894

6995
print("✅ MongoDB indexes created successfully")
7096
except Exception as e:

src/models/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .game import Game
22
from .team import Team
3-
from .youtube_video import YoutubeVideo
3+
from .youtube_video import YoutubeVideo
4+
from .article import Article

src/models/article.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from bson.objectid import ObjectId
2+
from datetime import datetime
3+
4+
class Article:
5+
"""
6+
A model representing a news article.
7+
8+
Attributes:
9+
- title: The title of the article
10+
- image: The filename of the article's main image
11+
- sports_type: The specific sport category
12+
- published_at: The publication date
13+
- url: The URL to the full article
14+
- slug: Unique identifier from the source
15+
- created_at: When the article was added to our DB
16+
"""
17+
def __init__(self, title, sports_type, published_at, url, slug, image=None, id=None, created_at=None):
18+
self.id = id if id else str(ObjectId())
19+
self.title = title
20+
self.image = image
21+
self.sports_type = sports_type
22+
self.published_at = published_at
23+
self.url = url
24+
self.slug = slug
25+
self.created_at = created_at if created_at else datetime.now()
26+
27+
def to_dict(self):
28+
"""
29+
Converts the Article object to a dictionary format for MongoDB storage.
30+
"""
31+
return {
32+
"_id": self.id,
33+
"title": self.title,
34+
"image": self.image,
35+
"sports_type": self.sports_type,
36+
"published_at": self.published_at,
37+
"url": self.url,
38+
"slug": self.slug,
39+
"created_at": self.created_at
40+
}
41+
42+
@staticmethod
43+
def from_dict(data):
44+
"""
45+
Converts a MongoDB document to an Article object.
46+
"""
47+
return Article(
48+
id=data.get("_id"),
49+
title=data.get("title"),
50+
image=data.get("image"),
51+
sports_type=data.get("sports_type"),
52+
published_at=data.get("published_at"),
53+
url=data.get("url"),
54+
slug=data.get("slug"),
55+
created_at=data.get("created_at")
56+
)

src/models/game.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class Game:
1717
- `time` The time of the game. (optional)
1818
- `box_score` The scoring summary of the game (optional)
1919
- `score_breakdown` The scoring breakdown of the game (optional)
20+
- 'ticket_link' The ticket link for the game (optional)
2021
"""
2122

2223
def __init__(
@@ -35,6 +36,7 @@ def __init__(
3536
score_breakdown=None,
3637
team=None,
3738
utc_date=None,
39+
ticket_link=None,
3840
):
3941
self.id = id if id else str(ObjectId())
4042
self.city = city
@@ -50,6 +52,7 @@ def __init__(
5052
self.score_breakdown = score_breakdown
5153
self.team = team
5254
self.utc_date = utc_date
55+
self.ticket_link = ticket_link
5356

5457
def to_dict(self):
5558
"""
@@ -70,6 +73,7 @@ def to_dict(self):
7073
"score_breakdown": self.score_breakdown,
7174
"team": self.team,
7275
"utc_date": self.utc_date,
76+
"ticket_link": self.ticket_link,
7377
}
7478

7579
@staticmethod
@@ -92,4 +96,5 @@ def from_dict(data) -> None:
9296
score_breakdown=data.get("score_breakdown"),
9397
team=data.get("team"),
9498
utc_date=data.get("utc_date"),
99+
ticket_link=data.get("ticket_link"),
95100
)

src/models/youtube_video.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,11 @@ class YoutubeVideo:
1212
- `thumbnail` The thumbnail of the video, as a URL string pointing to a `.jpg` file.
1313
- `url` The URL of the video.
1414
- `published_at` The date and time the video was published.
15+
- `duration` The duration of the video.
1516
"""
1617

1718
def __init__(
18-
self, title, description, thumbnail, b64_thumbnail, url, published_at, id=None
19+
self, title, description, thumbnail, b64_thumbnail, url, published_at, duration=None, id=None
1920
):
2021
self.id = id if id else str(ObjectId())
2122
self.title = title
@@ -24,6 +25,7 @@ def __init__(
2425
self.b64_thumbnail = b64_thumbnail
2526
self.url = url
2627
self.published_at = published_at
28+
self.duration = duration
2729

2830
def to_dict(self):
2931
"""
@@ -37,6 +39,7 @@ def to_dict(self):
3739
"b64_thumbnail": self.b64_thumbnail,
3840
"url": self.url,
3941
"published_at": self.published_at,
42+
"duration": self.duration,
4043
}
4144

4245
@staticmethod
@@ -52,4 +55,5 @@ def from_dict(data):
5255
b64_thumbnail=data.get("b64_thumbnail"),
5356
url=data.get("url"),
5457
published_at=data.get("published_at"),
58+
duration=data.get("duration"),
5559
)

src/mutations/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .create_game import CreateGame
22
from .create_team import CreateTeam
3-
from .create_youtube_video import CreateYoutubeVideo
3+
from .create_youtube_video import CreateYoutubeVideo
4+
from .create_article import CreateArticle

src/mutations/create_article.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from graphene import Mutation, String, Field
2+
from src.types import ArticleType
3+
from src.services.article_service import ArticleService
4+
5+
class CreateArticle(Mutation):
6+
class Arguments:
7+
title = String(required=True)
8+
sports_type = String(required=True)
9+
published_at = String(required=True)
10+
url = String(required=True)
11+
slug = String(required=True)
12+
image = String(required=False)
13+
14+
article = Field(lambda: ArticleType)
15+
16+
def mutate(self, info, title, sports_type, published_at, url, slug, image=None):
17+
article_data = {
18+
"title": title,
19+
"sports_type": sports_type,
20+
"published_at": published_at, # Already in ISO 8601 format
21+
"url": url,
22+
"slug": slug,
23+
"image": image
24+
}
25+
new_article = ArticleService.create_article(article_data)
26+
return CreateArticle(article=new_article)

0 commit comments

Comments
 (0)