Skip to content

Commit 74e0070

Browse files
authored
Merge pull request #19 from cuappdev/maw346/add-daily-sun
implement articles from daily sun
2 parents a64620b + 1bcf70a commit 74e0070

17 files changed

Lines changed: 405 additions & 9 deletions

.env_template

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ YOUTUBE_API_KEY=
22
MONGO_URI=
33
MONGO_DB=
44
STAGE=
5+
DAILY_SUN_URL=

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ To start the project, run the following command in the terminal
2222

2323
## Setting up the database
2424

25-
Add /graphql to the url to access the interactive GraphQL platform
25+
Create a Mongo database named `score_db` and another named `daily_sun_db`. A partnership with the Daily Sun has given us access to their articles which we copy and paginate the results for frontend.
26+
27+
Add /graphql to the url to access the interactive GraphQL platform

app.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
from flask_graphql import GraphQLView
66
from graphene import Schema
77
from src.schema import Query, Mutation
8+
from src.scrapers.games_scraper import fetch_game_schedule
9+
from src.scrapers.youtube_stats import fetch_videos
10+
from src.scrapers.daily_sun_scrape import fetch_news
11+
from src.services.article_service import ArticleService
812
from src.utils.team_loader import TeamLoader
913
import signal
1014
import sys
@@ -83,6 +87,22 @@ def create_context():
8387
),
8488
)
8589

90+
# Setup command line arguments
91+
def parse_args():
92+
parser = argparse.ArgumentParser(description="Skip scraping tasks, for dev purposes.")
93+
parser.add_argument(
94+
"--no-scrape",
95+
action="store_true",
96+
help="Skips scraping tasks if set, useful for frontend development.",
97+
)
98+
parser.add_argument(
99+
"--no-daily-sun",
100+
action="store_true",
101+
help="Skips using the Daily Sun page for alerts",
102+
)
103+
return parser.parse_args()
104+
105+
args = parse_args()
86106

87107
def signal_handler(sig, frame):
88108
sys.exit(0)
@@ -91,5 +111,40 @@ def signal_handler(sig, frame):
91111
signal.signal(signal.SIGINT, signal_handler)
92112
signal.signal(signal.SIGTERM, signal_handler)
93113

114+
# Only run scraping tasks if not disabled
115+
if not args.no_scrape:
116+
from flask_apscheduler import APScheduler
117+
scheduler = APScheduler()
118+
scheduler.init_app(app)
119+
scheduler.start()
120+
121+
@scheduler.task("interval", id="scrape_schedules", seconds=43200) # 12 hours
122+
def scrape_schedules():
123+
logging.info("Scraping game schedules...")
124+
fetch_game_schedule()
125+
126+
@scheduler.task("interval", id="scrape_videos", seconds=43200) # 12 hours
127+
def scrape_videos():
128+
logging.info("Scraping YouTube videos...")
129+
fetch_videos()
130+
131+
scrape_schedules()
132+
scrape_videos()
133+
134+
if not args.no_daily_sun and not args.no_scrape:
135+
@scheduler.task("interval", id="scrape_daily_sun", seconds=3600)
136+
def scrape_daily_sun():
137+
logging.info("Getting Daily Sun Sports News...")
138+
fetch_news()
139+
140+
@scheduler.task("interval", id="cleanse_daily_sun_db", seconds=604800) # 1 week
141+
def cleanse_daily_sun_db():
142+
logging.info("Cleaning the Daily Sun database from old articles...")
143+
ArticleService.cleanse_old_articles()
144+
145+
scrape_daily_sun()
146+
cleanse_daily_sun_db()
147+
148+
94149
if __name__ == "__main__":
95150
app.run(debug=True, host="0.0.0.0", port=8000)

src/database.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def keep_connection_alive():
4848

4949
# Access the database
5050
db = client[os.getenv("MONGO_DB", "score_db")]
51+
daily_sun_db = client[os.getenv("DAILY_SUN_DB", "daily_sun_db")]
5152

5253

5354
def setup_database_indexes():

src/models/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .game import Game
22
from .team import Team
3-
from .youtube_video import YoutubeVideo
3+
from .youtube_video import YoutubeVideo
4+
from .article import Article

src/models/article.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from bson.objectid import ObjectId
2+
from datetime import datetime
3+
4+
class Article:
5+
"""
6+
A model representing a news article.
7+
8+
Attributes:
9+
- title: The title of the article
10+
- image: The filename of the article's main image
11+
- sports_type: The specific sport category
12+
- published_at: The publication date
13+
- url: The URL to the full article
14+
- slug: Unique identifier from the source
15+
- created_at: When the article was added to our DB
16+
"""
17+
def __init__(self, title, sports_type, published_at, url, slug, image=None, id=None, created_at=None):
18+
self.id = id if id else str(ObjectId())
19+
self.title = title
20+
self.image = image
21+
self.sports_type = sports_type
22+
self.published_at = published_at
23+
self.url = url
24+
self.slug = slug
25+
self.created_at = created_at if created_at else datetime.now()
26+
27+
def to_dict(self):
28+
"""
29+
Converts the Article object to a dictionary format for MongoDB storage.
30+
"""
31+
return {
32+
"_id": self.id,
33+
"title": self.title,
34+
"image": self.image,
35+
"sports_type": self.sports_type,
36+
"published_at": self.published_at,
37+
"url": self.url,
38+
"slug": self.slug,
39+
"created_at": self.created_at
40+
}
41+
42+
@staticmethod
43+
def from_dict(data):
44+
"""
45+
Converts a MongoDB document to an Article object.
46+
"""
47+
return Article(
48+
id=data.get("_id"),
49+
title=data.get("title"),
50+
image=data.get("image"),
51+
sports_type=data.get("sports_type"),
52+
published_at=data.get("published_at"),
53+
url=data.get("url"),
54+
slug=data.get("slug"),
55+
created_at=data.get("created_at")
56+
)

src/mutations/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .create_game import CreateGame
22
from .create_team import CreateTeam
3-
from .create_youtube_video import CreateYoutubeVideo
3+
from .create_youtube_video import CreateYoutubeVideo
4+
from .create_article import CreateArticle

src/mutations/create_article.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from graphene import Mutation, String, Field
2+
from src.types import ArticleType
3+
from src.services.article_service import ArticleService
4+
5+
class CreateArticle(Mutation):
6+
class Arguments:
7+
title = String(required=True)
8+
sports_type = String(required=True)
9+
published_at = String(required=True)
10+
url = String(required=True)
11+
slug = String(required=True)
12+
image = String(required=False)
13+
14+
article = Field(lambda: ArticleType)
15+
16+
def mutate(self, info, title, sports_type, published_at, url, slug, image=None):
17+
from datetime import datetime
18+
article_data = {
19+
"title": title,
20+
"sports_type": sports_type,
21+
"published_at": datetime.fromisoformat(published_at),
22+
"url": url,
23+
"slug": slug,
24+
"image": image
25+
}
26+
new_article = ArticleService.create_article(article_data)
27+
return CreateArticle(article=new_article)

src/queries/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .game_query import GameQuery
22
from .team_query import TeamQuery
33
from .youtube_video_query import YoutubeVideoQuery
4+
from .article_query import ArticleQuery

src/queries/article_query.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from graphene import ObjectType, List, String
2+
from src.services.article_service import ArticleService
3+
from src.types import ArticleType
4+
5+
class ArticleQuery(ObjectType):
6+
articles = List(ArticleType, sports_type=String())
7+
8+
def resolve_articles(self, info, sports_type=None):
9+
"""
10+
Resolver for retrieving news articles, optionally filtered by sports_type.
11+
"""
12+
return ArticleService.get_articles(sports_type)

0 commit comments

Comments
 (0)