Skip to content

Commit 8573e2e

Browse files
committed
Resolving conflicts
2 parents 1309c3f + 5925182 commit 8573e2e

7 files changed

Lines changed: 255 additions & 11 deletions

File tree

app.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,16 @@ def signal_handler(sig, frame):
119119
signal.signal(signal.SIGINT, signal_handler)
120120
signal.signal(signal.SIGTERM, signal_handler)
121121

122+
# Only parse arguments when running directly (not when imported by gunicorn)
123+
if __name__ == "__main__":
124+
args = parse_args()
125+
else:
126+
# Default args when imported by gunicorn
127+
class DefaultArgs:
128+
no_scrape = False
129+
no_daily_sun = False
130+
args = DefaultArgs()
131+
122132
# Only run scraping tasks if not disabled
123133
if not args.no_scrape:
124134
from flask_apscheduler import APScheduler

src/database.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,31 @@ def setup_database_indexes():
6666

6767
# Index for sorting operations
6868
game_collection.create_index([("date", -1)], background=True)
69+
70+
# Index to have unique games so we won't add duplicates
71+
game_collection.create_index(
72+
[
73+
("sport", 1),
74+
("gender", 1),
75+
("date", 1),
76+
("opponent_id", 1),
77+
("state", 1),
78+
],
79+
unique=True,
80+
background=True
81+
)
82+
83+
# Additional index for tournament games (without opponent_id)
84+
game_collection.create_index(
85+
[
86+
("sport", 1),
87+
("gender", 1),
88+
("date", 1),
89+
("city", 1),
90+
("state", 1),
91+
],
92+
background=True
93+
)
6994

7095
print("✅ MongoDB indexes created successfully")
7196
except Exception as e:

src/repositories/game_repository.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,56 @@ def find_by_key_fields(city, date, gender, location, opponent_id, sport, state):
130130

131131
return [Game.from_dict(game) for game in games]
132132

133+
@staticmethod
134+
def find_by_tournament_key_fields(city, date, gender, location, sport, state):
135+
"""
136+
Find tournament games by location and date (excluding opponent_id).
137+
This is used when we need to find a tournament game that might have a placeholder team.
138+
Uses flexible matching to handle TBD/TBA values.
139+
"""
140+
game_collection = db["game"]
141+
142+
# Build flexible query that can handle TBD/TBA values
143+
query = {
144+
"date": date,
145+
"gender": gender,
146+
"sport": sport,
147+
}
148+
149+
# For city, state, and location, use flexible matching
150+
# This allows finding games even when TBD/TBA values change to real values
151+
city_conditions = []
152+
if city:
153+
city_conditions.append(city)
154+
else:
155+
city_conditions = [None]
156+
157+
state_conditions = []
158+
if state:
159+
state_conditions.append(state)
160+
else:
161+
state_conditions = [None]
162+
163+
location_conditions = []
164+
if location:
165+
location_conditions.append(location)
166+
else:
167+
location_conditions = [None]
168+
169+
query["city"] = {"$in": city_conditions}
170+
query["state"] = {"$in": state_conditions}
171+
query["location"] = {"$in": location_conditions}
172+
173+
games = list(game_collection.find(query))
174+
175+
if not games:
176+
return None
177+
178+
if len(games) == 1:
179+
return Game.from_dict(games[0])
180+
181+
return [Game.from_dict(game) for game in games]
182+
133183
@staticmethod
134184
def find_by_sport(sport):
135185
"""
@@ -156,3 +206,31 @@ def find_by_sport_gender(sport, gender):
156206
game_collection = db["game"]
157207
games = game_collection.find({"sport": sport, "gender": gender})
158208
return [Game.from_dict(game) for game in games]
209+
210+
@staticmethod
211+
def find_games_by_sport_gender_after_date(sport, gender, after_date=None):
212+
"""
213+
Find games for a specific sport and gender, optionally after a specific date.
214+
This method returns raw game data without team information.
215+
"""
216+
game_collection = db["game"]
217+
218+
query = {
219+
"sport": sport,
220+
"gender": gender
221+
}
222+
223+
if after_date:
224+
query["utc_date"] = {"$gt": after_date}
225+
226+
games = game_collection.find(query)
227+
return [Game.from_dict(game) for game in games]
228+
229+
@staticmethod
230+
def delete_games_by_ids(game_ids):
231+
"""
232+
Delete games by their IDs.
233+
"""
234+
game_collection = db["game"]
235+
result = game_collection.delete_many({"_id": {"$in": game_ids}})
236+
return result.deleted_count

src/scrapers/games_scraper.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
from src.utils.convert_to_utc import convert_to_utc
55
from src.utils.constants import *
66
from src.scrapers.game_details_scrape import scrape_game
7-
from src.utils.helpers import get_dominant_color
7+
from src.utils.helpers import get_dominant_color, normalize_game_data, is_tournament_placeholder_team, is_cornell_loss
88
import base64
99
import re
10-
import html
10+
from src.database import db
1111
import threading
1212

1313

@@ -171,6 +171,8 @@ def process_game_data(game_data):
171171
Args:
172172
game_data (dict): A dictionary containing the game data.
173173
"""
174+
175+
game_data = normalize_game_data(game_data)
174176
location_data = game_data["location"].split("\n")
175177
geo_location = location_data[0]
176178
if (",") not in geo_location:
@@ -239,16 +241,28 @@ def process_game_data(game_data):
239241
if str(final_box_cor_score) != str(cor_final) or str(final_box_opp_score) != str(opp_final):
240242
game_data["score_breakdown"] = game_data["score_breakdown"][::-1]
241243

242-
# finds any existing game with the same key fields regardless of time
243-
curr_game = GameService.get_game_by_key_fields(
244+
# Try to find by tournament key fields to handle placeholder teams
245+
curr_game = GameService.get_game_by_tournament_key_fields(
244246
city,
245247
game_data["date"],
246248
game_data["gender"],
247249
location,
248-
team.id,
249250
game_data["sport"],
250251
state
251252
)
253+
254+
# If no tournament game found, try the regular lookup with opponent_id
255+
if not curr_game:
256+
curr_game = GameService.get_game_by_key_fields(
257+
city,
258+
game_data["date"],
259+
game_data["gender"],
260+
location,
261+
team.id,
262+
game_data["sport"],
263+
state
264+
)
265+
252266
if isinstance(curr_game, list):
253267
if curr_game:
254268
curr_game = curr_game[0]
@@ -266,6 +280,14 @@ def process_game_data(game_data):
266280
"state": state,
267281
"ticket_link": game_data["ticket_link"]
268282
}
283+
284+
current_team = TeamService.get_team_by_id(curr_game.opponent_id)
285+
if current_team and is_tournament_placeholder_team(current_team.name):
286+
updates["opponent_id"] = team.id
287+
288+
if is_cornell_loss(game_data["result"]) and game_data["utc_date"]:
289+
GameService.handle_tournament_loss(game_data["sport"], game_data["gender"], game_data["utc_date"])
290+
269291
GameService.update_game(curr_game.id, updates)
270292
return
271293

@@ -284,5 +306,5 @@ def process_game_data(game_data):
284306
"utc_date": utc_date_str,
285307
"ticket_link": game_data["ticket_link"]
286308
}
287-
309+
288310
GameService.create_game(game_data)

src/services/game_service.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from src.repositories.game_repository import GameRepository
22
from src.models.game import Game
33
from src.services.team_service import TeamService
4+
from src.utils.helpers import is_tournament_placeholder_team
45

56

67
class GameService:
@@ -33,6 +34,7 @@ def create_game(data):
3334
opponent_id = data.get("opponent_id")
3435
if not TeamService.get_team_by_id(opponent_id):
3536
raise ValueError(f"Opponent team with id {opponent_id} does not exist.")
37+
3638
game = Game(**data)
3739
GameRepository.insert(game)
3840
return game
@@ -69,6 +71,16 @@ def get_game_by_key_fields(city, date, gender, location, opponent_id, sport, sta
6971
city, date, gender, location, opponent_id, sport, state
7072
)
7173

74+
@staticmethod
75+
def get_game_by_tournament_key_fields(city, date, gender, location, sport, state):
76+
"""
77+
Retrieve a tournament game by location and date (excluding opponent_id).
78+
This is used when we need to find a tournament game that might have a placeholder team.
79+
"""
80+
return GameRepository.find_by_tournament_key_fields(
81+
city, date, gender, location, sport, state
82+
)
83+
7284
@staticmethod
7385
def get_games_by_sport(sport):
7486
"""
@@ -89,3 +101,50 @@ def get_games_by_sport_gender(sport, gender):
89101
Retrieves all game by its sport and gender.
90102
"""
91103
return GameRepository.find_by_sport_gender(sport, gender)
104+
105+
@staticmethod
106+
def get_tournament_games_by_sport_gender(sport, gender, after_date=None):
107+
"""
108+
Find tournament games (with placeholder team names) for a specific sport and gender.
109+
Optionally filter by games after a specific date.
110+
"""
111+
games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
112+
tournament_games = []
113+
114+
for game in games:
115+
team = TeamService.get_team_by_id(game.opponent_id)
116+
if team and is_tournament_placeholder_team(team.name):
117+
tournament_games.append(game)
118+
119+
return tournament_games
120+
121+
@staticmethod
122+
def delete_tournament_games_by_sport_gender(sport, gender, after_date=None):
123+
"""
124+
Delete tournament games (with placeholder team names) for a specific sport and gender.
125+
Optionally filter by games after a specific date.
126+
"""
127+
games = GameRepository.find_games_by_sport_gender_after_date(sport, gender, after_date)
128+
tournament_game_ids = []
129+
130+
for game in games:
131+
team = TeamService.get_team_by_id(game.opponent_id)
132+
if team and is_tournament_placeholder_team(team.name):
133+
tournament_game_ids.append(game.id)
134+
135+
if tournament_game_ids:
136+
return GameRepository.delete_games_by_ids(tournament_game_ids)
137+
return 0
138+
139+
@staticmethod
140+
def handle_tournament_loss(sport, gender, loss_date):
141+
"""
142+
Handle when a Cornell team loses in a tournament by deleting future tournament games.
143+
144+
Args:
145+
sport (str): The sport of the team that lost
146+
gender (str): The gender of the team that lost
147+
loss_date (datetime): The date when the team lost
148+
"""
149+
deleted_count = GameService.delete_tournament_games_by_sport_gender(sport, gender, loss_date)
150+
return deleted_count

src/services/team_service.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from src.repositories import TeamRepository
22
from src.models.team import Team
33

4-
54
class TeamService:
65
@staticmethod
76
def get_all_teams():
@@ -13,14 +12,25 @@ def get_all_teams():
1312
@staticmethod
1413
def create_team(team_data):
1514
"""
16-
Create a new team.
17-
15+
Create a new team, or update it if it already exists.
16+
1817
Args:
1918
team_data (dict): The data for the new team.
20-
2119
Returns:
2220
Team: The created team.
2321
"""
22+
name = team_data.get("name")
23+
if not name:
24+
raise ValueError("Team name is required to create a team.")
25+
26+
existing = TeamService.get_team_by_name(name)
27+
if existing:
28+
if isinstance(existing, list) and existing:
29+
existing = existing[0]
30+
31+
TeamService.update_team(existing.id, team_data)
32+
return existing
33+
2434
team = Team(**team_data)
2535
TeamRepository.insert(team)
2636
return team

src/utils/helpers.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,44 @@ def get_dominant_color(image_url, white_threshold=200, black_threshold=50):
5454
return hex_color
5555
except Exception as e:
5656
logging.error(f"Error in get_dominant_color for {image_url}: {e}")
57-
return default_color
57+
return default_color
58+
59+
def normalize_game_data(data: dict):
60+
"""
61+
Normalize placeholder values like TBA/TBD into None.
62+
"""
63+
placeholders = {"TBA", "TBD", "tba", "tbd"}
64+
65+
for field in ["time", "city", "state"]:
66+
if data.get(field) in placeholders:
67+
data[field] = None
68+
69+
return data
70+
71+
def is_tournament_placeholder_team(team_name: str):
72+
"""
73+
Check if a team name is a tournament placeholder.
74+
"""
75+
76+
placeholder_team_names = [
77+
"First Round", "Second Round", "Third Round", "Quarterfinals",
78+
"College Cup Semifinals", "College Cup Championship Game",
79+
"ECAC Hockey First Round", "ECAC Hockey Quarterfinals",
80+
"ECAC Hockey Semifinals", "ECAC Hockey Championship Game",
81+
"Regional Semifinals", "Regional Championship", "National Semifinals",
82+
"TBD", "National Championship", "NCAA Wrestling Championships", "NCAA Northeast Regional CHampionships",
83+
"NCAA Cross Country Championships",
84+
]
85+
return team_name in placeholder_team_names
86+
87+
def is_cornell_loss(result: str):
88+
"""
89+
Check if the result indicates a Cornell loss.
90+
"""
91+
92+
if not result:
93+
return False
94+
95+
# Common loss indicators in result strings
96+
loss_indicators = ["L", "Loss", "loss", "Defeated", "defeated"]
97+
return any(indicator in result for indicator in loss_indicators)

0 commit comments

Comments
 (0)