Skip to content

Commit e848140

Browse files
committed
Fix #49 for baskerball score breakdowns
1 parent 5a71190 commit e848140

1 file changed

Lines changed: 45 additions & 6 deletions

File tree

src/scrapers/game_details_scrape.py

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,24 @@ def extract_teams_and_scores(box_score_section, sport):
3131
period_scores = []
3232

3333
for row in score_table.find(TAG_TBODY).find_all(TAG_TR):
34-
team_name_cell = row.find(TAG_TH) if sport == 'ice hockey' else row.find(TAG_TD)
34+
# Check if team name is in <th> (some sports) or first <td> (other sports)
35+
team_name_cell = row.find(TAG_TH)
3536
if team_name_cell:
37+
# Team name is in <th>, all <td> elements are period scores
3638
team_name = team_name_cell.text.strip().replace("Winner", "").strip()
37-
team_name = ' '.join(team_name.split())
39+
scores = [td.text.strip() for td in row.find_all(TAG_TD)]
3840
else:
39-
team_name = "Unknown"
41+
# Team name is in first <td>, remaining <td> elements are period scores
42+
team_name_cell = row.find(TAG_TD)
43+
team_name = team_name_cell.text.strip().replace("Winner", "").strip() if team_name_cell else "Unknown"
44+
scores = [td.text.strip() for td in row.find_all(TAG_TD)[1:]]
4045

46+
# Basketball box score includes a "Records" column at the end - exclude it
47+
if sport == 'basketball' and scores:
48+
scores = scores[:-1]
49+
50+
team_name = ' '.join(team_name.split())
4151
team_names.append(team_name)
42-
scores = [td.text.strip() for td in row.find_all(TAG_TD)[1:]]
43-
scores = scores[:-1] if sport == 'basketball' else scores
4452
period_scores.append(scores)
4553

4654
return team_names, period_scores
@@ -59,7 +67,7 @@ def soccer_summary(box_score_section):
5967
event = row.find_all(TAG_TD)[2]
6068
desc = event.find_all(TAG_SPAN)[-1].text.strip()
6169

62-
if team == "COR" or team == "CU":
70+
if team == "COR" or team == "CU" or team == "CRNL":
6371
cornell_score += 1
6472
else:
6573
opp_score += 1
@@ -220,6 +228,36 @@ def baseball_summary(box_score_section):
220228
summary = [{"message": "No scoring events in this game."}]
221229
return summary
222230

231+
# def basketball_summary(box_score_section):
232+
# summary = []
233+
# scoring_section = box_score_section.find(TAG_SECTION, {ATTR_ARIA_LABEL: LABEL_SCORING_SUMMARY})
234+
# if scoring_section:
235+
# scoring_rows = scoring_section.find(TAG_TBODY)
236+
# if scoring_rows:
237+
# cornell_score = 0
238+
# opp_score = 0
239+
# for row in scoring_rows.find_all(TAG_TR):
240+
# time = row.find_all(TAG_TD)[0].text.strip()
241+
# team = row.find_all(TAG_TD)[1].find(TAG_IMG)[ATTR_ALT]
242+
# event = row.find_all(TAG_TD)[2]
243+
# desc = event.find_all(TAG_SPAN)[-1].text.strip()
244+
245+
# if team == "COR" or team == "CU" or team == "CRNL":
246+
# cornell_score += 1
247+
# else:
248+
# opp_score += 1
249+
250+
# summary.append({
251+
# 'time': time,
252+
# 'team': team,
253+
# 'description': desc,
254+
# 'cor_score': cornell_score,
255+
# 'opp_score': opp_score
256+
# })
257+
# if not summary:
258+
# summary = [{"message": "No scoring events in this game."}]
259+
# return summary
260+
223261
def scrape_game(url, sport):
224262
soup = fetch_page(url)
225263
box_score_section = soup.find(class_=CLASS_BOX_SCORE) if sport in ['baseball', 'softball'] else soup.find(id=ID_BOX_SCORE)
@@ -233,6 +271,7 @@ def scrape_game(url, sport):
233271
'field hockey': (lambda: extract_teams_and_scores(box_score_section, 'field hockey'), field_hockey_summary),
234272
'lacrosse': (lambda: extract_teams_and_scores(box_score_section, 'lacrosse'), lacrosse_summary),
235273
'baseball': (lambda: extract_teams_and_scores(box_score_section, 'baseball'), baseball_summary),
274+
'basketball': (lambda: extract_teams_and_scores(box_score_section, 'basketball'), lambda _: []),
236275
}
237276

238277
extract_teams_func, summary_func = sport_parsers.get(sport, (None, None))

0 commit comments

Comments
 (0)