@@ -31,16 +31,24 @@ def extract_teams_and_scores(box_score_section, sport):
3131 period_scores = []
3232
3333 for row in score_table .find (TAG_TBODY ).find_all (TAG_TR ):
34- team_name_cell = row .find (TAG_TH ) if sport == 'ice hockey' else row .find (TAG_TD )
34+ # Check if team name is in <th> (some sports) or first <td> (other sports)
35+ team_name_cell = row .find (TAG_TH )
3536 if team_name_cell :
37+ # Team name is in <th>, all <td> elements are period scores
3638 team_name = team_name_cell .text .strip ().replace ("Winner" , "" ).strip ()
37- team_name = ' ' . join ( team_name . split ())
39+ scores = [ td . text . strip () for td in row . find_all ( TAG_TD )]
3840 else :
39- team_name = "Unknown"
41+ # Team name is in first <td>, remaining <td> elements are period scores
42+ team_name_cell = row .find (TAG_TD )
43+ team_name = team_name_cell .text .strip ().replace ("Winner" , "" ).strip () if team_name_cell else "Unknown"
44+ scores = [td .text .strip () for td in row .find_all (TAG_TD )[1 :]]
4045
46+ # Basketball box score includes a "Records" column at the end - exclude it
47+ if sport == 'basketball' and scores :
48+ scores = scores [:- 1 ]
49+
50+ team_name = ' ' .join (team_name .split ())
4151 team_names .append (team_name )
42- scores = [td .text .strip () for td in row .find_all (TAG_TD )[1 :]]
43- scores = scores [:- 1 ] if sport == 'basketball' else scores
4452 period_scores .append (scores )
4553
4654 return team_names , period_scores
@@ -59,7 +67,7 @@ def soccer_summary(box_score_section):
5967 event = row .find_all (TAG_TD )[2 ]
6068 desc = event .find_all (TAG_SPAN )[- 1 ].text .strip ()
6169
62- if team == "COR" or team == "CU" :
70+ if team == "COR" or team == "CU" or team == "CRNL" :
6371 cornell_score += 1
6472 else :
6573 opp_score += 1
@@ -220,6 +228,36 @@ def baseball_summary(box_score_section):
220228 summary = [{"message" : "No scoring events in this game." }]
221229 return summary
222230
231+ # def basketball_summary(box_score_section):
232+ # summary = []
233+ # scoring_section = box_score_section.find(TAG_SECTION, {ATTR_ARIA_LABEL: LABEL_SCORING_SUMMARY})
234+ # if scoring_section:
235+ # scoring_rows = scoring_section.find(TAG_TBODY)
236+ # if scoring_rows:
237+ # cornell_score = 0
238+ # opp_score = 0
239+ # for row in scoring_rows.find_all(TAG_TR):
240+ # time = row.find_all(TAG_TD)[0].text.strip()
241+ # team = row.find_all(TAG_TD)[1].find(TAG_IMG)[ATTR_ALT]
242+ # event = row.find_all(TAG_TD)[2]
243+ # desc = event.find_all(TAG_SPAN)[-1].text.strip()
244+
245+ # if team == "COR" or team == "CU" or team == "CRNL":
246+ # cornell_score += 1
247+ # else:
248+ # opp_score += 1
249+
250+ # summary.append({
251+ # 'time': time,
252+ # 'team': team,
253+ # 'description': desc,
254+ # 'cor_score': cornell_score,
255+ # 'opp_score': opp_score
256+ # })
257+ # if not summary:
258+ # summary = [{"message": "No scoring events in this game."}]
259+ # return summary
260+
223261def scrape_game (url , sport ):
224262 soup = fetch_page (url )
225263 box_score_section = soup .find (class_ = CLASS_BOX_SCORE ) if sport in ['baseball' , 'softball' ] else soup .find (id = ID_BOX_SCORE )
@@ -233,6 +271,7 @@ def scrape_game(url, sport):
233271 'field hockey' : (lambda : extract_teams_and_scores (box_score_section , 'field hockey' ), field_hockey_summary ),
234272 'lacrosse' : (lambda : extract_teams_and_scores (box_score_section , 'lacrosse' ), lacrosse_summary ),
235273 'baseball' : (lambda : extract_teams_and_scores (box_score_section , 'baseball' ), baseball_summary ),
274+ 'basketball' : (lambda : extract_teams_and_scores (box_score_section , 'basketball' ), lambda _ : []),
236275 }
237276
238277 extract_teams_func , summary_func = sport_parsers .get (sport , (None , None ))
0 commit comments