-
Notifications
You must be signed in to change notification settings - Fork 0
Created a softball scraper, edited unique sport constraint #69
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,4 +4,5 @@ __pycache__/ | |
| .env | ||
| .envrc | ||
| .DS_Store | ||
| ca-certificate.crt | ||
| ca-certificate.crt | ||
| firebase-service-account-key.json | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,27 @@ | |
| from src.utils.team_loader import TeamLoader | ||
| from src.database import db | ||
|
|
||
| import os | ||
| import firebase_admin | ||
| from firebase_admin import credentials, auth | ||
|
|
||
| SERVICE_ACCOUNT_PATH = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") | ||
|
|
||
|
|
||
| def initialize_firebase(): | ||
| if not firebase_admin._apps: | ||
| if not SERVICE_ACCOUNT_PATH: | ||
| raise ValueError( | ||
| "GOOGLE_APPLICATION_CREDENTIALS is not set. Set it to your firebase-service-account-key.json path." | ||
| ) | ||
| cred = credentials.Certificate(SERVICE_ACCOUNT_PATH) | ||
| firebase_admin.initialize_app(cred) | ||
| logging.info("Firebase app initialized.") | ||
| return firebase_admin.get_app() | ||
|
|
||
|
|
||
| initialize_firebase() | ||
|
Comment on lines
+26
to
+45
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Check if .env_template includes the Firebase credential variable
echo "=== Checking .env_template ==="
cat .env_template 2>/dev/null || echo ".env_template not found"
echo ""
echo "=== Checking docker-compose.yml for volume mounts ==="
cat docker-compose.yml 2>/dev/null | grep -A5 "volumes:" || echo "No volumes section found"Repository: cuappdev/score-backend Length of output: 498 🏁 Script executed: # Check file size and read app.py to verify all claims
wc -l app.py
echo "=== Full app.py contents ==="
cat -n app.pyRepository: cuappdev/score-backend Length of output: 8592 🏁 Script executed: # Check if Dockerfile exists and examine it
if [ -f Dockerfile ]; then
echo "=== Dockerfile found ==="
cat -n Dockerfile
else
echo "Dockerfile not found"
fiRepository: cuappdev/score-backend Length of output: 391 Deployment configuration incomplete for Firebase. The Firebase initialization requires
This will cause startup failures in containerized deployments. 🤖 Prompt for AI Agents
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these set in both dev and prod servers @claiireyu ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| app = Flask(__name__) | ||
|
|
||
| # CORS: allow frontend (different origin) to call this API | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,3 +11,4 @@ Flask-APScheduler | |
| python-dotenv | ||
| pytz | ||
| gunicorn | ||
| firebase-admin | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,6 +53,34 @@ def extract_teams_and_scores(box_score_section, sport): | |
|
|
||
| return team_names, period_scores | ||
|
|
||
| def softball_summary(box_score_section): | ||
| summary = [] | ||
| scoring_section = box_score_section.find(TAG_SECTION, {ATTR_ARIA_LABEL: LABEL_SCORING_SUMMARY}) | ||
| if scoring_section: | ||
| scoring_rows = scoring_section.find(TAG_TBODY) | ||
| if scoring_rows: | ||
| for row in scoring_rows.find_all(TAG_TR): | ||
| team = row.find_all(TAG_TD)[0].find(TAG_IMG)[ATTR_ALT] | ||
| inning = row.find_all(TAG_TD)[3].text.strip() | ||
| desc_cell = row.find_all(TAG_TD)[4] | ||
| span = desc_cell.find(TAG_SPAN) | ||
| if span: | ||
| span.extract() | ||
| desc = desc_cell.get_text(strip=True) | ||
| cornell_score = int(row.find_all(TAG_TD)[5].get_text(strip=True) or 0) | ||
| opp_score = int(row.find_all(TAG_TD)[6].get_text(strip=True) or 0) | ||
|
Comment on lines
+63
to
+71
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Harden softball row parsing to avoid scraper crashes on markup variance. Line 63-Line 71 assumes every row has all expected cells/image and strictly numeric score cells. A single malformed row can raise and terminate parsing for this game. 🛠️ Proposed defensive fix def softball_summary(box_score_section):
summary = []
scoring_section = box_score_section.find(TAG_SECTION, {ATTR_ARIA_LABEL: LABEL_SCORING_SUMMARY})
if scoring_section:
scoring_rows = scoring_section.find(TAG_TBODY)
if scoring_rows:
for row in scoring_rows.find_all(TAG_TR):
- team = row.find_all(TAG_TD)[0].find(TAG_IMG)[ATTR_ALT]
- inning = row.find_all(TAG_TD)[3].text.strip()
- desc_cell = row.find_all(TAG_TD)[4]
+ cells = row.find_all(TAG_TD)
+ if len(cells) < 7:
+ continue
+
+ team_img = cells[0].find(TAG_IMG)
+ team = team_img.get(ATTR_ALT, "").strip() if team_img else cells[0].get_text(strip=True)
+ inning = cells[3].get_text(strip=True)
+ desc_cell = cells[4]
span = desc_cell.find(TAG_SPAN)
if span:
span.extract()
desc = desc_cell.get_text(strip=True)
- cornell_score = int(row.find_all(TAG_TD)[5].get_text(strip=True) or 0)
- opp_score = int(row.find_all(TAG_TD)[6].get_text(strip=True) or 0)
+
+ cor_raw = cells[5].get_text(strip=True)
+ opp_raw = cells[6].get_text(strip=True)
+ cornell_score = int(cor_raw) if cor_raw.isdigit() else 0
+ opp_score = int(opp_raw) if opp_raw.isdigit() else 0
summary.append({
'team': team,
'period': inning,
'inning': inning,
'description': desc,
'cor_score': cornell_score,
'opp_score': opp_score
})🤖 Prompt for AI Agents |
||
| summary.append({ | ||
| 'team': team, | ||
| 'inning': inning, | ||
| 'description': desc, | ||
| 'cor_score': cornell_score, | ||
| 'opp_score': opp_score | ||
| }) | ||
| if not summary: | ||
| summary = [{"message": "No scoring events in this game."}] | ||
| return summary | ||
|
|
||
|
|
||
| def soccer_summary(box_score_section): | ||
| summary = [] | ||
| scoring_section = box_score_section.find(TAG_SECTION, {ATTR_ARIA_LABEL: LABEL_SCORING_SUMMARY}) | ||
|
|
@@ -124,14 +152,13 @@ def hockey_summary(box_score_section): | |
| scorer = row.find_all(TAG_TD)[4].text.strip() | ||
| assist = row.find_all(TAG_TD)[5].text.strip() | ||
|
|
||
| if team == "COR" or team == "CU" or team == "Cornell": | ||
| if team == "COR" or team == "CU" or team == "Cornell" or team == "CORNELL": | ||
| cornell_score += 1 | ||
| else: | ||
| opp_score += 1 | ||
|
|
||
| summary.append({ | ||
| 'team': team, | ||
| 'period': period, | ||
| 'time': time, | ||
| 'scorer': scorer, | ||
| 'assist': assist, | ||
|
|
@@ -272,6 +299,7 @@ def scrape_game(url, sport): | |
| 'field hockey': (lambda: extract_teams_and_scores(box_score_section, 'field hockey'), field_hockey_summary), | ||
| 'lacrosse': (lambda: extract_teams_and_scores(box_score_section, 'lacrosse'), lacrosse_summary), | ||
| 'baseball': (lambda: extract_teams_and_scores(box_score_section, 'baseball'), baseball_summary), | ||
| 'softball': (lambda: extract_teams_and_scores(box_score_section, 'softball'), softball_summary), | ||
| 'basketball': (lambda: extract_teams_and_scores(box_score_section, 'basketball'), lambda _: []), | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.