|
3 | 3 | from PIL import Image |
4 | 4 | from io import BytesIO |
5 | 5 | from collections import Counter |
| 6 | +import re |
6 | 7 |
|
7 | 8 |
|
8 | 9 | def get_dominant_color(image_url, white_threshold=200, black_threshold=50): |
@@ -96,6 +97,53 @@ def is_cornell_loss(result: str): |
96 | 97 | loss_indicators = ["L", "Loss", "loss", "Defeated", "defeated"] |
97 | 98 | return any(indicator in result for indicator in loss_indicators) |
98 | 99 |
|
| 100 | +def extract_sport_from_title(title): |
| 101 | + """ |
| 102 | + Extracts the sport type from a YouTube video title. |
| 103 | + |
| 104 | + Args: |
| 105 | + title (str): The title of the YouTube video |
| 106 | + |
| 107 | + Returns: |
| 108 | + str: The sport type if found, None otherwise |
| 109 | + """ |
| 110 | + if not title: |
| 111 | + return None |
| 112 | + |
| 113 | + title_lower = title.lower() |
| 114 | + |
| 115 | + sport_patterns = [ |
| 116 | + # Ice Hockey |
| 117 | + (r"ice\s+hockey", "Ice Hockey"), |
| 118 | + (r"women'?s\s+ice\s+hockey", "Ice Hockey"), |
| 119 | + (r"men'?s\s+ice\s+hockey", "Ice Hockey"), |
| 120 | + # Field Hockey |
| 121 | + (r"field\s+hockey", "Field Hockey"), |
| 122 | + # Hockey |
| 123 | + (r"\bhockey\b", "Ice Hockey"), |
| 124 | + # Basketball |
| 125 | + (r"basketball", "Basketball"), |
| 126 | + # Football |
| 127 | + (r"\bfootball\b", "Football"), |
| 128 | + # Soccer |
| 129 | + (r"\bsoccer\b", "Soccer"), |
| 130 | + # Volleyball |
| 131 | + (r"volleyball", "Volleyball"), |
| 132 | + # Wrestling |
| 133 | + (r"wrestling", "Wrestling"), |
| 134 | + # Sprint Football |
| 135 | + (r"sprint\s+football", "Sprint Football"), |
| 136 | + ] |
| 137 | + |
| 138 | + for pattern, sport_name in sport_patterns: |
| 139 | + if re.search(pattern, title_lower): |
| 140 | + return sport_name |
| 141 | + |
| 142 | + if "ice" in title_lower and ("hockey" in title_lower or "cornell" in title_lower): |
| 143 | + return "Ice Hockey" |
| 144 | + |
| 145 | + return None |
| 146 | + |
99 | 147 | def extract_sport_type_from_title(title: str): |
100 | 148 | """ |
101 | 149 | Extract the sport type from an article title by matching against known sports. |
|
0 commit comments