|
3 | 3 | from datetime import datetime, timedelta |
4 | 4 | from dotenv import load_dotenv |
5 | 5 | from ..services import ArticleService |
| 6 | +from ..utils.constants import ARTICLE_IMG_TAG |
6 | 7 | import logging |
| 8 | +from bs4 import BeautifulSoup |
| 9 | +import base64 |
7 | 10 |
|
8 | 11 | load_dotenv() |
9 | 12 |
|
@@ -36,16 +39,32 @@ def fetch_news(): |
36 | 39 | ) |
37 | 40 | article_url = f"https://cornellsun.com/article/{article['slug']}" |
38 | 41 |
|
| 42 | + article_image = None |
| 43 | + try: |
| 44 | + response = requests.get( |
| 45 | + article_url, |
| 46 | + headers={ |
| 47 | + "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" |
| 48 | + } |
| 49 | + ) |
| 50 | + response.raise_for_status() |
| 51 | + soup = BeautifulSoup(response.content, 'html.parser') |
| 52 | + img_tag = soup.select_one(ARTICLE_IMG_TAG) |
| 53 | + if img_tag and img_tag.get('src'): |
| 54 | + article_image=img_tag.get('src') |
| 55 | + except Exception as e: |
| 56 | + logging.error(f"Error fetching news: {str(e)}") |
39 | 57 | article_doc = { |
40 | 58 | "title": article["headline"], |
41 | | - "image": article["dominantMedia"]["title"] if article["dominantMedia"] else None, |
| 59 | + "image": article_image, |
42 | 60 | "sports_type": sports_type, |
43 | 61 | "published_at": published_at, |
44 | 62 | "url": article_url, |
45 | 63 | "slug": article["slug"], |
46 | 64 | "created_at": datetime.now() |
47 | 65 | } |
48 | 66 | articles_to_store.append(article_doc) |
| 67 | + |
49 | 68 |
|
50 | 69 | if articles_to_store: |
51 | 70 | ArticleService.create_articles_bulk(articles_to_store) |
|
0 commit comments