Skip to content

Commit bf52319

Browse files
committed
fix: mypy checks
1 parent 027586e commit bf52319

5 files changed

Lines changed: 42 additions & 21 deletions

File tree

module/scraping/notice.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -53,25 +53,25 @@ def from_url(cls, label: str, url: str) -> "Notice | None":
5353
table_content += "\t".join(cols_text) + "\n"
5454
table.decompose() # remove table from content
5555

56-
title = cls.__get_title(cls, soup)
56+
title = cls.__get_title(soup)
5757
content = soup.find("div", attrs={"class": "field-item even"})
5858

59-
prof = cls.__get_prof(cls, soup)
59+
prof = cls.__get_prof(soup)
6060

6161
if title is not None and content is not None:
62-
title = title.get_text()
63-
content = content.get_text()
62+
title_text = title.get_text()
63+
content_text = content.get_text()
6464

65-
content = f"{content.strip()}\n{table_content}"
65+
content = f"{content_text.strip()}\n{table_content}"
6666
if prof is not None:
67-
title = f"[{prof}]\n{title}"
67+
title_text = f"[{prof}]\n{title_text}"
6868

6969
else:
7070
return None
7171

72-
title = f"\n{title}"
72+
title_text = f"\n{title_text}"
7373

74-
return cls(label, title, content, url)
74+
return cls(label, title_text, content, url)
7575
except (
7676
requests.Timeout,
7777
requests.ConnectionError,
@@ -83,7 +83,8 @@ def from_url(cls, label: str, url: str) -> "Notice | None":
8383

8484
return None
8585

86-
def __get_prof(self, soup: bs4.BeautifulSoup) -> str | None:
86+
@staticmethod
87+
def __get_prof(soup: bs4.BeautifulSoup) -> str | None:
8788
"""Returns the prof of the notice
8889
Args:
8990
soup: BeautifulSoup object of the page
@@ -92,17 +93,23 @@ def __get_prof(self, soup: bs4.BeautifulSoup) -> str | None:
9293
"""
9394
goto_prof_text = "Vai alla scheda del prof. "
9495
prof = soup.find("a", text=lambda text: text and goto_prof_text in text)
95-
return prof and prof.get_text().replace(goto_prof_text, "")
96+
if isinstance(prof, bs4.Tag):
97+
return prof.get_text().replace(goto_prof_text, "")
98+
return None
9699

97-
def __get_title(self, soup: bs4.BeautifulSoup) -> bs4.BeautifulSoup | None:
100+
@staticmethod
101+
def __get_title(soup: bs4.BeautifulSoup) -> bs4.Tag | None:
98102
"""Returns the title of the notice
99103
Args:
100104
soup: BeautifulSoup object of the page
101105
Returns:
102106
the soup of the title
103107
"""
104108
title = soup.find("h1", attrs={"class": "page-title"})
105-
return title if title else soup.select_one("section#content h1")
109+
if isinstance(title, bs4.Tag):
110+
return title
111+
result = soup.select_one("section#content h1")
112+
return result if isinstance(result, bs4.Tag) else None
106113

107114
@property
108115
def formatted_url(self) -> str:

module/scraping/scraper_group.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
"""Scrape groups"""
2+
23
import logging
34
import os
45
import shutil
6+
57
import yaml
68
from telegram.ext import CallbackContext
7-
from module.data import NoticeData, GroupConfig, DEFAULT_NOTICES_DATA, config_map
9+
10+
from module.data import DEFAULT_NOTICES_DATA, GroupConfig, NoticeData, config_map
11+
812
from .notice import Notice
913
from .scraper_links import get_links
1014

@@ -24,7 +28,9 @@ def scrape_group(context: CallbackContext, group_key: str, group: GroupConfig) -
2428
logging.info("-- Page '%s'", page_key)
2529

2630
# Generate page folder's path and subpaths
27-
base_page_path = f"data/avvisi/{group_key.replace(' ', '_')}/{page_key.replace(' ', '_')}"
31+
base_page_path = (
32+
f"data/avvisi/{group_key.replace(' ', '_')}/{page_key.replace(' ', '_')}"
33+
)
2834
data_file_path = f"{base_page_path}/notices_data.yaml"
2935

3036
# Initialize folder and data file (if it doesn't exist)
@@ -52,21 +58,28 @@ def scrape_group(context: CallbackContext, group_key: str, group: GroupConfig) -
5258

5359
# If link has already been scraped
5460
# (implying that's invalid page or already posted notice), skip it
55-
if link in notices_data["scraped_links"]:
61+
if not isinstance(link, str) or link in notices_data["scraped_links"]:
5662
logging.info("Link is already present in the list")
5763
continue
5864

65+
# Type guard for mypy to ensure link is str
66+
assert isinstance(link, str)
67+
5968
notice = Notice.from_url(page["label"], group["base_url"] + link)
6069

6170
# If the notice is valid,
6271
# enqueue it to be sent in the channel or in an approval group
6372
if notice is not None:
64-
logging.info("Link is valid and seems to contain a notice, spamming")
73+
logging.info(
74+
"Link is valid and seems to contain a notice, spamming"
75+
)
6576
notice.send(context, page["channels"])
6677
else:
6778
logging.info("Link doesn't contain a valid notice")
68-
context.bot.sendMessage(chat_id=config_map["log_group_chatid"],
69-
text=f"Link doesn't contain a valid notice: {link}")
79+
context.bot.sendMessage(
80+
chat_id=config_map["log_group_chatid"],
81+
text=f"Link doesn't contain a valid notice: {link}",
82+
)
7083

7184
# Appends current link to scraped ones
7285
notices_data["scraped_links"].append(link)

module/scraping/scraper_links.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from module.data import config_map
1111

1212

13-
def get_links(url: str) -> "list[str] | None":
13+
def get_links(url: str) -> list[str | list[str] | None] | None:
1414
"""Generates a list of links to the notices scraped from the page indicated by the url.
1515
1616
Args:

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ chardet==4.0.0
88
click==8.1.3
99
cryptography==3.4.7
1010
decorator==5.0.5
11-
dill==0.3.6
11+
dill
1212
idna==2.10
1313
lazy-object-proxy==1.8.0
1414
mccabe==0.7.0

requirements_dev.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ mypy-extensions==0.4.3
44
pylint==4.0.4
55
pytest
66
flake8
7-
mypy
7+
mypy
8+
typed-ast

0 commit comments

Comments
 (0)