File tree Expand file tree Collapse file tree 1 file changed +28
-1
lines changed
Expand file tree Collapse file tree 1 file changed +28
-1
lines changed Original file line number Diff line number Diff line change 1+ from urllib.parse import urlparse
2+
13import bs4
24import requests
35
6+ ALLOWED_HOSTS = {"www.imdb.com"}
7+
8+
9+ def _validate_url(url: str) -> str:
10+ """Validate a URL against an allowlist of trusted hosts to prevent SSRF attacks.
11+
12+ Args:
13+ url: The URL to validate.
14+
15+ Returns:
16+ The original URL if valid.
17+
18+ Raises:
19+ ValueError: If the URL scheme is not HTTPS or the host is not in the allowlist.
20+ """
21+ parsed = urlparse(url)
22+ if parsed.scheme != "https" or parsed.hostname not in ALLOWED_HOSTS:
23+ raise ValueError(
24+ f"URL '{url}' is not allowed. Only HTTPS requests to "
25+ f"{ALLOWED_HOSTS} are permitted."
26+ )
27+ return url
28+
429
530def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]:
631 return {
@@ -35,7 +60,9 @@ def get_imdb_top_movies(num_movies: int = 5) -> tuple:
3560 "https://www.imdb.com/search/title?title_type="
3661 f"feature&sort=num_votes,desc&count={num_movies}"
3762 )
38- source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser")
63+ source = bs4.BeautifulSoup(
64+ requests.get(_validate_url(base_url)).content, "html.parser"
65+ )
3966 return tuple(
4067 get_movie_data_from_soup(movie)
4168 for movie in source.find_all("div", class_="lister-item mode-advanced")
You can’t perform that action at this time.
0 commit comments