Skip to content

Commit 60d54c4

Browse files
committed
feat: use repo as source of truth
1 parent 29ee325 commit 60d54c4

6 files changed

Lines changed: 107 additions & 129 deletions

File tree

src/twyn/trusted_packages/references/base.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from twyn.trusted_packages.cache_handler import CacheEntry, CacheHandler
99
from twyn.trusted_packages.exceptions import (
10+
EmptyPackagesListError,
1011
InvalidJSONError,
1112
)
1213

@@ -28,26 +29,19 @@ def __init__(self, source: Optional[str] = None, cache_handler: Union[CacheHandl
2829
self.source = source or self.DEFAULT_SOURCE
2930
self.cache_handler = cache_handler
3031

31-
@staticmethod
32-
@abstractmethod
33-
def _parse(packages_json: dict[str, Any]) -> set[str]:
34-
"""Parse and retrieve the packages within the given json structure."""
35-
3632
@staticmethod
3733
@abstractmethod
3834
def normalize_packages(packages: set[str]) -> set[str]:
3935
"""Normalize package names to make sure they're valid within the package manager context."""
4036

4137
def _download(self) -> dict[str, Any]:
42-
packages = requests.get(self.source)
43-
packages.raise_for_status()
38+
response = requests.get(self.source)
39+
response.raise_for_status()
40+
4441
try:
45-
packages_json: dict[str, Any] = packages.json()
42+
return response.json()
4643
except requests.exceptions.JSONDecodeError as err:
4744
raise InvalidJSONError from err
48-
else:
49-
logger.debug("Successfully downloaded trusted packages list from %s", self.source)
50-
return packages_json
5145

5246
def _save_trusted_packages_to_cache_if_enabled(self, packages: set[str]) -> None:
5347
"""Save trusted packages using CacheHandler."""
@@ -69,18 +63,24 @@ def _get_packages_from_cache_if_enabled(self) -> set[str]:
6963
return cache_entry.packages
7064

7165
def get_packages(self) -> set[str]:
72-
"""Download and parse online source of top Python Package Index packages."""
73-
packages_to_use = set()
74-
packages_to_use = self._get_packages_from_cache_if_enabled()
66+
"""Download and parse online source of top packages from the package ecosystem."""
67+
packages = self._get_packages_from_cache_if_enabled()
7568
# we don't save the cache here, we keep it as it is so the date remains the original one.
76-
77-
if not packages_to_use:
69+
if not packages:
7870
# no cache usage, no cache hit (non-existent or outdated) or cache was empty.
7971
logger.info("Fetching trusted packages from trusted packages reference...")
80-
packages_to_use = self._parse(self._download())
72+
data = self._download()
73+
try:
74+
packages = set(data["packages"])
75+
except KeyError as err:
76+
raise InvalidJSONError("`packages` key not in JSON.") from err
77+
78+
logger.debug("Successfully downloaded trusted packages list from %s", self.source)
79+
if not packages:
80+
raise EmptyPackagesListError
8181

8282
# New packages were downloaded, we create a new entry updating all values.
83-
self._save_trusted_packages_to_cache_if_enabled(packages_to_use)
83+
self._save_trusted_packages_to_cache_if_enabled(packages)
8484

85-
normalized_packages = self.normalize_packages(packages_to_use)
85+
normalized_packages = self.normalize_packages(packages)
8686
return normalized_packages

src/twyn/trusted_packages/references/top_npm_reference.py

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
import logging
22
import re
3-
from typing import Any
43

54
from typing_extensions import override
65

76
from twyn.trusted_packages.exceptions import (
8-
EmptyPackagesListError,
9-
InvalidReferenceFormatError,
107
PackageNormalizingError,
118
)
129
from twyn.trusted_packages.references.base import AbstractPackageReference
@@ -17,22 +14,9 @@
1714
class TopNpmReference(AbstractPackageReference):
1815
"""Top npm packages retrieved from an online source."""
1916

20-
DEFAULT_SOURCE: str = "https://www.npmleaderboard.org/api/packages"
21-
22-
@override
23-
@staticmethod
24-
def _parse(packages_info: dict[str, Any]) -> set[str]:
25-
try:
26-
names = {pkg["name"] for pkg in packages_info["packages"]}
27-
28-
except KeyError as err:
29-
raise InvalidReferenceFormatError from err
30-
31-
if not names:
32-
raise EmptyPackagesListError
33-
34-
logger.debug("Successfully parsed trusted packages list")
35-
return names
17+
DEFAULT_SOURCE: str = (
18+
"https://raw.githubusercontent.com/elementsinteractive/twyn/refs/heads/main/dependencies/npm.json"
19+
)
3620

3721
@override
3822
@staticmethod

src/twyn/trusted_packages/references/top_pypi_reference.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
import logging
22
import re
3-
from typing import Any
43

54
from typing_extensions import override
65

76
from twyn.trusted_packages.exceptions import (
8-
EmptyPackagesListError,
9-
InvalidReferenceFormatError,
107
PackageNormalizingError,
118
)
129
from twyn.trusted_packages.references.base import AbstractPackageReference
@@ -17,21 +14,9 @@
1714
class TopPyPiReference(AbstractPackageReference):
1815
"""Top PyPi packages retrieved from an online source."""
1916

20-
DEFAULT_SOURCE: str = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
21-
22-
@override
23-
@staticmethod
24-
def _parse(packages_info: dict[str, Any]) -> set[str]:
25-
try:
26-
names = {row["project"] for row in packages_info["rows"]}
27-
except KeyError as err:
28-
raise InvalidReferenceFormatError from err
29-
30-
if not names:
31-
raise EmptyPackagesListError
32-
33-
logger.debug("Successfully parsed trusted packages list")
34-
return names
17+
DEFAULT_SOURCE: str = (
18+
"https://raw.githubusercontent.com/elementsinteractive/twyn/refs/heads/main/dependencies/pypi.json"
19+
)
3520

3621
@override
3722
@staticmethod

tests/conftest.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from collections.abc import Iterable, Iterator
1+
import datetime
2+
from collections.abc import Iterator
23
from contextlib import contextmanager
34
from pathlib import Path
45
from unittest import mock
@@ -14,12 +15,12 @@ def create_tmp_file(path: Path, data: str) -> Iterator[Path]:
1415

1516

1617
@contextmanager
17-
def patch_pypi_packages_download(packages: Iterable[str]) -> Iterator[mock.Mock]:
18+
def patch_pypi_packages_download(packages: list[str]) -> Iterator[mock.Mock]:
1819
"""Patcher of `requests.get` for Top PyPi list.
1920
2021
Replaces call with the output you would get from downloading the top PyPi packages list.
2122
"""
22-
json_response = {"rows": [{"project": name} for name in packages]}
23+
json_response = {"packages": packages, "date": datetime.datetime.now().isoformat()}
2324

2425
with mock.patch("twyn.trusted_packages.TopPyPiReference._download") as mock_download:
2526
mock_download.return_value = json_response
@@ -28,12 +29,12 @@ def patch_pypi_packages_download(packages: Iterable[str]) -> Iterator[mock.Mock]
2829

2930

3031
@contextmanager
31-
def patch_npm_packages_download(packages: Iterable[str]) -> Iterator[mock.Mock]:
32+
def patch_npm_packages_download(packages: list[str]) -> Iterator[mock.Mock]:
3233
"""Patcher of `requests.get` for Top Npm list.
3334
3435
Replaces call with the output you would get from downloading the top Npm packages list.
3536
"""
36-
json_response = {"packages": [{"name": name} for name in packages]}
37+
json_response = {"packages": packages, "date": datetime.datetime.now().isoformat()}
3738

3839
with mock.patch("twyn.trusted_packages.TopNpmReference._download") as mock_download:
3940
mock_download.return_value = json_response

tests/main/test_main.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -286,32 +286,6 @@ def test_check_dependencies_ignores_package_in_allowlist(
286286

287287
assert error == TyposquatCheckResultList(errors=[])
288288

289-
@pytest.mark.parametrize(
290-
"package_name",
291-
[
292-
"my.package",
293-
"my-package",
294-
"my_package",
295-
"My.Package",
296-
],
297-
)
298-
@patch("twyn.trusted_packages.TopPyPiReference._get_packages_from_cache_if_enabled")
299-
def test_normalize_package(self, mock_get_packages_from_cache: Mock, package_name: Mock) -> None:
300-
mock_get_packages_from_cache.return_value = {"requests", "mypackage"}
301-
error = check_dependencies(
302-
config_file=None,
303-
dependency_file=None,
304-
dependencies={package_name},
305-
selector_method="first-letter",
306-
package_ecosystem="pypi",
307-
)
308-
309-
assert error == TyposquatCheckResultList(
310-
errors=[
311-
TyposquatCheckResult(dependency="my-package", similars=["mypackage"]),
312-
]
313-
)
314-
315289
@patch("twyn.trusted_packages.TopPyPiReference.get_packages")
316290
def test_check_dependencies_does_not_error_on_same_package(
317291
self, mock_get_packages: Mock, uv_lock_file_with_typo: Path

tests/trusted_packages/test_references.py

Lines changed: 76 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from twyn.trusted_packages.exceptions import (
1212
EmptyPackagesListError,
1313
InvalidJSONError,
14-
InvalidReferenceFormatError,
14+
PackageNormalizingError,
1515
)
1616
from twyn.trusted_packages.references.base import AbstractPackageReference
1717

@@ -34,7 +34,7 @@ def test_get_packages(self) -> None:
3434
@freeze_time("2025-8-19")
3535
def test_get_trusted_packages_uses_valid_cache(self, tmp_path: Path) -> None:
3636
"""Test that valid cached data is loaded and used without fetching from PyPI."""
37-
packages = {"requests", "flask", "django", "fastapi"}
37+
packages = ["requests", "flask", "django", "fastapi"]
3838

3939
cache_handler = CacheHandler(str(tmp_path / "cache"))
4040
cache_entry = CacheEntry(saved_date="2025-08-18", packages=packages)
@@ -44,7 +44,7 @@ def test_get_trusted_packages_uses_valid_cache(self, tmp_path: Path) -> None:
4444
retrieved_cache_entry = cache_handler.get_cache_entry("pypi")
4545
assert retrieved_cache_entry is not None
4646
assert retrieved_cache_entry.saved_date == "2025-08-18"
47-
assert retrieved_cache_entry.packages == packages
47+
assert retrieved_cache_entry.packages == set(packages)
4848

4949
with patch_pypi_packages_download(packages) as m_pypi:
5050
result = TopPyPiReference("pypi", cache_handler=cache_handler).get_packages()
@@ -110,7 +110,7 @@ def test_get_packages_downloads_when_cache_has_invalid_package_names(self, tmp_p
110110
@freeze_time("2025-8-21", tz_offset=0)
111111
def test_cache_is_saved_when_not_existing(self, tmp_path: Path) -> None:
112112
"""Test that cache starts empty and gets filled after downloading packages."""
113-
cached_packages = {"numpy", "requests", "django"}
113+
cached_packages = ["numpy", "requests", "django"]
114114
cache_handler = CacheHandler(str(tmp_path / "cache"))
115115
with patch_pypi_packages_download(cached_packages) as m_pypi:
116116
pypi_ref = TopPyPiReference(source="pypi", cache_handler=cache_handler)
@@ -119,12 +119,12 @@ def test_cache_is_saved_when_not_existing(self, tmp_path: Path) -> None:
119119

120120
# The packages were downloaded and match the expected result
121121
assert m_pypi.call_count == 1
122-
assert retrieved_packages == cached_packages
122+
assert retrieved_packages == set(cached_packages)
123123

124124
# The packages were saved to the cache file, with its associated metadata
125125
cache_content = cache_handler.get_cache_entry("pypi")
126126

127-
assert set(cache_content.packages) == cached_packages
127+
assert set(cache_content.packages) == set(cached_packages)
128128
assert cache_content.saved_date == "2025-08-21"
129129

130130
@patch("requests.get")
@@ -140,6 +140,24 @@ def test__download_json_exception(self, mock_get: Mock) -> None:
140140
):
141141
top_pypi._download()
142142

143+
def test_get_packages_no_packages_key(self) -> None:
144+
top_pypi = TopPyPiReference(source="foo", cache_handler=CacheHandler())
145+
146+
with patch("twyn.trusted_packages.TopPyPiReference._download") as mock_download:
147+
mock_download.return_value = {}
148+
with pytest.raises(InvalidJSONError, match="`packages` key not in JSON."):
149+
top_pypi.get_packages()
150+
151+
def test_empty_packages_list_exception(self) -> None:
152+
with (
153+
pytest.raises(
154+
EmptyPackagesListError,
155+
match="Downloaded packages list is empty",
156+
),
157+
patch_pypi_packages_download([]),
158+
):
159+
TopPyPiReference().get_packages()
160+
143161

144162
class TestTopPyPiReference:
145163
def test_get_trusted_packages(self, tmp_path: Path) -> None:
@@ -152,29 +170,60 @@ def test_get_trusted_packages(self, tmp_path: Path) -> None:
152170
assert packages == {"foo", "bar", "django", "requests", "sqlalchemy"}
153171
assert m_pypi.call_count == 1
154172

155-
def test__parse_no_rows(self) -> None:
156-
data = {"bananas": 5}
157-
top_pypi = TopPyPiReference(source="foo", cache_handler=CacheHandler())
158-
159-
with pytest.raises(InvalidReferenceFormatError, match="Invalid JSON format."):
160-
top_pypi._parse(data)
173+
@pytest.mark.parametrize(
174+
"package_name",
175+
[
176+
"my.package",
177+
"my-package",
178+
"my_package",
179+
"My.Package",
180+
],
181+
)
182+
@patch("twyn.trusted_packages.TopPyPiReference._get_packages_from_cache_if_enabled")
183+
def test_normalize_package_when_loaded_from_cache(
184+
self, mock_get_packages_from_cache: Mock, package_name: Mock, tmp_path: Path
185+
) -> None:
186+
mock_get_packages_from_cache.return_value = {package_name}
187+
188+
with patch_pypi_packages_download([]) as m_pypi:
189+
ref = TopPyPiReference(cache_handler=CacheHandler(str(tmp_path / "cache")))
190+
packages = ref.get_packages()
161191

162-
def test_empty_packages_list_exception(self) -> None:
163-
with pytest.raises(
164-
EmptyPackagesListError,
165-
match="Downloaded packages list is empty",
166-
):
167-
TopPyPiReference._parse({"rows": []})
192+
assert packages == {"my-package"}
193+
assert m_pypi.call_count == 0
194+
assert mock_get_packages_from_cache.call_count == 1
195+
196+
@pytest.mark.parametrize(
197+
"package_name",
198+
[
199+
"my.package",
200+
"my-package",
201+
"my_package",
202+
"My.Package",
203+
],
204+
)
205+
@patch("twyn.trusted_packages.TopPyPiReference._get_packages_from_cache_if_enabled")
206+
def test_normalize_package_when_downloaded(
207+
self, mock_get_packages_from_cache: Mock, package_name: Mock, tmp_path: Path
208+
) -> None:
209+
mock_get_packages_from_cache.return_value = {}
210+
211+
with patch_pypi_packages_download([package_name]) as m_pypi:
212+
ref = TopPyPiReference()
213+
packages = ref.get_packages()
168214

169-
def test__parse_retrieves_package_names(self) -> None:
170-
data = {"rows": [{"project": "boto3"}, {"project": "requests"}]}
171-
top_pypi = TopPyPiReference(source="foo", cache_handler=CacheHandler())
215+
assert packages == {"my-package"}
216+
assert m_pypi.call_count == 1
217+
assert mock_get_packages_from_cache.call_count == 1
172218

173-
assert top_pypi._parse(data) == {"boto3", "requests"}
219+
def test_normalize_package_invalid_name_raises(self):
220+
ref = TopPyPiReference()
221+
with pytest.raises(PackageNormalizingError):
222+
ref.normalize_packages({"INVALID PACKAGE NAME!"})
174223

175224

176225
class TestTopNpmReference:
177-
def test_get_trusted_packages_v2(self, tmp_path: Path) -> None:
226+
def test_get_trusted_packages(self, tmp_path: Path) -> None:
178227
test_packages = ["foo", "bar", "react", "express", "lodash"]
179228

180229
with patch_npm_packages_download(test_packages) as m_npm:
@@ -184,22 +233,7 @@ def test_get_trusted_packages_v2(self, tmp_path: Path) -> None:
184233
assert packages == {"foo", "bar", "react", "express", "lodash"}
185234
assert m_npm.call_count == 1
186235

187-
def test__parse_no_rows(self) -> None:
188-
data = {"bananas": 5}
189-
npm_ref = TopNpmReference(source="foo", cache_handler=CacheHandler())
190-
191-
with pytest.raises(InvalidReferenceFormatError, match="Invalid JSON format."):
192-
npm_ref._parse(data)
193-
194-
def test_empty_packages_list_exception(self) -> None:
195-
with pytest.raises(
196-
EmptyPackagesListError,
197-
match="Downloaded packages list is empty",
198-
):
199-
TopNpmReference._parse({"packages": []})
200-
201-
def test__parse_retrieves_package_names(self) -> None:
202-
data = {"packages": [{"name": "react"}, {"name": "express"}]}
203-
npm_ref = TopNpmReference(source="foo", cache_handler=CacheHandler())
204-
205-
assert npm_ref._parse(data) == {"react", "express"}
236+
def test_normalize_package_invalid_name_raises(self):
237+
ref = TopNpmReference()
238+
with pytest.raises(PackageNormalizingError):
239+
ref.normalize_packages({"INVALID PACKAGE NAME!"})

0 commit comments

Comments
 (0)