Skip to content

Commit 5f4e1dc

Browse files
committed
feat: support multiple sources and create CacheHandler
1 parent bdf963d commit 5f4e1dc

13 files changed

Lines changed: 613 additions & 173 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ dependencies = [
1919
"pyparsing<4.0.0,>=3.2.3",
2020
"tomlkit<0.14.0,>=0.11.6",
2121
"tomli<3.0.0,>=2.2.1; python_version < \"3.13\"",
22+
"pydantic>=2.11.7,<3.0.0",
2223
]
2324
name = "twyn"
2425
description = "Security tool against dependency typosquatting attacks"

src/twyn/base/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ def __init__(self, message: str = "") -> None:
1515
def show(self, file: Optional[IO[Any]] = None) -> None:
1616
logger.debug(self.format_message(), exc_info=True)
1717
logger.error(self.format_message(), exc_info=False)
18+
19+
20+
class PackageNormalizingError(TwynError):
21+
"""Exception for when it is not possible to normalize a package name."""

src/twyn/base/utils.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
import re
22

3+
from twyn.base.exceptions import PackageNormalizingError
34

4-
def _normalize_packages(packages: set[str]) -> set[str]:
5+
6+
def normalize_packages(packages: set[str]) -> set[str]:
57
"""Normalize dependency names according to PyPi https://packaging.python.org/en/latest/specifications/name-normalization/."""
6-
return {re.sub(r"[-_.]+", "-", name).lower() for name in packages}
8+
renamed_packages = {re.sub(r"[-_.]+", "-", name).lower() for name in packages}
9+
10+
pattern = re.compile(r"^([a-z0-9]|[a-z0-9][a-z0-9._-]*[a-z0-9])\Z")
11+
for package in renamed_packages:
12+
if not pattern.match(package):
13+
raise PackageNormalizingError(f"Package name '{package}' does not match required pattern")
14+
15+
return renamed_packages

src/twyn/cli.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from twyn.config.config_handler import ConfigHandler
1616
from twyn.file_handler.file_handler import FileHandler
1717
from twyn.main import check_dependencies
18+
from twyn.trusted_packages.cache_handler import CacheHandler
1819
from twyn.trusted_packages.constants import TRUSTED_PACKAGES_FILE_PATH
1920

2021
logging.basicConfig(
@@ -154,13 +155,7 @@ def cache() -> None:
154155

155156
@cache.command()
156157
def clear() -> None:
157-
fp = FileHandler(TRUSTED_PACKAGES_FILE_PATH).file_path
158-
if fp.exists():
159-
fp.unlink()
160-
fp.parent.rmdir()
161-
logger.warning("Cache has been cleared")
162-
else:
163-
logger.warning("Could not clear cache. Cache file not found.")
158+
CacheHandler(FileHandler(TRUSTED_PACKAGES_FILE_PATH)).clear()
164159

165160

166161
if __name__ == "__main__":

src/twyn/file_handler/file_handler.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from pathlib import Path
44
from typing import Protocol
55

6-
from twyn.base.exceptions import TwynError
76
from twyn.file_handler.exceptions import PathIsNotFileError, PathNotFoundError
87

98
logger = logging.getLogger("twyn")
@@ -13,6 +12,7 @@ class BaseFileHandler(Protocol):
1312
def read(self) -> str: ...
1413
def exists(self) -> bool: ...
1514
def write(self, data: str) -> None: ...
15+
def delete(self, remove_parent_dir: bool) -> None: ...
1616

1717

1818
class FileHandler(BaseFileHandler):
@@ -36,7 +36,7 @@ def read(self) -> str:
3636
def exists(self) -> bool:
3737
try:
3838
self._raise_for_file_exists()
39-
except TwynError:
39+
except (PathNotFoundError, PathIsNotFileError):
4040
return False
4141
return True
4242

@@ -49,3 +49,20 @@ def _raise_for_file_exists(self) -> None:
4949

5050
def write(self, data: str) -> None:
5151
self.file_path.write_text(data)
52+
53+
def delete(self, delete_parent_dir: bool = False) -> None:
54+
if not self.exists():
55+
logger.info("File does not exist, nothing to delete")
56+
return
57+
58+
self.file_path.unlink()
59+
logger.info("Deleted file: %s", self.file_path)
60+
61+
if delete_parent_dir:
62+
try:
63+
self.file_path.parent.rmdir()
64+
logger.info("Removed empty directory: %s", self.file_path.parent)
65+
except OSError:
66+
logger.exception(
67+
"Directory not empty or not enough permissions. Cannot be removed: %s", self.file_path.parent
68+
)

src/twyn/main.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
AvailableLoggingLevels,
1010
SelectorMethod,
1111
)
12-
from twyn.base.utils import _normalize_packages
12+
from twyn.base.utils import normalize_packages
1313
from twyn.config.config_handler import ConfigHandler
1414
from twyn.dependency_parser.dependency_selector import DependencySelector
1515
from twyn.file_handler.file_handler import FileHandler
1616
from twyn.similarity.algorithm import EditDistance, SimilarityThreshold
1717
from twyn.trusted_packages import TopPyPiReference
18+
from twyn.trusted_packages.cache_handler import CacheHandler
19+
from twyn.trusted_packages.constants import TRUSTED_PACKAGES_FILE_PATH
1820
from twyn.trusted_packages.selectors import AbstractSelector
1921
from twyn.trusted_packages.trusted_packages import (
2022
TrustedPackages,
@@ -39,15 +41,16 @@ def check_dependencies(
3941
)
4042
_set_logging_level(config.logging_level)
4143

44+
cache_handler = CacheHandler(FileHandler(TRUSTED_PACKAGES_FILE_PATH))
4245
trusted_packages = TrustedPackages(
43-
names=TopPyPiReference(source=config.pypi_reference).get_packages(use_cache),
46+
names=TopPyPiReference(source=config.pypi_reference, cache_handler=cache_handler).get_packages(use_cache),
4447
algorithm=EditDistance(),
4548
selector=get_candidate_selector(config.selector_method),
4649
threshold_class=SimilarityThreshold,
4750
)
48-
normalized_allowlist_packages = _normalize_packages(config.allowlist)
51+
normalized_allowlist_packages = normalize_packages(config.allowlist)
4952
dependencies = dependencies if dependencies else get_parsed_dependencies_from_file(config.dependency_file)
50-
normalized_dependencies = _normalize_packages(dependencies)
53+
normalized_dependencies = normalize_packages(dependencies)
5154

5255
errors: list[TyposquatCheckResult] = []
5356
for dependency in track(normalized_dependencies, description="Processing..."):
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import json
2+
import logging
3+
from datetime import datetime
4+
from functools import cached_property
5+
from typing import Any, Optional
6+
7+
from pydantic import BaseModel, ValidationError, field_validator
8+
9+
from twyn.base.exceptions import PackageNormalizingError
10+
from twyn.base.utils import normalize_packages
11+
from twyn.file_handler.exceptions import PathIsNotFileError, PathNotFoundError
12+
from twyn.file_handler.file_handler import FileHandler
13+
from twyn.trusted_packages.constants import TRUSTED_PACKAGES_MAX_RETENTION_DAYS
14+
15+
logger = logging.getLogger("twyn")
16+
17+
18+
class CacheEntry(BaseModel):
19+
saved_date: str
20+
packages: set[str]
21+
22+
@field_validator("saved_date")
23+
@classmethod
24+
def validate_saved_date(cls, v: str) -> str:
25+
try:
26+
datetime.fromisoformat(v)
27+
except (ValueError, TypeError) as e:
28+
raise ValueError(f"Invalid saved_date format: {e}") from e
29+
else:
30+
return v
31+
32+
@field_validator("packages")
33+
@classmethod
34+
def validate_packages(cls, v: set[str]) -> set[str]:
35+
try:
36+
return normalize_packages(v)
37+
except PackageNormalizingError as e:
38+
raise ValueError(f"Failed to normalize packages: {e}") from e
39+
40+
41+
class CacheContent(BaseModel):
42+
entries: dict[str, CacheEntry]
43+
44+
def get_entry(self, source: str) -> Optional[CacheEntry]:
45+
return self.entries.get(source, None)
46+
47+
def add_or_modify_entry(self, source: str, data: CacheEntry) -> None:
48+
self.entries[source] = data
49+
50+
51+
class CacheHandler:
52+
"""Cache class that provides basic read/write/delete operation for the caching system, as well as integrity validation checks."""
53+
54+
def __init__(self, file_handler: FileHandler) -> None:
55+
self._file_handler = file_handler
56+
self._content: Optional[CacheContent] = None
57+
58+
@cached_property
59+
def content(self) -> CacheContent:
60+
"""Get all cache content."""
61+
content = self._read()
62+
try:
63+
if content:
64+
return CacheContent(**content)
65+
except ValidationError:
66+
logger.exception("Could not read cache. Cache is corrupt.")
67+
68+
return CacheContent(entries={})
69+
70+
def exists(self) -> bool:
71+
"""Check if cache file exists."""
72+
return self._file_handler.exists()
73+
74+
def is_entry_outdated(self, entry: CacheEntry) -> bool:
75+
"""Check if a cache entry is outdated based on retention days."""
76+
try:
77+
saved_date = datetime.fromisoformat(entry.saved_date).date()
78+
days_diff = (datetime.today().date() - saved_date).days
79+
except (ValueError, AttributeError):
80+
logger.warning("Invalid date format in cache entry")
81+
return True
82+
else:
83+
return days_diff > TRUSTED_PACKAGES_MAX_RETENTION_DAYS
84+
85+
def write_entry(self, source: str, data: CacheEntry) -> None:
86+
"""Given a source and a CacheEntry, saves to contents to cache."""
87+
self.content.add_or_modify_entry(source, data)
88+
self._write(json.loads(self.content.model_dump_json()))
89+
90+
def get_cache_entry(self, source: str) -> Optional[CacheEntry]:
91+
"""Retrieve cache contents from a given source."""
92+
entry = self.content.get_entry(source)
93+
if entry and not self.is_entry_outdated(entry):
94+
return entry
95+
return None
96+
97+
def clear(self) -> None:
98+
"""Delete cache file and its parent directory if empty."""
99+
self._file_handler.delete(delete_parent_dir=True)
100+
101+
def _read(self) -> dict[str, Any]:
102+
"""Read and parse cache file as JSON."""
103+
try:
104+
content = self._file_handler.read()
105+
except (PathNotFoundError, PathIsNotFileError):
106+
logger.info("Cache file not found")
107+
return {}
108+
109+
try:
110+
return json.loads(content)
111+
except json.JSONDecodeError as e:
112+
logger.warning("Failed to decode JSON from cache file: %s", e)
113+
return {}
114+
115+
def _write(self, data: dict[str, Any]) -> None:
116+
"""Write data to cache file as JSON."""
117+
try:
118+
json_content = json.dumps(data)
119+
except (TypeError, ValueError):
120+
logger.exception("Failed to serialize data to JSON")
121+
return None
122+
123+
# Ensure parent directory exists
124+
self._file_handler.file_path.parent.mkdir(parents=True, exist_ok=True)
125+
self._file_handler.write(json_content)
126+
logger.debug("Successfully wrote cache data to %s", self._file_handler.file_path)

0 commit comments

Comments
 (0)