Skip to content

Commit e997e3f

Browse files
committed
Cleanup implementation
1 parent 978b347 commit e997e3f

11 files changed

Lines changed: 135 additions & 155 deletions

File tree

dfetch/commands/report.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
from dfetch.manifest.project import ProjectEntry
1515
from dfetch.project import create_super_project
1616
from dfetch.project.metadata import Metadata
17-
from dfetch.project.subproject import SubProject
1817
from dfetch.reporting import REPORTERS, ReportTypes
1918
from dfetch.util.license import License, guess_license_in_file
19+
from dfetch.util.util import is_license_file
2020

2121
logger = get_logger(__name__)
2222

@@ -89,7 +89,7 @@ def _determine_licenses(project: ProjectEntry) -> list[License]:
8989

9090
license_files = []
9191
with dfetch.util.util.in_directory(project.destination):
92-
for license_file in filter(SubProject.is_license_file, glob.glob("*")):
92+
for license_file in filter(is_license_file, glob.glob("*")):
9393
logger.debug(f"Found license file {license_file} for {project.name}")
9494
guessed_license = guess_license_in_file(license_file)
9595

dfetch/manifest/project.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,8 @@ class Integrity:
336336
Holds the ``hash:`` sub-field today and is designed to accommodate
337337
future signature-verification fields:
338338
339-
* ``sig`` URL of a detached signature file (``.sig`` / ``.asc``).
340-
* ``sig_key`` URL or fingerprint of the signing key (``.p7s`` / ``.gpg``).
339+
* ``sig`` - URL of a detached signature file (``.sig`` / ``.asc``).
340+
* ``sig_key`` - URL or fingerprint of the signing key (``.p7s`` / ``.gpg``).
341341
"""
342342

343343
hash: str = field(default="")

dfetch/project/archivesubproject.py

Lines changed: 40 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
33
Archives are a third VCS type alongside ``git`` and ``svn``. They represent
44
versioned dependencies that are distributed as ``.tar.gz``, ``.tgz``,
5-
``.tar.bz2``, ``.tar.xz`` or ``.zip`` files reachable via any URL that Python's
6-
:mod:`urllib.request` understands (``http://``, ``https://``, ``file://``, …).
5+
``.tar.bz2``, ``.tar.xz`` or ``.zip`` files reachable via ``http://``,
6+
``https://``, or ``file://`` URLs.
77
88
Unlike git and SVN, archives have no inherent "branching" or "tagging"
99
concept. Version identity is expressed through:
1010
11-
* **No hash** the URL itself acts as the identity. The archive is
11+
* **No hash** - the URL itself acts as the identity. The archive is
1212
considered up-to-date as long as the same URL is still reachable.
13-
* **``integrity.hash: <algorithm>:<hex>``** the cryptographic hash of the
13+
* **``integrity.hash: <algorithm>:<hex>``** - the cryptographic hash of the
1414
archive file acts as the version identifier. The fetch step verifies the
1515
downloaded archive against this hash and raises an error on mismatch.
1616
@@ -41,22 +41,18 @@
4141

4242
from __future__ import annotations
4343

44+
import hmac
45+
import http.client
4446
import os
4547
import pathlib
4648
import tempfile
47-
import urllib.request as _ur
4849

4950
from dfetch.log import get_logger
5051
from dfetch.manifest.project import ProjectEntry
5152
from dfetch.manifest.version import Version
5253
from dfetch.project.subproject import SubProject
5354
from dfetch.vcs.archive import (
54-
_safe_compare_hex, # private helper, intentionally imported for internal use
55-
)
56-
from dfetch.vcs.archive import (
57-
_suffix_for_url, # private helper, intentionally imported for internal use
58-
)
59-
from dfetch.vcs.archive import (
55+
ARCHIVE_EXTENSIONS,
6056
SUPPORTED_HASH_ALGORITHMS,
6157
ArchiveLocalRepo,
6258
ArchiveRemote,
@@ -67,6 +63,24 @@
6763
logger = get_logger(__name__)
6864

6965

66+
def _safe_compare_hex(actual: str, expected: str) -> bool:
67+
"""Constant-time comparison of two hex digest strings.
68+
69+
Uses :func:`hmac.compare_digest` to avoid leaking timing information about
70+
the expected hash value.
71+
"""
72+
return hmac.compare_digest(actual.lower(), expected.lower())
73+
74+
75+
def _suffix_for_url(url: str) -> str:
76+
"""Return the archive file suffix for *url* (e.g. ``'.tar.gz'``, ``'.zip'``)."""
77+
lower = url.lower()
78+
for ext in sorted(ARCHIVE_EXTENSIONS, key=len, reverse=True):
79+
if lower.endswith(ext):
80+
return ext
81+
return ".archive"
82+
83+
7084
class ArchiveSubProject(SubProject):
7185
"""A project fetched from a tar/zip archive URL.
7286
@@ -83,10 +97,6 @@ def __init__(self, project: ProjectEntry) -> None:
8397
self._project_entry = project
8498
self._remote_repo = ArchiveRemote(project.remote_url)
8599

86-
# ------------------------------------------------------------------
87-
# SubProject abstract interface
88-
# ------------------------------------------------------------------
89-
90100
def check(self) -> bool:
91101
"""Return *True* when the project URL looks like an archive."""
92102
return is_archive_url(self.remote)
@@ -98,16 +108,16 @@ def revision_is_enough() -> bool:
98108

99109
@staticmethod
100110
def list_tool_info() -> None:
101-
"""Log information about the archive fetching tool (Python's urllib)."""
102-
SubProject._log_tool("urllib", _ur.__doc__ or "built-in")
111+
"""Log information about the archive fetching tool (Python's http.client)."""
112+
SubProject._log_tool("http.client", http.client.__doc__ or "built-in")
103113

104114
def get_default_branch(self) -> str:
105115
"""Archives have no branches; return an empty string."""
106116
return ""
107117

108118
def _latest_revision_on_branch(self, branch: str) -> str: # noqa: ARG002
109119
"""For archives the 'latest revision' is always the URL (or hash)."""
110-
return self._project_entry.remote_url
120+
return self.remote
111121

112122
def _download_and_compute_hash(self, algorithm: str = "sha256") -> str:
113123
"""Download the archive to a temporary file and return its hash.
@@ -117,20 +127,16 @@ def _download_and_compute_hash(self, algorithm: str = "sha256") -> str:
117127
Raises:
118128
RuntimeError: On download failure or unsupported algorithm.
119129
"""
120-
tmp_path: str | None = None
130+
fd, tmp_path = tempfile.mkstemp(suffix=_suffix_for_url(self.remote))
131+
os.close(fd)
121132
try:
122-
with tempfile.NamedTemporaryFile(
123-
suffix=_suffix_for_url(self._project_entry.remote_url), delete=False
124-
) as tmp:
125-
tmp_path = tmp.name
126133
self._remote_repo.download(tmp_path)
127134
return compute_hash(tmp_path, algorithm)
128135
finally:
129-
if tmp_path:
130-
try:
131-
os.remove(tmp_path)
132-
except OSError:
133-
pass
136+
try:
137+
os.remove(tmp_path)
138+
except OSError:
139+
pass
134140

135141
def _does_revision_exist(self, revision: str) -> bool:
136142
"""Check whether *revision* (a hash or URL string) is still valid.
@@ -151,17 +157,13 @@ def _does_revision_exist(self, revision: str) -> bool:
151157
except RuntimeError:
152158
return False
153159

154-
# revision is the URL just check accessibility
160+
# revision is the URL - just check accessibility
155161
return self._remote_repo.is_accessible()
156162

157163
def _list_of_tags(self) -> list[str]:
158164
"""Archives have no tags; returns an empty list."""
159165
return []
160166

161-
# ------------------------------------------------------------------
162-
# Version overrides
163-
# ------------------------------------------------------------------
164-
165167
@property
166168
def wanted_version(self) -> Version:
167169
"""Version derived from the ``integrity.hash`` field or the archive URL.
@@ -174,11 +176,7 @@ def wanted_version(self) -> Version:
174176
"""
175177
if self._project_entry.hash:
176178
return Version(revision=self._project_entry.hash)
177-
return Version(revision=self._project_entry.remote_url)
178-
179-
# ------------------------------------------------------------------
180-
# Fetch
181-
# ------------------------------------------------------------------
179+
return Version(revision=self.remote)
182180

183181
def _fetch_impl(self, version: Version) -> Version:
184182
"""Download and extract the archive to the local destination.
@@ -193,15 +191,12 @@ def _fetch_impl(self, version: Version) -> Version:
193191
Returns:
194192
The version that was actually fetched (hash string or URL).
195193
"""
196-
url = self._project_entry.remote_url
197194
expected_hash = self._project_entry.hash
198195

199196
pathlib.Path(self.local_path).mkdir(parents=True, exist_ok=True)
200197

201-
suffix = _suffix_for_url(url)
202-
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
203-
tmp_path = tmp.name
204-
198+
fd, tmp_path = tempfile.mkstemp(suffix=_suffix_for_url(self.remote))
199+
os.close(fd)
205200
try:
206201
self._remote_repo.download(tmp_path)
207202

@@ -231,13 +226,7 @@ def _fetch_impl(self, version: Version) -> Version:
231226
except OSError:
232227
pass
233228

234-
if expected_hash:
235-
return Version(revision=expected_hash)
236-
return Version(revision=url)
237-
238-
# ------------------------------------------------------------------
239-
# Freeze support
240-
# ------------------------------------------------------------------
229+
return Version(revision=expected_hash if expected_hash else self.remote)
241230

242231
def freeze_project(self, project: ProjectEntry) -> str | None:
243232
"""Pin *project* to a cryptographic hash of the archive.
@@ -264,7 +253,7 @@ def freeze_project(self, project: ProjectEntry) -> str | None:
264253

265254
revision = on_disk.revision
266255

267-
# Already hash-pinned revision is "sha256:<hex>"
256+
# Already hash-pinned - revision is "sha256:<hex>"
268257
if revision.startswith(tuple(f"{a}:" for a in SUPPORTED_HASH_ALGORITHMS)):
269258
if project.hash == revision:
270259
return None

dfetch/project/gitsubproject.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from dfetch.manifest.project import ProjectEntry
99
from dfetch.manifest.version import Version
1010
from dfetch.project.subproject import SubProject
11-
from dfetch.util.util import safe_rmtree
11+
from dfetch.util.util import LICENSE_GLOBS, safe_rmtree
1212
from dfetch.vcs.git import GitLocalRepo, GitRemote, get_git_version
1313

1414
logger = get_logger(__name__)
@@ -64,8 +64,8 @@ def _fetch_impl(self, version: Version) -> Version:
6464
# When exporting a file, the destination directory must already exist
6565
pathlib.Path(self.local_path).mkdir(parents=True, exist_ok=True)
6666

67-
license_globs = [f"/{name.lower()}" for name in self.LICENSE_GLOBS] + [
68-
f"/{name.upper()}" for name in self.LICENSE_GLOBS
67+
license_globs = [f"/{name.lower()}" for name in LICENSE_GLOBS] + [
68+
f"/{name.upper()}" for name in LICENSE_GLOBS
6969
]
7070

7171
local_repo = GitLocalRepo(self.local_path)

dfetch/project/subproject.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""SubProject."""
22

3-
import fnmatch
43
import os
54
import pathlib
65
from abc import ABC, abstractmethod
@@ -26,7 +25,6 @@ class SubProject(ABC):
2625
"""
2726

2827
NAME = ""
29-
LICENSE_GLOBS = ["licen[cs]e*", "copying*", "copyright*"]
3028

3129
def __init__(self, project: ProjectEntry) -> None:
3230
"""Create the subproject."""
@@ -413,11 +411,3 @@ def freeze_project(self, project: ProjectEntry) -> str | None:
413411
on_disk_version.revision or on_disk_version.tag or str(on_disk_version)
414412
)
415413
return None
416-
417-
@staticmethod
418-
def is_license_file(filename: str) -> bool:
419-
"""Check if the given filename is a license file."""
420-
return any(
421-
fnmatch.fnmatch(filename.lower(), pattern)
422-
for pattern in SubProject.LICENSE_GLOBS
423-
)

dfetch/project/svnsubproject.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from dfetch.util.util import (
1212
find_matching_files,
1313
find_non_matching_files,
14+
is_license_file,
1415
safe_rm,
1516
)
1617
from dfetch.vcs.svn import SvnRemote, SvnRepo, get_svn_version
@@ -103,7 +104,7 @@ def _determine_what_to_fetch(self, version: Version) -> tuple[str, str, str]:
103104
def _remove_ignored_files(self) -> None:
104105
"""Remove any ignored files, whilst keeping license files."""
105106
for file_or_dir in find_matching_files(self.local_path, self.ignore):
106-
if not (file_or_dir.is_file() and self.is_license_file(file_or_dir.name)):
107+
if not (file_or_dir.is_file() and is_license_file(file_or_dir.name)):
107108
safe_rm(file_or_dir)
108109

109110
def _fetch_impl(self, version: Version) -> Version:
@@ -168,9 +169,7 @@ def _get_info(self, branch: str) -> dict[str, str]:
168169
def _license_files(url_path: str) -> list[str]:
169170
return [
170171
str(license)
171-
for license in filter(
172-
SvnSubProject.is_license_file, SvnRepo.files_in_path(url_path)
173-
)
172+
for license in filter(is_license_file, SvnRepo.files_in_path(url_path))
174173
]
175174

176175
def _get_revision(self, branch: str) -> str:

dfetch/util/util.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,72 @@
1212

1313
from _hashlib import HASH
1414

15+
#: Glob patterns used to identify license files by filename.
16+
LICENSE_GLOBS = ["licen[cs]e*", "copying*", "copyright*"]
17+
18+
19+
def is_license_file(filename: str) -> bool:
20+
"""Return *True* when *filename* matches a known license file pattern."""
21+
return any(fnmatch.fnmatch(filename.lower(), pattern) for pattern in LICENSE_GLOBS)
22+
23+
24+
def _copy_entry(src_entry: str, dest_entry: str) -> None:
25+
"""Copy a single file or directory *src_entry* to *dest_entry*."""
26+
if os.path.isdir(src_entry):
27+
shutil.copytree(src_entry, dest_entry)
28+
else:
29+
shutil.copy2(src_entry, dest_entry)
30+
31+
32+
def copy_directory_contents(src_dir: str, dest_dir: str) -> None:
33+
"""Copy every entry in *src_dir* directly into *dest_dir*.
34+
35+
Directories are copied recursively; files are copied with metadata.
36+
"""
37+
for entry_name in os.listdir(src_dir):
38+
_copy_entry(
39+
os.path.join(src_dir, entry_name),
40+
os.path.join(dest_dir, entry_name),
41+
)
42+
43+
44+
def copy_src_subset(
45+
src_root: str, dest_dir: str, src: str, keep_licenses: bool
46+
) -> None:
47+
"""Copy a *src* sub-path from *src_root* into *dest_dir*.
48+
49+
When *src* is a directory, its contents are copied flat into *dest_dir*.
50+
When *src* is a single file, that file is copied into *dest_dir*.
51+
If *keep_licenses* is ``True``, any license files found directly in
52+
*src_root* are also copied regardless of the *src* filter.
53+
54+
Raises:
55+
RuntimeError: When *src* does not exist inside *src_root*.
56+
"""
57+
src_path = os.path.join(src_root, src)
58+
if os.path.isdir(src_path):
59+
copy_directory_contents(src_path, dest_dir)
60+
elif os.path.isfile(src_path):
61+
shutil.copy2(src_path, os.path.join(dest_dir, os.path.basename(src_path)))
62+
else:
63+
raise RuntimeError(f"src {src!r} was not found in the extracted archive")
64+
65+
if keep_licenses:
66+
for entry_name in os.listdir(src_root):
67+
full_path = os.path.join(src_root, entry_name)
68+
if os.path.isfile(full_path) and is_license_file(entry_name):
69+
shutil.copy2(full_path, os.path.join(dest_dir, entry_name))
70+
71+
72+
def prune_files_by_pattern(directory: str, patterns: Sequence[str]) -> None:
73+
"""Remove files and directories in *directory* matching *patterns*.
74+
75+
License files are never removed even when they match a pattern.
76+
"""
77+
for file_or_dir in find_matching_files(directory, patterns):
78+
if not (file_or_dir.is_file() and is_license_file(file_or_dir.name)):
79+
safe_rm(file_or_dir)
80+
1581

1682
def _remove_readonly(func: Any, path: str, _: Any) -> None:
1783
if not os.access(path, os.W_OK):

0 commit comments

Comments
 (0)