Skip to content

Commit 8110b37

Browse files
committed
Fix edge case when creating metadata file
fixes #1101
1 parent beb85e7 commit 8110b37

7 files changed

Lines changed: 64 additions & 16 deletions

File tree

CHANGES/1101.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed edge case where metadata file did not match wheel metadata.

pulp_python/app/models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,10 @@ def init_from_artifact_and_relative_path(artifact, relative_path):
216216
"""Used when downloading package from pull-through cache."""
217217
path = PurePath(relative_path)
218218
data = artifact_to_python_content_data(path.name, artifact, domain=get_domain())
219+
name = data["name"]
220+
version = data["version"]
219221
artifacts = {path.name: artifact}
220-
if metadata_artifact := artifact_to_metadata_artifact(path.name, artifact):
222+
if metadata_artifact := artifact_to_metadata_artifact(path.name, artifact, name, version):
221223
artifacts[f"{path.name}.metadata"] = metadata_artifact
222224
return PythonPackageContent(**data), artifacts
223225

pulp_python/app/serializers.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,11 @@ def deferred_validate(self, data):
410410

411411
# Create metadata artifact for wheel files
412412
if filename.endswith(".whl"):
413-
if metadata_artifact := artifact_to_metadata_artifact(filename, artifact):
413+
name = data["name"]
414+
version = data["version"]
415+
if metadata_artifact := artifact_to_metadata_artifact(
416+
filename, artifact, name, version
417+
):
414418
data["metadata_artifact"] = metadata_artifact
415419
data["metadata_sha256"] = metadata_artifact.sha256
416420

@@ -552,9 +556,11 @@ def validate(self, data):
552556
)
553557
# Create metadata artifact for wheel files
554558
if filename.endswith(".whl"):
559+
name = data["name"]
560+
version = data["version"]
555561
with tempfile.TemporaryDirectory(dir=settings.WORKING_DIRECTORY) as temp_dir:
556562
if metadata_artifact := artifact_to_metadata_artifact(
557-
filename, artifact, tmp_dir=temp_dir
563+
filename, artifact, name, version, tmp_dir=temp_dir
558564
):
559565
data["metadata_artifact"] = metadata_artifact
560566
data["metadata_sha256"] = metadata_artifact.sha256

pulp_python/app/tasks/upload.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,9 @@ def create():
9898
content = PythonPackageContent.objects.create(**data)
9999
ContentArtifact.objects.create(artifact=artifact, content=content, relative_path=filename)
100100

101-
if metadata_artifact := artifact_to_metadata_artifact(filename, artifact):
101+
name = data["name"]
102+
version = data["version"]
103+
if metadata_artifact := artifact_to_metadata_artifact(filename, artifact, name, version):
102104
ContentArtifact.objects.create(
103105
artifact=metadata_artifact, content=content, relative_path=f"{filename}.metadata"
104106
)

pulp_python/app/utils.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,9 @@ def get_project_metadata_from_file(filename):
197197
packagetype = DIST_EXTENSIONS[extensions[pkg_type_index]]
198198

199199
metadata = DIST_TYPES[packagetype](filename)
200-
metadata.metadata_sha256 = compute_metadata_sha256(filename)
200+
name = metadata.name
201+
version = metadata.version
202+
metadata.metadata_sha256 = compute_metadata_sha256(filename, name, version)
201203
metadata.packagetype = packagetype
202204
if packagetype == "sdist":
203205
metadata.python_version = "source"
@@ -210,31 +212,53 @@ def get_project_metadata_from_file(filename):
210212
return metadata
211213

212214

213-
def extract_wheel_metadata(filename: str) -> bytes | None:
215+
def extract_non_normalized_pkg_name_with_version(
216+
filename: str, name: str, version: str
217+
) -> str | None:
218+
"""
219+
Search `filename` for a package name that normalizes to `name` and is followed by `version`.
220+
221+
Returns the original (non-normalized) name with version if found, otherwise None.
222+
"""
223+
# Ensure the package name is normalized
224+
normalized = re.sub(r"[-_.]+", "-", name).lower()
225+
226+
parts = normalized.split("-")
227+
name_pattern = r"[-_.]+".join(map(re.escape, parts))
228+
pattern = rf"({name_pattern})-{re.escape(version)}"
229+
230+
match = re.search(pattern, filename, re.IGNORECASE)
231+
if match:
232+
return match.group(0)
233+
return None
234+
235+
236+
def extract_wheel_metadata(filename: str, name: str, version: str) -> bytes | None:
214237
"""
215238
Extract the metadata file content from a wheel file.
216239
217240
Returns the raw metadata content as bytes or None if metadata cannot be extracted.
218241
"""
219242
if not filename.endswith(".whl"):
220243
return None
244+
245+
original_name_version = extract_non_normalized_pkg_name_with_version(filename, name, version)
246+
metadata_path = f"{original_name_version}.dist-info/METADATA"
221247
try:
222248
with zipfile.ZipFile(filename, "r") as f:
223-
for file_path in f.namelist():
224-
if file_path.endswith(".dist-info/METADATA"):
225-
return f.read(file_path)
249+
return f.read(metadata_path)
226250
except (zipfile.BadZipFile, KeyError, OSError) as e:
227251
log.warning(f"Failed to extract metadata file from {filename}: {e}")
228252
return None
229253

230254

231-
def compute_metadata_sha256(filename: str) -> str | None:
255+
def compute_metadata_sha256(filename: str, name: str, version: str) -> str | None:
232256
"""
233257
Compute SHA256 hash of the metadata file from a Python package.
234258
235259
Returns SHA256 hash or None if metadata cannot be extracted.
236260
"""
237-
metadata_content = extract_wheel_metadata(filename)
261+
metadata_content = extract_wheel_metadata(filename, name, version)
238262
return hashlib.sha256(metadata_content).hexdigest() if metadata_content else None
239263

240264

@@ -260,7 +284,7 @@ def artifact_to_python_content_data(filename, artifact, domain=None):
260284

261285

262286
def artifact_to_metadata_artifact(
263-
filename: str, artifact: Artifact, tmp_dir: str = "."
287+
filename: str, artifact: Artifact, name: str, version: str, tmp_dir: str = "."
264288
) -> Artifact | None:
265289
"""
266290
Creates artifact for metadata from the provided wheel artifact.
@@ -274,7 +298,7 @@ def artifact_to_metadata_artifact(
274298
shutil.copyfileobj(artifact.file, temp_file)
275299
temp_file.flush()
276300

277-
metadata_content = extract_wheel_metadata(temp_wheel_path)
301+
metadata_content = extract_wheel_metadata(temp_wheel_path, name, version)
278302
if not metadata_content:
279303
return None
280304

pulp_python/tests/functional/api/test_upload.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from pulp_python.tests.functional.constants import (
44
PYTHON_EGG_FILENAME,
55
PYTHON_EGG_URL,
6+
PYTHON_FIXTURES_URL,
67
PYTHON_WHEEL_FILENAME,
78
PYTHON_WHEEL_URL,
89
PYTHON_EGG_SHA256,
@@ -61,7 +62,9 @@ def test_synchronous_package_upload_with_metadata(
6162
"""
6263
Test that the synchronous upload of a Python wheel package creates a metadata artifact.
6364
"""
64-
python_file = download_python_file(PYTHON_WHEEL_FILENAME, PYTHON_WHEEL_URL)
65+
wheel_filename = "setuptools-80.9.0-py3-none-any.whl"
66+
wheel_url = urljoin(urljoin(PYTHON_FIXTURES_URL, "packages/"), wheel_filename)
67+
python_file = download_python_file(wheel_filename, wheel_url)
6568
content_body = {"file": python_file}
6669
content = python_bindings.ContentPackagesApi.upload(**content_body)
6770

@@ -70,7 +73,7 @@ def test_synchronous_package_upload_with_metadata(
7073
distro = python_distribution_factory(repository=python_repo)
7174

7275
# Test that metadata is accessible
73-
ensure_metadata(pulp_content_url, distro.base_path, PYTHON_WHEEL_FILENAME)
76+
ensure_metadata(pulp_content_url, distro.base_path, wheel_filename)
7477

7578

7679
@pytest.mark.parallel

pulp_python/tests/functional/utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import requests
23

34
from urllib.parse import urljoin
@@ -127,9 +128,18 @@ def ensure_metadata(pulp_content_url, distro_base_path, filename):
127128
"""
128129
Tests that metadata is accessible for a given wheel package filename.
129130
"""
131+
from pulp_python.app.utils import DIST_REGEXES
132+
130133
relative_path = f"{distro_base_path}/{filename}.metadata"
131134
metadata_url = urljoin(pulp_content_url, relative_path)
132135
metadata_response = requests.get(metadata_url)
133136
assert metadata_response.status_code == 200
134137
assert len(metadata_response.content) > 0
135-
assert "Name: " in metadata_response.text
138+
139+
regex = DIST_REGEXES[f".{filename.rsplit('.', 1)[1]}"]
140+
match = regex.match(filename)
141+
name = match.group(1)
142+
normalized = re.sub(r"[-_.]+", "-", name).lower()
143+
version = match.group(2)
144+
assert f"Name: {normalized}" in metadata_response.text
145+
assert f"Version: {version}" in metadata_response.text

0 commit comments

Comments
 (0)