Skip to content

Commit 2d5569c

Browse files
committed
Add new field to repository to prevent duplicate filename uploads
fixes: #1162 Generated by: Claude-Opus-4.6
1 parent 9271553 commit 2d5569c

File tree

5 files changed

+173
-26
lines changed

5 files changed

+173
-26
lines changed

CHANGES/1162.feature

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Added a new `allow_package_substitution` boolean field to PythonRepository (default: True).
2+
When set to False, any new repository version that would implicitly replace existing content
3+
with content that has the same filename but a different sha256 checksum is rejected. This
4+
applies to all repository version creation paths including uploads, modify, and sync. Content
5+
with a matching checksum is allowed through idempotently.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from django.db import migrations, models
2+
3+
4+
class Migration(migrations.Migration):
5+
6+
dependencies = [
7+
("python", "0020_pythonpackagecontent_name_normalized"),
8+
]
9+
10+
operations = [
11+
migrations.AddField(
12+
model_name="pythonrepository",
13+
name="allow_package_substitution",
14+
field=models.BooleanField(default=True),
15+
),
16+
]

pulp_python/app/models.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@
3131
PYPI_LAST_SERIAL,
3232
PYPI_SERIAL_CONSTANT,
3333
)
34-
from pulpcore.plugin.repo_version_utils import remove_duplicates, validate_repo_version
34+
from pulpcore.plugin.repo_version_utils import (
35+
collect_duplicates,
36+
remove_duplicates,
37+
validate_repo_version,
38+
)
3539
from pulpcore.plugin.util import get_domain_pk, get_domain
3640

3741
log = getLogger(__name__)
@@ -362,6 +366,7 @@ class PythonRepository(Repository, AutoAddObjPermsMixin):
362366
PULL_THROUGH_SUPPORTED = True
363367

364368
autopublish = models.BooleanField(default=False)
369+
allow_package_substitution = models.BooleanField(default=True)
365370

366371
class Meta:
367372
default_related_name = "%(app_label)s_%(model_name)s"
@@ -390,6 +395,24 @@ def on_new_version(self, version):
390395
def finalize_new_version(self, new_version):
391396
"""
392397
Remove duplicate packages that have the same filename.
398+
399+
When allow_package_substitution is False, reject any new version that would implicitly
400+
replace existing content with different checksums (content substitution).
393401
"""
402+
if not self.allow_package_substitution:
403+
self._check_for_package_substitution(new_version)
394404
remove_duplicates(new_version)
395405
validate_repo_version(new_version)
406+
407+
def _check_for_package_substitution(self, new_version):
408+
"""
409+
Raise a ValueError if newly added packages would replace existing packages that have the
410+
same filename but a different sha256 checksum.
411+
"""
412+
duplicates = collect_duplicates(new_version.content, ("filename",))
413+
if duplicates:
414+
raise ValueError(
415+
"Found duplicate packages being added with the same filename but different checksums."
416+
"To allow this, set 'allow_package_substitution' to True on the repository."
417+
f"Duplicate packages: {duplicates}"
418+
)

pulp_python/app/serializers.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,23 @@ class PythonRepositorySerializer(core_serializers.RepositorySerializer):
5353
default=False,
5454
required=False,
5555
)
56+
allow_package_substitution = serializers.BooleanField(
57+
help_text=_(
58+
"Whether to allow package substitution (replacing existing packages with packages "
59+
"that have the same filename but a different checksum). When False, any new "
60+
"repository version that would cause such a substitution will be rejected. This "
61+
"applies to all repository version creation paths including uploads, modify, and "
62+
"sync. When True (the default), package substitution is allowed."
63+
),
64+
default=True,
65+
required=False,
66+
)
5667

5768
class Meta:
58-
fields = core_serializers.RepositorySerializer.Meta.fields + ("autopublish",)
69+
fields = core_serializers.RepositorySerializer.Meta.fields + (
70+
"autopublish",
71+
"allow_package_substitution",
72+
)
5973
model = python_models.PythonRepository
6074

6175

@@ -448,7 +462,7 @@ def create(self, validated_data):
448462
content = super().create(validated_data)
449463
if provenance:
450464
prov_sha256 = python_models.PackageProvenance.calculate_sha256(provenance)
451-
prov_model, _ = python_models.PackageProvenance.objects.get_or_create(
465+
prov_model, _created = python_models.PackageProvenance.objects.get_or_create(
452466
sha256=prov_sha256,
453467
_pulp_domain=get_domain(),
454468
defaults={"package": content, "provenance": provenance},

pulp_python/tests/functional/api/test_crud_content_unit.py

Lines changed: 112 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -144,44 +144,41 @@ def test_content_create_new_metadata(
144144
assert getattr(content, k) == v
145145

146146

147+
def get_package_url(package, filename):
148+
with PyPISimple() as client:
149+
page = client.get_project_page(package)
150+
for package in page.packages:
151+
if package.filename == filename:
152+
return package.url
153+
raise ValueError(f"Package {filename} not found")
154+
155+
147156
@pytest.mark.parallel
148157
def test_upload_metadata_23_spec(python_content_factory):
149158
"""Test that packages using metadata spec 2.3 can be uploaded to pulp."""
150159
filename = "urllib3-2.2.2-py3-none-any.whl"
151-
with PyPISimple() as client:
152-
page = client.get_project_page("urllib3")
153-
for package in page.packages:
154-
if package.filename == filename:
155-
content = python_content_factory(filename, url=package.url)
156-
assert content.metadata_version == "2.3"
157-
break
160+
url = get_package_url("urllib3", filename)
161+
content = python_content_factory(filename, url=url)
162+
assert content.metadata_version == "2.3"
158163

159164

160165
@pytest.mark.parallel
161166
def test_upload_requires_python(python_content_factory):
162167
filename = "pip-24.3.1-py3-none-any.whl"
163-
with PyPISimple() as client:
164-
page = client.get_project_page("pip")
165-
for package in page.packages:
166-
if package.filename == filename:
167-
content = python_content_factory(filename, url=package.url)
168-
assert content.requires_python == ">=3.8"
169-
break
168+
url = get_package_url("pip", filename)
169+
content = python_content_factory(filename, url=url)
170+
assert content.requires_python == ">=3.8"
170171

171172

172173
@pytest.mark.parallel
173174
def test_upload_metadata_24_spec(python_content_factory):
174175
"""Test that packages using metadata spec 2.4 can be uploaded to pulp."""
175176
filename = "setuptools-80.9.0.tar.gz"
176-
with PyPISimple() as client:
177-
page = client.get_project_page("setuptools")
178-
for package in page.packages:
179-
if package.filename == filename:
180-
content = python_content_factory(filename, url=package.url)
181-
assert content.metadata_version == "2.4"
182-
assert content.license_expression == "MIT"
183-
assert content.license_file == '["LICENSE"]'
184-
break
177+
url = get_package_url("setuptools", filename)
178+
content = python_content_factory(filename, url=url)
179+
assert content.metadata_version == "2.4"
180+
assert content.license_expression == "MIT"
181+
assert content.license_file == '["LICENSE"]'
185182

186183

187184
@pytest.mark.parallel
@@ -203,3 +200,95 @@ def test_package_creation_with_metadata(
203200
ensure_metadata(
204201
pulp_content_url, distro.base_path, PYTHON_WHEEL_FILENAME, "shelf-reader", "0.1"
205202
)
203+
204+
205+
@pytest.mark.parallel
206+
def test_disallow_package_substitution(
207+
monitor_task,
208+
python_bindings,
209+
python_repo_factory,
210+
):
211+
"""
212+
When allow_package_substitution=False, any new repository version that would substitute
213+
existing content (same filename, different sha256) is rejected. This applies to both
214+
content uploads and repository modify operations. Re-adding content with a matching
215+
sha256 succeeds idempotently.
216+
"""
217+
repo = python_repo_factory(allow_package_substitution=False)
218+
219+
# First upload succeeds
220+
content_body = {"relative_path": PYTHON_EGG_FILENAME, "file_url": PYTHON_EGG_URL}
221+
response = python_bindings.ContentPackagesApi.create(repository=repo.pulp_href, **content_body)
222+
task = monitor_task(response.task)
223+
content = python_bindings.ContentPackagesApi.read(task.created_resources[-1])
224+
assert content.filename == PYTHON_EGG_FILENAME
225+
226+
# Re-upload same artifact with same filename — should succeed (idempotent)
227+
response = python_bindings.ContentPackagesApi.create(repository=repo.pulp_href, **content_body)
228+
task = monitor_task(response.task)
229+
assert content.pulp_href in task.created_resources
230+
231+
# Upload a different artifact with the same filename — should be rejected
232+
second_filename = "pip-26.0.1.tar.gz"
233+
second_url = get_package_url("pip", second_filename)
234+
content_body2 = {"relative_path": PYTHON_EGG_FILENAME, "file_url": second_url}
235+
with pytest.raises(PulpTaskError) as exc:
236+
response = python_bindings.ContentPackagesApi.create(
237+
repository=repo.pulp_href, **content_body2
238+
)
239+
monitor_task(response.task)
240+
assert "already exists in repository" in exc.value.task.error["description"]
241+
assert "allow_package_substitution" in exc.value.task.error["description"]
242+
243+
# Also create the conflicting content without a repo, then try to add via modify
244+
response = python_bindings.ContentPackagesApi.create(**content_body2)
245+
task = monitor_task(response.task)
246+
content2 = python_bindings.ContentPackagesApi.read(task.created_resources[0])
247+
body = {"add_content_units": [content2.pulp_href]}
248+
with pytest.raises(PulpTaskError) as exc:
249+
monitor_task(python_bindings.RepositoriesPythonApi.modify(repo.pulp_href, body).task)
250+
assert "already exists in repository" in exc.value.task.error["description"]
251+
252+
# Verify the repository still has only the original content
253+
repo = python_bindings.RepositoriesPythonApi.read(repo.pulp_href)
254+
content_list = python_bindings.ContentPackagesApi.list(
255+
repository_version=repo.latest_version_href
256+
)
257+
assert content_list.count == 1
258+
assert content_list.results[0].sha256 == content.sha256
259+
260+
261+
@pytest.mark.parallel
262+
def test_package_substitution_allowed_by_default(
263+
monitor_task,
264+
python_bindings,
265+
python_repo_factory,
266+
):
267+
"""
268+
By default (allow_package_substitution=True), uploading a file with the same filename
269+
but different sha256 replaces the existing content in the repository.
270+
"""
271+
repo = python_repo_factory()
272+
assert repo.allow_package_substitution is True
273+
274+
content_body = {"relative_path": PYTHON_EGG_FILENAME, "file_url": PYTHON_EGG_URL}
275+
response = python_bindings.ContentPackagesApi.create(repository=repo.pulp_href, **content_body)
276+
task = monitor_task(response.task)
277+
content1 = python_bindings.ContentPackagesApi.read(task.created_resources[-1])
278+
279+
# Upload a different artifact with the same filename — should succeed and replace
280+
second_filename = "pip-26.0.tar.gz"
281+
second_url = get_package_url("pip", second_filename)
282+
content_body = {"relative_path": PYTHON_EGG_FILENAME, "file_url": second_url}
283+
response = python_bindings.ContentPackagesApi.create(repository=repo.pulp_href, **content_body)
284+
task = monitor_task(response.task)
285+
content2 = python_bindings.ContentPackagesApi.read(task.created_resources[-1])
286+
assert content2.pulp_href != content1.pulp_href
287+
288+
# Verify the repo has only the new content
289+
repo = python_bindings.RepositoriesPythonApi.read(repo.pulp_href)
290+
content_list = python_bindings.ContentPackagesApi.list(
291+
repository_version=repo.latest_version_href
292+
)
293+
assert content_list.count == 1
294+
assert content_list.results[0].sha256 == content2.sha256

0 commit comments

Comments
 (0)