Skip to content

Commit 4aff772

Browse files
committed
Nail down a few additional differences between pulp_rust and crates.io
Validate semver formatting and treat the comparison correctly Warn users about keeping private registries on the same domain as public indexes. Add some additional tests Assisted-By: claude-opus-4.6
1 parent b25c9f8 commit 4aff772

11 files changed

Lines changed: 579 additions & 39 deletions

File tree

docs/user/guides/private-registry.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,22 @@ my-internal-lib = { version = "1.0", registry = "my-crates" } # resolved from p
173173
attacks, where an attacker publishes a crate on the public registry with the same name as one
174174
of your private crates.
175175

176+
## Crate Name Handling
177+
178+
Crate names in the Cargo spec are case-insensitive, and hyphens and underscores are treated as
179+
equivalent (e.g. `my-crate` and `my_crate` refer to the same package). Pulp enforces this:
180+
publishing `my-crate` when `my_crate` already exists in the same repository is rejected as a
181+
duplicate. Yank and unyank operations use the same matching.
182+
183+
!!! warning "Domain Isolation"
184+
Pulp shares content objects globally within a
185+
[domain](https://docs.pulpproject.org/pulpcore/configuration/domains.html). A crate name
186+
collision between a private registry and a pull-through cache in the same domain could allow
187+
an upstream crate to shadow a private one (a form of
188+
[dependency confusion](https://medium.com/@alex.birsan/dependency-confusion-4a5d60fec610)).
189+
To mitigate this, consider placing private registries and public pull-through caches in
190+
**separate Pulp domains** so their content namespaces are fully isolated.
191+
176192
## Further Reading
177193

178194
- [Cargo registries configuration](https://doc.rust-lang.org/cargo/reference/registries.html) -- configuring alternate registries in Cargo

pulp_rust/app/migrations/0001_initial.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class Migration(migrations.Migration):
1919
fields=[
2020
('content_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='core.content')),
2121
('name', models.CharField(db_index=True, max_length=255)),
22+
('canonical_name', models.CharField(db_index=True, max_length=255)),
2223
('vers', models.CharField(db_index=True, max_length=64)),
2324
('cksum', models.CharField(db_index=True, max_length=64)),
2425
('features', models.JSONField(blank=True, default=dict)),
@@ -30,7 +31,7 @@ class Migration(migrations.Migration):
3031
],
3132
options={
3233
'default_related_name': '%(app_label)s_%(model_name)s',
33-
'unique_together': {('name', 'vers', '_pulp_domain')},
34+
'unique_together': {('name', 'vers', 'cksum', '_pulp_domain')},
3435
},
3536
bases=('core.content',),
3637
),

pulp_rust/app/models.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
from django.db import models
66
from django_lifecycle import hook, AFTER_CREATE
77

8-
from pulp_rust.app.utils import extract_cargo_toml, extract_dependencies
8+
from pulp_rust.app.utils import (
9+
canonicalize_crate_name,
10+
extract_cargo_toml,
11+
extract_dependencies,
12+
)
913

1014
from pulpcore.plugin.models import (
1115
Content,
@@ -55,8 +59,25 @@ class RustContent(Content):
5559
Cargo registry index specification. Each instance corresponds to one line
5660
in a package's index file.
5761
62+
The `name` field preserves the original crate name as it appears in the
63+
package's `Cargo.toml` (e.g. `cfg-if`, `Serde-JSON`). This matches
64+
crates.io behavior and ensures that download paths and index entries use
65+
the author's intended name form.
66+
67+
The `canonical_name` field stores the canonical form (lowercased, hyphens
68+
replaced with underscores) for use in lookups where the Cargo spec's
69+
case-insensitive, hyphen/underscore-equivalent matching is needed - for
70+
example, duplicate detection during publish and yank operations.
71+
72+
Content uniqueness is enforced on `(name, vers, cksum, _pulp_domain)`.
73+
Including `cksum` allows different crates with the same name and version
74+
(e.g. a private crate and a public crate) to coexist as separate content
75+
objects within a domain, while `repo_key_fields` prevents both from
76+
appearing in the same repository version.
77+
5878
Fields:
59-
name: The package name (crate name)
79+
name: The package name as it appears in Cargo.toml
80+
canonical_name: Canonical form (lowercased, hyphens -> underscores)
6081
vers: The semantic version string (SemVer 2.0.0)
6182
cksum: SHA256 checksum of the .crate file (tarball)
6283
features: JSON object mapping feature names to their dependencies
@@ -69,9 +90,15 @@ class RustContent(Content):
6990
TYPE = "rust"
7091
repo_key_fields = ("name", "vers")
7192

72-
# Package name - alphanumeric characters, hyphens, and underscores allowed
93+
# Package name as it appears in the crate's Cargo.toml.
7394
name = models.CharField(max_length=255, blank=False, null=False, db_index=True)
7495

96+
# Canonical form of the name: lowercased with hyphens replaced by underscores.
97+
# Used for lookups where the Cargo spec's case-insensitive,
98+
# hyphen/underscore-equivalent matching is needed (e.g. duplicate detection,
99+
# yank operations).
100+
canonical_name = models.CharField(max_length=255, blank=False, null=False, db_index=True)
101+
75102
# Semantic version string following SemVer 2.0.0 specification
76103
vers = models.CharField(max_length=64, blank=False, null=False, db_index=True)
77104

@@ -115,6 +142,7 @@ def init_from_artifact_and_relative_path(artifact, relative_path):
115142

116143
content = RustContent(
117144
name=crate_name,
145+
canonical_name=canonicalize_crate_name(crate_name),
118146
vers=version,
119147
cksum=artifact.sha256,
120148
features=cargo_toml.get("features", {}),
@@ -136,7 +164,11 @@ def _create_dependencies_from_parsed_data(self):
136164

137165
class Meta:
138166
default_related_name = "%(app_label)s_%(model_name)s"
139-
unique_together = (("name", "vers", "_pulp_domain"),)
167+
# cksum is included so that different crates with the same canonical
168+
# name and version (e.g. a private crate vs a public crate of the
169+
# same name) can coexist within a domain. repo_key_fields still
170+
# prevents both from appearing in the same repository version.
171+
unique_together = (("name", "vers", "cksum", "_pulp_domain"),)
140172

141173

142174
class RustDependency(models.Model):
@@ -277,6 +309,7 @@ class RustPackageYank(Content):
277309

278310
name = models.CharField(max_length=255, db_index=True)
279311
vers = models.CharField(max_length=64, db_index=True)
312+
280313
_pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.PROTECT)
281314

282315
class Meta:

pulp_rust/app/serializers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pulpcore.plugin import serializers as core_serializers
88

99
from . import models
10+
from .utils import canonicalize_crate_name
1011

1112
log = logging.getLogger(__name__)
1213

@@ -153,6 +154,7 @@ class RustContentSerializer(core_serializers.SingleArtifactContentSerializer):
153154
def create(self, validated_data):
154155
"""Create RustContent and related dependencies."""
155156
dependencies_data = validated_data.pop("dependencies", [])
157+
validated_data["canonical_name"] = canonicalize_crate_name(validated_data["name"])
156158
content = super().create(validated_data)
157159

158160
# Create dependency records

pulp_rust/app/tasks/publishing.py

Lines changed: 50 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11
import hashlib
22
import struct
33

4+
from django.db import IntegrityError
5+
46
from pulpcore.plugin.models import Artifact, ContentArtifact
57
from pulpcore.plugin.tasking import aadd_and_remove
68

79
from pulp_rust.app.models import RustContent, RustDependency, RustRepository
8-
from pulp_rust.app.utils import extract_cargo_toml, extract_dependencies
10+
from pulp_rust.app.utils import (
11+
canonicalize_crate_name,
12+
extract_cargo_toml,
13+
extract_dependencies,
14+
strip_semver_build_metadata,
15+
)
916

1017

1118
def parse_cargo_publish_body(body):
@@ -55,15 +62,19 @@ async def apublish_package(repository_pk, metadata, crate_path):
5562
"""
5663
repository = await RustRepository.objects.aget(pk=repository_pk)
5764

58-
# Create the artifact from the .crate file
65+
# Create the artifact from the .crate file, or reuse an existing one
66+
# with the same checksum (Artifact has a unique constraint on digests).
5967
with open(crate_path, "rb") as f:
6068
cksum = hashlib.sha256(f.read()).hexdigest()
6169

6270
artifact = Artifact.init_and_validate(crate_path, expected_digests={"sha256": cksum})
63-
await artifact.asave()
71+
try:
72+
await artifact.asave()
73+
except IntegrityError:
74+
artifact = await Artifact.objects.aget(sha256=cksum)
6475

6576
# Extract authoritative metadata from the Cargo.toml inside the .crate tarball.
66-
# The publish JSON metadata is NOT authoritative a rogue client can send metadata
77+
# The publish JSON metadata is NOT authoritative - a rogue client can send metadata
6778
# that doesn't match the actual package. We only use the JSON name/vers to locate the
6879
# Cargo.toml within the tarball, then extract everything from the Cargo.toml itself.
6980
# See: https://github.com/rust-lang/cargo/issues/14492
@@ -72,34 +83,52 @@ async def apublish_package(repository_pk, metadata, crate_path):
7283
package = cargo_toml.get("package", {})
7384

7485
name = package["name"]
75-
vers = package["version"]
86+
canonical_name = canonicalize_crate_name(name)
87+
# Strip build metadata - SemVer 2.0.0 treats versions differing only in
88+
# build metadata as identical, and the index must not contain duplicates.
89+
vers = strip_semver_build_metadata(package["version"])
7690

7791
# Build dependency list from the Cargo.toml (authoritative source)
7892
deps = extract_dependencies(cargo_toml)
7993

80-
# Create the content record
81-
content = RustContent(
94+
# Reuse existing content if it already exists in the domain with the same
95+
# checksum (e.g. from a pull-through cache or another repository's publish).
96+
# Content in Pulp is globally shared - the same object can belong to
97+
# multiple repositories. Including cksum in the lookup allows different
98+
# crates with the same name+version (e.g. a private crate shadowing a
99+
# public one) to coexist as separate content objects within a domain.
100+
content = await RustContent.objects.filter(
82101
name=name,
83102
vers=vers,
84103
cksum=cksum,
85-
features=cargo_toml.get("features", {}),
86-
features2=None,
87-
links=package.get("links"),
88-
rust_version=package.get("rust-version"),
89104
_pulp_domain_id=repository.pulp_domain_id,
90-
)
91-
await content.asave()
92-
93-
# Create dependencies
94-
if deps:
95-
await RustDependency.objects.abulk_create(
96-
[RustDependency(content=content, **dep) for dep in deps]
105+
).afirst()
106+
107+
if content is None:
108+
content = RustContent(
109+
name=name,
110+
canonical_name=canonical_name,
111+
vers=vers,
112+
cksum=cksum,
113+
features=cargo_toml.get("features", {}),
114+
features2=None,
115+
links=package.get("links"),
116+
rust_version=package.get("rust-version"),
117+
_pulp_domain_id=repository.pulp_domain_id,
97118
)
119+
await content.asave()
120+
121+
if deps:
122+
await RustDependency.objects.abulk_create(
123+
[RustDependency(content=content, **dep) for dep in deps]
124+
)
98125

99-
# Create the content artifact (links the .crate file to the content)
126+
# Create the content artifact if it doesn't already exist
100127
relative_path = f"{name}/{name}-{vers}.crate"
101-
await ContentArtifact.objects.acreate(
102-
artifact=artifact, content=content, relative_path=relative_path
128+
await ContentArtifact.objects.aget_or_create(
129+
content=content,
130+
relative_path=relative_path,
131+
defaults={"artifact": artifact},
103132
)
104133

105134
# Add the content to a new repository version

pulp_rust/app/tasks/yanking.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from pulpcore.plugin.tasking import aadd_and_remove
22

33
from pulp_rust.app.models import RustContent, RustPackageYank, RustRepository
4+
from pulp_rust.app.utils import canonicalize_crate_name
45

56

67
async def ayank_package(repository_pk, name, vers):
@@ -9,11 +10,14 @@ async def ayank_package(repository_pk, name, vers):
910
1011
Creates a new repository version with the yank marker added.
1112
"""
13+
name = canonicalize_crate_name(name)
1214
repository = await RustRepository.objects.aget(pk=repository_pk)
1315
latest = await repository.alatest_version()
1416

1517
# Verify the package version exists in this repository
16-
exists = await RustContent.objects.filter(pk__in=latest.content, name=name, vers=vers).aexists()
18+
exists = await RustContent.objects.filter(
19+
pk__in=latest.content, canonical_name=name, vers=vers
20+
).aexists()
1721
if not exists:
1822
raise ValueError(f"Package {name}=={vers} not found in repository")
1923

@@ -25,7 +29,9 @@ async def ayank_package(repository_pk, name, vers):
2529
return # Already yanked, no-op
2630

2731
yank_marker, _ = await RustPackageYank.objects.aget_or_create(
28-
name=name, vers=vers, _pulp_domain_id=repository.pulp_domain_id
32+
name=name,
33+
vers=vers,
34+
_pulp_domain_id=repository.pulp_domain_id,
2935
)
3036

3137
await aadd_and_remove(
@@ -41,6 +47,7 @@ async def aunyank_package(repository_pk, name, vers):
4147
4248
Creates a new repository version with the yank marker removed.
4349
"""
50+
name = canonicalize_crate_name(name)
4451
repository = await RustRepository.objects.aget(pk=repository_pk)
4552
latest = await repository.alatest_version()
4653

pulp_rust/app/utils.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import tarfile
23

34
try:
@@ -88,3 +89,65 @@ def extract_dependencies(cargo_toml):
8889
deps.append(parse_dep(name, spec, kind="build", target=target))
8990

9091
return deps
92+
93+
94+
CRATE_NAME_MAX_LENGTH = 64
95+
CRATE_NAME_RE = re.compile(r"^[a-zA-Z][a-zA-Z0-9_-]*$")
96+
SEMVER_RE = re.compile(
97+
r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)"
98+
r"(-[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?"
99+
r"(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$"
100+
)
101+
102+
103+
def validate_crate_name(name):
104+
"""Validate a crate name.
105+
106+
Enforces the following rules:
107+
- Must start with an ASCII letter and contain only ASCII alphanumeric
108+
characters, hyphens, or underscores (Cargo spec, via ``cargo new``).
109+
- Must not exceed 64 characters (crates.io policy, not in the Cargo spec).
110+
111+
Returns None if valid, or an error message string if invalid.
112+
"""
113+
if not name:
114+
return "crate name must not be empty"
115+
if len(name) > CRATE_NAME_MAX_LENGTH:
116+
return f"crate name exceeds maximum length of {CRATE_NAME_MAX_LENGTH} characters"
117+
if not CRATE_NAME_RE.match(name):
118+
return (
119+
"crate name must start with an ASCII letter and contain only "
120+
"ASCII alphanumeric characters, hyphens, or underscores"
121+
)
122+
return None
123+
124+
125+
def validate_crate_version(version):
126+
"""Validate a crate version per SemVer 2.0.0 (required by Cargo spec).
127+
128+
Returns None if valid, or an error message string if invalid.
129+
"""
130+
if not version:
131+
return "crate version must not be empty"
132+
if not SEMVER_RE.match(version):
133+
return f"invalid semver: `{version}` " "(expected MAJOR.MINOR.PATCH[-prerelease][+build])"
134+
return None
135+
136+
137+
def strip_semver_build_metadata(version):
138+
"""Strip build metadata from a SemVer version string.
139+
140+
Per SemVer 2.0.0, versions that differ only in build metadata have equal
141+
precedence. The Cargo registry spec requires that indexes treat such
142+
versions as identical (e.g. ``1.0.0`` and ``1.0.0+build1`` must collide).
143+
"""
144+
return version.split("+", 1)[0]
145+
146+
147+
def canonicalize_crate_name(name):
148+
"""Canonicalize a crate name for uniqueness comparison.
149+
150+
Crate names are case-insensitive and hyphens and underscores are treated
151+
as equivalent (Cargo spec).
152+
"""
153+
return name.lower().replace("-", "_")

0 commit comments

Comments
 (0)