Skip to content

Commit d066c49

Browse files
committed
Fix an issue pull-through caching (wasn't saving all metadata)
Assisted-By: claude-opus-4.6
1 parent d766c88 commit d066c49

9 files changed

Lines changed: 668 additions & 24 deletions

File tree

pulp_rust/app/models.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
from logging import getLogger
44

55
from django.db import models
6+
from django_lifecycle import hook, AFTER_CREATE
7+
8+
from pulp_rust.app.utils import extract_cargo_toml, extract_dependencies
69

710
from pulpcore.plugin.models import (
811
Content,
@@ -106,15 +109,32 @@ def init_from_artifact_and_relative_path(artifact, relative_path):
106109
Create an unsaved RustContent from a downloaded .crate artifact.
107110
108111
Called by pulpcore's content handler during pull-through caching.
109-
Only populates name, version, and checksum -- dependency and feature
110-
metadata is served from the upstream sparse index via the proxy.
112+
Extracts full metadata (dependencies, features, etc.) from the
113+
Cargo.toml inside the .crate tarball.
111114
"""
112115
crate_name, version = _parse_crate_relative_path(relative_path)
113-
return RustContent(
116+
cargo_toml = extract_cargo_toml(artifact.file.path, crate_name, version)
117+
118+
content = RustContent(
114119
name=crate_name,
115120
vers=version,
116121
cksum=artifact.sha256,
122+
features=cargo_toml.get("features", {}),
123+
links=cargo_toml.get("package", {}).get("links"),
124+
rust_version=cargo_toml.get("package", {}).get("rust-version"),
117125
)
126+
# Store parsed dep data for the AFTER_CREATE hook to consume
127+
content._parsed_deps = extract_dependencies(cargo_toml)
128+
return content
129+
130+
@hook(AFTER_CREATE)
131+
def _create_dependencies_from_parsed_data(self):
132+
"""Create RustDependency records from data parsed during pull-through."""
133+
parsed_deps = getattr(self, "_parsed_deps", None)
134+
if parsed_deps:
135+
RustDependency.objects.bulk_create(
136+
[RustDependency(content=self, **dep) for dep in parsed_deps]
137+
)
118138

119139
class Meta:
120140
default_related_name = "%(app_label)s_%(model_name)s"

pulp_rust/app/utils.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import tarfile
2+
3+
try:
4+
import tomllib
5+
except ModuleNotFoundError:
6+
import tomli as tomllib
7+
8+
9+
def extract_cargo_toml(crate_path, crate_name, version):
10+
"""Extract and parse Cargo.toml from a .crate tarball."""
11+
expected_path = f"{crate_name}-{version}/Cargo.toml"
12+
with tarfile.open(crate_path, "r:gz") as tar:
13+
cargo_toml_file = tar.extractfile(expected_path)
14+
if cargo_toml_file is None:
15+
raise FileNotFoundError(f"No Cargo.toml found in {crate_path} at {expected_path}")
16+
return tomllib.load(cargo_toml_file)
17+
18+
19+
def _normalize_req(version_str):
20+
"""Normalize a Cargo version requirement to its explicit form.
21+
22+
In Cargo.toml, a bare version like "1.0" is shorthand for "^1.0".
23+
The index format uses the explicit form with the comparator prefix.
24+
"""
25+
if not version_str or version_str == "*":
26+
return version_str
27+
# Already has a comparator prefix
28+
if version_str[0] in ("^", "~", "=", ">", "<"):
29+
return version_str
30+
return f"^{version_str}"
31+
32+
33+
def parse_dep(name, spec, kind="normal", target=None):
34+
"""Convert a single Cargo.toml dependency entry to index format."""
35+
if isinstance(spec, str):
36+
# Simple form: dep = "1.0"
37+
return {
38+
"name": name,
39+
"req": _normalize_req(spec),
40+
"features": [],
41+
"optional": False,
42+
"default_features": True,
43+
"target": target,
44+
"kind": kind,
45+
"registry": None,
46+
"package": None,
47+
}
48+
49+
# Table form: dep = { version = "1.0", optional = true, ... }
50+
dep = {
51+
"name": name,
52+
"req": _normalize_req(spec.get("version", "*")),
53+
"features": spec.get("features", []),
54+
"optional": spec.get("optional", False),
55+
"default_features": spec.get("default-features", True),
56+
"target": target,
57+
"kind": kind,
58+
"registry": spec.get("registry"),
59+
"package": None,
60+
}
61+
# If the dep was renamed, "name" in the index is the alias (the key),
62+
# and "package" is the real crate name
63+
if "package" in spec:
64+
dep["package"] = spec["package"]
65+
return dep
66+
67+
68+
def extract_dependencies(cargo_toml):
69+
"""Extract all dependencies from a parsed Cargo.toml into index format."""
70+
deps = []
71+
72+
for name, spec in cargo_toml.get("dependencies", {}).items():
73+
deps.append(parse_dep(name, spec, kind="normal"))
74+
75+
for name, spec in cargo_toml.get("dev-dependencies", {}).items():
76+
deps.append(parse_dep(name, spec, kind="dev"))
77+
78+
for name, spec in cargo_toml.get("build-dependencies", {}).items():
79+
deps.append(parse_dep(name, spec, kind="build"))
80+
81+
# Platform-specific dependencies: [target.'cfg(...)'.dependencies]
82+
for target, target_deps in cargo_toml.get("target", {}).items():
83+
for name, spec in target_deps.get("dependencies", {}).items():
84+
deps.append(parse_dep(name, spec, kind="normal", target=target))
85+
for name, spec in target_deps.get("dev-dependencies", {}).items():
86+
deps.append(parse_dep(name, spec, kind="dev", target=target))
87+
for name, spec in target_deps.get("build-dependencies", {}).items():
88+
deps.append(parse_dep(name, spec, kind="build", target=target))
89+
90+
return deps

pulp_rust/app/views.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -178,19 +178,21 @@ def _build_index_response(crate_versions):
178178
for crate_version in crate_versions:
179179
deps = []
180180
for dep in crate_version.dependencies.all():
181-
deps.append(
182-
{
183-
"name": dep.name,
184-
"req": dep.req,
185-
"features": dep.features,
186-
"optional": dep.optional,
187-
"default_features": dep.default_features,
188-
"target": dep.target,
189-
"kind": dep.kind,
190-
"registry": dep.registry,
191-
"package": dep.package,
192-
}
193-
)
181+
dep_obj = {
182+
"name": dep.name,
183+
"req": dep.req,
184+
"features": dep.features,
185+
"optional": dep.optional,
186+
"default_features": dep.default_features,
187+
"target": dep.target,
188+
"kind": dep.kind,
189+
}
190+
# crates.io omits these keys when not set
191+
if dep.registry is not None:
192+
dep_obj["registry"] = dep.registry
193+
if dep.package is not None:
194+
dep_obj["package"] = dep.package
195+
deps.append(dep_obj)
194196

195197
version_obj = {
196198
"name": crate_version.name,

pulp_rust/tests/functional/api/test_cargo_api.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
import pytest
77
from aiohttp.client_exceptions import ClientResponseError
88

9-
from pulp_rust.tests.functional.utils import download_file
10-
11-
CRATES_IO_URL = "sparse+https://index.crates.io/"
9+
from pulp_rust.tests.functional.utils import CRATES_IO_URL, download_file
1210

1311

1412
def test_config_json(

pulp_rust/tests/functional/api/test_download_content.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import hashlib
88
from urllib.parse import urljoin
99

10-
from pulp_rust.tests.functional.utils import download_file
10+
from pulp_rust.tests.functional.utils import CRATES_IO_URL, download_file
1111

1212

1313
def test_download_content(
@@ -27,7 +27,7 @@ def test_download_content(
2727
3. Verify that the content was automatically added to the repository.
2828
4. Remove the remote and verify the content is still served from cache.
2929
"""
30-
remote = rust_remote_factory(url="sparse+https://index.crates.io/")
30+
remote = rust_remote_factory(url=CRATES_IO_URL)
3131
repository = rust_repo_factory(remote=remote.pulp_href)
3232
distribution = rust_distribution_factory(
3333
remote=remote.pulp_href, repository=repository.pulp_href

pulp_rust/tests/functional/api/test_pull_through_caching.py

Lines changed: 161 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
"""Tests for Cargo pull-through caching via the sparse index proxy."""
22

33
import hashlib
4+
import json
45
from urllib.parse import urljoin
56

67
import pytest
78

8-
from pulp_rust.tests.functional.utils import download_file
9-
10-
CRATES_IO_URL = "sparse+https://index.crates.io/"
9+
from pulp_rust.tests.functional.utils import (
10+
CRATES_IO_URL,
11+
assert_index_entry_matches_upstream,
12+
download_file,
13+
get_index_entry,
14+
)
1115

1216

1317
def test_pull_through_sparse_index(
@@ -241,3 +245,157 @@ def test_pull_through_multiple_crates_on_demand(
241245

242246
repository = rust_repo_api_client.read(repository.pulp_href)
243247
assert not repository.latest_version_href.endswith("/versions/0/")
248+
249+
250+
def test_pull_through_on_demand_preserves_metadata(
251+
delete_orphans_pre,
252+
rust_remote_factory,
253+
rust_repo_factory,
254+
rust_distribution_factory,
255+
rust_repo_api_client,
256+
rust_content_api_client,
257+
monitor_task,
258+
cargo_registry_url,
259+
):
260+
"""on_demand: cached content should have full metadata (deps, features).
261+
262+
Ensure that all metadata is saved appropriately when a package is created via
263+
pull-through caching.
264+
"""
265+
remote = rust_remote_factory(url=CRATES_IO_URL, policy="on_demand")
266+
repository = rust_repo_factory(remote=remote.pulp_href)
267+
distribution = rust_distribution_factory(
268+
remote=remote.pulp_href, repository=repository.pulp_href
269+
)
270+
271+
# serde has dependencies (serde_derive) and features in most versions
272+
crate_name, crate_version = "serde", "1.0.210"
273+
unit_path = f"api/v1/crates/{crate_name}/{crate_version}/download"
274+
pulp_unit_url = urljoin(cargo_registry_url(distribution.base_path), unit_path)
275+
download_file(pulp_unit_url)
276+
277+
# Verify the content record was created with full metadata
278+
content_response = rust_content_api_client.list(name=crate_name, vers=crate_version)
279+
assert content_response.count == 1
280+
content = content_response.results[0]
281+
282+
# Should have dependencies (serde has serde_derive as an optional dep)
283+
assert len(content.dependencies) > 0
284+
dep_names = [d.name for d in content.dependencies]
285+
assert "serde_derive" in dep_names
286+
287+
# Should have features
288+
assert len(content.features) > 0
289+
assert "derive" in content.features
290+
291+
# Add cached content and verify the locally-served index includes deps
292+
monitor_task(
293+
rust_repo_api_client.add_cached_content(
294+
repository.pulp_href, {"remote": remote.pulp_href}
295+
).task
296+
)
297+
298+
# Fetch the sparse index from Pulp (now served from local data)
299+
index_url = urljoin(cargo_registry_url(distribution.base_path), f"se/rd/{crate_name}")
300+
index_response = download_file(index_url)
301+
assert index_response.response_obj.status == 200
302+
303+
body = index_response.body.decode("utf-8")
304+
lines = body.strip().split("\n")
305+
# Find the line for our version
306+
version_entry = None
307+
for line in lines:
308+
entry = json.loads(line)
309+
if entry["vers"] == crate_version:
310+
version_entry = entry
311+
break
312+
313+
assert version_entry is not None, f"Version {crate_version} not found in index"
314+
assert len(version_entry["deps"]) > 0, "Index entry has no dependencies"
315+
index_dep_names = [d["name"] for d in version_entry["deps"]]
316+
assert "serde_derive" in index_dep_names
317+
assert len(version_entry["features"]) > 0, "Index entry has no features"
318+
319+
320+
# ---------------------------------------------------------------------------
321+
# Index fidelity tests: compare Pulp output against crates.io for each mode
322+
# ---------------------------------------------------------------------------
323+
324+
325+
def test_index_fidelity_streamed(
326+
rust_remote_factory,
327+
rust_repo_factory,
328+
rust_distribution_factory,
329+
cargo_registry_url,
330+
upstream_index_entry,
331+
):
332+
"""streamed: proxied sparse index entry should match crates.io exactly."""
333+
remote = rust_remote_factory(url=CRATES_IO_URL, policy="streamed")
334+
repository = rust_repo_factory(remote=remote.pulp_href)
335+
distribution = rust_distribution_factory(
336+
remote=remote.pulp_href, repository=repository.pulp_href
337+
)
338+
339+
base = cargo_registry_url(distribution.base_path)
340+
pulp_entry = get_index_entry(base, "se/rd/serde", "1.0.210")
341+
assert_index_entry_matches_upstream(pulp_entry, upstream_index_entry)
342+
343+
344+
def test_index_fidelity_on_demand_proxied(
345+
rust_remote_factory,
346+
rust_repo_factory,
347+
rust_distribution_factory,
348+
cargo_registry_url,
349+
upstream_index_entry,
350+
):
351+
"""on_demand: the first fetch (before caching) proxies upstream and should match."""
352+
remote = rust_remote_factory(url=CRATES_IO_URL, policy="on_demand")
353+
repository = rust_repo_factory(remote=remote.pulp_href)
354+
distribution = rust_distribution_factory(
355+
remote=remote.pulp_href, repository=repository.pulp_href
356+
)
357+
358+
base = cargo_registry_url(distribution.base_path)
359+
360+
# First fetch — proxied from upstream (no local content yet)
361+
pulp_entry = get_index_entry(base, "se/rd/serde", "1.0.210")
362+
assert_index_entry_matches_upstream(pulp_entry, upstream_index_entry)
363+
364+
365+
def test_index_fidelity_on_demand_cached(
366+
delete_orphans_pre,
367+
rust_remote_factory,
368+
rust_repo_factory,
369+
rust_distribution_factory,
370+
rust_repo_api_client,
371+
rust_content_api_client,
372+
monitor_task,
373+
cargo_registry_url,
374+
upstream_index_entry,
375+
):
376+
"""on_demand: after caching, the locally-served index entry should match crates.io."""
377+
remote = rust_remote_factory(url=CRATES_IO_URL, policy="on_demand")
378+
repository = rust_repo_factory(remote=remote.pulp_href)
379+
distribution = rust_distribution_factory(
380+
remote=remote.pulp_href, repository=repository.pulp_href
381+
)
382+
383+
base = cargo_registry_url(distribution.base_path)
384+
385+
# Download the .crate to trigger content creation
386+
download_file(urljoin(base, "api/v1/crates/serde/1.0.210/download"))
387+
388+
# Verify content was cached
389+
content = rust_content_api_client.list(name="serde", vers="1.0.210")
390+
assert content.count == 1, "Content was not cached after on_demand download"
391+
392+
# Promote cached content into the repository
393+
monitor_task(
394+
rust_repo_api_client.add_cached_content(
395+
repository.pulp_href, {"remote": remote.pulp_href}
396+
).task
397+
)
398+
399+
# Now the index is served from local data — compare against upstream
400+
pulp_entry = get_index_entry(base, "se/rd/serde", "1.0.210")
401+
assert_index_entry_matches_upstream(pulp_entry, upstream_index_entry)

0 commit comments

Comments
 (0)