From 59b34c2f95b1929983d27fb5c690d3c4521b69af Mon Sep 17 00:00:00 2001 From: beer4code <102912454+beer4code@users.noreply.github.com> Date: Fri, 20 Mar 2026 00:07:09 +0100 Subject: [PATCH 1/3] Extract VCS URL from package.id (pkgid spec) instead of package.source Replace the use of package.source (opaque/unstable) with package.id (stable pkgid spec) for extracting VCS URLs from git dependencies. - Add extract_git_url_from_id helper in purl.rs that strips the #name@version fragment and ?branch=/?tag=/?rev= query params from the pkgid, returning a clean git+proto://host/path URL - Use the helper in get_purl for the PURL vcs_url qualifier, replacing the previous source_to_vcs_url function - Use the helper in get_external_references as a fallback when package.repository is absent on git dependencies - Update git_package.json fixture to use the new pkgid format package.source is explicitly documented as opaque and unstable by the cargo_metadata crate. package.id (the pkgid spec) is the stable alternative. On Rust 1.77+ the pkgid format for git deps is: git+proto://host/path[?query]#name@version. The fragment contains name@version (not a commit hash like package.source does), so the output no longer includes commit hashes. This was recommended by the maintainer in PR #777. Signed-off-by: beer4code <102912454+beer4code@users.noreply.github.com> --- cargo-cyclonedx/src/generator.rs | 69 ++++++++- cargo-cyclonedx/src/purl.rs | 138 +++++++++++------- .../tests/fixtures/git_package.json | 2 +- 3 files changed, 154 insertions(+), 55 deletions(-) diff --git a/cargo-cyclonedx/src/generator.rs b/cargo-cyclonedx/src/generator.rs index 7866d42e..70f21730 100644 --- a/cargo-cyclonedx/src/generator.rs +++ b/cargo-cyclonedx/src/generator.rs @@ -23,7 +23,7 @@ use crate::config::PlatformSuffix; use crate::config::SbomConfig; use crate::config::{IncludedDependencies, ParseMode}; use crate::format::Format; -use crate::purl::get_purl; +use crate::purl::{extract_git_url_from_id, get_purl}; use cargo_metadata; use cargo_metadata::DependencyKind; @@ -408,6 +408,21 @@ impl SbomGenerator { e ), } + } else if let Some(id_vcs_url) = extract_git_url_from_id(&package.id) { + // Fall back to the package id for git sources that have no repository field. + // Per the pkgid spec (Rust 1.77+), git source ids look like: + // git+proto://host/path[?query]#name@version + match Uri::try_from(id_vcs_url) { + Ok(uri) => { + references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri)) + } + Err(e) => log::warn!( + "Package {} has an invalid VCS URI (from id: {}): {} ", + package.name, + package.id, + e + ), + } } if !references.is_empty() { @@ -1115,4 +1130,56 @@ mod test { assert_eq!(actual, expected); } + + const GIT_PACKAGE_JSON: &str = include_str!("../tests/fixtures/git_package.json"); + const GIT_PACKAGE_WITH_BRANCH_JSON: &str = + include_str!("../tests/fixtures/git_package_with_branch.json"); + + fn vcs_urls(refs: &ExternalReferences) -> Vec { + refs.0 + .iter() + .filter(|r| r.external_reference_type == ExternalReferenceType::Vcs) + .map(|r| r.url.to_string()) + .collect() + } + + #[test] + fn external_refs_git_package_uses_repository_when_present() { + // When package.repository is set it must be used as the VCS entry, not the id-based URL. + let package: cargo_metadata::Package = serde_json::from_str(GIT_PACKAGE_JSON).unwrap(); + let refs = SbomGenerator::get_external_references(&package).unwrap(); + assert_eq!( + vcs_urls(&refs), + vec!["https://github.com/rust-secure-code/cargo-auditable"], + ); + } + + #[test] + fn external_refs_git_package_falls_back_to_id_when_no_repository() { + // When package.repository is absent, the VCS entry must be derived from package.id. + // This exercises the new extract_git_url_from_id fallback in get_external_references. + let mut json: serde_json::Value = serde_json::from_str(GIT_PACKAGE_JSON).unwrap(); + json["repository"] = serde_json::Value::Null; + let package: cargo_metadata::Package = serde_json::from_value(json).unwrap(); + let refs = SbomGenerator::get_external_references(&package).unwrap(); + assert_eq!( + vcs_urls(&refs), + vec!["git+https://github.com/rust-secure-code/cargo-auditable.git"], + ); + } + + #[test] + fn external_refs_git_with_branch_falls_back_to_id_strips_query_and_fragment() { + // When package.repository is absent on a branch-pinned git dep, the id-based URL + // must have its ?branch= query and #fragment stripped. + let mut json: serde_json::Value = + serde_json::from_str(GIT_PACKAGE_WITH_BRANCH_JSON).unwrap(); + json["repository"] = serde_json::Value::Null; + let package: cargo_metadata::Package = serde_json::from_value(json).unwrap(); + let refs = SbomGenerator::get_external_references(&package).unwrap(); + assert_eq!( + vcs_urls(&refs), + vec!["git+https://github.com/leo030303/rav1d.git"], + ); + } } diff --git a/cargo-cyclonedx/src/purl.rs b/cargo-cyclonedx/src/purl.rs index 3fbd02b2..1cb466be 100644 --- a/cargo-cyclonedx/src/purl.rs +++ b/cargo-cyclonedx/src/purl.rs @@ -20,7 +20,14 @@ pub fn get_purl( // qualifier names are taken from the spec, which defines these two for all PURL types: // https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs Some(("git", _git_path)) => { - builder = builder.with_qualifier("vcs_url", source_to_vcs_url(source))? + if let Some(vcs_url) = extract_git_url_from_id(&package.id) { + builder = builder.with_qualifier("vcs_url", vcs_url)? + } else { + log::warn!( + "Package {} has a git source but no extractable VCS URL in its id", + package.name + ); + } } Some(("registry" | "sparse", registry_url)) => { builder = builder.with_qualifier("repository_url", registry_url)? @@ -64,52 +71,30 @@ pub fn get_purl( Ok(CdxPurl::from_str(&purl.to_string()).unwrap()) } -/// Converts the `cargo metadata`'s `source` field to a valid PURL `vcs_url`. +/// Extracts a clean git VCS URL from a package's `id` field (pkgid spec). /// -/// The `vcs_url` qualifier is specified to use the SPDX Package Download Location format: -/// `+://[/][@][#]` +/// On Rust 1.77+, git dependency package IDs use the "new" pkgid format: +/// `git+proto://host/path[?query]#name@version` /// -/// Cargo metadata uses a different format: -/// `git+[?branch=|?tag=|?rev=]#` -/// -/// This function strips the query parameters (since the commit hash already identifies the code) -/// and converts the `#commit_hash` to `@commit_hash` per the SPDX format. -/// -/// Assumes that the source kind is `git`, panics if it isn't. -fn source_to_vcs_url(source: &cargo_metadata::Source) -> String { - assert!(source.repr.starts_with("git+")); - let url = &source.repr; - // Find where query parameters start (if any) and where the commit hash fragment starts - let query_start = url.find('?'); - let fragment_start = url.find('#'); - match (query_start, fragment_start) { - // Has both query params and commit hash: strip query, keep commit as @ - (Some(q), Some(f)) => { - let base = &url[..q]; - let commit = &url[f + 1..]; - format!("{}@{}", base, commit) - } - // No query params, has commit hash: just replace # with @ - (None, Some(_)) => url.replace('#', "@"), - // Has query params but no commit hash: extract the ref value as @ - (Some(q), None) => { - let base = &url[..q]; - let query = &url[q + 1..]; - // Extract the value from branch=X, tag=X, or rev=X - let ref_value = query - .split('&') - .find_map(|param| { - param - .strip_prefix("branch=") - .or_else(|| param.strip_prefix("tag=")) - .or_else(|| param.strip_prefix("rev=")) - }) - .unwrap_or(query); - format!("{}@{}", base, ref_value) - } - // No query params, no commit hash: return as-is - (None, None) => url.to_string(), +/// This function: +/// - Returns `None` if the id does not start with `git+` +/// - Strips the fragment (`#name@version`) +/// - Strips query parameters (`?branch=`, `?tag=`, `?rev=`) +/// - Returns the clean `git+proto://host/path` URL with the `git+` prefix +pub(crate) fn extract_git_url_from_id(id: &cargo_metadata::PackageId) -> Option { + let id_str = &id.repr; + if !id_str.starts_with("git+") { + return None; } + // Strip fragment (#name@version) + let without_fragment = id_str + .split_once('#') + .map_or(id_str.as_str(), |(url, _)| url); + // Strip query parameters (?branch=, ?tag=, ?rev=) + let without_query = without_fragment + .split_once('?') + .map_or(without_fragment, |(url, _)| url); + Some(without_query.to_string()) } /// Converts a relative path to PURL subpath @@ -128,7 +113,6 @@ mod tests { use super::*; use percent_encoding::percent_decode; use purl::Purl; - use serde_json; const CRATES_IO_PACKAGE_JSON: &str = include_str!("../tests/fixtures/crates_io_package.json"); const GIT_PACKAGE_JSON: &str = include_str!("../tests/fixtures/git_package.json"); @@ -137,6 +121,12 @@ mod tests { const ROOT_PACKAGE_JSON: &str = include_str!("../tests/fixtures/root_package.json"); const WORKSPACE_PACKAGE_JSON: &str = include_str!("../tests/fixtures/workspace_package.json"); + fn pkg_id(s: &str) -> cargo_metadata::PackageId { + cargo_metadata::PackageId { + repr: s.to_string(), + } + } + #[test] fn crates_io_purl() { let crates_io_package: Package = serde_json::from_str(CRATES_IO_PACKAGE_JSON).unwrap(); @@ -167,7 +157,15 @@ mod tests { assert_eq!(parsed_purl.qualifiers().len(), 1); let (qualifier, value) = parsed_purl.qualifiers().iter().next().unwrap(); assert_eq!(qualifier.as_str(), "vcs_url"); - assert_eq!(value, "git+https://github.com/rust-secure-code/cargo-auditable.git@da85607fb1a09435d77288ccf05a92b2e8ec3f71"); + // vcs_url comes from package.id (pkgid spec): no commit hash, no query params + let decoded_value = percent_decode(value.as_bytes()) + .decode_utf8() + .unwrap() + .to_string(); + assert_eq!( + decoded_value, + "git+https://github.com/rust-secure-code/cargo-auditable.git" + ); assert!(parsed_purl.subpath().is_none()); assert!(parsed_purl.namespace().is_none()); } @@ -183,17 +181,12 @@ mod tests { assert_eq!(parsed_purl.qualifiers().len(), 1); let (qualifier, value) = parsed_purl.qualifiers().iter().next().unwrap(); assert_eq!(qualifier.as_str(), "vcs_url"); - // The ?branch= query param must be stripped; only the commit hash remains after @ + // vcs_url comes from package.id: ?branch= query param and #fragment must be stripped let decoded_value = percent_decode(value.as_bytes()) .decode_utf8() .unwrap() .to_string(); - assert_eq!( - decoded_value, - "git+https://github.com/leo030303/rav1d.git@3a50834ce3743bc580f340ba3bfbdbf6a46ab783" - ); - // Ensure ?branch= is NOT present in the vcs_url - assert!(!decoded_value.contains("?branch=")); + assert_eq!(decoded_value, "git+https://github.com/leo030303/rav1d.git"); assert!(parsed_purl.subpath().is_none()); assert!(parsed_purl.namespace().is_none()); } @@ -268,6 +261,45 @@ mod tests { assert!(parsed_purl.namespace().is_none()); } + #[test] + fn extract_git_url_strips_fragment() { + // New pkgid format: #name@version fragment must be stripped. + // Also verifies that the git+ prefix is preserved and the result is otherwise intact. + let id = pkg_id("git+https://github.com/foo/bar.git#bar@1.2.3"); + assert_eq!( + extract_git_url_from_id(&id), + Some("git+https://github.com/foo/bar.git".to_string()) + ); + } + + #[test] + fn extract_git_url_strips_query_params() { + // ?branch=, ?tag=, ?rev= must all be stripped, along with the fragment + for query in &["?branch=main", "?tag=v1.0", "?rev=abc123"] { + let id = pkg_id(&format!("git+https://github.com/foo/bar.git{query}#bar@1.0.0")); + let result = extract_git_url_from_id(&id).unwrap(); + assert_eq!(result, "git+https://github.com/foo/bar.git", "failed for {query}"); + } + } + + #[test] + fn extract_git_url_non_git_returns_none() { + // registry, sparse, and path sources must return None + for id_str in &[ + "aho-corasick 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "registry+https://github.com/rust-lang/crates.io-index#foo@1.0.0", + "sparse+https://my.registry.com/index#foo@1.0.0", + "foo 0.1.0 (path+file:///home/user/project)", + ] { + let id = pkg_id(id_str); + assert_eq!( + extract_git_url_from_id(&id), + None, + "expected None for id: {id_str}" + ); + } + } + #[test] fn local_package() { let root_package: Package = serde_json::from_str(ROOT_PACKAGE_JSON).unwrap(); diff --git a/cargo-cyclonedx/tests/fixtures/git_package.json b/cargo-cyclonedx/tests/fixtures/git_package.json index 8f320f92..0c22e8b7 100644 --- a/cargo-cyclonedx/tests/fixtures/git_package.json +++ b/cargo-cyclonedx/tests/fixtures/git_package.json @@ -1,7 +1,7 @@ { "name": "auditable-extract", "version": "0.3.2", - "id": "auditable-extract 0.3.2 (git+https://github.com/rust-secure-code/cargo-auditable.git#da85607fb1a09435d77288ccf05a92b2e8ec3f71)", + "id": "git+https://github.com/rust-secure-code/cargo-auditable.git#auditable-extract@0.3.2", "license": "MIT OR Apache-2.0", "license_file": null, "description": "Extract the dependency trees embedded in binaries by `cargo auditable`", From 99a33a45aaef82098f2cbc969c1bace08c76d9da Mon Sep 17 00:00:00 2001 From: beer4code <102912454+beer4code@users.noreply.github.com> Date: Tue, 24 Mar 2026 00:35:00 +0100 Subject: [PATCH 2/3] Fix rustfmt formatting Signed-off-by: beer4code <102912454+beer4code@users.noreply.github.com> --- cargo-cyclonedx/src/generator.rs | 4 +--- cargo-cyclonedx/src/purl.rs | 9 +++++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/cargo-cyclonedx/src/generator.rs b/cargo-cyclonedx/src/generator.rs index 70f21730..60a8f060 100644 --- a/cargo-cyclonedx/src/generator.rs +++ b/cargo-cyclonedx/src/generator.rs @@ -413,9 +413,7 @@ impl SbomGenerator { // Per the pkgid spec (Rust 1.77+), git source ids look like: // git+proto://host/path[?query]#name@version match Uri::try_from(id_vcs_url) { - Ok(uri) => { - references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri)) - } + Ok(uri) => references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri)), Err(e) => log::warn!( "Package {} has an invalid VCS URI (from id: {}): {} ", package.name, diff --git a/cargo-cyclonedx/src/purl.rs b/cargo-cyclonedx/src/purl.rs index 1cb466be..2d87eee2 100644 --- a/cargo-cyclonedx/src/purl.rs +++ b/cargo-cyclonedx/src/purl.rs @@ -276,9 +276,14 @@ mod tests { fn extract_git_url_strips_query_params() { // ?branch=, ?tag=, ?rev= must all be stripped, along with the fragment for query in &["?branch=main", "?tag=v1.0", "?rev=abc123"] { - let id = pkg_id(&format!("git+https://github.com/foo/bar.git{query}#bar@1.0.0")); + let id = pkg_id(&format!( + "git+https://github.com/foo/bar.git{query}#bar@1.0.0" + )); let result = extract_git_url_from_id(&id).unwrap(); - assert_eq!(result, "git+https://github.com/foo/bar.git", "failed for {query}"); + assert_eq!( + result, "git+https://github.com/foo/bar.git", + "failed for {query}" + ); } } From 46945d16f4c48727493911c20c23fdabe148e47a Mon Sep 17 00:00:00 2001 From: beer4code <102912454+beer4code@users.noreply.github.com> Date: Tue, 24 Mar 2026 01:18:39 +0100 Subject: [PATCH 3/3] Add fallback to source Signed-off-by: beer4code <102912454+beer4code@users.noreply.github.com> --- cargo-cyclonedx/src/generator.rs | 58 +++++--- cargo-cyclonedx/src/purl.rs | 136 ++++++++++-------- .../tests/fixtures/git_package.json | 2 +- 3 files changed, 119 insertions(+), 77 deletions(-) diff --git a/cargo-cyclonedx/src/generator.rs b/cargo-cyclonedx/src/generator.rs index 60a8f060..2fcf8e3f 100644 --- a/cargo-cyclonedx/src/generator.rs +++ b/cargo-cyclonedx/src/generator.rs @@ -23,7 +23,7 @@ use crate::config::PlatformSuffix; use crate::config::SbomConfig; use crate::config::{IncludedDependencies, ParseMode}; use crate::format::Format; -use crate::purl::{extract_git_url_from_id, get_purl}; +use crate::purl::{get_purl, strip_git_url}; use cargo_metadata; use cargo_metadata::DependencyKind; @@ -408,18 +408,40 @@ impl SbomGenerator { e ), } - } else if let Some(id_vcs_url) = extract_git_url_from_id(&package.id) { - // Fall back to the package id for git sources that have no repository field. - // Per the pkgid spec (Rust 1.77+), git source ids look like: - // git+proto://host/path[?query]#name@version - match Uri::try_from(id_vcs_url) { - Ok(uri) => references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri)), - Err(e) => log::warn!( - "Package {} has an invalid VCS URI (from id: {}): {} ", - package.name, - package.id, - e - ), + } else if package + .source + .as_ref() + .map_or(false, |s| s.repr.starts_with("git+")) + { + if let Some(id_vcs_url) = strip_git_url(&package.id.repr) { + // Use the package id (pkgid spec, Rust 1.77+): git+proto://host/path[?query]#name@version + match Uri::try_from(id_vcs_url) { + Ok(uri) => { + references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri)) + } + Err(e) => log::warn!( + "Package {} has an invalid VCS URI (from id: {}): {} ", + package.name, + package.id, + e + ), + } + } else if let Some(source_vcs_url) = + package.source.as_ref().and_then(|s| strip_git_url(&s.repr)) + { + // Fall back to the source field for older cargo versions (before Rust 1.77) + // whose id field uses the old format `name version (source)`. + match Uri::try_from(source_vcs_url) { + Ok(uri) => { + references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri)) + } + Err(e) => log::warn!( + "Package {} has an invalid VCS URI (from source: {:?}): {} ", + package.name, + package.source, + e + ), + } } } @@ -1153,9 +1175,9 @@ mod test { } #[test] - fn external_refs_git_package_falls_back_to_id_when_no_repository() { - // When package.repository is absent, the VCS entry must be derived from package.id. - // This exercises the new extract_git_url_from_id fallback in get_external_references. + fn external_refs_git_package_falls_back_to_source_when_no_repository_old_format() { + // Old pkgid format: id is "name version (source)", so strip_git_url(id) returns None. + // The code falls back to strip_git_url(source), which strips the #commit_hash. let mut json: serde_json::Value = serde_json::from_str(GIT_PACKAGE_JSON).unwrap(); json["repository"] = serde_json::Value::Null; let package: cargo_metadata::Package = serde_json::from_value(json).unwrap(); @@ -1168,8 +1190,8 @@ mod test { #[test] fn external_refs_git_with_branch_falls_back_to_id_strips_query_and_fragment() { - // When package.repository is absent on a branch-pinned git dep, the id-based URL - // must have its ?branch= query and #fragment stripped. + // New pkgid format (Rust 1.77+): id is "git+url?branch=...#name@version", + // so strip_git_url(id) succeeds and strips both ?branch= and #fragment. let mut json: serde_json::Value = serde_json::from_str(GIT_PACKAGE_WITH_BRANCH_JSON).unwrap(); json["repository"] = serde_json::Value::Null; diff --git a/cargo-cyclonedx/src/purl.rs b/cargo-cyclonedx/src/purl.rs index 2d87eee2..380018c1 100644 --- a/cargo-cyclonedx/src/purl.rs +++ b/cargo-cyclonedx/src/purl.rs @@ -20,14 +20,11 @@ pub fn get_purl( // qualifier names are taken from the spec, which defines these two for all PURL types: // https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs Some(("git", _git_path)) => { - if let Some(vcs_url) = extract_git_url_from_id(&package.id) { - builder = builder.with_qualifier("vcs_url", vcs_url)? - } else { - log::warn!( - "Package {} has a git source but no extractable VCS URL in its id", - package.name - ); - } + // Prefer the stable pkgid spec (Rust 1.77+) id field, fall back + // to the opaque source field for older cargo versions. + let vcs_url = strip_git_url(&package.id.repr) + .unwrap_or_else(|| source_to_vcs_url(source)); + builder = builder.with_qualifier("vcs_url", vcs_url)? } Some(("registry" | "sparse", registry_url)) => { builder = builder.with_qualifier("repository_url", registry_url)? @@ -71,32 +68,71 @@ pub fn get_purl( Ok(CdxPurl::from_str(&purl.to_string()).unwrap()) } -/// Extracts a clean git VCS URL from a package's `id` field (pkgid spec). +/// Strips query parameters and the fragment from a `git+...` URL string, +/// returning the bare `git+proto://host/path` URL, or `None` if not a git URL. /// -/// On Rust 1.77+, git dependency package IDs use the "new" pkgid format: -/// `git+proto://host/path[?query]#name@version` -/// -/// This function: -/// - Returns `None` if the id does not start with `git+` -/// - Strips the fragment (`#name@version`) -/// - Strips query parameters (`?branch=`, `?tag=`, `?rev=`) -/// - Returns the clean `git+proto://host/path` URL with the `git+` prefix -pub(crate) fn extract_git_url_from_id(id: &cargo_metadata::PackageId) -> Option { - let id_str = &id.repr; - if !id_str.starts_with("git+") { +/// Works with both the package `id` field (new pkgid format, Rust 1.77+: +/// `git+proto://host/path[?query]#name@version`) and the `source` field +/// (`git+proto://host/path[?query]#commit_hash`). +pub(crate) fn strip_git_url(url: &str) -> Option { + if !url.starts_with("git+") { return None; } - // Strip fragment (#name@version) - let without_fragment = id_str - .split_once('#') - .map_or(id_str.as_str(), |(url, _)| url); - // Strip query parameters (?branch=, ?tag=, ?rev=) + let without_fragment = url.split_once('#').map_or(url, |(base, _)| base); let without_query = without_fragment .split_once('?') - .map_or(without_fragment, |(url, _)| url); + .map_or(without_fragment, |(base, _)| base); Some(without_query.to_string()) } +/// Converts the `cargo metadata`'s `source` field to a valid PURL `vcs_url`. +/// +/// The `vcs_url` qualifier is specified to use the SPDX Package Download Location format: +/// `+://[/][@][#]` +/// +/// Cargo metadata uses a different format: +/// `git+[?branch=|?tag=|?rev=]#` +/// +/// This function strips the query parameters (since the commit hash already identifies the code) +/// and converts the `#commit_hash` to `@commit_hash` per the SPDX format. +/// +/// Assumes that the source kind is `git`, panics if it isn't. +fn source_to_vcs_url(source: &cargo_metadata::Source) -> String { + assert!(source.repr.starts_with("git+")); + let url = &source.repr; + // Find where query parameters start (if any) and where the commit hash fragment starts + let query_start = url.find('?'); + let fragment_start = url.find('#'); + match (query_start, fragment_start) { + // Has both query params and commit hash: strip query, keep commit as @ + (Some(q), Some(f)) => { + let base = &url[..q]; + let commit = &url[f + 1..]; + format!("{}@{}", base, commit) + } + // No query params, has commit hash: just replace # with @ + (None, Some(_)) => url.replace('#', "@"), + // Has query params but no commit hash: extract the ref value as @ + (Some(q), None) => { + let base = &url[..q]; + let query = &url[q + 1..]; + // Extract the value from branch=X, tag=X, or rev=X + let ref_value = query + .split('&') + .find_map(|param| { + param + .strip_prefix("branch=") + .or_else(|| param.strip_prefix("tag=")) + .or_else(|| param.strip_prefix("rev=")) + }) + .unwrap_or(query); + format!("{}@{}", base, ref_value) + } + // No query params, no commit hash: return as-is + (None, None) => url.to_string(), + } +} + /// Converts a relative path to PURL subpath fn to_purl_subpath(path: &Utf8Path) -> String { assert!(path.is_relative()); @@ -121,12 +157,6 @@ mod tests { const ROOT_PACKAGE_JSON: &str = include_str!("../tests/fixtures/root_package.json"); const WORKSPACE_PACKAGE_JSON: &str = include_str!("../tests/fixtures/workspace_package.json"); - fn pkg_id(s: &str) -> cargo_metadata::PackageId { - cargo_metadata::PackageId { - repr: s.to_string(), - } - } - #[test] fn crates_io_purl() { let crates_io_package: Package = serde_json::from_str(CRATES_IO_PACKAGE_JSON).unwrap(); @@ -157,21 +187,15 @@ mod tests { assert_eq!(parsed_purl.qualifiers().len(), 1); let (qualifier, value) = parsed_purl.qualifiers().iter().next().unwrap(); assert_eq!(qualifier.as_str(), "vcs_url"); - // vcs_url comes from package.id (pkgid spec): no commit hash, no query params - let decoded_value = percent_decode(value.as_bytes()) - .decode_utf8() - .unwrap() - .to_string(); - assert_eq!( - decoded_value, - "git+https://github.com/rust-secure-code/cargo-auditable.git" - ); + assert_eq!(value, "git+https://github.com/rust-secure-code/cargo-auditable.git@da85607fb1a09435d77288ccf05a92b2e8ec3f71"); assert!(parsed_purl.subpath().is_none()); assert!(parsed_purl.namespace().is_none()); } #[test] fn git_purl_with_branch() { + // New pkgid format (Rust 1.77+): id is "git+url?branch=...#name@version", + // so strip_git_url succeeds and strips both ?branch= and #fragment. let git_package: Package = serde_json::from_str(GIT_PACKAGE_WITH_BRANCH_JSON).unwrap(); let purl = get_purl(&git_package, &git_package, Utf8Path::new("/foo/bar"), None).unwrap(); // Validate that data roundtripped correctly @@ -181,12 +205,15 @@ mod tests { assert_eq!(parsed_purl.qualifiers().len(), 1); let (qualifier, value) = parsed_purl.qualifiers().iter().next().unwrap(); assert_eq!(qualifier.as_str(), "vcs_url"); - // vcs_url comes from package.id: ?branch= query param and #fragment must be stripped + // The ?branch= query param must be stripped; the #name@version fragment in the id + // is not a commit hash and is also stripped, so vcs_url has no @revision suffix. let decoded_value = percent_decode(value.as_bytes()) .decode_utf8() .unwrap() .to_string(); assert_eq!(decoded_value, "git+https://github.com/leo030303/rav1d.git"); + // Ensure ?branch= is NOT present in the vcs_url + assert!(!decoded_value.contains("?branch=")); assert!(parsed_purl.subpath().is_none()); assert!(parsed_purl.namespace().is_none()); } @@ -262,24 +289,21 @@ mod tests { } #[test] - fn extract_git_url_strips_fragment() { + fn strip_git_url_strips_fragment() { // New pkgid format: #name@version fragment must be stripped. // Also verifies that the git+ prefix is preserved and the result is otherwise intact. - let id = pkg_id("git+https://github.com/foo/bar.git#bar@1.2.3"); assert_eq!( - extract_git_url_from_id(&id), + strip_git_url("git+https://github.com/foo/bar.git#bar@1.2.3"), Some("git+https://github.com/foo/bar.git".to_string()) ); } #[test] - fn extract_git_url_strips_query_params() { + fn strip_git_url_strips_query_params() { // ?branch=, ?tag=, ?rev= must all be stripped, along with the fragment for query in &["?branch=main", "?tag=v1.0", "?rev=abc123"] { - let id = pkg_id(&format!( - "git+https://github.com/foo/bar.git{query}#bar@1.0.0" - )); - let result = extract_git_url_from_id(&id).unwrap(); + let url = format!("git+https://github.com/foo/bar.git{query}#bar@1.0.0"); + let result = strip_git_url(&url).unwrap(); assert_eq!( result, "git+https://github.com/foo/bar.git", "failed for {query}" @@ -288,20 +312,16 @@ mod tests { } #[test] - fn extract_git_url_non_git_returns_none() { + fn strip_git_url_non_git_returns_none() { // registry, sparse, and path sources must return None - for id_str in &[ + for url in &[ "aho-corasick 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "registry+https://github.com/rust-lang/crates.io-index#foo@1.0.0", "sparse+https://my.registry.com/index#foo@1.0.0", "foo 0.1.0 (path+file:///home/user/project)", + "path+file:///home/user/project#foo@0.1.0", ] { - let id = pkg_id(id_str); - assert_eq!( - extract_git_url_from_id(&id), - None, - "expected None for id: {id_str}" - ); + assert_eq!(strip_git_url(url), None, "expected None for: {url}"); } } diff --git a/cargo-cyclonedx/tests/fixtures/git_package.json b/cargo-cyclonedx/tests/fixtures/git_package.json index 0c22e8b7..8f320f92 100644 --- a/cargo-cyclonedx/tests/fixtures/git_package.json +++ b/cargo-cyclonedx/tests/fixtures/git_package.json @@ -1,7 +1,7 @@ { "name": "auditable-extract", "version": "0.3.2", - "id": "git+https://github.com/rust-secure-code/cargo-auditable.git#auditable-extract@0.3.2", + "id": "auditable-extract 0.3.2 (git+https://github.com/rust-secure-code/cargo-auditable.git#da85607fb1a09435d77288ccf05a92b2e8ec3f71)", "license": "MIT OR Apache-2.0", "license_file": null, "description": "Extract the dependency trees embedded in binaries by `cargo auditable`",