diff --git a/cargo-cyclonedx/src/generator.rs b/cargo-cyclonedx/src/generator.rs index 7866d42e..2fcf8e3f 100644 --- a/cargo-cyclonedx/src/generator.rs +++ b/cargo-cyclonedx/src/generator.rs @@ -23,7 +23,7 @@ use crate::config::PlatformSuffix; use crate::config::SbomConfig; use crate::config::{IncludedDependencies, ParseMode}; use crate::format::Format; -use crate::purl::get_purl; +use crate::purl::{get_purl, strip_git_url}; use cargo_metadata; use cargo_metadata::DependencyKind; @@ -408,6 +408,41 @@ impl SbomGenerator { e ), } + } else if package + .source + .as_ref() + .map_or(false, |s| s.repr.starts_with("git+")) + { + if let Some(id_vcs_url) = strip_git_url(&package.id.repr) { + // Use the package id (pkgid spec, Rust 1.77+): git+proto://host/path[?query]#name@version + match Uri::try_from(id_vcs_url) { + Ok(uri) => { + references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri)) + } + Err(e) => log::warn!( + "Package {} has an invalid VCS URI (from id: {}): {} ", + package.name, + package.id, + e + ), + } + } else if let Some(source_vcs_url) = + package.source.as_ref().and_then(|s| strip_git_url(&s.repr)) + { + // Fall back to the source field for older cargo versions (before Rust 1.77) + // whose id field uses the old format `name version (source)`. + match Uri::try_from(source_vcs_url) { + Ok(uri) => { + references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri)) + } + Err(e) => log::warn!( + "Package {} has an invalid VCS URI (from source: {:?}): {} ", + package.name, + package.source, + e + ), + } + } } if !references.is_empty() { @@ -1115,4 +1150,56 @@ mod test { assert_eq!(actual, expected); } + + const GIT_PACKAGE_JSON: &str = include_str!("../tests/fixtures/git_package.json"); + const GIT_PACKAGE_WITH_BRANCH_JSON: &str = + include_str!("../tests/fixtures/git_package_with_branch.json"); + + fn vcs_urls(refs: &ExternalReferences) -> Vec { + refs.0 + .iter() + .filter(|r| r.external_reference_type == ExternalReferenceType::Vcs) + .map(|r| r.url.to_string()) + .collect() + } + + #[test] + fn external_refs_git_package_uses_repository_when_present() { + // When package.repository is set it must be used as the VCS entry, not the id-based URL. + let package: cargo_metadata::Package = serde_json::from_str(GIT_PACKAGE_JSON).unwrap(); + let refs = SbomGenerator::get_external_references(&package).unwrap(); + assert_eq!( + vcs_urls(&refs), + vec!["https://github.com/rust-secure-code/cargo-auditable"], + ); + } + + #[test] + fn external_refs_git_package_falls_back_to_source_when_no_repository_old_format() { + // Old pkgid format: id is "name version (source)", so strip_git_url(id) returns None. + // The code falls back to strip_git_url(source), which strips the #commit_hash. + let mut json: serde_json::Value = serde_json::from_str(GIT_PACKAGE_JSON).unwrap(); + json["repository"] = serde_json::Value::Null; + let package: cargo_metadata::Package = serde_json::from_value(json).unwrap(); + let refs = SbomGenerator::get_external_references(&package).unwrap(); + assert_eq!( + vcs_urls(&refs), + vec!["git+https://github.com/rust-secure-code/cargo-auditable.git"], + ); + } + + #[test] + fn external_refs_git_with_branch_falls_back_to_id_strips_query_and_fragment() { + // New pkgid format (Rust 1.77+): id is "git+url?branch=...#name@version", + // so strip_git_url(id) succeeds and strips both ?branch= and #fragment. + let mut json: serde_json::Value = + serde_json::from_str(GIT_PACKAGE_WITH_BRANCH_JSON).unwrap(); + json["repository"] = serde_json::Value::Null; + let package: cargo_metadata::Package = serde_json::from_value(json).unwrap(); + let refs = SbomGenerator::get_external_references(&package).unwrap(); + assert_eq!( + vcs_urls(&refs), + vec!["git+https://github.com/leo030303/rav1d.git"], + ); + } } diff --git a/cargo-cyclonedx/src/purl.rs b/cargo-cyclonedx/src/purl.rs index 3fbd02b2..380018c1 100644 --- a/cargo-cyclonedx/src/purl.rs +++ b/cargo-cyclonedx/src/purl.rs @@ -20,7 +20,11 @@ pub fn get_purl( // qualifier names are taken from the spec, which defines these two for all PURL types: // https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs Some(("git", _git_path)) => { - builder = builder.with_qualifier("vcs_url", source_to_vcs_url(source))? + // Prefer the stable pkgid spec (Rust 1.77+) id field, fall back + // to the opaque source field for older cargo versions. + let vcs_url = strip_git_url(&package.id.repr) + .unwrap_or_else(|| source_to_vcs_url(source)); + builder = builder.with_qualifier("vcs_url", vcs_url)? } Some(("registry" | "sparse", registry_url)) => { builder = builder.with_qualifier("repository_url", registry_url)? @@ -64,6 +68,23 @@ pub fn get_purl( Ok(CdxPurl::from_str(&purl.to_string()).unwrap()) } +/// Strips query parameters and the fragment from a `git+...` URL string, +/// returning the bare `git+proto://host/path` URL, or `None` if not a git URL. +/// +/// Works with both the package `id` field (new pkgid format, Rust 1.77+: +/// `git+proto://host/path[?query]#name@version`) and the `source` field +/// (`git+proto://host/path[?query]#commit_hash`). +pub(crate) fn strip_git_url(url: &str) -> Option { + if !url.starts_with("git+") { + return None; + } + let without_fragment = url.split_once('#').map_or(url, |(base, _)| base); + let without_query = without_fragment + .split_once('?') + .map_or(without_fragment, |(base, _)| base); + Some(without_query.to_string()) +} + /// Converts the `cargo metadata`'s `source` field to a valid PURL `vcs_url`. /// /// The `vcs_url` qualifier is specified to use the SPDX Package Download Location format: @@ -128,7 +149,6 @@ mod tests { use super::*; use percent_encoding::percent_decode; use purl::Purl; - use serde_json; const CRATES_IO_PACKAGE_JSON: &str = include_str!("../tests/fixtures/crates_io_package.json"); const GIT_PACKAGE_JSON: &str = include_str!("../tests/fixtures/git_package.json"); @@ -174,6 +194,8 @@ mod tests { #[test] fn git_purl_with_branch() { + // New pkgid format (Rust 1.77+): id is "git+url?branch=...#name@version", + // so strip_git_url succeeds and strips both ?branch= and #fragment. let git_package: Package = serde_json::from_str(GIT_PACKAGE_WITH_BRANCH_JSON).unwrap(); let purl = get_purl(&git_package, &git_package, Utf8Path::new("/foo/bar"), None).unwrap(); // Validate that data roundtripped correctly @@ -183,15 +205,13 @@ mod tests { assert_eq!(parsed_purl.qualifiers().len(), 1); let (qualifier, value) = parsed_purl.qualifiers().iter().next().unwrap(); assert_eq!(qualifier.as_str(), "vcs_url"); - // The ?branch= query param must be stripped; only the commit hash remains after @ + // The ?branch= query param must be stripped; the #name@version fragment in the id + // is not a commit hash and is also stripped, so vcs_url has no @revision suffix. let decoded_value = percent_decode(value.as_bytes()) .decode_utf8() .unwrap() .to_string(); - assert_eq!( - decoded_value, - "git+https://github.com/leo030303/rav1d.git@3a50834ce3743bc580f340ba3bfbdbf6a46ab783" - ); + assert_eq!(decoded_value, "git+https://github.com/leo030303/rav1d.git"); // Ensure ?branch= is NOT present in the vcs_url assert!(!decoded_value.contains("?branch=")); assert!(parsed_purl.subpath().is_none()); @@ -268,6 +288,43 @@ mod tests { assert!(parsed_purl.namespace().is_none()); } + #[test] + fn strip_git_url_strips_fragment() { + // New pkgid format: #name@version fragment must be stripped. + // Also verifies that the git+ prefix is preserved and the result is otherwise intact. + assert_eq!( + strip_git_url("git+https://github.com/foo/bar.git#bar@1.2.3"), + Some("git+https://github.com/foo/bar.git".to_string()) + ); + } + + #[test] + fn strip_git_url_strips_query_params() { + // ?branch=, ?tag=, ?rev= must all be stripped, along with the fragment + for query in &["?branch=main", "?tag=v1.0", "?rev=abc123"] { + let url = format!("git+https://github.com/foo/bar.git{query}#bar@1.0.0"); + let result = strip_git_url(&url).unwrap(); + assert_eq!( + result, "git+https://github.com/foo/bar.git", + "failed for {query}" + ); + } + } + + #[test] + fn strip_git_url_non_git_returns_none() { + // registry, sparse, and path sources must return None + for url in &[ + "aho-corasick 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "registry+https://github.com/rust-lang/crates.io-index#foo@1.0.0", + "sparse+https://my.registry.com/index#foo@1.0.0", + "foo 0.1.0 (path+file:///home/user/project)", + "path+file:///home/user/project#foo@0.1.0", + ] { + assert_eq!(strip_git_url(url), None, "expected None for: {url}"); + } + } + #[test] fn local_package() { let root_package: Package = serde_json::from_str(ROOT_PACKAGE_JSON).unwrap();