Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 88 additions & 1 deletion cargo-cyclonedx/src/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::config::PlatformSuffix;
use crate::config::SbomConfig;
use crate::config::{IncludedDependencies, ParseMode};
use crate::format::Format;
use crate::purl::get_purl;
use crate::purl::{get_purl, strip_git_url};

use cargo_metadata;
use cargo_metadata::DependencyKind;
Expand Down Expand Up @@ -408,6 +408,41 @@ impl SbomGenerator {
e
),
}
} else if package
.source
.as_ref()
.map_or(false, |s| s.repr.starts_with("git+"))
{
if let Some(id_vcs_url) = strip_git_url(&package.id.repr) {
// Use the package id (pkgid spec, Rust 1.77+): git+proto://host/path[?query]#name@version
match Uri::try_from(id_vcs_url) {
Ok(uri) => {
references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri))
}
Err(e) => log::warn!(
"Package {} has an invalid VCS URI (from id: {}): {} ",
package.name,
package.id,
e
),
}
} else if let Some(source_vcs_url) =
package.source.as_ref().and_then(|s| strip_git_url(&s.repr))
{
// Fall back to the source field for older cargo versions (before Rust 1.77)
// whose id field uses the old format `name version (source)`.
match Uri::try_from(source_vcs_url) {
Ok(uri) => {
references.push(ExternalReference::new(ExternalReferenceType::Vcs, uri))
}
Err(e) => log::warn!(
"Package {} has an invalid VCS URI (from source: {:?}): {} ",
package.name,
package.source,
e
),
}
}
}

if !references.is_empty() {
Expand Down Expand Up @@ -1115,4 +1150,56 @@ mod test {

assert_eq!(actual, expected);
}

const GIT_PACKAGE_JSON: &str = include_str!("../tests/fixtures/git_package.json");
const GIT_PACKAGE_WITH_BRANCH_JSON: &str =
include_str!("../tests/fixtures/git_package_with_branch.json");

fn vcs_urls(refs: &ExternalReferences) -> Vec<String> {
refs.0
.iter()
.filter(|r| r.external_reference_type == ExternalReferenceType::Vcs)
.map(|r| r.url.to_string())
.collect()
}

#[test]
fn external_refs_git_package_uses_repository_when_present() {
// When package.repository is set it must be used as the VCS entry, not the id-based URL.
let package: cargo_metadata::Package = serde_json::from_str(GIT_PACKAGE_JSON).unwrap();
let refs = SbomGenerator::get_external_references(&package).unwrap();
assert_eq!(
vcs_urls(&refs),
vec!["https://github.com/rust-secure-code/cargo-auditable"],
);
}

#[test]
fn external_refs_git_package_falls_back_to_source_when_no_repository_old_format() {
// Old pkgid format: id is "name version (source)", so strip_git_url(id) returns None.
// The code falls back to strip_git_url(source), which strips the #commit_hash.
let mut json: serde_json::Value = serde_json::from_str(GIT_PACKAGE_JSON).unwrap();
json["repository"] = serde_json::Value::Null;
let package: cargo_metadata::Package = serde_json::from_value(json).unwrap();
let refs = SbomGenerator::get_external_references(&package).unwrap();
assert_eq!(
vcs_urls(&refs),
vec!["git+https://github.com/rust-secure-code/cargo-auditable.git"],
);
}

#[test]
fn external_refs_git_with_branch_falls_back_to_id_strips_query_and_fragment() {
// New pkgid format (Rust 1.77+): id is "git+url?branch=...#name@version",
// so strip_git_url(id) succeeds and strips both ?branch= and #fragment.
let mut json: serde_json::Value =
serde_json::from_str(GIT_PACKAGE_WITH_BRANCH_JSON).unwrap();
json["repository"] = serde_json::Value::Null;
let package: cargo_metadata::Package = serde_json::from_value(json).unwrap();
let refs = SbomGenerator::get_external_references(&package).unwrap();
assert_eq!(
vcs_urls(&refs),
vec!["git+https://github.com/leo030303/rav1d.git"],
);
}
}
71 changes: 64 additions & 7 deletions cargo-cyclonedx/src/purl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ pub fn get_purl(
// qualifier names are taken from the spec, which defines these two for all PURL types:
// https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#known-qualifiers-keyvalue-pairs
Some(("git", _git_path)) => {
builder = builder.with_qualifier("vcs_url", source_to_vcs_url(source))?
// Prefer the stable pkgid spec (Rust 1.77+) id field, fall back
// to the opaque source field for older cargo versions.
let vcs_url = strip_git_url(&package.id.repr)
.unwrap_or_else(|| source_to_vcs_url(source));
builder = builder.with_qualifier("vcs_url", vcs_url)?
}
Some(("registry" | "sparse", registry_url)) => {
builder = builder.with_qualifier("repository_url", registry_url)?
Expand Down Expand Up @@ -64,6 +68,23 @@ pub fn get_purl(
Ok(CdxPurl::from_str(&purl.to_string()).unwrap())
}

/// Strips query parameters and the fragment from a `git+...` URL string,
/// returning the bare `git+proto://host/path` URL, or `None` if not a git URL.
///
/// Works with both the package `id` field (new pkgid format, Rust 1.77+:
/// `git+proto://host/path[?query]#name@version`) and the `source` field
/// (`git+proto://host/path[?query]#commit_hash`).
pub(crate) fn strip_git_url(url: &str) -> Option<String> {
if !url.starts_with("git+") {
return None;
}
let without_fragment = url.split_once('#').map_or(url, |(base, _)| base);
let without_query = without_fragment
.split_once('?')
.map_or(without_fragment, |(base, _)| base);
Some(without_query.to_string())
}

/// Converts the `cargo metadata`'s `source` field to a valid PURL `vcs_url`.
///
/// The `vcs_url` qualifier is specified to use the SPDX Package Download Location format:
Expand Down Expand Up @@ -128,7 +149,6 @@ mod tests {
use super::*;
use percent_encoding::percent_decode;
use purl::Purl;
use serde_json;

const CRATES_IO_PACKAGE_JSON: &str = include_str!("../tests/fixtures/crates_io_package.json");
const GIT_PACKAGE_JSON: &str = include_str!("../tests/fixtures/git_package.json");
Expand Down Expand Up @@ -174,6 +194,8 @@ mod tests {

#[test]
fn git_purl_with_branch() {
// New pkgid format (Rust 1.77+): id is "git+url?branch=...#name@version",
// so strip_git_url succeeds and strips both ?branch= and #fragment.
let git_package: Package = serde_json::from_str(GIT_PACKAGE_WITH_BRANCH_JSON).unwrap();
let purl = get_purl(&git_package, &git_package, Utf8Path::new("/foo/bar"), None).unwrap();
// Validate that data roundtripped correctly
Expand All @@ -183,15 +205,13 @@ mod tests {
assert_eq!(parsed_purl.qualifiers().len(), 1);
let (qualifier, value) = parsed_purl.qualifiers().iter().next().unwrap();
assert_eq!(qualifier.as_str(), "vcs_url");
// The ?branch= query param must be stripped; only the commit hash remains after @
// The ?branch= query param must be stripped; the #name@version fragment in the id
// is not a commit hash and is also stripped, so vcs_url has no @revision suffix.
let decoded_value = percent_decode(value.as_bytes())
.decode_utf8()
.unwrap()
.to_string();
assert_eq!(
decoded_value,
"git+https://github.com/leo030303/rav1d.git@3a50834ce3743bc580f340ba3bfbdbf6a46ab783"
);
assert_eq!(decoded_value, "git+https://github.com/leo030303/rav1d.git");
// Ensure ?branch= is NOT present in the vcs_url
assert!(!decoded_value.contains("?branch="));
assert!(parsed_purl.subpath().is_none());
Expand Down Expand Up @@ -268,6 +288,43 @@ mod tests {
assert!(parsed_purl.namespace().is_none());
}

#[test]
fn strip_git_url_strips_fragment() {
// New pkgid format: #name@version fragment must be stripped.
// Also verifies that the git+ prefix is preserved and the result is otherwise intact.
assert_eq!(
strip_git_url("git+https://github.com/foo/bar.git#bar@1.2.3"),
Some("git+https://github.com/foo/bar.git".to_string())
);
}

#[test]
fn strip_git_url_strips_query_params() {
// ?branch=, ?tag=, ?rev= must all be stripped, along with the fragment
for query in &["?branch=main", "?tag=v1.0", "?rev=abc123"] {
let url = format!("git+https://github.com/foo/bar.git{query}#bar@1.0.0");
let result = strip_git_url(&url).unwrap();
assert_eq!(
result, "git+https://github.com/foo/bar.git",
"failed for {query}"
);
}
}

#[test]
fn strip_git_url_non_git_returns_none() {
// registry, sparse, and path sources must return None
for url in &[
"aho-corasick 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"registry+https://github.com/rust-lang/crates.io-index#foo@1.0.0",
"sparse+https://my.registry.com/index#foo@1.0.0",
"foo 0.1.0 (path+file:///home/user/project)",
"path+file:///home/user/project#foo@0.1.0",
] {
assert_eq!(strip_git_url(url), None, "expected None for: {url}");
}
}

#[test]
fn local_package() {
let root_package: Package = serde_json::from_str(ROOT_PACKAGE_JSON).unwrap();
Expand Down