From 5477018858ddcad2d95e2c06578218a58efb66d3 Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Fri, 5 Jun 2026 16:08:40 -0700 Subject: [PATCH 1/4] feat: defensive identity overrides for MavenComponent and GitComponent MavenComponent - Override GetExtendedIdProperties() to suppress DownloadUrl/SourceUrl from Id. GAV is the canonical Maven identity; the Maven Central download URL is deterministic from GAV and SourceUrl is surfaced server-side from the POM - neither should affect identity. GitComponent - Add PackageUrl override returning pkg:github/{owner}/{repo}@{commit} for repositories hosted on github.com (case-insensitive host match, .git suffix stripped, trailing slash normalised, path must resolve cleanly to owner/repo). Returns null for any other host (gitlab, bitbucket, ADO, GitHub Enterprise) and for malformed paths; consumers should fall back to RepositoryUrl in that case. - Override GetExtendedIdProperties() defensively for the same reason as MavenComponent. Tests - PurlGenerationTests: explicit Maven coverage plus 7 GitHub PURL cases (canonical, .git suffix, trailing slash, case-insensitive host, non-github hosts, malformed paths, missing commit hash). - TypedComponentSerializationTests: Id-stability tests proving both overrides exclude DownloadUrl/SourceUrl from Id even when set. No production behaviour change today (no detector currently sets DownloadUrl/SourceUrl on MavenComponent/GitComponent), but locks identity stability ahead of upcoming SBOM enrichment work that will populate DownloadUrl in the CD-Internal converter. --- .../TypedComponent/GitComponent.cs | 79 ++++++++++++++++ .../TypedComponent/MavenComponent.cs | 13 +++ .../PurlGenerationTests.cs | 93 +++++++++++++++++++ .../TypedComponentSerializationTests.cs | 32 +++++++ 4 files changed, 217 insertions(+) diff --git a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs index 57a6deae6..7320740ef 100644 --- a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs +++ b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs @@ -2,10 +2,15 @@ namespace Microsoft.ComponentDetection.Contracts.TypedComponent; using System; +using System.Collections.Generic; using System.Text.Json.Serialization; +using PackageUrl; public class GitComponent : TypedComponent { + private const string GithubHost = "github.com"; + private const string DotGitSuffix = ".git"; + public GitComponent(Uri repositoryUrl, string commitHash) { this.RepositoryUrl = this.ValidateRequiredInput(repositoryUrl, nameof(this.RepositoryUrl), nameof(ComponentType.Git)); @@ -32,5 +37,79 @@ public GitComponent() [JsonIgnore] public override ComponentType Type => ComponentType.Git; + /// + /// Gets pkg:github/{owner}/{repo}@{commit} for repositories hosted on github.com whose + /// path resolves cleanly to owner/repo; null for any other host (gitlab, bitbucket, ADO, + /// GitHub Enterprise, etc.) or malformed paths. Consumers should fall back to + /// when this returns null. + /// + [JsonPropertyName("packageUrl")] + public override PackageURL PackageUrl + { + get + { + if (string.IsNullOrEmpty(this.CommitHash) + || !TryGetGithubOwnerAndRepo(this.RepositoryUrl, out var owner, out var repo)) + { + return null; + } + + return new PackageURL("github", owner, repo, this.CommitHash, null, null); + } + } + protected override string ComputeBaseId() => $"{this.RepositoryUrl} : {this.CommitHash} - {this.Type}"; + + /// + /// Suppresses the base impl so stays stable if a detector later + /// populates or . + /// RepositoryUrl and CommitHash are already in BaseId; the GitHub archive download URL is + /// deterministic and source URL would duplicate RepositoryUrl. + /// + /// An empty sequence. + protected override IEnumerable> GetExtendedIdProperties() + { + yield break; + } + + private static bool TryGetGithubOwnerAndRepo(Uri repositoryUrl, out string owner, out string repo) + { + owner = null; + repo = null; + + if (repositoryUrl == null + || !repositoryUrl.IsAbsoluteUri + || !string.Equals(repositoryUrl.Host, GithubHost, StringComparison.OrdinalIgnoreCase)) + { + return false; + } + + var trimmedPath = repositoryUrl.AbsolutePath?.Trim('/'); + if (string.IsNullOrEmpty(trimmedPath)) + { + return false; + } + + var segments = trimmedPath.Split('/'); + if (segments.Length != 2) + { + return false; + } + + var ownerSegment = segments[0]; + var repoSegment = segments[1]; + if (repoSegment.EndsWith(DotGitSuffix, StringComparison.OrdinalIgnoreCase)) + { + repoSegment = repoSegment.Substring(0, repoSegment.Length - DotGitSuffix.Length); + } + + if (string.IsNullOrEmpty(ownerSegment) || string.IsNullOrEmpty(repoSegment)) + { + return false; + } + + owner = ownerSegment; + repo = repoSegment; + return true; + } } diff --git a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/MavenComponent.cs b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/MavenComponent.cs index 034a156ea..8cd522922 100644 --- a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/MavenComponent.cs +++ b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/MavenComponent.cs @@ -1,6 +1,7 @@ #nullable disable namespace Microsoft.ComponentDetection.Contracts.TypedComponent; +using System.Collections.Generic; using System.Text.Json.Serialization; using PackageUrl; @@ -34,4 +35,16 @@ public MavenComponent() public override PackageURL PackageUrl => new PackageURL("maven", this.GroupId, this.ArtifactId, this.Version, null, null); protected override string ComputeBaseId() => $"{this.GroupId} {this.ArtifactId} {this.Version} - {this.Type}"; + + /// + /// Suppresses the base impl so stays stable if a detector later + /// populates or . + /// GroupId/ArtifactId/Version are already in BaseId; the Maven Central download URL is deterministic + /// and source/repo URLs are surfaced server-side from the POM. + /// + /// An empty sequence. + protected override IEnumerable> GetExtendedIdProperties() + { + yield break; + } } diff --git a/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs b/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs index 18b9f99c4..a6a779b81 100644 --- a/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs +++ b/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs @@ -1,6 +1,7 @@ #nullable disable namespace Microsoft.ComponentDetection.Contracts.Tests; +using System; using AwesomeAssertions; using Microsoft.ComponentDetection.Contracts.TypedComponent; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -110,4 +111,96 @@ public void CocoaPodNameShouldPurlWithCustomQualifier() packageOne.PackageUrl.ToString().Should().Be("pkg:cocoapods/AFNetworking@4.0.1?repository_url=https:%2F%2Fcustom_repo.example.com%2Fpath%2Fto%2Frepo%2Fspecs.git"); } + + [TestMethod] + public void MavenComponentShouldGenerateMavenPurl() + { + // https://github.com/package-url/purl-spec/blob/b8ddd39a6d533b8895f3b741f2e62e2695d82aa4/PURL-TYPES.rst#maven + var component = new MavenComponent("com.google.guava", "guava", "33.0-jre"); + + component.PackageUrl.Type.Should().Be("maven"); + component.PackageUrl.Namespace.Should().Be("com.google.guava"); + component.PackageUrl.Name.Should().Be("guava"); + component.PackageUrl.Version.Should().Be("33.0-jre"); + component.PackageUrl.ToString().Should().Be("pkg:maven/com.google.guava/guava@33.0-jre"); + } + + [TestMethod] + public void GitComponentGithubRepositoryShouldGenerateGithubPurl() + { + // https://github.com/package-url/purl-spec/blob/b8ddd39a6d533b8895f3b741f2e62e2695d82aa4/PURL-TYPES.rst#github + var component = new GitComponent(new Uri("https://github.com/google/guava"), "abcdef1234567890"); + + component.PackageUrl.Type.Should().Be("github"); + component.PackageUrl.Namespace.Should().Be("google"); + component.PackageUrl.Name.Should().Be("guava"); + component.PackageUrl.Version.Should().Be("abcdef1234567890"); + component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890"); + } + + [TestMethod] + public void GitComponentGithubRepositoryWithDotGitSuffixShouldStripIt() + { + var component = new GitComponent(new Uri("https://github.com/google/guava.git"), "abcdef1234567890"); + + component.PackageUrl.Name.Should().Be("guava", "the .git suffix is not part of the canonical repo name"); + component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890"); + } + + [TestMethod] + public void GitComponentGithubRepositoryWithTrailingSlashShouldBeNormalized() + { + var component = new GitComponent(new Uri("https://github.com/google/guava/"), "abcdef1234567890"); + + component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890"); + } + + [TestMethod] + public void GitComponentGithubHostMatchIsCaseInsensitive() + { + var component = new GitComponent(new Uri("https://GitHub.com/google/guava"), "abcdef1234567890"); + + component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890"); + } + + [TestMethod] + public void GitComponentNonGithubRepositoryShouldHaveNoPackageUrl() + { + // GitLab / Bitbucket / Azure DevOps / GitHub Enterprise have no canonical PURL representation today. + // Consumers should fall back to RepositoryUrl in this case. + var gitlab = new GitComponent(new Uri("https://gitlab.com/foo/bar"), "abcdef1234567890"); + var bitbucket = new GitComponent(new Uri("https://bitbucket.org/foo/bar"), "abcdef1234567890"); + var ado = new GitComponent(new Uri("https://dev.azure.com/org/proj/_git/repo"), "abcdef1234567890"); + var ghEnterprise = new GitComponent(new Uri("https://github.contoso.com/foo/bar"), "abcdef1234567890"); + + gitlab.PackageUrl.Should().BeNull(); + bitbucket.PackageUrl.Should().BeNull(); + ado.PackageUrl.Should().BeNull(); + ghEnterprise.PackageUrl.Should().BeNull(); + } + + [TestMethod] + public void GitComponentMalformedGithubUrlShouldHaveNoPackageUrl() + { + // Owner only, or paths deeper than owner/repo (e.g. browse URLs) — not canonical repository URLs. + var ownerOnly = new GitComponent(new Uri("https://github.com/google"), "abcdef1234567890"); + var tooDeep = new GitComponent(new Uri("https://github.com/google/guava/tree/main"), "abcdef1234567890"); + var rootOnly = new GitComponent(new Uri("https://github.com/"), "abcdef1234567890"); + + ownerOnly.PackageUrl.Should().BeNull(); + tooDeep.PackageUrl.Should().BeNull(); + rootOnly.PackageUrl.Should().BeNull(); + } + + [TestMethod] + public void GitComponentMissingCommitHashShouldHaveNoPackageUrl() + { + // CommitHash is required via the public ctor, but the parameterless deserialization ctor allows null. + var component = new GitComponent + { + RepositoryUrl = new Uri("https://github.com/google/guava"), + }; + + component.PackageUrl.Should().BeNull(); + } } diff --git a/test/Microsoft.ComponentDetection.Contracts.Tests/TypedComponentSerializationTests.cs b/test/Microsoft.ComponentDetection.Contracts.Tests/TypedComponentSerializationTests.cs index 57342f7e7..77b302860 100644 --- a/test/Microsoft.ComponentDetection.Contracts.Tests/TypedComponentSerializationTests.cs +++ b/test/Microsoft.ComponentDetection.Contracts.Tests/TypedComponentSerializationTests.cs @@ -504,4 +504,36 @@ public void TypedComponent_Id_IncludesBothUrls_WhenPresent() tc.BaseId.Should().Be("TestPackage 1.0.0 - NuGet"); tc.Id.Should().Be("TestPackage 1.0.0 - NuGet [DownloadUrl:https://example.com/package/1.0.0 SourceUrl:https://github.com/test-org/TestPackage]"); } + + [TestMethod] + public void MavenComponent_Id_ExcludesDownloadUrlAndSourceUrl_WhenSet() + { + // MavenComponent overrides GetExtendedIdProperties to keep Id stable across detectors that + // may or may not populate DownloadUrl / SourceUrl (e.g. CDS surfaces SourceUrl from the POM, + // and DownloadUrl is deterministic from GAV — neither should affect identity). + var tc = new MavenComponent("com.google.guava", "guava", "33.0-jre") + { + DownloadUrl = new Uri("https://repo1.maven.org/maven2/com/google/guava/guava/33.0-jre/guava-33.0-jre.jar"), + SourceUrl = new Uri("https://github.com/google/guava"), + }; + + tc.BaseId.Should().Be("com.google.guava guava 33.0-jre - Maven"); + tc.Id.Should().Be(tc.BaseId, "DownloadUrl and SourceUrl must not affect MavenComponent identity"); + } + + [TestMethod] + public void GitComponent_Id_ExcludesDownloadUrlAndSourceUrl_WhenSet() + { + // GitComponent overrides GetExtendedIdProperties for the same reason as MavenComponent: + // SourceUrl would duplicate RepositoryUrl, and DownloadUrl (the github archive URL) is deterministic. + var repo = new Uri("https://github.com/google/guava"); + var tc = new GitComponent(repo, "abcdef1234567890") + { + DownloadUrl = new Uri("https://github.com/google/guava/archive/abcdef1234567890.zip"), + SourceUrl = repo, + }; + + tc.BaseId.Should().Be("https://github.com/google/guava : abcdef1234567890 - Git"); + tc.Id.Should().Be(tc.BaseId, "DownloadUrl and SourceUrl must not affect GitComponent identity"); + } } From 9100511ce299719285725c2e96a8a2501d8653c1 Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Mon, 8 Jun 2026 14:00:52 -0700 Subject: [PATCH 2/4] test: harden Maven verification fixture for PackageUrl coverage Update verification Maven resource test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml to make PackageUrl-producing coordinates explicit: - add module-level com.microsoft - add literal dependency org.apache.commons:commons-lang3:3.12.0 This keeps existing semantics intact while ensuring at least one stable, non-property-indirected Maven coordinate is present in the fixture for verification comparisons that key on Component.PackageUrl. --- .../resources/maven/lib/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml b/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml index 79b9a851c..699dc6ef6 100644 --- a/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml +++ b/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml @@ -1,5 +1,6 @@ 4.0.0 + com.microsoft com.microsoft maven-test-parent @@ -32,6 +33,11 @@ 2.3.0 pom + + org.apache.commons + commons-lang3 + 3.12.0 + junit junit From a55692d330928b3b3bb34b71e1f32a4f8d0f41bf Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Mon, 8 Jun 2026 14:28:11 -0700 Subject: [PATCH 3/4] fix: guard Git PackageUrl against whitespace commit hash --- .../TypedComponent/GitComponent.cs | 2 +- .../PurlGenerationTests.cs | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs index 7320740ef..0e05972f0 100644 --- a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs +++ b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs @@ -48,7 +48,7 @@ public override PackageURL PackageUrl { get { - if (string.IsNullOrEmpty(this.CommitHash) + if (string.IsNullOrWhiteSpace(this.CommitHash) || !TryGetGithubOwnerAndRepo(this.RepositoryUrl, out var owner, out var repo)) { return null; diff --git a/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs b/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs index a6a779b81..69b863d27 100644 --- a/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs +++ b/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs @@ -203,4 +203,17 @@ public void GitComponentMissingCommitHashShouldHaveNoPackageUrl() component.PackageUrl.Should().BeNull(); } + + [TestMethod] + public void GitComponentWhitespaceCommitHashShouldHaveNoPackageUrl() + { + // CommitHash is required via the public ctor, but the parameterless deserialization ctor can carry whitespace. + var component = new GitComponent + { + RepositoryUrl = new Uri("https://github.com/google/guava"), + CommitHash = " ", + }; + + component.PackageUrl.Should().BeNull(); + } } From 4bcfbf899861b3006a56072839c6610e7d9f9d2f Mon Sep 17 00:00:00 2001 From: Zheng Hao Tang Date: Mon, 8 Jun 2026 14:33:00 -0700 Subject: [PATCH 4/4] test(verification): avoid maven fixture dependency-graph drift --- .../resources/maven/lib/pom.xml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml b/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml index 699dc6ef6..e790d1fb8 100644 --- a/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml +++ b/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml @@ -33,11 +33,6 @@ 2.3.0 pom - - org.apache.commons - commons-lang3 - 3.12.0 - junit junit