diff --git a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs index 57a6deae6..0e05972f0 100644 --- a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs +++ b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/GitComponent.cs @@ -2,10 +2,15 @@ namespace Microsoft.ComponentDetection.Contracts.TypedComponent; using System; +using System.Collections.Generic; using System.Text.Json.Serialization; +using PackageUrl; public class GitComponent : TypedComponent { + private const string GithubHost = "github.com"; + private const string DotGitSuffix = ".git"; + public GitComponent(Uri repositoryUrl, string commitHash) { this.RepositoryUrl = this.ValidateRequiredInput(repositoryUrl, nameof(this.RepositoryUrl), nameof(ComponentType.Git)); @@ -32,5 +37,79 @@ public GitComponent() [JsonIgnore] public override ComponentType Type => ComponentType.Git; + /// + /// Gets pkg:github/{owner}/{repo}@{commit} for repositories hosted on github.com whose + /// path resolves cleanly to owner/repo; null for any other host (gitlab, bitbucket, ADO, + /// GitHub Enterprise, etc.) or malformed paths. Consumers should fall back to + /// when this returns null. + /// + [JsonPropertyName("packageUrl")] + public override PackageURL PackageUrl + { + get + { + if (string.IsNullOrWhiteSpace(this.CommitHash) + || !TryGetGithubOwnerAndRepo(this.RepositoryUrl, out var owner, out var repo)) + { + return null; + } + + return new PackageURL("github", owner, repo, this.CommitHash, null, null); + } + } + protected override string ComputeBaseId() => $"{this.RepositoryUrl} : {this.CommitHash} - {this.Type}"; + + /// + /// Suppresses the base impl so stays stable if a detector later + /// populates or . + /// RepositoryUrl and CommitHash are already in BaseId; the GitHub archive download URL is + /// deterministic and source URL would duplicate RepositoryUrl. + /// + /// An empty sequence. + protected override IEnumerable> GetExtendedIdProperties() + { + yield break; + } + + private static bool TryGetGithubOwnerAndRepo(Uri repositoryUrl, out string owner, out string repo) + { + owner = null; + repo = null; + + if (repositoryUrl == null + || !repositoryUrl.IsAbsoluteUri + || !string.Equals(repositoryUrl.Host, GithubHost, StringComparison.OrdinalIgnoreCase)) + { + return false; + } + + var trimmedPath = repositoryUrl.AbsolutePath?.Trim('/'); + if (string.IsNullOrEmpty(trimmedPath)) + { + return false; + } + + var segments = trimmedPath.Split('/'); + if (segments.Length != 2) + { + return false; + } + + var ownerSegment = segments[0]; + var repoSegment = segments[1]; + if (repoSegment.EndsWith(DotGitSuffix, StringComparison.OrdinalIgnoreCase)) + { + repoSegment = repoSegment.Substring(0, repoSegment.Length - DotGitSuffix.Length); + } + + if (string.IsNullOrEmpty(ownerSegment) || string.IsNullOrEmpty(repoSegment)) + { + return false; + } + + owner = ownerSegment; + repo = repoSegment; + return true; + } } diff --git a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/MavenComponent.cs b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/MavenComponent.cs index 034a156ea..8cd522922 100644 --- a/src/Microsoft.ComponentDetection.Contracts/TypedComponent/MavenComponent.cs +++ b/src/Microsoft.ComponentDetection.Contracts/TypedComponent/MavenComponent.cs @@ -1,6 +1,7 @@ #nullable disable namespace Microsoft.ComponentDetection.Contracts.TypedComponent; +using System.Collections.Generic; using System.Text.Json.Serialization; using PackageUrl; @@ -34,4 +35,16 @@ public MavenComponent() public override PackageURL PackageUrl => new PackageURL("maven", this.GroupId, this.ArtifactId, this.Version, null, null); protected override string ComputeBaseId() => $"{this.GroupId} {this.ArtifactId} {this.Version} - {this.Type}"; + + /// + /// Suppresses the base impl so stays stable if a detector later + /// populates or . + /// GroupId/ArtifactId/Version are already in BaseId; the Maven Central download URL is deterministic + /// and source/repo URLs are surfaced server-side from the POM. + /// + /// An empty sequence. + protected override IEnumerable> GetExtendedIdProperties() + { + yield break; + } } diff --git a/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs b/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs index 18b9f99c4..69b863d27 100644 --- a/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs +++ b/test/Microsoft.ComponentDetection.Contracts.Tests/PurlGenerationTests.cs @@ -1,6 +1,7 @@ #nullable disable namespace Microsoft.ComponentDetection.Contracts.Tests; +using System; using AwesomeAssertions; using Microsoft.ComponentDetection.Contracts.TypedComponent; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -110,4 +111,109 @@ public void CocoaPodNameShouldPurlWithCustomQualifier() packageOne.PackageUrl.ToString().Should().Be("pkg:cocoapods/AFNetworking@4.0.1?repository_url=https:%2F%2Fcustom_repo.example.com%2Fpath%2Fto%2Frepo%2Fspecs.git"); } + + [TestMethod] + public void MavenComponentShouldGenerateMavenPurl() + { + // https://github.com/package-url/purl-spec/blob/b8ddd39a6d533b8895f3b741f2e62e2695d82aa4/PURL-TYPES.rst#maven + var component = new MavenComponent("com.google.guava", "guava", "33.0-jre"); + + component.PackageUrl.Type.Should().Be("maven"); + component.PackageUrl.Namespace.Should().Be("com.google.guava"); + component.PackageUrl.Name.Should().Be("guava"); + component.PackageUrl.Version.Should().Be("33.0-jre"); + component.PackageUrl.ToString().Should().Be("pkg:maven/com.google.guava/guava@33.0-jre"); + } + + [TestMethod] + public void GitComponentGithubRepositoryShouldGenerateGithubPurl() + { + // https://github.com/package-url/purl-spec/blob/b8ddd39a6d533b8895f3b741f2e62e2695d82aa4/PURL-TYPES.rst#github + var component = new GitComponent(new Uri("https://github.com/google/guava"), "abcdef1234567890"); + + component.PackageUrl.Type.Should().Be("github"); + component.PackageUrl.Namespace.Should().Be("google"); + component.PackageUrl.Name.Should().Be("guava"); + component.PackageUrl.Version.Should().Be("abcdef1234567890"); + component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890"); + } + + [TestMethod] + public void GitComponentGithubRepositoryWithDotGitSuffixShouldStripIt() + { + var component = new GitComponent(new Uri("https://github.com/google/guava.git"), "abcdef1234567890"); + + component.PackageUrl.Name.Should().Be("guava", "the .git suffix is not part of the canonical repo name"); + component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890"); + } + + [TestMethod] + public void GitComponentGithubRepositoryWithTrailingSlashShouldBeNormalized() + { + var component = new GitComponent(new Uri("https://github.com/google/guava/"), "abcdef1234567890"); + + component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890"); + } + + [TestMethod] + public void GitComponentGithubHostMatchIsCaseInsensitive() + { + var component = new GitComponent(new Uri("https://GitHub.com/google/guava"), "abcdef1234567890"); + + component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890"); + } + + [TestMethod] + public void GitComponentNonGithubRepositoryShouldHaveNoPackageUrl() + { + // GitLab / Bitbucket / Azure DevOps / GitHub Enterprise have no canonical PURL representation today. + // Consumers should fall back to RepositoryUrl in this case. + var gitlab = new GitComponent(new Uri("https://gitlab.com/foo/bar"), "abcdef1234567890"); + var bitbucket = new GitComponent(new Uri("https://bitbucket.org/foo/bar"), "abcdef1234567890"); + var ado = new GitComponent(new Uri("https://dev.azure.com/org/proj/_git/repo"), "abcdef1234567890"); + var ghEnterprise = new GitComponent(new Uri("https://github.contoso.com/foo/bar"), "abcdef1234567890"); + + gitlab.PackageUrl.Should().BeNull(); + bitbucket.PackageUrl.Should().BeNull(); + ado.PackageUrl.Should().BeNull(); + ghEnterprise.PackageUrl.Should().BeNull(); + } + + [TestMethod] + public void GitComponentMalformedGithubUrlShouldHaveNoPackageUrl() + { + // Owner only, or paths deeper than owner/repo (e.g. browse URLs) — not canonical repository URLs. + var ownerOnly = new GitComponent(new Uri("https://github.com/google"), "abcdef1234567890"); + var tooDeep = new GitComponent(new Uri("https://github.com/google/guava/tree/main"), "abcdef1234567890"); + var rootOnly = new GitComponent(new Uri("https://github.com/"), "abcdef1234567890"); + + ownerOnly.PackageUrl.Should().BeNull(); + tooDeep.PackageUrl.Should().BeNull(); + rootOnly.PackageUrl.Should().BeNull(); + } + + [TestMethod] + public void GitComponentMissingCommitHashShouldHaveNoPackageUrl() + { + // CommitHash is required via the public ctor, but the parameterless deserialization ctor allows null. + var component = new GitComponent + { + RepositoryUrl = new Uri("https://github.com/google/guava"), + }; + + component.PackageUrl.Should().BeNull(); + } + + [TestMethod] + public void GitComponentWhitespaceCommitHashShouldHaveNoPackageUrl() + { + // CommitHash is required via the public ctor, but the parameterless deserialization ctor can carry whitespace. + var component = new GitComponent + { + RepositoryUrl = new Uri("https://github.com/google/guava"), + CommitHash = " ", + }; + + component.PackageUrl.Should().BeNull(); + } } diff --git a/test/Microsoft.ComponentDetection.Contracts.Tests/TypedComponentSerializationTests.cs b/test/Microsoft.ComponentDetection.Contracts.Tests/TypedComponentSerializationTests.cs index 57342f7e7..77b302860 100644 --- a/test/Microsoft.ComponentDetection.Contracts.Tests/TypedComponentSerializationTests.cs +++ b/test/Microsoft.ComponentDetection.Contracts.Tests/TypedComponentSerializationTests.cs @@ -504,4 +504,36 @@ public void TypedComponent_Id_IncludesBothUrls_WhenPresent() tc.BaseId.Should().Be("TestPackage 1.0.0 - NuGet"); tc.Id.Should().Be("TestPackage 1.0.0 - NuGet [DownloadUrl:https://example.com/package/1.0.0 SourceUrl:https://github.com/test-org/TestPackage]"); } + + [TestMethod] + public void MavenComponent_Id_ExcludesDownloadUrlAndSourceUrl_WhenSet() + { + // MavenComponent overrides GetExtendedIdProperties to keep Id stable across detectors that + // may or may not populate DownloadUrl / SourceUrl (e.g. CDS surfaces SourceUrl from the POM, + // and DownloadUrl is deterministic from GAV — neither should affect identity). + var tc = new MavenComponent("com.google.guava", "guava", "33.0-jre") + { + DownloadUrl = new Uri("https://repo1.maven.org/maven2/com/google/guava/guava/33.0-jre/guava-33.0-jre.jar"), + SourceUrl = new Uri("https://github.com/google/guava"), + }; + + tc.BaseId.Should().Be("com.google.guava guava 33.0-jre - Maven"); + tc.Id.Should().Be(tc.BaseId, "DownloadUrl and SourceUrl must not affect MavenComponent identity"); + } + + [TestMethod] + public void GitComponent_Id_ExcludesDownloadUrlAndSourceUrl_WhenSet() + { + // GitComponent overrides GetExtendedIdProperties for the same reason as MavenComponent: + // SourceUrl would duplicate RepositoryUrl, and DownloadUrl (the github archive URL) is deterministic. + var repo = new Uri("https://github.com/google/guava"); + var tc = new GitComponent(repo, "abcdef1234567890") + { + DownloadUrl = new Uri("https://github.com/google/guava/archive/abcdef1234567890.zip"), + SourceUrl = repo, + }; + + tc.BaseId.Should().Be("https://github.com/google/guava : abcdef1234567890 - Git"); + tc.Id.Should().Be(tc.BaseId, "DownloadUrl and SourceUrl must not affect GitComponent identity"); + } } diff --git a/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml b/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml index 79b9a851c..e790d1fb8 100644 --- a/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml +++ b/test/Microsoft.ComponentDetection.VerificationTests/resources/maven/lib/pom.xml @@ -1,5 +1,6 @@ 4.0.0 + com.microsoft com.microsoft maven-test-parent