Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@
namespace Microsoft.ComponentDetection.Contracts.TypedComponent;

using System;
using System.Collections.Generic;
using System.Text.Json.Serialization;
using PackageUrl;

public class GitComponent : TypedComponent
{
private const string GithubHost = "github.com";
private const string DotGitSuffix = ".git";

public GitComponent(Uri repositoryUrl, string commitHash)
{
this.RepositoryUrl = this.ValidateRequiredInput(repositoryUrl, nameof(this.RepositoryUrl), nameof(ComponentType.Git));
Expand All @@ -32,5 +37,79 @@ public GitComponent()
[JsonIgnore]
public override ComponentType Type => ComponentType.Git;

/// <summary>
/// Gets <c>pkg:github/{owner}/{repo}@{commit}</c> for repositories hosted on github.com whose
/// path resolves cleanly to <c>owner/repo</c>; null for any other host (gitlab, bitbucket, ADO,
/// GitHub Enterprise, etc.) or malformed paths. Consumers should fall back to
/// <see cref="RepositoryUrl"/> when this returns null.
/// </summary>
[JsonPropertyName("packageUrl")]
public override PackageURL PackageUrl
{
get
{
if (string.IsNullOrWhiteSpace(this.CommitHash)
|| !TryGetGithubOwnerAndRepo(this.RepositoryUrl, out var owner, out var repo))
{
return null;
}

return new PackageURL("github", owner, repo, this.CommitHash, null, null);
}
}

protected override string ComputeBaseId() => $"{this.RepositoryUrl} : {this.CommitHash} - {this.Type}";

/// <summary>
/// Suppresses the base impl so <see cref="TypedComponent.Id"/> stays stable if a detector later
/// populates <see cref="TypedComponent.DownloadUrl"/> or <see cref="TypedComponent.SourceUrl"/>.
/// RepositoryUrl and CommitHash are already in BaseId; the GitHub archive download URL is
/// deterministic and source URL would duplicate RepositoryUrl.
/// </summary>
/// <returns>An empty sequence.</returns>
protected override IEnumerable<KeyValuePair<string, string>> GetExtendedIdProperties()
{
yield break;
}

private static bool TryGetGithubOwnerAndRepo(Uri repositoryUrl, out string owner, out string repo)
{
Comment thread
zhenghao104 marked this conversation as resolved.
owner = null;
repo = null;

if (repositoryUrl == null
|| !repositoryUrl.IsAbsoluteUri
|| !string.Equals(repositoryUrl.Host, GithubHost, StringComparison.OrdinalIgnoreCase))
{
return false;
}

var trimmedPath = repositoryUrl.AbsolutePath?.Trim('/');
if (string.IsNullOrEmpty(trimmedPath))
{
return false;
}

var segments = trimmedPath.Split('/');
if (segments.Length != 2)
{
return false;
}

var ownerSegment = segments[0];
var repoSegment = segments[1];
if (repoSegment.EndsWith(DotGitSuffix, StringComparison.OrdinalIgnoreCase))
{
repoSegment = repoSegment.Substring(0, repoSegment.Length - DotGitSuffix.Length);
}

if (string.IsNullOrEmpty(ownerSegment) || string.IsNullOrEmpty(repoSegment))
{
return false;
}

owner = ownerSegment;
repo = repoSegment;
return true;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#nullable disable
namespace Microsoft.ComponentDetection.Contracts.TypedComponent;

using System.Collections.Generic;
using System.Text.Json.Serialization;
using PackageUrl;

Expand Down Expand Up @@ -34,4 +35,16 @@ public MavenComponent()
public override PackageURL PackageUrl => new PackageURL("maven", this.GroupId, this.ArtifactId, this.Version, null, null);

protected override string ComputeBaseId() => $"{this.GroupId} {this.ArtifactId} {this.Version} - {this.Type}";

/// <summary>
/// Suppresses the base impl so <see cref="TypedComponent.Id"/> stays stable if a detector later
/// populates <see cref="TypedComponent.DownloadUrl"/> or <see cref="TypedComponent.SourceUrl"/>.
/// GroupId/ArtifactId/Version are already in BaseId; the Maven Central download URL is deterministic
/// and source/repo URLs are surfaced server-side from the POM.
/// </summary>
/// <returns>An empty sequence.</returns>
protected override IEnumerable<KeyValuePair<string, string>> GetExtendedIdProperties()
{
yield break;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#nullable disable
namespace Microsoft.ComponentDetection.Contracts.Tests;

using System;
using AwesomeAssertions;
using Microsoft.ComponentDetection.Contracts.TypedComponent;
using Microsoft.VisualStudio.TestTools.UnitTesting;
Expand Down Expand Up @@ -110,4 +111,109 @@ public void CocoaPodNameShouldPurlWithCustomQualifier()

packageOne.PackageUrl.ToString().Should().Be("pkg:cocoapods/AFNetworking@4.0.1?repository_url=https:%2F%2Fcustom_repo.example.com%2Fpath%2Fto%2Frepo%2Fspecs.git");
}

[TestMethod]
public void MavenComponentShouldGenerateMavenPurl()
{
// https://github.com/package-url/purl-spec/blob/b8ddd39a6d533b8895f3b741f2e62e2695d82aa4/PURL-TYPES.rst#maven
var component = new MavenComponent("com.google.guava", "guava", "33.0-jre");

component.PackageUrl.Type.Should().Be("maven");
component.PackageUrl.Namespace.Should().Be("com.google.guava");
component.PackageUrl.Name.Should().Be("guava");
component.PackageUrl.Version.Should().Be("33.0-jre");
component.PackageUrl.ToString().Should().Be("pkg:maven/com.google.guava/guava@33.0-jre");
}

[TestMethod]
public void GitComponentGithubRepositoryShouldGenerateGithubPurl()
{
// https://github.com/package-url/purl-spec/blob/b8ddd39a6d533b8895f3b741f2e62e2695d82aa4/PURL-TYPES.rst#github
var component = new GitComponent(new Uri("https://github.com/google/guava"), "abcdef1234567890");

component.PackageUrl.Type.Should().Be("github");
component.PackageUrl.Namespace.Should().Be("google");
component.PackageUrl.Name.Should().Be("guava");
component.PackageUrl.Version.Should().Be("abcdef1234567890");
component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890");
}

[TestMethod]
public void GitComponentGithubRepositoryWithDotGitSuffixShouldStripIt()
{
var component = new GitComponent(new Uri("https://github.com/google/guava.git"), "abcdef1234567890");

component.PackageUrl.Name.Should().Be("guava", "the .git suffix is not part of the canonical repo name");
component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890");
}

[TestMethod]
public void GitComponentGithubRepositoryWithTrailingSlashShouldBeNormalized()
{
var component = new GitComponent(new Uri("https://github.com/google/guava/"), "abcdef1234567890");

component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890");
}

[TestMethod]
public void GitComponentGithubHostMatchIsCaseInsensitive()
{
var component = new GitComponent(new Uri("https://GitHub.com/google/guava"), "abcdef1234567890");

component.PackageUrl.ToString().Should().Be("pkg:github/google/guava@abcdef1234567890");
}

[TestMethod]
public void GitComponentNonGithubRepositoryShouldHaveNoPackageUrl()
{
// GitLab / Bitbucket / Azure DevOps / GitHub Enterprise have no canonical PURL representation today.
// Consumers should fall back to RepositoryUrl in this case.
var gitlab = new GitComponent(new Uri("https://gitlab.com/foo/bar"), "abcdef1234567890");
var bitbucket = new GitComponent(new Uri("https://bitbucket.org/foo/bar"), "abcdef1234567890");
var ado = new GitComponent(new Uri("https://dev.azure.com/org/proj/_git/repo"), "abcdef1234567890");
var ghEnterprise = new GitComponent(new Uri("https://github.contoso.com/foo/bar"), "abcdef1234567890");

gitlab.PackageUrl.Should().BeNull();
bitbucket.PackageUrl.Should().BeNull();
ado.PackageUrl.Should().BeNull();
ghEnterprise.PackageUrl.Should().BeNull();
}

[TestMethod]
public void GitComponentMalformedGithubUrlShouldHaveNoPackageUrl()
{
// Owner only, or paths deeper than owner/repo (e.g. browse URLs) — not canonical repository URLs.
var ownerOnly = new GitComponent(new Uri("https://github.com/google"), "abcdef1234567890");
var tooDeep = new GitComponent(new Uri("https://github.com/google/guava/tree/main"), "abcdef1234567890");
var rootOnly = new GitComponent(new Uri("https://github.com/"), "abcdef1234567890");

ownerOnly.PackageUrl.Should().BeNull();
tooDeep.PackageUrl.Should().BeNull();
rootOnly.PackageUrl.Should().BeNull();
}

[TestMethod]
public void GitComponentMissingCommitHashShouldHaveNoPackageUrl()
{
// CommitHash is required via the public ctor, but the parameterless deserialization ctor allows null.
var component = new GitComponent
{
RepositoryUrl = new Uri("https://github.com/google/guava"),
};

component.PackageUrl.Should().BeNull();
}

[TestMethod]
public void GitComponentWhitespaceCommitHashShouldHaveNoPackageUrl()
{
// CommitHash is required via the public ctor, but the parameterless deserialization ctor can carry whitespace.
var component = new GitComponent
{
RepositoryUrl = new Uri("https://github.com/google/guava"),
CommitHash = " ",
};

component.PackageUrl.Should().BeNull();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -504,4 +504,36 @@ public void TypedComponent_Id_IncludesBothUrls_WhenPresent()
tc.BaseId.Should().Be("TestPackage 1.0.0 - NuGet");
tc.Id.Should().Be("TestPackage 1.0.0 - NuGet [DownloadUrl:https://example.com/package/1.0.0 SourceUrl:https://github.com/test-org/TestPackage]");
}

[TestMethod]
public void MavenComponent_Id_ExcludesDownloadUrlAndSourceUrl_WhenSet()
{
// MavenComponent overrides GetExtendedIdProperties to keep Id stable across detectors that
// may or may not populate DownloadUrl / SourceUrl (e.g. CDS surfaces SourceUrl from the POM,
// and DownloadUrl is deterministic from GAV — neither should affect identity).
var tc = new MavenComponent("com.google.guava", "guava", "33.0-jre")
{
DownloadUrl = new Uri("https://repo1.maven.org/maven2/com/google/guava/guava/33.0-jre/guava-33.0-jre.jar"),
SourceUrl = new Uri("https://github.com/google/guava"),
};

tc.BaseId.Should().Be("com.google.guava guava 33.0-jre - Maven");
tc.Id.Should().Be(tc.BaseId, "DownloadUrl and SourceUrl must not affect MavenComponent identity");
}

[TestMethod]
public void GitComponent_Id_ExcludesDownloadUrlAndSourceUrl_WhenSet()
{
// GitComponent overrides GetExtendedIdProperties for the same reason as MavenComponent:
// SourceUrl would duplicate RepositoryUrl, and DownloadUrl (the github archive URL) is deterministic.
var repo = new Uri("https://github.com/google/guava");
var tc = new GitComponent(repo, "abcdef1234567890")
{
DownloadUrl = new Uri("https://github.com/google/guava/archive/abcdef1234567890.zip"),
SourceUrl = repo,
};

tc.BaseId.Should().Be("https://github.com/google/guava : abcdef1234567890 - Git");
tc.Id.Should().Be(tc.BaseId, "DownloadUrl and SourceUrl must not affect GitComponent identity");
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.microsoft</groupId>
<parent>
<groupId>com.microsoft</groupId>
<artifactId>maven-test-parent</artifactId>
Expand Down
Loading