diff --git a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs index 611b04c75..e93ddeb1c 100644 --- a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs +++ b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs @@ -366,6 +366,21 @@ public static void ValidateNoPrivateReferences(string serializedYaml, IReadOnlyL if (!ChangelogTextUtilities.TryGetGitHubRepo(r, defaultOwner, defaultBundleRepo ?? string.Empty, out var owner, out var repo)) { + // A bare numeric reference with no repo context (e.g. `prs: ['155500']` loaded by the + // scrubber Lambda, which doesn't know which repo a per-entry YAML belongs to) carries no + // repository identity, so it cannot leak a private reference on its own. Keep it as-is — + // downstream rendering supplies the owner/repo from runtime context — and emit a warning so + // the diagnostic is still visible without failing the whole entry. Anything that still + // encodes a repo (URL / owner/repo#N short-form) hits the explicit error branch below. + if (!string.IsNullOrWhiteSpace(r) && uint.TryParse(r.Trim(), out _)) + { + list.Add(r); + collector.EmitWarning( + string.Empty, + $"Bare {referenceKind} reference '{r}' has no embedded owner/repo and no default repo was supplied; keeping as-is for downstream rendering to resolve."); + continue; + } + collector.EmitError( string.Empty, $"Link allowlist filtering could not parse {referenceKind} reference '{r}'. " + diff --git a/src/services/Elastic.Changelog/Creation/ChangelogFileWriter.cs b/src/services/Elastic.Changelog/Creation/ChangelogFileWriter.cs index 8c40eac84..ce3ba610d 100644 --- a/src/services/Elastic.Changelog/Creation/ChangelogFileWriter.cs +++ b/src/services/Elastic.Changelog/Creation/ChangelogFileWriter.cs @@ -143,13 +143,44 @@ private static ChangelogEntry BuildChangelogData(CreateChangelogArguments input) Action = input.Action, FeatureId = input.FeatureId, Highlight = input.Highlight, - Prs = input.Prs is { Length: > 0 } ? input.Prs.ToList() : null, + Prs = NormalizeReferences(input.Prs, input.Owner, input.Repo, "pull"), Products = input.Products.Select(p => p.ToProductReference()).ToList(), Areas = input.Areas is { Length: > 0 } ? input.Areas.ToList() : null, - Issues = input.Issues is { Length: > 0 } ? input.Issues.ToList() : null + Issues = NormalizeReferences(input.Issues, input.Owner, input.Repo, "issues") }; } + /// + /// Expands bare numeric PR/issue references to full https://github.com/owner/repo/{pull|issues}/N + /// URLs when owner and repo are available, so the written YAML is self-describing — readers (including the + /// changelog scrubber Lambda, which has no per-entry repo context) can resolve the target repository without + /// extra arguments. References that are already full URLs or owner/repo#N short-forms are returned + /// unchanged. Returns null when there is nothing to write, preserving the previous omit-on-empty + /// serialization behaviour. + /// + internal static List? NormalizeReferences(string[]? refs, string? owner, string? repo, string pathSegment) + { + if (refs is not { Length: > 0 }) + return null; + + // Only expand when we have an unambiguous single-repo context. Bundle-style multi-repo strings + // (e.g. "elasticsearch+kibana") or pre-qualified "org/repo" values would expand to the wrong URL. + var hasContext = !string.IsNullOrWhiteSpace(owner) + && !string.IsNullOrWhiteSpace(repo) + && !repo.Contains('/', StringComparison.Ordinal) + && !repo.Contains('+', StringComparison.Ordinal); + + var list = new List(refs.Length); + foreach (var r in refs) + { + if (hasContext && !string.IsNullOrWhiteSpace(r) && uint.TryParse(r.Trim(), out _)) + list.Add($"https://github.com/{owner}/{repo}/{pathSegment}/{r.Trim()}"); + else + list.Add(r); + } + return list; + } + private static string GenerateYaml(ChangelogEntry data, ChangelogConfiguration config, bool titleMissing, bool typeMissing) { // Create a mutable copy for serialization adjustments diff --git a/tests/Elastic.Changelog.Tests/Changelogs/Create/PrIntegrationTests.cs b/tests/Elastic.Changelog.Tests/Changelogs/Create/PrIntegrationTests.cs index 0bc0eb04d..2c1760c00 100644 --- a/tests/Elastic.Changelog.Tests/Changelogs/Create/PrIntegrationTests.cs +++ b/tests/Elastic.Changelog.Tests/Changelogs/Create/PrIntegrationTests.cs @@ -597,4 +597,69 @@ public async Task CreateChangelog_WithMixedPrsFromFileAndCommaSeparated_Processe yamlContents.Should().Contain(c => c.Contains("prs:") && c.Contains("https://github.com/elastic/elasticsearch/pull/1111")); yamlContents.Should().Contain(c => c.Contains("prs:") && c.Contains("https://github.com/elastic/elasticsearch/pull/2222")); } + + [Fact] + public async Task CreateChangelog_WithBareNumberPrAndOwnerRepo_WritesFullUrlIntoYaml() + { + // Mirrors the upload action's fork-PR re-derivation invocation: + // docs-builder changelog add --concise --use-pr-number --owner elastic --repo cloud --prs 155500 + // The previous bare-number-in YAML was unparseable by the scrubber Lambda (which has no per-entry + // repo context), so the writer must normalize bare numbers to full URLs when owner+repo are known. + var prInfo = new GitHubPrInfo + { + Title = "Fix upload failures for extensions with >5GiB", + Labels = ["type:bug"] + }; + + A.CallTo(() => MockGitHubService.FetchPrInfoAsync( + "155500", + "elastic", + "cloud", + A._)) + .Returns(prInfo); + + // language=yaml + var configContent = + """ + pivot: + types: + feature: + bug-fix: "type:bug" + breaking-change: + lifecycles: + - preview + - beta + - ga + """; + var configPath = await CreateConfigDirectory(configContent); + + var service = CreateService(); + + var input = new CreateChangelogArguments + { + Prs = ["155500"], + Owner = "elastic", + Repo = "cloud", + Products = [new ProductArgument { Product = "elasticsearch", Target = "9.2.0", Lifecycle = "ga" }], + Config = configPath, + Output = CreateOutputDirectory(), + UsePrNumber = true, + Concise = true + }; + + var result = await service.CreateChangelog(Collector, input, TestContext.Current.CancellationToken); + + result.Should().BeTrue(); + Collector.Errors.Should().Be(0); + + var outputDir = input.Output ?? FileSystem.Directory.GetCurrentDirectory(); + var files = FileSystem.Directory.GetFiles(outputDir, "*.yaml"); + files.Should().HaveCount(1); + + var yamlContent = await FileSystem.File.ReadAllTextAsync(files[0], TestContext.Current.CancellationToken); + yamlContent.Should().Contain("prs:"); + yamlContent.Should().Contain("https://github.com/elastic/cloud/pull/155500"); + yamlContent.Should().NotMatch("*prs:*\n- '155500'*"); + yamlContent.Should().NotMatch("*prs:*\n- 155500*"); + } } diff --git a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs index 851a05472..b3cb5cc2a 100644 --- a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs +++ b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs @@ -530,6 +530,76 @@ public void TryApplyChangelogEntry_NullFields_PreservesNulls() sanitized.Description.Should().BeNull(); } + [Fact] + public void TryApplyChangelogEntry_BarePrNumberWithoutDefaultRepo_KeptWithWarning() + { + // The scrubber Lambda calls TryApplyChangelogEntry with defaultRepo=null because per-entry + // YAMLs uploaded under {product}/changelogs/*.yaml carry no embedded repo context. A bare + // numeric PR ref ("155500") must be tolerated rather than failing the whole entry — the + // reference carries no repo identity so it cannot leak a private link, and downstream + // rendering supplies the owner/repo from runtime context. + var entry = new BundledEntry + { + Title = "Fork PR entry", + Prs = ["155500"], + Issues = null + }; + + var allow = new[] { "elastic/elasticsearch" }; + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, allow, "elastic", null, + out var sanitized, out var changed); + + ok.Should().BeTrue(); + changed.Should().BeFalse(); + sanitized.Prs.Should().BeEquivalentTo(["155500"]); + Collector.Errors.Should().Be(0); + Collector.Warnings.Should().BeGreaterThan(0); + } + + [Fact] + public void TryApplyChangelogEntry_BareIssueNumberWithoutDefaultRepo_KeptWithWarning() + { + var entry = new BundledEntry + { + Title = "Entry with bare issue", + Prs = null, + Issues = ["4274"] + }; + + var allow = new[] { "elastic/elasticsearch" }; + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, allow, "elastic", null, + out var sanitized, out var changed); + + ok.Should().BeTrue(); + changed.Should().BeFalse(); + sanitized.Issues.Should().BeEquivalentTo(["4274"]); + Collector.Errors.Should().Be(0); + Collector.Warnings.Should().BeGreaterThan(0); + } + + [Fact] + public void TryApplyChangelogEntry_UnparseableNonNumericRef_StillErrors() + { + // Genuinely unparseable references (not bare numbers and not URL/short-form) should still + // fail-closed so we surface schema regressions instead of silently dropping data. + var entry = new BundledEntry + { + Title = "Malformed entry", + Prs = ["not-a-pr-ref"], + Issues = null + }; + + var allow = new[] { "elastic/elasticsearch" }; + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, allow, "elastic", "elasticsearch", + out _, out _); + + ok.Should().BeFalse(); + Collector.Errors.Should().BeGreaterThan(0); + } + // --- ScrubText --- [Fact] diff --git a/tests/Elastic.Changelog.Tests/Creation/NormalizeReferencesTests.cs b/tests/Elastic.Changelog.Tests/Creation/NormalizeReferencesTests.cs new file mode 100644 index 000000000..b049f79a3 --- /dev/null +++ b/tests/Elastic.Changelog.Tests/Creation/NormalizeReferencesTests.cs @@ -0,0 +1,130 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using AwesomeAssertions; +using Elastic.Changelog.Creation; + +namespace Elastic.Changelog.Tests.Creation; + +/// +/// Covers , which is the choke point for making +/// generated entry YAMLs self-describing. Bare PR/issue numbers must be expanded to full GitHub URLs +/// when owner+repo are supplied so downstream consumers without per-entry repo context — most +/// importantly the changelog-scrubber Lambda — can resolve the target repository. +/// +public class NormalizeReferencesTests +{ + [Fact] + public void Null_ReturnsNull() => + ChangelogFileWriter.NormalizeReferences(null, "elastic", "cloud", "pull") + .Should().BeNull(); + + [Fact] + public void Empty_ReturnsNull() => + ChangelogFileWriter.NormalizeReferences([], "elastic", "cloud", "pull") + .Should().BeNull(); + + [Fact] + public void BareNumber_WithOwnerAndRepo_ExpandsToFullUrl() + { + var result = ChangelogFileWriter.NormalizeReferences( + ["155500"], "elastic", "cloud", "pull"); + + result.Should().BeEquivalentTo(["https://github.com/elastic/cloud/pull/155500"]); + } + + [Fact] + public void BareIssueNumber_WithOwnerAndRepo_ExpandsToIssuesUrl() + { + var result = ChangelogFileWriter.NormalizeReferences( + ["4274"], "elastic", "cloud", "issues"); + + result.Should().BeEquivalentTo(["https://github.com/elastic/cloud/issues/4274"]); + } + + [Fact] + public void BareNumber_WithoutOwner_LeftAsIs() + { + var result = ChangelogFileWriter.NormalizeReferences( + ["155500"], null, "cloud", "pull"); + + result.Should().BeEquivalentTo(["155500"]); + } + + [Fact] + public void BareNumber_WithoutRepo_LeftAsIs() + { + var result = ChangelogFileWriter.NormalizeReferences( + ["155500"], "elastic", null, "pull"); + + result.Should().BeEquivalentTo(["155500"]); + } + + [Fact] + public void BareNumber_WithBundleStyleRepo_LeftAsIs() + { + // `elasticsearch+kibana` is a multi-repo bundle string; we can't pick which one a bare + // number targets, so we conservatively leave it alone. + var result = ChangelogFileWriter.NormalizeReferences( + ["100"], "elastic", "elasticsearch+kibana", "pull"); + + result.Should().BeEquivalentTo(["100"]); + } + + [Fact] + public void BareNumber_WithSlashInRepo_LeftAsIs() + { + // A pre-qualified `org/repo` value supplied through `--repo` shouldn't be re-combined. + var result = ChangelogFileWriter.NormalizeReferences( + ["100"], "elastic", "elastic/cloud", "pull"); + + result.Should().BeEquivalentTo(["100"]); + } + + [Fact] + public void FullUrl_LeftAsIs() + { + var result = ChangelogFileWriter.NormalizeReferences( + ["https://github.com/elastic/cloud/pull/155500"], "elastic", "cloud", "pull"); + + result.Should().BeEquivalentTo(["https://github.com/elastic/cloud/pull/155500"]); + } + + [Fact] + public void ShortFormReference_LeftAsIs() + { + var result = ChangelogFileWriter.NormalizeReferences( + ["elastic/cloud#155500"], "elastic", "cloud", "pull"); + + result.Should().BeEquivalentTo(["elastic/cloud#155500"]); + } + + [Fact] + public void MixedReferences_OnlyBareNumbersExpand() + { + var result = ChangelogFileWriter.NormalizeReferences( + [ + "155500", + "https://github.com/elastic/cloud/pull/155501", + "elastic/cloud#155502" + ], + "elastic", "cloud", "pull"); + + result.Should().BeEquivalentTo( + [ + "https://github.com/elastic/cloud/pull/155500", + "https://github.com/elastic/cloud/pull/155501", + "elastic/cloud#155502" + ]); + } + + [Fact] + public void NumberWithWhitespace_TrimmedAndExpanded() + { + var result = ChangelogFileWriter.NormalizeReferences( + [" 155500 "], "elastic", "cloud", "pull"); + + result.Should().BeEquivalentTo(["https://github.com/elastic/cloud/pull/155500"]); + } +}