|
| 1 | +// Licensed to Elasticsearch B.V under one or more agreements. |
| 2 | +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. |
| 3 | +// See the LICENSE file in the project root for more information |
| 4 | + |
| 5 | +using System.Text.Json; |
| 6 | +using Microsoft.Extensions.Logging; |
| 7 | + |
| 8 | +namespace Elastic.Documentation.Configuration.ReleaseNotes; |
| 9 | + |
| 10 | +/// <summary> |
| 11 | +/// One downloaded changelog entry: the registry file name and its raw YAML content. |
| 12 | +/// </summary> |
| 13 | +public readonly record struct CdnChangelogEntry(string FileName, string Content); |
| 14 | + |
| 15 | +/// <summary> |
| 16 | +/// Fetches the individual (scrubbed) changelog entries for a single product from the public CDN, for |
| 17 | +/// the <c>changelog bundle</c> command when sourcing entries from S3 rather than a local folder. It |
| 18 | +/// reads <c>{base}/{product}/changelog/registry.json</c> to enumerate entries and downloads each |
| 19 | +/// <c>{base}/{product}/changelog/{file}</c> as raw YAML; the bundle command then applies its usual |
| 20 | +/// filter (products / prs / issues) to the downloaded set. |
| 21 | +/// </summary> |
| 22 | +/// <remarks> |
| 23 | +/// <para> |
| 24 | +/// A registry that cannot be fetched or parsed is a hard error (the caller gets an empty list and an |
| 25 | +/// emitted error). An individual entry that the registry lists but the CDN does not yet serve is |
| 26 | +/// retried a few times with short backoff (and cache-busting, to defeat any CloudFront negative-cache) |
| 27 | +/// to ride out the brief upload→scrub→propagate window. If it still cannot be fetched after the retry |
| 28 | +/// budget it is escalated to an error, not skipped: the registry asserts the entry exists (uploads |
| 29 | +/// never prune) and scrubbing is sub-second, so a persistent miss is a real pipeline problem and |
| 30 | +/// silently shipping an incomplete release bundle is worse than failing the run. |
| 31 | +/// </para> |
| 32 | +/// </remarks> |
| 33 | +public sealed class CdnChangelogEntryFetcher( |
| 34 | + ILoggerFactory logFactory, |
| 35 | + HttpMessageHandler? handler = null, |
| 36 | + int maxAttempts = CdnChangelogEntryFetcher.DefaultMaxAttempts, |
| 37 | + Action<TimeSpan, Cancel>? sleep = null) |
| 38 | +{ |
| 39 | + private const int SupportedSchemaVersion = 1; |
| 40 | + |
| 41 | + /// <summary>Total GET attempts per entry (1 initial + retries). ~3.5s budget at the default backoff.</summary> |
| 42 | + private const int DefaultMaxAttempts = 4; |
| 43 | + private const int BaseRetryDelayMs = 500; |
| 44 | + private const int MaxRetryDelayMs = 2000; |
| 45 | + |
| 46 | + private readonly ILogger _logger = logFactory.CreateLogger<CdnChangelogEntryFetcher>(); |
| 47 | + private readonly HttpClient _httpClient = handler is null ? new HttpClient() : new HttpClient(handler, disposeHandler: false); |
| 48 | + private readonly int _maxAttempts = maxAttempts < 1 ? DefaultMaxAttempts : maxAttempts; |
| 49 | + private readonly Action<TimeSpan, Cancel> _sleep = sleep ?? DefaultSleep; |
| 50 | + |
| 51 | + /// <summary> |
| 52 | + /// Downloads the changelog entries for <paramref name="product"/> from the CDN at |
| 53 | + /// <paramref name="baseUri"/>. Returns an empty list after emitting an error when the registry cannot |
| 54 | + /// be read or when a registry-listed entry cannot be fetched within the retry budget. Entries are |
| 55 | + /// returned in registry order; the caller owns filtering and de-duplication. |
| 56 | + /// </summary> |
| 57 | + public IReadOnlyList<CdnChangelogEntry> Fetch( |
| 58 | + Uri baseUri, |
| 59 | + string product, |
| 60 | + Action<string> emitError, |
| 61 | + Action<string> emitWarning, |
| 62 | + Cancel ctx) |
| 63 | + { |
| 64 | + var registryUri = Combine(baseUri, product, "changelog", "registry.json"); |
| 65 | + |
| 66 | + ChangelogRegistry? registry; |
| 67 | + try |
| 68 | + { |
| 69 | + registry = FetchRegistry(registryUri, ctx); |
| 70 | + } |
| 71 | + catch (Exception ex) when (ex is not OperationCanceledException) |
| 72 | + { |
| 73 | + emitError($"Could not fetch changelog entry registry for product '{product}' from {registryUri}: {ex.Message}"); |
| 74 | + return []; |
| 75 | + } |
| 76 | + |
| 77 | + if (registry is null) |
| 78 | + { |
| 79 | + emitError($"Changelog entry registry for product '{product}' at {registryUri} was empty or unparseable."); |
| 80 | + return []; |
| 81 | + } |
| 82 | + |
| 83 | + if (registry.SchemaVersion > SupportedSchemaVersion) |
| 84 | + { |
| 85 | + emitError( |
| 86 | + $"Changelog entry registry for product '{product}' uses schema version {registry.SchemaVersion}, but this build only understands version {SupportedSchemaVersion}. Update docs-builder."); |
| 87 | + return []; |
| 88 | + } |
| 89 | + |
| 90 | + var entries = new List<CdnChangelogEntry>(registry.Bundles.Count); |
| 91 | + foreach (var entry in registry.Bundles) |
| 92 | + { |
| 93 | + ctx.ThrowIfCancellationRequested(); |
| 94 | + |
| 95 | + var fileName = entry.File; |
| 96 | + if (string.IsNullOrWhiteSpace(fileName) || !IsSafeFileName(fileName)) |
| 97 | + { |
| 98 | + emitWarning($"Changelog entry registry for '{product}' lists an invalid file name '{fileName}'; skipping."); |
| 99 | + continue; |
| 100 | + } |
| 101 | + |
| 102 | + var entryUri = Combine(baseUri, product, "changelog", fileName); |
| 103 | + if (TryFetchEntry(entryUri, fileName, product, ctx, out var content, out var lastError)) |
| 104 | + { |
| 105 | + entries.Add(new CdnChangelogEntry(fileName, content)); |
| 106 | + continue; |
| 107 | + } |
| 108 | + |
| 109 | + // The registry lists this entry, so it exists in the private bucket and should have been |
| 110 | + // scrubbed to the public one within milliseconds. Still missing after the retry budget means |
| 111 | + // a genuine propagation/scrub failure — fail rather than ship a bundle missing this entry. |
| 112 | + emitError( |
| 113 | + $"Changelog entry '{fileName}' for product '{product}' is listed in the registry but could not be fetched from {entryUri} after {_maxAttempts} attempt(s): {lastError}. " + |
| 114 | + "The scrubbed copy may not have propagated to the CDN yet; retry shortly, and if it persists check the changelog scrubber pipeline."); |
| 115 | + return []; |
| 116 | + } |
| 117 | + |
| 118 | + _logger.LogInformation("Fetched {Count} changelog entry(ies) for {Product} from {BaseUri}", entries.Count, product, baseUri); |
| 119 | + return entries; |
| 120 | + } |
| 121 | + |
| 122 | + /// <summary> |
| 123 | + /// Fetches a single entry, retrying transient failures (most importantly a not-yet-propagated 404) |
| 124 | + /// up to <see cref="_maxAttempts"/> times with exponential backoff. Retry requests are cache-busted |
| 125 | + /// so a CloudFront-cached 404 cannot pin the result for the whole window. |
| 126 | + /// </summary> |
| 127 | + private bool TryFetchEntry(Uri uri, string fileName, string product, Cancel ctx, out string content, out string? lastError) |
| 128 | + { |
| 129 | + content = string.Empty; |
| 130 | + lastError = null; |
| 131 | + |
| 132 | + for (var attempt = 1; attempt <= _maxAttempts; attempt++) |
| 133 | + { |
| 134 | + ctx.ThrowIfCancellationRequested(); |
| 135 | + try |
| 136 | + { |
| 137 | + content = FetchText(uri, attempt, ctx); |
| 138 | + if (attempt > 1) |
| 139 | + _logger.LogInformation("Fetched changelog entry '{File}' for {Product} on attempt {Attempt}/{Max}", fileName, product, attempt, _maxAttempts); |
| 140 | + return true; |
| 141 | + } |
| 142 | + catch (Exception ex) when (ex is not OperationCanceledException) |
| 143 | + { |
| 144 | + lastError = ex.Message; |
| 145 | + if (attempt >= _maxAttempts) |
| 146 | + break; |
| 147 | + |
| 148 | + var delay = RetryDelay(attempt); |
| 149 | + _logger.LogDebug( |
| 150 | + "Changelog entry '{File}' for {Product} not yet available (attempt {Attempt}/{Max}: {Error}); retrying in {Delay}", |
| 151 | + fileName, product, attempt, _maxAttempts, ex.Message, delay); |
| 152 | + _sleep(delay, ctx); |
| 153 | + } |
| 154 | + } |
| 155 | + |
| 156 | + return false; |
| 157 | + } |
| 158 | + |
| 159 | + private ChangelogRegistry? FetchRegistry(Uri registryUri, Cancel ctx) |
| 160 | + { |
| 161 | + _logger.LogInformation("Fetching changelog entry registry {RegistryUri}", registryUri); |
| 162 | + using var request = new HttpRequestMessage(HttpMethod.Get, registryUri); |
| 163 | + using var response = _httpClient.Send(request, ctx); |
| 164 | + _ = response.EnsureSuccessStatusCode(); |
| 165 | + using var stream = response.Content.ReadAsStream(ctx); |
| 166 | + return JsonSerializer.Deserialize(stream, ChangelogRegistryJsonContext.Default.ChangelogRegistry); |
| 167 | + } |
| 168 | + |
| 169 | + private string FetchText(Uri uri, int attempt, Cancel ctx) |
| 170 | + { |
| 171 | + // Only bust the cache on retries: the first hit should use the CDN cache normally (the common, |
| 172 | + // already-propagated case); retries explicitly want to bypass any cached 404. |
| 173 | + var requestUri = attempt > 1 ? WithCacheBuster(uri) : uri; |
| 174 | + using var request = new HttpRequestMessage(HttpMethod.Get, requestUri); |
| 175 | + if (attempt > 1) |
| 176 | + _ = request.Headers.TryAddWithoutValidation("Cache-Control", "no-cache"); |
| 177 | + using var response = _httpClient.Send(request, ctx); |
| 178 | + _ = response.EnsureSuccessStatusCode(); |
| 179 | + using var stream = response.Content.ReadAsStream(ctx); |
| 180 | + using var reader = new StreamReader(stream); |
| 181 | + return reader.ReadToEnd(); |
| 182 | + } |
| 183 | + |
| 184 | + private static TimeSpan RetryDelay(int attempt) |
| 185 | + { |
| 186 | + // attempt is 1-based; first retry waits BaseRetryDelayMs, doubling up to the cap. |
| 187 | + var ms = Math.Min(BaseRetryDelayMs * (1L << (attempt - 1)), MaxRetryDelayMs); |
| 188 | + return TimeSpan.FromMilliseconds(ms); |
| 189 | + } |
| 190 | + |
| 191 | + private static void DefaultSleep(TimeSpan delay, Cancel ctx) |
| 192 | + { |
| 193 | + if (delay > TimeSpan.Zero) |
| 194 | + _ = ctx.WaitHandle.WaitOne(delay); |
| 195 | + } |
| 196 | + |
| 197 | + private static Uri WithCacheBuster(Uri uri) |
| 198 | + { |
| 199 | + var separator = string.IsNullOrEmpty(uri.Query) ? "?" : "&"; |
| 200 | + return new Uri($"{uri.AbsoluteUri}{separator}_={DateTimeOffset.UtcNow.Ticks:x}"); |
| 201 | + } |
| 202 | + |
| 203 | + private static Uri Combine(Uri baseUri, params string[] segments) |
| 204 | + { |
| 205 | + var basePath = baseUri.AbsoluteUri.TrimEnd('/'); |
| 206 | + var suffix = string.Join('/', segments.Select(Uri.EscapeDataString)); |
| 207 | + return new Uri($"{basePath}/{suffix}"); |
| 208 | + } |
| 209 | + |
| 210 | + /// <summary> |
| 211 | + /// Guards against path traversal or nested keys sneaking in via the registry: an entry file name |
| 212 | + /// must be a single path segment (the producer always writes <c>{product}/changelog/{file}</c>). |
| 213 | + /// </summary> |
| 214 | + private static bool IsSafeFileName(string fileName) => |
| 215 | + !fileName.Contains('/', StringComparison.Ordinal) |
| 216 | + && !fileName.Contains('\\', StringComparison.Ordinal) |
| 217 | + && fileName is not ("." or ".."); |
| 218 | +} |
0 commit comments