Skip to content

Commit 221b86d

Browse files
Git-based link-index for codex clone" (#2739)
* Decrease LogToStandardErrorThreshold to Trace * Git-based link-index for codex clone * Fix Path.Combine warnings * Revert * Potential fix for pull request finding 'Call to 'System.IO.Path.Combine' may silently drop its earlier arguments' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> * Revert AppDefaultsExtensions * Ensure codex environment is set --------- Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com>
1 parent 59828e8 commit 221b86d

7 files changed

Lines changed: 232 additions & 25 deletions

File tree

src/Elastic.Codex/Elastic.Codex.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
<ProjectReference Include="..\Elastic.Documentation\Elastic.Documentation.csproj"/>
2424
<ProjectReference Include="..\Elastic.Documentation.Navigation\Elastic.Documentation.Navigation.csproj"/>
2525
<ProjectReference Include="..\Elastic.Documentation.Links\Elastic.Documentation.Links.csproj"/>
26+
<ProjectReference Include="..\Elastic.Documentation.LinkIndex\Elastic.Documentation.LinkIndex.csproj"/>
2627
<ProjectReference Include="..\Elastic.Documentation.ServiceDefaults\Elastic.Documentation.ServiceDefaults.csproj"/>
2728
<ProjectReference Include="..\services\Elastic.Documentation.Services\Elastic.Documentation.Services.csproj"/>
2829
<ProjectReference Include="..\services\Elastic.Documentation.Isolated\Elastic.Documentation.Isolated.csproj"/>

src/Elastic.Codex/Sourcing/CodexCloneService.cs

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
using System.IO.Abstractions;
66
using Elastic.Documentation.Configuration.Codex;
77
using Elastic.Documentation.Diagnostics;
8+
using Elastic.Documentation.LinkIndex;
9+
using Elastic.Documentation.Links;
810
using Elastic.Documentation.Services;
911
using Microsoft.Extensions.Logging;
1012

@@ -13,8 +15,9 @@ namespace Elastic.Codex.Sourcing;
1315
/// <summary>
1416
/// Service for cloning repositories defined in a codex configuration.
1517
/// </summary>
16-
public class CodexCloneService(ILoggerFactory logFactory) : IService
18+
public class CodexCloneService(ILoggerFactory logFactory, ILinkIndexReader linkIndexReader) : IService
1719
{
20+
private const string LinkRegistrySnapshotFileName = "link-index.snapshot.json";
1821
private readonly ILogger _logger = logFactory.CreateLogger<CodexCloneService>();
1922

2023
/// <summary>
@@ -32,6 +35,8 @@ public async Task<CodexCloneResult> CloneAll(
3235
if (!checkoutDir.Exists)
3336
checkoutDir.Create();
3437

38+
var linkRegistry = await linkIndexReader.GetRegistry(ctx);
39+
3540
_logger.LogInformation("Cloning {Count} documentation sets to {Directory}",
3641
context.Configuration.DocumentationSets.Count, checkoutDir.FullName);
3742

@@ -40,33 +45,65 @@ await Parallel.ForEachAsync(
4045
new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount, CancellationToken = ctx },
4146
async (docSetRef, c) =>
4247
{
43-
var checkout = await CloneRepository(context, docSetRef, fetchLatest, assumeCloned, c);
48+
var checkout = await CloneRepository(context, docSetRef, linkRegistry, fetchLatest, assumeCloned, c);
4449
if (checkout != null)
4550
{
4651
lock (checkouts)
4752
checkouts.Add(checkout);
4853
}
4954
});
5055

51-
return new CodexCloneResult(checkouts);
56+
if (Path.IsPathRooted(LinkRegistrySnapshotFileName))
57+
throw new InvalidOperationException($"Snapshot file name '{LinkRegistrySnapshotFileName}' must be a relative path.");
58+
59+
var snapshotFilePath = Path.Combine(context.CheckoutDirectory.FullName, LinkRegistrySnapshotFileName);
60+
61+
await context.WriteFileSystem.File.WriteAllTextAsync(
62+
snapshotFilePath,
63+
LinkRegistry.Serialize(linkRegistry),
64+
ctx);
65+
66+
return new CodexCloneResult(checkouts, linkRegistry);
5267
}
5368

5469
private async Task<CodexCheckout?> CloneRepository(
5570
CodexContext context,
5671
CodexDocumentationSetReference docSetRef,
72+
LinkRegistry linkRegistry,
5773
bool fetchLatest,
5874
bool assumeCloned,
5975
Cancel _)
6076
{
77+
if (Path.IsPathRooted(docSetRef.ResolvedRepoName))
78+
{
79+
context.Collector.EmitError(
80+
context.ConfigurationPath,
81+
$"Repository name '{docSetRef.ResolvedRepoName}' must be a relative path");
82+
return null;
83+
}
84+
6185
var repoDir = context.ReadFileSystem.DirectoryInfo.New(
6286
Path.Combine(context.CheckoutDirectory.FullName, docSetRef.ResolvedRepoName));
6387

6488
var gitUrl = docSetRef.GetGitUrl();
6589
var branch = docSetRef.Branch;
6690
var docsPath = docSetRef.Path;
6791

68-
_logger.LogInformation("Cloning {Name} from {Origin} branch {Branch}",
69-
docSetRef.Name, docSetRef.ResolvedOrigin, branch);
92+
if (Path.IsPathRooted(docsPath))
93+
{
94+
context.Collector.EmitError(
95+
context.ConfigurationPath,
96+
$"Documentation path '{docsPath}' for repository '{docSetRef.Name}' must be a relative path");
97+
return null;
98+
}
99+
100+
var gitRef = branch;
101+
if (!fetchLatest && linkRegistry.Repositories.TryGetValue(docSetRef.ResolvedRepoName, out var entry) &&
102+
entry.TryGetValue(branch, out var entryInfo))
103+
gitRef = entryInfo.GitReference;
104+
105+
_logger.LogInformation("Cloning {Name} from {Origin} at {GitRef}",
106+
docSetRef.Name, docSetRef.ResolvedOrigin, gitRef);
70107

71108
try
72109
{
@@ -93,7 +130,7 @@ await Parallel.ForEachAsync(
93130

94131
// Enable sparse checkout for just the docs folder
95132
git.EnableSparseCheckout([docsPath]);
96-
git.Fetch(branch);
133+
git.Fetch(gitRef);
97134
git.Checkout("FETCH_HEAD");
98135
}
99136

@@ -123,7 +160,7 @@ await Parallel.ForEachAsync(
123160
/// <summary>
124161
/// Result of cloning codex repositories.
125162
/// </summary>
126-
public record CodexCloneResult(IReadOnlyList<CodexCheckout> Checkouts);
163+
public record CodexCloneResult(IReadOnlyList<CodexCheckout> Checkouts, LinkRegistry LinkRegistrySnapshot);
127164

128165
/// <summary>
129166
/// Represents a cloned repository checkout for the codex.

src/Elastic.Documentation.LinkIndex/Elastic.Documentation.LinkIndex.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<Project Sdk="Microsoft.NET.Sdk">
1+
<Project Sdk="Microsoft.NET.Sdk">
22

33
<PropertyGroup>
44
<TargetFramework>net10.0</TargetFramework>
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// Licensed to Elasticsearch B.V under one or more agreements.
2+
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
3+
// See the LICENSE file in the project root for more information
4+
5+
using System.Diagnostics;
6+
using System.IO.Abstractions;
7+
using Elastic.Documentation.Links;
8+
9+
namespace Elastic.Documentation.LinkIndex;
10+
11+
/// <summary>
12+
/// Reads the link index from a cloned git repository (elastic/codex-link-index).
13+
/// Uses local SSH credentials for cloning, enabling private access without S3.
14+
/// </summary>
15+
public class GitLinkIndexReader : ILinkIndexReader, IDisposable
16+
{
17+
private const string LinkIndexOrigin = "elastic/codex-link-index";
18+
private static readonly string CloneDirectory = Path.Combine(
19+
Environment.GetFolderPath(Environment.SpecialFolder.UserProfile),
20+
".docs-builder",
21+
"codex-link-index");
22+
23+
private readonly string _environment;
24+
private readonly IFileSystem _fileSystem;
25+
private readonly SemaphoreSlim _cloneLock = new(1, 1);
26+
private bool _ensuredClone;
27+
28+
public GitLinkIndexReader(string environment, IFileSystem? fileSystem = null)
29+
{
30+
if (string.IsNullOrWhiteSpace(environment))
31+
throw new ArgumentException("Environment must be specified in the codex configuration (e.g., 'engineering', 'security').", nameof(environment));
32+
33+
_environment = environment;
34+
_fileSystem = fileSystem ?? new FileSystem();
35+
}
36+
37+
/// <inheritdoc />
38+
public void Dispose()
39+
{
40+
_cloneLock.Dispose();
41+
GC.SuppressFinalize(this);
42+
}
43+
44+
public string RegistryUrl => "https://github.com/elastic/codex-link-index";
45+
46+
/// <inheritdoc />
47+
public async Task<LinkRegistry> GetRegistry(Cancel cancellationToken = default)
48+
{
49+
await EnsureCloneAsync(cancellationToken);
50+
if (Path.IsPathRooted(_environment))
51+
throw new ArgumentException($"Environment '{_environment}' must be a relative path segment.");
52+
var registryPath = Path.Combine(CloneDirectory, _environment, "link-index.json");
53+
if (!_fileSystem.File.Exists(registryPath))
54+
throw new FileNotFoundException($"Link index registry not found at {registryPath}. Ensure the codex-link-index repository has {_environment}/link-index.json.");
55+
56+
var json = await _fileSystem.File.ReadAllTextAsync(registryPath, cancellationToken);
57+
return LinkRegistry.Deserialize(json);
58+
}
59+
60+
/// <inheritdoc />
61+
public async Task<RepositoryLinks> GetRepositoryLinks(string key, Cancel cancellationToken = default)
62+
{
63+
await EnsureCloneAsync(cancellationToken);
64+
if (Path.IsPathRooted(key))
65+
throw new ArgumentException($"Repository key '{key}' must be a relative path.", nameof(key));
66+
var linksPath = Path.Combine(CloneDirectory, key);
67+
if (!_fileSystem.File.Exists(linksPath))
68+
throw new FileNotFoundException($"Repository links not found at {linksPath}.");
69+
70+
var json = await _fileSystem.File.ReadAllTextAsync(linksPath, cancellationToken);
71+
return RepositoryLinks.Deserialize(json);
72+
}
73+
74+
private async Task EnsureCloneAsync(Cancel cancellationToken)
75+
{
76+
await _cloneLock.WaitAsync(cancellationToken);
77+
try
78+
{
79+
if (_ensuredClone)
80+
return;
81+
82+
var cloneDir = _fileSystem.DirectoryInfo.New(CloneDirectory);
83+
var gitDir = Path.Combine(CloneDirectory, ".git");
84+
var gitUrl = GetCodexLinkIndexGitUrl();
85+
86+
if (!_fileSystem.Directory.Exists(gitDir))
87+
{
88+
if (!cloneDir.Exists)
89+
cloneDir.Create();
90+
RunGit(CloneDirectory, "init");
91+
RunGit(CloneDirectory, "remote", "add", "origin", gitUrl);
92+
}
93+
94+
RunGit(CloneDirectory, "fetch", "--no-tags", "--prune", "--depth", "1", "origin", "HEAD");
95+
RunGit(CloneDirectory, "checkout", "--force", "FETCH_HEAD");
96+
97+
_ensuredClone = true;
98+
}
99+
finally
100+
{
101+
_ = _cloneLock.Release();
102+
}
103+
}
104+
105+
private static string GetCodexLinkIndexGitUrl()
106+
{
107+
if (!string.IsNullOrEmpty(Environment.GetEnvironmentVariable("GITHUB_ACTIONS")))
108+
{
109+
var token = Environment.GetEnvironmentVariable("GITHUB_TOKEN");
110+
return !string.IsNullOrEmpty(token)
111+
? $"https://oauth2:{token}@github.com/{LinkIndexOrigin}.git"
112+
: $"https://github.com/{LinkIndexOrigin}.git";
113+
}
114+
115+
return $"git@github.com:{LinkIndexOrigin}.git";
116+
}
117+
118+
private static void RunGit(string workingDirectory, params string[] args)
119+
{
120+
var startInfo = new ProcessStartInfo
121+
{
122+
FileName = "git",
123+
WorkingDirectory = workingDirectory,
124+
UseShellExecute = false,
125+
RedirectStandardOutput = true,
126+
RedirectStandardError = true,
127+
CreateNoWindow = true
128+
};
129+
foreach (var arg in args)
130+
startInfo.ArgumentList.Add(arg);
131+
startInfo.Environment["GIT_EDITOR"] = "true";
132+
133+
using var process = Process.Start(startInfo) ?? throw new InvalidOperationException("Failed to start git process.");
134+
135+
var stderr = process.StandardError.ReadToEnd();
136+
_ = process.StandardOutput.ReadToEnd();
137+
process.WaitForExit();
138+
139+
if (process.ExitCode != 0)
140+
throw new InvalidOperationException($"Git command failed (exit {process.ExitCode}): {stderr.Trim()}");
141+
}
142+
}

src/Elastic.Documentation.Links/CrossLinks/CrossLinkFetcher.cs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ public record FetchedCrossLinks
3131
public abstract class CrossLinkFetcher(ILoggerFactory logFactory, ILinkIndexReader linkIndexProvider) : IDisposable
3232
{
3333
protected ILogger Logger { get; } = logFactory.CreateLogger(nameof(CrossLinkFetcher));
34-
private readonly HttpClient _client = new();
3534
private LinkRegistry? _linkIndex;
3635

3736
public static RepositoryLinks Deserialize(string json) =>
@@ -90,22 +89,20 @@ protected async Task<RepositoryLinks> FetchCrossLinks(string repository, string[
9089

9190
protected async Task<RepositoryLinks> FetchLinkIndexEntry(string repository, LinkRegistryEntry linkRegistryEntry, Cancel ctx)
9291
{
93-
var url = $"https://elastic-docs-link-index.s3.us-east-2.amazonaws.com/{linkRegistryEntry.Path}";
9492
var linkReference = await TryGetCachedLinkReference(repository, linkRegistryEntry);
9593
if (linkReference is not null)
9694
{
97-
Logger.LogInformation("Using locally cached links.json for '{Repository}': {Url}", repository, url);
95+
Logger.LogInformation("Using locally cached links.json for '{Repository}' from {RegistryUrl}", repository, linkIndexProvider.RegistryUrl);
9896
return linkReference;
9997
}
10098

101-
Logger.LogInformation("Fetching links.json for '{Repository}': {Url}", repository, url);
102-
var json = await _client.GetStringAsync(url, ctx);
103-
linkReference = Deserialize(json);
104-
WriteLinksJsonCachedFile(repository, linkRegistryEntry, json);
99+
Logger.LogInformation("Fetching links.json for '{Repository}' from {RegistryUrl}", repository, linkIndexProvider.RegistryUrl);
100+
linkReference = await linkIndexProvider.GetRepositoryLinks(linkRegistryEntry.Path, ctx);
101+
WriteLinksJsonCachedFile(repository, linkRegistryEntry, linkReference);
105102
return linkReference;
106103
}
107104

108-
private void WriteLinksJsonCachedFile(string repository, LinkRegistryEntry linkRegistryEntry, string json)
105+
private void WriteLinksJsonCachedFile(string repository, LinkRegistryEntry linkRegistryEntry, RepositoryLinks linkReference)
109106
{
110107
var cachedFileName = $"links-elastic-{repository}-{linkRegistryEntry.Branch}-{linkRegistryEntry.ETag}.json";
111108
var cachedPath = Path.Combine(Paths.ApplicationData.FullName, "links", cachedFileName);
@@ -114,7 +111,7 @@ private void WriteLinksJsonCachedFile(string repository, LinkRegistryEntry linkR
114111
try
115112
{
116113
_ = Directory.CreateDirectory(Path.GetDirectoryName(cachedPath)!);
117-
File.WriteAllText(cachedPath, json);
114+
File.WriteAllText(cachedPath, RepositoryLinks.Serialize(linkReference));
118115
}
119116
catch (Exception e)
120117
{
@@ -152,7 +149,6 @@ private void WriteLinksJsonCachedFile(string repository, LinkRegistryEntry linkR
152149

153150
public void Dispose()
154151
{
155-
_client.Dispose();
156152
logFactory.Dispose();
157153
GC.SuppressFinalize(this);
158154
}

src/tooling/docs-builder/Commands/Codex/CodexCommands.cs

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
using Elastic.Documentation.Configuration.Codex;
1414
using Elastic.Documentation.Diagnostics;
1515
using Elastic.Documentation.Isolated;
16+
using Elastic.Documentation.LinkIndex;
1617
using Elastic.Documentation.Services;
1718
using Microsoft.Extensions.Logging;
1819

@@ -62,10 +63,17 @@ public async Task<int> CloneAndBuild(
6263
}
6364

6465
var codexConfig = CodexConfiguration.Load(configFile);
66+
67+
if (string.IsNullOrWhiteSpace(codexConfig.Environment))
68+
{
69+
collector.EmitGlobalError("Codex configuration must specify an 'environment' (e.g., 'engineering', 'security').");
70+
return 1;
71+
}
72+
6573
var codexContext = new CodexContext(codexConfig, configFile, collector, fs, fs, null, output);
6674

67-
// Clone service
68-
var cloneService = new CodexCloneService(logFactory);
75+
using var linkIndexReader = new GitLinkIndexReader(codexConfig.Environment);
76+
var cloneService = new CodexCloneService(logFactory, linkIndexReader);
6977
CodexCloneResult? cloneResult = null;
7078

7179
serviceInvoker.AddCommand(cloneService, (codexContext, fetchLatest, assumeCloned), strict,
@@ -128,9 +136,17 @@ public async Task<int> Clone(
128136
}
129137

130138
var codexConfig = CodexConfiguration.Load(configFile);
139+
140+
if (string.IsNullOrWhiteSpace(codexConfig.Environment))
141+
{
142+
collector.EmitGlobalError("Codex configuration must specify an 'environment' (e.g., 'engineering', 'security').");
143+
return 1;
144+
}
145+
131146
var codexContext = new CodexContext(codexConfig, configFile, collector, fs, fs, null, null);
132147

133-
var cloneService = new CodexCloneService(logFactory);
148+
using var linkIndexReader = new GitLinkIndexReader(codexConfig.Environment);
149+
var cloneService = new CodexCloneService(logFactory, linkIndexReader);
134150
serviceInvoker.AddCommand(cloneService, (codexContext, fetchLatest, assumeCloned), strict,
135151
async (s, col, state, c) =>
136152
{
@@ -168,10 +184,17 @@ public async Task<int> Build(
168184
}
169185

170186
var codexConfig = CodexConfiguration.Load(configFile);
187+
188+
if (string.IsNullOrWhiteSpace(codexConfig.Environment))
189+
{
190+
collector.EmitGlobalError("Codex configuration must specify an 'environment' (e.g., 'engineering', 'security').");
191+
return 1;
192+
}
193+
171194
var codexContext = new CodexContext(codexConfig, configFile, collector, fs, fs, null, output);
172195

173-
// First, we need to load the checkouts that should already exist
174-
var cloneService = new CodexCloneService(logFactory);
196+
using var linkIndexReader = new GitLinkIndexReader(codexConfig.Environment);
197+
var cloneService = new CodexCloneService(logFactory, linkIndexReader);
175198
var cloneResult = await cloneService.CloneAll(codexContext, fetchLatest: false, assumeCloned: true, ctx);
176199

177200
if (cloneResult.Checkouts.Count == 0)

0 commit comments

Comments
 (0)