Skip to content

Commit a46dc26

Browse files
authored
feat(results): support configured storage branch (#1365)
* feat(results): support configured storage branch * fix(results): keep missing storage branch unavailable
1 parent 4d0defc commit a46dc26

13 files changed

Lines changed: 470 additions & 58 deletions

File tree

apps/cli/src/commands/results/remote.ts

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import {
55
DEFAULT_THRESHOLD,
66
type EvaluationResult,
77
type GitListedRun,
8-
type ResultsConfig,
8+
type NormalizedResultsConfig,
99
type ResultsRepoStatus,
1010
directPushResults,
1111
directorySizeBytes,
@@ -37,32 +37,41 @@ import {
3737

3838
// ── In-memory TTL cache for listGitRuns ────────────────────────────
3939
// Avoids repeated expensive git ls-tree + git cat-file --batch operations
40-
// on every API request. Cache key is repoDir, TTL is 60 seconds.
40+
// on every API request. Cache key is repoDir + ref, TTL is 60 seconds.
4141
const gitRunsCache = new Map<string, { data: Promise<GitListedRun[]>; expiresAt: number }>();
4242
const GIT_RUNS_CACHE_TTL_MS = 60_000;
4343

44-
function cachedListGitRuns(repoDir: string) {
44+
function getResultsStorageRef(config: NormalizedResultsConfig): string | undefined {
45+
return config.branch ? `origin/${config.branch}` : undefined;
46+
}
47+
48+
function cachedListGitRuns(repoDir: string, ref?: string) {
4549
const now = Date.now();
46-
const cached = gitRunsCache.get(repoDir);
50+
const cacheKey = `${repoDir}\0${ref ?? ''}`;
51+
const cached = gitRunsCache.get(cacheKey);
4752
if (cached && cached.expiresAt > now) {
4853
return cached.data;
4954
}
50-
const promise = listGitRuns(repoDir);
51-
gitRunsCache.set(repoDir, { data: promise, expiresAt: now + GIT_RUNS_CACHE_TTL_MS });
55+
const promise = ref ? listGitRuns(repoDir, ref) : listGitRuns(repoDir);
56+
gitRunsCache.set(cacheKey, { data: promise, expiresAt: now + GIT_RUNS_CACHE_TTL_MS });
5257
// Evict stale entry once the promise settles so a fresh fetch replaces it
5358
promise
5459
.catch(() => {})
5560
.finally(() => {
56-
const entry = gitRunsCache.get(repoDir);
61+
const entry = gitRunsCache.get(cacheKey);
5762
if (entry && entry.expiresAt <= Date.now()) {
58-
gitRunsCache.delete(repoDir);
63+
gitRunsCache.delete(cacheKey);
5964
}
6065
});
6166
return promise;
6267
}
6368

6469
function invalidateGitRunsCache(repoDir: string): void {
65-
gitRunsCache.delete(repoDir);
70+
for (const key of gitRunsCache.keys()) {
71+
if (key.startsWith(`${repoDir}\0`)) {
72+
gitRunsCache.delete(key);
73+
}
74+
}
6675
}
6776

6877
export type RunSource = 'local' | 'remote';
@@ -144,7 +153,7 @@ async function maybeWarnLargeArtifact(runDir: string): Promise<void> {
144153
async function loadNormalizedResultsConfig(
145154
cwd: string,
146155
projectId?: string,
147-
): Promise<Required<ResultsConfig> | undefined> {
156+
): Promise<NormalizedResultsConfig | undefined> {
148157
const repoRoot = (await findRepoRoot(cwd)) ?? cwd;
149158
const config = await loadConfig(path.join(cwd, '_'), repoRoot);
150159
const project =
@@ -155,6 +164,7 @@ async function loadNormalizedResultsConfig(
155164
? {
156165
mode: 'github' as const,
157166
repo: project.results.repoUrl,
167+
branch: project.results.branch,
158168
path: project.results.path,
159169
auto_push: project.results.sync?.autoPush,
160170
branch_prefix: project.results.branchPrefix,
@@ -193,15 +203,17 @@ export async function getRemoteResultsStatus(
193203
}
194204

195205
async function getRemoteRunCount(
196-
config: Required<ResultsConfig> | undefined,
206+
config: NormalizedResultsConfig | undefined,
197207
status: ResultsRepoStatus,
198208
): Promise<number> {
199209
let runCount = 0;
200210
if (config && status.available) {
201211
try {
202-
runCount = (await cachedListGitRuns(config.path)).length;
212+
runCount = (await cachedListGitRuns(config.path, getResultsStorageRef(config))).length;
203213
} catch {
204-
runCount = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length;
214+
if (!config.branch) {
215+
runCount = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length;
216+
}
205217
}
206218
}
207219
return runCount;
@@ -277,7 +289,7 @@ export async function listMergedResultFiles(
277289
let remoteRuns: SourcedResultFileMeta[] = [];
278290
if (config.mode === 'github') {
279291
try {
280-
const gitRuns = await cachedListGitRuns(config.path);
292+
const gitRuns = await cachedListGitRuns(config.path, getResultsStorageRef(config));
281293
remoteRuns = gitRuns.map((r) => ({
282294
filename: encodeRemoteRunId(r.run_id),
283295
raw_filename: r.run_id,
@@ -291,16 +303,20 @@ export async function listMergedResultFiles(
291303
sizeBytes: r.size_bytes || 0,
292304
}));
293305
} catch (error) {
294-
console.error('git-native listing failed, falling back', error);
295-
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
296-
(meta) =>
297-
({
298-
...meta,
299-
filename: encodeRemoteRunId(meta.filename),
300-
raw_filename: meta.filename,
301-
source: 'remote' as const,
302-
}) satisfies SourcedResultFileMeta,
303-
);
306+
if (config.branch) {
307+
console.error('git-native listing failed for configured results branch', error);
308+
} else {
309+
console.error('git-native listing failed, falling back', error);
310+
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
311+
(meta) =>
312+
({
313+
...meta,
314+
filename: encodeRemoteRunId(meta.filename),
315+
raw_filename: meta.filename,
316+
source: 'remote' as const,
317+
}) satisfies SourcedResultFileMeta,
318+
);
319+
}
304320
}
305321
} else {
306322
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
@@ -359,7 +375,7 @@ export async function ensureRemoteRunAvailable(
359375
'.agentv/results/runs',
360376
path.posix.dirname(relativeManifestPath),
361377
);
362-
await materializeGitRun(config.path, relativeRunPath);
378+
await materializeGitRun(config.path, relativeRunPath, getResultsStorageRef(config));
363379
}
364380

365381
export async function readRemoteRunTagState(

apps/cli/test/commands/results/serve.test.ts

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ function writeRemoteRunArtifact(
118118
experiment: string,
119119
timestamp: string,
120120
resultRecords: object | object[],
121+
branch = 'main',
121122
): string {
122123
const isoTimestamp = timestamp.replace(
123124
/^(\d{4}-\d{2}-\d{2})T(\d{2})-(\d{2})-(\d{2})-(\d{3})Z$/,
@@ -148,7 +149,7 @@ function writeRemoteRunArtifact(
148149
),
149150
);
150151
git(`git add "${runDir}" && git commit --quiet -m "add ${experiment}"`, cloneDir);
151-
git('git push --quiet origin main', cloneDir);
152+
git(`git push --quiet origin HEAD:${branch}`, cloneDir);
152153
git('git fetch --quiet origin --prune', cloneDir);
153154
return `${experiment}::${timestamp}`;
154155
}
@@ -926,6 +927,97 @@ describe('serve app', () => {
926927
expect(detailData.results[0]).toMatchObject({ testId: 'test-greeting' });
927928
}, 15000);
928929

930+
it('lists git-native remote runs from the configured storage branch', async () => {
931+
const { remoteDir, cloneDir } = initializeRemoteRepo(tempDir);
932+
const storageBranch = 'agentv-results';
933+
git(`git switch --quiet --orphan ${storageBranch}`, cloneDir);
934+
git('git rm -rf --quiet . 2>/dev/null || true', cloneDir);
935+
git('git commit --quiet --allow-empty -m "seed results branch"', cloneDir);
936+
git(`git push --quiet origin HEAD:${storageBranch}`, cloneDir);
937+
const runId = writeRemoteRunArtifact(
938+
cloneDir,
939+
'branch-green-uat',
940+
'2026-03-26T11-00-00-000Z',
941+
RESULT_A,
942+
storageBranch,
943+
);
944+
945+
mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
946+
writeFileSync(
947+
path.join(tempDir, '.agentv', 'config.yaml'),
948+
`results:
949+
mode: github
950+
repo: file://${remoteDir}
951+
branch: ${storageBranch}
952+
path: ${cloneDir}
953+
`,
954+
);
955+
956+
const app = createApp([], tempDir, tempDir, undefined, { studioDir });
957+
958+
const listRes = await app.request('/api/runs');
959+
expect(listRes.status).toBe(200);
960+
const listData = (await listRes.json()) as {
961+
runs: Array<{ filename: string; source: string; experiment?: string }>;
962+
};
963+
expect(listData.runs).toHaveLength(1);
964+
expect(listData.runs[0]).toMatchObject({
965+
filename: `remote::${runId}`,
966+
source: 'remote',
967+
experiment: 'branch-green-uat',
968+
});
969+
}, 15000);
970+
971+
it('does not fall back to checked-out default branch runs when the configured storage branch is missing', async () => {
972+
const { remoteDir, cloneDir } = initializeRemoteRepo(tempDir);
973+
const runId = writeRemoteRunArtifact(
974+
cloneDir,
975+
'main-only',
976+
'2026-03-26T11-30-00-000Z',
977+
RESULT_A,
978+
);
979+
980+
mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
981+
writeFileSync(
982+
path.join(tempDir, '.agentv', 'config.yaml'),
983+
`results:
984+
mode: github
985+
repo: file://${remoteDir}
986+
branch: agentv-results
987+
path: ${cloneDir}
988+
`,
989+
);
990+
991+
const app = createApp([], tempDir, tempDir, undefined, { studioDir });
992+
993+
const statusRes = await app.request('/api/remote/status');
994+
expect(statusRes.status).toBe(200);
995+
const statusData = (await statusRes.json()) as {
996+
available: boolean;
997+
sync_status?: string;
998+
run_count: number;
999+
last_error?: string;
1000+
};
1001+
expect(statusData).toMatchObject({
1002+
available: false,
1003+
sync_status: 'unavailable',
1004+
run_count: 0,
1005+
});
1006+
expect(statusData.last_error ?? '').toContain(
1007+
"Results repo remote branch 'agentv-results' does not exist",
1008+
);
1009+
1010+
const listRes = await app.request('/api/runs');
1011+
expect(listRes.status).toBe(200);
1012+
const listData = (await listRes.json()) as {
1013+
runs: Array<{ filename: string; source: string }>;
1014+
};
1015+
expect(listData.runs).toHaveLength(0);
1016+
1017+
const detailRes = await app.request(`/api/runs/${encodeURIComponent(`remote::${runId}`)}`);
1018+
expect(detailRes.status).toBe(404);
1019+
}, 15000);
1020+
9291021
it('dedupes synced local and remote run copies in favor of the local run', async () => {
9301022
const { remoteDir, cloneDir } = initializeRemoteRepo(tempDir);
9311023
const runId = writeRemoteRunArtifact(

apps/web/src/content/docs/docs/tools/dashboard.mdx

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,19 +260,21 @@ projects:
260260
ref: main
261261
results:
262262
repo_url: git@github.com:EntityProcess/agentv-examples-eval-results.git
263+
branch: agentv-results
263264
path: /home/entity/projects/EntityProcess/agentv-examples-eval-results
264265
sync:
265266
auto_push: true
266267
```
267268

268-
`results.path` is the filesystem location of the local clone AgentV manages for the results repo. It uses the same local-path field name as the source project. `results.repo_url` is the Git remote URL used for clone and push operations, so use HTTPS when credentials are HTTP-token based and SSH when the runtime has SSH keys configured.
269+
`results.path` is the filesystem location of the local clone AgentV manages for the results repo. It uses the same local-path field name as the source project. `results.repo_url` is the Git remote URL used for clone and push operations, so use HTTPS when credentials are HTTP-token based and SSH when the runtime has SSH keys configured. `results.branch` is optional; when present, AgentV reads and writes that existing storage branch instead of the results repo's default branch.
269270

270271
You can also set a top-level global fallback in the same file. This is used when the current project is not registered or its registry entry has no `results` block:
271272

272273
```yaml
273274
results:
274275
mode: github
275276
repo: EntityProcess/default-eval-results
277+
branch: agentv-results
276278
path: /home/entity/projects/EntityProcess/default-eval-results
277279
auto_push: true
278280
```
@@ -314,6 +316,7 @@ projects:
314316
ref: main
315317
results:
316318
repo_url: https://github.com/EntityProcess/agentv-eval-results.git
319+
branch: agentv-results
317320
path: /home/entity/projects/EntityProcess/agentv-eval-results
318321
sync:
319322
auto_push: true
@@ -325,7 +328,7 @@ Use project-level **Sync Project** as the results exchange workflow. It handles
325328

326329
There is no separate `agentv results remote status` or `agentv results remote sync` command. The `agentv results` CLI stays focused on local run workspaces; manual remote exchange is Dashboard/API-only, with eval auto-export covering the common CI/publisher path.
327330

328-
Each run writes to a unique timestamped directory, so concurrent pushes from multiple machines are safe — non-fast-forward conflicts are resolved automatically via rebase retry.
331+
Each run writes to a unique timestamped directory, so concurrent pushes from multiple machines are safe — non-fast-forward conflicts are resolved automatically via rebase retry. If `results.branch` is configured, create that remote branch once before syncing (for example with `git switch --orphan agentv-results` and `git push origin HEAD:agentv-results`). `branch_prefix` remains only the prefix for temporary result/PR branch names; it is not the storage branch.
329332

330333
### What happens to existing local runs?
331334

apps/web/src/content/docs/docs/tools/results.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ The CLI contract is deliberately narrow: `agentv results` manages local result a
9090

9191
Use these supported remote workflows instead:
9292

93-
- **Automatic publishing:** configure `projects[].results.auto_push: true`; new `agentv eval` and `agentv pipeline bench` runs push their artifacts after the run completes.
93+
- **Automatic publishing:** configure `projects[].results.sync.auto_push: true`; new `agentv eval` and `agentv pipeline bench` runs push their artifacts after the run completes. Add `projects[].results.branch` when the results repo stores artifacts on a non-default branch.
9494
- **Manual Dashboard sync:** run `agentv dashboard`, open the project, and use **Sync Project**.
9595
- **Manual API sync:** while Dashboard is running, call `GET /api/projects/:projectId/remote/status` or `POST /api/projects/:projectId/remote/sync` for project-scoped automation. Single-project sessions also expose `GET /api/remote/status` and `POST /api/remote/sync`.
9696
- **Git escape hatch:** for advanced recovery, inspect or repair the configured `projects[].results.path` clone with `git` directly, then sync again.

packages/core/src/evaluation/loaders/config-loader.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ export type ExecutionDefaults = {
4040
export type ResultsConfig = {
4141
readonly mode: 'github';
4242
readonly repo: string;
43+
/** Optional remote branch used as the canonical git-backed results store. */
44+
readonly branch?: string;
4345
/** Local filesystem path for the results clone. Optional; defaults to ~/.agentv/results/<slug>/. */
4446
readonly path?: string;
4547
readonly auto_push?: boolean;
@@ -610,6 +612,15 @@ export function parseResultsConfig(raw: unknown, configPath: string): ResultsCon
610612
return undefined;
611613
}
612614

615+
let branch: string | undefined;
616+
if (obj.branch !== undefined) {
617+
if (typeof obj.branch !== 'string' || obj.branch.trim().length === 0) {
618+
logWarning(`Invalid results.branch in ${configPath}, expected non-empty string`);
619+
return undefined;
620+
}
621+
branch = obj.branch.trim();
622+
}
623+
613624
let resultsPath: string | undefined;
614625
if (obj.path !== undefined) {
615626
if (typeof obj.path !== 'string' || obj.path.trim().length === 0) {
@@ -643,6 +654,7 @@ export function parseResultsConfig(raw: unknown, configPath: string): ResultsCon
643654
return {
644655
mode: 'github',
645656
repo,
657+
...(branch !== undefined && { branch }),
646658
...(resultsPath !== undefined && { path: resultsPath }),
647659
...(typeof obj.auto_push === 'boolean' && { auto_push: obj.auto_push }),
648660
...(branchPrefix && { branch_prefix: branchPrefix }),

0 commit comments

Comments
 (0)