Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 41 additions & 25 deletions apps/cli/src/commands/results/remote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
DEFAULT_THRESHOLD,
type EvaluationResult,
type GitListedRun,
type ResultsConfig,
type NormalizedResultsConfig,
type ResultsRepoStatus,
directPushResults,
directorySizeBytes,
Expand Down Expand Up @@ -37,32 +37,41 @@ import {

// ── In-memory TTL cache for listGitRuns ────────────────────────────
// Avoids repeated expensive git ls-tree + git cat-file --batch operations
// on every API request. Cache key is repoDir, TTL is 60 seconds.
// on every API request. Cache key is repoDir + ref, TTL is 60 seconds.
const gitRunsCache = new Map<string, { data: Promise<GitListedRun[]>; expiresAt: number }>();
const GIT_RUNS_CACHE_TTL_MS = 60_000;

function cachedListGitRuns(repoDir: string) {
function getResultsStorageRef(config: NormalizedResultsConfig): string | undefined {
return config.branch ? `origin/${config.branch}` : undefined;
}

function cachedListGitRuns(repoDir: string, ref?: string) {
const now = Date.now();
const cached = gitRunsCache.get(repoDir);
const cacheKey = `${repoDir}\0${ref ?? ''}`;
const cached = gitRunsCache.get(cacheKey);
if (cached && cached.expiresAt > now) {
return cached.data;
}
const promise = listGitRuns(repoDir);
gitRunsCache.set(repoDir, { data: promise, expiresAt: now + GIT_RUNS_CACHE_TTL_MS });
const promise = ref ? listGitRuns(repoDir, ref) : listGitRuns(repoDir);
gitRunsCache.set(cacheKey, { data: promise, expiresAt: now + GIT_RUNS_CACHE_TTL_MS });
// Evict stale entry once the promise settles so a fresh fetch replaces it
promise
.catch(() => {})
.finally(() => {
const entry = gitRunsCache.get(repoDir);
const entry = gitRunsCache.get(cacheKey);
if (entry && entry.expiresAt <= Date.now()) {
gitRunsCache.delete(repoDir);
gitRunsCache.delete(cacheKey);
}
});
return promise;
}

function invalidateGitRunsCache(repoDir: string): void {
gitRunsCache.delete(repoDir);
for (const key of gitRunsCache.keys()) {
if (key.startsWith(`${repoDir}\0`)) {
gitRunsCache.delete(key);
}
}
}

export type RunSource = 'local' | 'remote';
Expand Down Expand Up @@ -144,7 +153,7 @@ async function maybeWarnLargeArtifact(runDir: string): Promise<void> {
async function loadNormalizedResultsConfig(
cwd: string,
projectId?: string,
): Promise<Required<ResultsConfig> | undefined> {
): Promise<NormalizedResultsConfig | undefined> {
const repoRoot = (await findRepoRoot(cwd)) ?? cwd;
const config = await loadConfig(path.join(cwd, '_'), repoRoot);
const project =
Expand All @@ -155,6 +164,7 @@ async function loadNormalizedResultsConfig(
? {
mode: 'github' as const,
repo: project.results.repoUrl,
branch: project.results.branch,
path: project.results.path,
auto_push: project.results.sync?.autoPush,
branch_prefix: project.results.branchPrefix,
Expand Down Expand Up @@ -193,15 +203,17 @@ export async function getRemoteResultsStatus(
}

async function getRemoteRunCount(
config: Required<ResultsConfig> | undefined,
config: NormalizedResultsConfig | undefined,
status: ResultsRepoStatus,
): Promise<number> {
let runCount = 0;
if (config && status.available) {
try {
runCount = (await cachedListGitRuns(config.path)).length;
runCount = (await cachedListGitRuns(config.path, getResultsStorageRef(config))).length;
} catch {
runCount = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length;
if (!config.branch) {
runCount = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).length;
}
}
}
return runCount;
Expand Down Expand Up @@ -277,7 +289,7 @@ export async function listMergedResultFiles(
let remoteRuns: SourcedResultFileMeta[] = [];
if (config.mode === 'github') {
try {
const gitRuns = await cachedListGitRuns(config.path);
const gitRuns = await cachedListGitRuns(config.path, getResultsStorageRef(config));
remoteRuns = gitRuns.map((r) => ({
filename: encodeRemoteRunId(r.run_id),
raw_filename: r.run_id,
Expand All @@ -291,16 +303,20 @@ export async function listMergedResultFiles(
sizeBytes: r.size_bytes || 0,
}));
} catch (error) {
console.error('git-native listing failed, falling back', error);
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
(meta) =>
({
...meta,
filename: encodeRemoteRunId(meta.filename),
raw_filename: meta.filename,
source: 'remote' as const,
}) satisfies SourcedResultFileMeta,
);
if (config.branch) {
console.error('git-native listing failed for configured results branch', error);
} else {
console.error('git-native listing failed, falling back', error);
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
(meta) =>
({
...meta,
filename: encodeRemoteRunId(meta.filename),
raw_filename: meta.filename,
source: 'remote' as const,
}) satisfies SourcedResultFileMeta,
);
}
}
} else {
remoteRuns = listResultFilesFromRunsDir(resolveResultsRepoRunsDir(config)).map(
Expand Down Expand Up @@ -359,7 +375,7 @@ export async function ensureRemoteRunAvailable(
'.agentv/results/runs',
path.posix.dirname(relativeManifestPath),
);
await materializeGitRun(config.path, relativeRunPath);
await materializeGitRun(config.path, relativeRunPath, getResultsStorageRef(config));
}

export async function readRemoteRunTagState(
Expand Down
94 changes: 93 additions & 1 deletion apps/cli/test/commands/results/serve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ function writeRemoteRunArtifact(
experiment: string,
timestamp: string,
resultRecords: object | object[],
branch = 'main',
): string {
const isoTimestamp = timestamp.replace(
/^(\d{4}-\d{2}-\d{2})T(\d{2})-(\d{2})-(\d{2})-(\d{3})Z$/,
Expand Down Expand Up @@ -148,7 +149,7 @@ function writeRemoteRunArtifact(
),
);
git(`git add "${runDir}" && git commit --quiet -m "add ${experiment}"`, cloneDir);
git('git push --quiet origin main', cloneDir);
git(`git push --quiet origin HEAD:${branch}`, cloneDir);
git('git fetch --quiet origin --prune', cloneDir);
return `${experiment}::${timestamp}`;
}
Expand Down Expand Up @@ -926,6 +927,97 @@ describe('serve app', () => {
expect(detailData.results[0]).toMatchObject({ testId: 'test-greeting' });
}, 15000);

it('lists git-native remote runs from the configured storage branch', async () => {
const { remoteDir, cloneDir } = initializeRemoteRepo(tempDir);
const storageBranch = 'agentv-results';
git(`git switch --quiet --orphan ${storageBranch}`, cloneDir);
git('git rm -rf --quiet . 2>/dev/null || true', cloneDir);
git('git commit --quiet --allow-empty -m "seed results branch"', cloneDir);
git(`git push --quiet origin HEAD:${storageBranch}`, cloneDir);
const runId = writeRemoteRunArtifact(
cloneDir,
'branch-green-uat',
'2026-03-26T11-00-00-000Z',
RESULT_A,
storageBranch,
);

mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
writeFileSync(
path.join(tempDir, '.agentv', 'config.yaml'),
`results:
mode: github
repo: file://${remoteDir}
branch: ${storageBranch}
path: ${cloneDir}
`,
);

const app = createApp([], tempDir, tempDir, undefined, { studioDir });

const listRes = await app.request('/api/runs');
expect(listRes.status).toBe(200);
const listData = (await listRes.json()) as {
runs: Array<{ filename: string; source: string; experiment?: string }>;
};
expect(listData.runs).toHaveLength(1);
expect(listData.runs[0]).toMatchObject({
filename: `remote::${runId}`,
source: 'remote',
experiment: 'branch-green-uat',
});
}, 15000);

it('does not fall back to checked-out default branch runs when the configured storage branch is missing', async () => {
const { remoteDir, cloneDir } = initializeRemoteRepo(tempDir);
const runId = writeRemoteRunArtifact(
cloneDir,
'main-only',
'2026-03-26T11-30-00-000Z',
RESULT_A,
);

mkdirSync(path.join(tempDir, '.agentv'), { recursive: true });
writeFileSync(
path.join(tempDir, '.agentv', 'config.yaml'),
`results:
mode: github
repo: file://${remoteDir}
branch: agentv-results
path: ${cloneDir}
`,
);

const app = createApp([], tempDir, tempDir, undefined, { studioDir });

const statusRes = await app.request('/api/remote/status');
expect(statusRes.status).toBe(200);
const statusData = (await statusRes.json()) as {
available: boolean;
sync_status?: string;
run_count: number;
last_error?: string;
};
expect(statusData).toMatchObject({
available: false,
sync_status: 'unavailable',
run_count: 0,
});
expect(statusData.last_error ?? '').toContain(
"Results repo remote branch 'agentv-results' does not exist",
);

const listRes = await app.request('/api/runs');
expect(listRes.status).toBe(200);
const listData = (await listRes.json()) as {
runs: Array<{ filename: string; source: string }>;
};
expect(listData.runs).toHaveLength(0);

const detailRes = await app.request(`/api/runs/${encodeURIComponent(`remote::${runId}`)}`);
expect(detailRes.status).toBe(404);
}, 15000);

it('dedupes synced local and remote run copies in favor of the local run', async () => {
const { remoteDir, cloneDir } = initializeRemoteRepo(tempDir);
const runId = writeRemoteRunArtifact(
Expand Down
7 changes: 5 additions & 2 deletions apps/web/src/content/docs/docs/tools/dashboard.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -260,19 +260,21 @@ projects:
ref: main
results:
repo_url: git@github.com:EntityProcess/agentv-examples-eval-results.git
branch: agentv-results
path: /home/entity/projects/EntityProcess/agentv-examples-eval-results
sync:
auto_push: true
```

`results.path` is the filesystem location of the local clone AgentV manages for the results repo. It uses the same local-path field name as the source project. `results.repo_url` is the Git remote URL used for clone and push operations, so use HTTPS when credentials are HTTP-token based and SSH when the runtime has SSH keys configured.
`results.path` is the filesystem location of the local clone AgentV manages for the results repo. It uses the same local-path field name as the source project. `results.repo_url` is the Git remote URL used for clone and push operations, so use HTTPS when credentials are HTTP-token based and SSH when the runtime has SSH keys configured. `results.branch` is optional; when present, AgentV reads and writes that existing storage branch instead of the results repo's default branch.

You can also set a top-level global fallback in the same file. This is used when the current project is not registered or its registry entry has no `results` block:

```yaml
results:
mode: github
repo: EntityProcess/default-eval-results
branch: agentv-results
path: /home/entity/projects/EntityProcess/default-eval-results
auto_push: true
```
Expand Down Expand Up @@ -314,6 +316,7 @@ projects:
ref: main
results:
repo_url: https://github.com/EntityProcess/agentv-eval-results.git
branch: agentv-results
path: /home/entity/projects/EntityProcess/agentv-eval-results
sync:
auto_push: true
Expand All @@ -325,7 +328,7 @@ Use project-level **Sync Project** as the results exchange workflow. It handles

There is no separate `agentv results remote status` or `agentv results remote sync` command. The `agentv results` CLI stays focused on local run workspaces; manual remote exchange is Dashboard/API-only, with eval auto-export covering the common CI/publisher path.

Each run writes to a unique timestamped directory, so concurrent pushes from multiple machines are safe — non-fast-forward conflicts are resolved automatically via rebase retry.
Each run writes to a unique timestamped directory, so concurrent pushes from multiple machines are safe — non-fast-forward conflicts are resolved automatically via rebase retry. If `results.branch` is configured, create that remote branch once before syncing (for example with `git switch --orphan agentv-results` and `git push origin HEAD:agentv-results`). `branch_prefix` remains only the prefix for temporary result/PR branch names; it is not the storage branch.

### What happens to existing local runs?

Expand Down
2 changes: 1 addition & 1 deletion apps/web/src/content/docs/docs/tools/results.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ The CLI contract is deliberately narrow: `agentv results` manages local result a

Use these supported remote workflows instead:

- **Automatic publishing:** configure `projects[].results.auto_push: true`; new `agentv eval` and `agentv pipeline bench` runs push their artifacts after the run completes.
- **Automatic publishing:** configure `projects[].results.sync.auto_push: true`; new `agentv eval` and `agentv pipeline bench` runs push their artifacts after the run completes. Add `projects[].results.branch` when the results repo stores artifacts on a non-default branch.
- **Manual Dashboard sync:** run `agentv dashboard`, open the project, and use **Sync Project**.
- **Manual API sync:** while Dashboard is running, call `GET /api/projects/:projectId/remote/status` or `POST /api/projects/:projectId/remote/sync` for project-scoped automation. Single-project sessions also expose `GET /api/remote/status` and `POST /api/remote/sync`.
- **Git escape hatch:** for advanced recovery, inspect or repair the configured `projects[].results.path` clone with `git` directly, then sync again.
12 changes: 12 additions & 0 deletions packages/core/src/evaluation/loaders/config-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ export type ExecutionDefaults = {
export type ResultsConfig = {
readonly mode: 'github';
readonly repo: string;
/** Optional remote branch used as the canonical git-backed results store. */
readonly branch?: string;
/** Local filesystem path for the results clone. Optional; defaults to ~/.agentv/results/<slug>/. */
readonly path?: string;
readonly auto_push?: boolean;
Expand Down Expand Up @@ -610,6 +612,15 @@ export function parseResultsConfig(raw: unknown, configPath: string): ResultsCon
return undefined;
}

let branch: string | undefined;
if (obj.branch !== undefined) {
if (typeof obj.branch !== 'string' || obj.branch.trim().length === 0) {
logWarning(`Invalid results.branch in ${configPath}, expected non-empty string`);
return undefined;
}
branch = obj.branch.trim();
}

let resultsPath: string | undefined;
if (obj.path !== undefined) {
if (typeof obj.path !== 'string' || obj.path.trim().length === 0) {
Expand Down Expand Up @@ -643,6 +654,7 @@ export function parseResultsConfig(raw: unknown, configPath: string): ResultsCon
return {
mode: 'github',
repo,
...(branch !== undefined && { branch }),
...(resultsPath !== undefined && { path: resultsPath }),
...(typeof obj.auto_push === 'boolean' && { auto_push: obj.auto_push }),
...(branchPrefix && { branch_prefix: branchPrefix }),
Expand Down
Loading
Loading