Skip to content

Commit 900a591

Browse files
agent-relay-code[bot]Proactive Runtime Bot
authored andcommitted
chore: apply pr-reviewer fixes for #1092
1 parent 750225b commit 900a591

7 files changed

Lines changed: 1115 additions & 509 deletions

File tree

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Trajectory: Review and fix PR #1092
2+
3+
> **Status:** ✅ Completed
4+
> **Confidence:** 70%
5+
> **Started:** June 11, 2026 at 08:29 AM
6+
> **Completed:** June 11, 2026 at 08:30 AM
7+
8+
---
9+
10+
## Summary
11+
12+
Fixed eval parser, runner, summary, executor, and relay-check issues for PR #1092; scoped verification passed, full verification blocked by incomplete dependency install and GitHub mergeability is dirty.
13+
14+
**Approach:** Standard approach
15+
16+
---
17+
18+
## Key Decisions
19+
20+
### Kept fixes scoped to relay eval harness
21+
- **Chose:** Kept fixes scoped to relay eval harness
22+
- **Reasoning:** Validated current PR comments and changed only PR eval scripts/checks; full repo verification is blocked by killed npm ci leaving missing dependencies.
23+
24+
---
25+
26+
## Chapters
27+
28+
### 1. Work
29+
*Agent: default*
30+
31+
- Kept fixes scoped to relay eval harness: Kept fixes scoped to relay eval harness
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
{
2+
"id": "traj_o61z0ze6kvla",
3+
"version": 1,
4+
"task": {
5+
"title": "Review and fix PR #1092"
6+
},
7+
"status": "completed",
8+
"startedAt": "2026-06-11T08:29:58.788Z",
9+
"completedAt": "2026-06-11T08:30:00.119Z",
10+
"agents": [
11+
{
12+
"name": "default",
13+
"role": "lead",
14+
"joinedAt": "2026-06-11T08:29:59.481Z"
15+
}
16+
],
17+
"chapters": [
18+
{
19+
"id": "chap_f292s4revkwp",
20+
"title": "Work",
21+
"agentName": "default",
22+
"startedAt": "2026-06-11T08:29:59.481Z",
23+
"endedAt": "2026-06-11T08:30:00.119Z",
24+
"events": [
25+
{
26+
"ts": 1781166599482,
27+
"type": "decision",
28+
"content": "Kept fixes scoped to relay eval harness: Kept fixes scoped to relay eval harness",
29+
"raw": {
30+
"question": "Kept fixes scoped to relay eval harness",
31+
"chosen": "Kept fixes scoped to relay eval harness",
32+
"alternatives": [],
33+
"reasoning": "Validated current PR comments and changed only PR eval scripts/checks; full repo verification is blocked by killed npm ci leaving missing dependencies."
34+
},
35+
"significance": "high"
36+
}
37+
]
38+
}
39+
],
40+
"retrospective": {
41+
"summary": "Fixed eval parser, runner, summary, executor, and relay-check issues for PR #1092; scoped verification passed, full verification blocked by incomplete dependency install and GitHub mergeability is dirty.",
42+
"approach": "Standard approach",
43+
"confidence": 0.7
44+
},
45+
"commits": [],
46+
"filesChanged": [],
47+
"projectId": "/home/daytona/workspace",
48+
"tags": [],
49+
"_trace": {
50+
"startRef": "5e63ef398e6376b6c96d9cffeae9c1b668ab45cf",
51+
"endRef": "5e63ef398e6376b6c96d9cffeae9c1b668ab45cf"
52+
}
53+
}

scripts/evals/ci-summary.mjs

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,54 @@
11
#!/usr/bin/env node
22

3-
import { existsSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
4-
import path from "node:path";
5-
import { fileURLToPath } from "node:url";
3+
import { existsSync, readdirSync, readFileSync, writeFileSync } from 'node:fs';
4+
import path from 'node:path';
5+
import { fileURLToPath } from 'node:url';
66

7-
const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../..");
8-
const RUNS_DIR = path.join(ROOT, ".relay", "evals", "runs");
7+
const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '../..');
8+
const RUNS_DIR = path.join(ROOT, '.relay', 'evals', 'runs');
99

1010
const runDir = findLatestRunDir();
1111
if (!runDir) {
12-
const summary = "# Relay Eval CI Summary\n\nNo Relay eval run found.\n";
12+
const summary = '# Relay Eval CI Summary\n\nNo Relay eval run found.\n';
1313
console.log(summary);
14-
if (process.env.GITHUB_STEP_SUMMARY) writeFileSync(process.env.GITHUB_STEP_SUMMARY, summary, { flag: "a" });
14+
if (process.env.GITHUB_STEP_SUMMARY) writeFileSync(process.env.GITHUB_STEP_SUMMARY, summary, { flag: 'a' });
1515
process.exit(0);
1616
}
1717

18-
const resultPath = path.join(runDir, "result.json");
18+
const resultPath = path.join(runDir, 'result.json');
1919
const result = readResultJson(resultPath);
20-
const failed = result.tests.filter((test) => test.status === "failed");
21-
const skipped = result.tests.filter((test) => test.status === "skipped");
22-
const needsHuman = result.tests.filter((test) => test.status === "needs-human");
20+
const failed = result.tests.filter((test) => test.status === 'failed');
21+
const skipped = result.tests.filter((test) => test.status === 'skipped');
22+
const needsHuman = result.tests.filter((test) => test.status === 'needs-human');
2323

2424
const lines = [
25-
"# Relay Eval CI Summary",
26-
"",
25+
'# Relay Eval CI Summary',
26+
'',
2727
`- Run directory: \`${path.relative(ROOT, runDir)}\``,
2828
`- Mode: \`${result.mode}\``,
2929
`- Git SHA: \`${result.git_sha}\``,
3030
`- Passed: ${result.passed}`,
3131
`- Needs human review: ${result.needs_human}`,
3232
`- Failed: ${result.failed}`,
3333
`- Skipped: ${result.skipped}`,
34-
"",
34+
'',
3535
];
3636

37-
appendStatusSection(lines, "Failed", failed);
38-
appendStatusSection(lines, "Skipped", skipped);
37+
appendStatusSection(lines, 'Failed', failed);
38+
appendStatusSection(lines, 'Skipped', skipped);
3939
appendNeedsHumanSection(lines, needsHuman);
4040

41-
const summary = `${lines.join("\n")}\n`;
41+
const summary = `${lines.join('\n')}\n`;
4242
console.log(summary);
4343

44-
if (process.env.GITHUB_STEP_SUMMARY) writeFileSync(process.env.GITHUB_STEP_SUMMARY, summary, { flag: "a" });
45-
if (failed.length > 0 || skipped.length > 0) process.exitCode = 1;
44+
if (process.env.GITHUB_STEP_SUMMARY) writeFileSync(process.env.GITHUB_STEP_SUMMARY, summary, { flag: 'a' });
45+
const failOnSkipped =
46+
process.env.RELAY_EVAL_FAIL_ON_SKIPPED === '1' || process.env.HUMAN_EVAL_FAIL_ON_SKIPPED === '1';
47+
if (failed.length > 0 || (failOnSkipped && skipped.length > 0)) process.exitCode = 1;
4648

4749
function appendStatusSection(lines, title, tests) {
4850
if (tests.length === 0) return;
49-
lines.push(`## ${title}`, "");
51+
lines.push(`## ${title}`, '');
5052
for (const test of tests) {
5153
lines.push(`- \`${test.id}\` (${test.suite}/${test.executor})`);
5254
if (test.error) lines.push(` - ${test.error}`);
@@ -55,26 +57,26 @@ function appendStatusSection(lines, title, tests) {
5557
lines.push(` - FAIL ${check.name}: ${check.message}`);
5658
}
5759
}
58-
lines.push("");
60+
lines.push('');
5961
}
6062

6163
function appendNeedsHumanSection(lines, tests) {
62-
lines.push("## Human Review", "");
64+
lines.push('## Human Review', '');
6365
if (tests.length === 0) {
64-
lines.push("No cases require human review.", "");
66+
lines.push('No cases require human review.', '');
6567
return;
6668
}
6769
for (const test of tests) lines.push(`- \`${test.id}\` (${test.suite}/${test.executor})`);
68-
lines.push("");
70+
lines.push('');
6971
}
7072

7173
function findLatestRunDir() {
7274
if (!existsSync(RUNS_DIR)) return null;
7375
const runs = readdirSync(RUNS_DIR)
7476
.map((dir) => path.join(RUNS_DIR, dir))
75-
.filter((dir) => existsSync(path.join(dir, "result.json")))
77+
.filter((dir) => existsSync(path.join(dir, 'result.json')))
7678
.flatMap((dir) => {
77-
const result = safeReadResultJson(path.join(dir, "result.json"));
79+
const result = safeReadResultJson(path.join(dir, 'result.json'));
7880
return result ? [{ dir, result }] : [];
7981
})
8082
.sort((a, b) => String(b.result.timestamp).localeCompare(String(a.result.timestamp)));
@@ -89,9 +91,11 @@ function readResultJson(filePath) {
8991

9092
function safeReadResultJson(filePath) {
9193
try {
92-
return JSON.parse(readFileSync(filePath, "utf8"));
94+
return JSON.parse(readFileSync(filePath, 'utf8'));
9395
} catch (error) {
94-
console.warn(`Skipping malformed Relay eval result ${path.relative(ROOT, filePath)}: ${error instanceof Error ? error.message : String(error)}`);
96+
console.warn(
97+
`Skipping malformed Relay eval result ${path.relative(ROOT, filePath)}: ${error instanceof Error ? error.message : String(error)}`
98+
);
9599
return null;
96100
}
97101
}

0 commit comments

Comments
 (0)