Skip to content

Commit a607486

Browse files
committed
fix: resolve nightly evaluation failures
- Fix ReferenceError in gemini-scheduled-triage.eval.ts by assigning stdout - Mitigate string length limit in test-rig.ts using Buffer for logs - Increase telemetry log safety limit to 500MB - Fix mock MCP server dependency resolution by using absolute paths and NODE_PATH - Update gemini-plan-execute.eval.ts to mock issue comments for plan verification
1 parent 642deeb commit a607486

5 files changed

Lines changed: 60 additions & 13 deletions

File tree

evals/gemini-plan-execute.eval.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ describe('Gemini Plan Execution Workflow', () => {
2727
join(rig.testDir, '.gemini/commands/gemini-plan-execute.toml'),
2828
);
2929

30+
if (item.inputs.DESCRIPTION.includes('AI Assistant: Plan of Action')) {
31+
rig.createFile(
32+
'mock-data.json',
33+
JSON.stringify({
34+
'issue_read.get_comments': [{ body: item.inputs.DESCRIPTION }],
35+
}),
36+
);
37+
}
38+
3039
const stdout = await rig.run(
3140
['--prompt', '/gemini-plan-execute', '--yolo'],
3241
item.inputs,

evals/gemini-scheduled-triage.eval.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ describe('Scheduled Triage Workflow', () => {
3131
GITHUB_ENV: envFile,
3232
};
3333

34-
await rig.run(['--prompt', '/gemini-scheduled-triage', '--yolo'], env);
34+
const stdout = await rig.run(
35+
['--prompt', '/gemini-scheduled-triage', '--yolo'],
36+
env,
37+
);
3538

3639
const content = readFileSync(envFile, 'utf-8');
3740
const triagedLine = content

evals/mock-mcp-server.ts

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,36 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
209209

210210
server.setRequestHandler(CallToolRequestSchema, async (request) => {
211211
log(`Calling tool: ${request.params.name}`);
212-
const pull_number = (request.params.arguments as any)?.pull_number;
212+
const args = request.params.arguments as any;
213+
const pull_number = args?.pull_number;
214+
215+
// Try to read custom mock data from CWD
216+
let customMockData: Record<string, any> = {};
217+
try {
218+
if (fs.existsSync('mock-data.json')) {
219+
customMockData = JSON.parse(fs.readFileSync('mock-data.json', 'utf-8'));
220+
}
221+
} catch (err) {
222+
log(`Error reading custom mock data: ${err}`);
223+
}
224+
225+
if (customMockData[request.params.name]) {
226+
const customResult = customMockData[request.params.name];
227+
// If it's a function, it's not supported by JSON.
228+
// If it's an array, we might want to return a specific item based on args,
229+
// but for now let's just return the whole thing as content.
230+
return {
231+
content: [
232+
{
233+
type: 'text',
234+
text:
235+
typeof customResult === 'string'
236+
? customResult
237+
: JSON.stringify(customResult),
238+
},
239+
],
240+
};
241+
}
213242

214243
switch (request.params.name) {
215244
case 'search_code':

evals/test-rig.ts

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
existsSync,
77
rmSync,
88
realpathSync,
9+
statSync,
910
} from 'node:fs';
1011
import { join, dirname, basename } from 'node:path';
1112
import * as os from 'node:os';
@@ -89,11 +90,16 @@ export class TestRig {
8990
}
9091

9192
setupMockMcp() {
93+
const rootDir = realpathSync(join(__dirname, '..'));
94+
const tsxPath = join(rootDir, 'node_modules', '.bin', 'tsx');
9295
const mockServerPath = realpathSync(join(__dirname, 'mock-mcp-server.ts'));
9396
this.mcpServers['github'] = {
94-
command: 'npx',
95-
args: ['tsx', mockServerPath],
97+
command: tsxPath,
98+
args: [mockServerPath],
9699
trust: true,
100+
env: {
101+
NODE_PATH: join(rootDir, 'node_modules'),
102+
},
97103
};
98104
this._setupSettings(); // Re-write with MCP config
99105
}
@@ -203,7 +209,15 @@ export class TestRig {
203209

204210
readToolLogs() {
205211
if (!existsSync(this.telemetryLog)) return [];
206-
const content = readFileSync(this.telemetryLog, 'utf-8');
212+
const stats = statSync(this.telemetryLog);
213+
if (stats.size > 500 * 1024 * 1024) {
214+
throw new Error(
215+
`Telemetry log file is too large (${stats.size} bytes). Possible infinite loop.`,
216+
);
217+
}
218+
// Use Buffer to avoid string length limits for very large logs
219+
const buffer = readFileSync(this.telemetryLog);
220+
const content = buffer.toString('utf-8');
207221
return content
208222
.split(/(?<=})\s*(?={)/)
209223
.map((obj) => {

package-lock.json

Lines changed: 0 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)