Skip to content

Commit 26987f7

Browse files
committed
refactor(evals): migrate skill extraction to componentEvalTest
Replaces the evalTest approach (full CLI subprocess + loadCliConfig) with componentEvalTest (in-process makeFakeConfig + direct startMemoryService). Key changes: - ComponentRig now creates an isolated homeDir and stubs GEMINI_CLI_HOME after auth to isolate storage paths (sessions, skills, extraction state). - ComponentRig.cleanup() calls config.dispose() and vi.unstubAllEnvs(). - Skill extraction evals pass approvalMode: YOLO to auto-approve tool calls (write_file/read_file) in non-interactive mode. - Removes ~100 lines of boilerplate (withRigStorage, waitForExtractionState, loadCliConfig, loadSettings, process.chdir).
1 parent ef522bb commit 26987f7

2 files changed

Lines changed: 102 additions & 200 deletions

File tree

evals/component-test-helper.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import fs from 'node:fs';
1616
import path from 'node:path';
1717
import os from 'node:os';
1818
import { randomUUID } from 'node:crypto';
19+
import { vi } from 'vitest';
1920
import {
2021
Config,
2122
type ConfigParameters,
@@ -52,6 +53,7 @@ export interface ComponentEvalCase extends BaseEvalCase {
5253
export class ComponentRig {
5354
public config: Config | undefined;
5455
public testDir: string;
56+
public homeDir: string;
5557
public sessionId: string;
5658

5759
constructor(
@@ -61,6 +63,9 @@ export class ComponentRig {
6163
this.testDir = fs.mkdtempSync(
6264
path.join(os.tmpdir(), `gemini-component-rig-${uniqueId.slice(0, 8)}-`),
6365
);
66+
this.homeDir = fs.mkdtempSync(
67+
path.join(os.tmpdir(), `gemini-component-home-${uniqueId.slice(0, 8)}-`),
68+
);
6469
this.sessionId = `test-session-${uniqueId}`;
6570
}
6671

@@ -89,12 +94,23 @@ export class ComponentRig {
8994
this.config = makeFakeConfig(configParams);
9095
await this.config.initialize();
9196

92-
// Refresh auth using USE_GEMINI to initialize the real BaseLlmClient
97+
// Refresh auth using USE_GEMINI to initialize the real BaseLlmClient.
98+
// This must happen BEFORE stubbing GEMINI_CLI_HOME because OAuth credential
99+
// lookup resolves through homedir() → GEMINI_CLI_HOME.
93100
await this.config.refreshAuth(AuthType.USE_GEMINI);
101+
102+
// Isolate storage paths (session files, skills, extraction state) by
103+
// pointing GEMINI_CLI_HOME at a per-test temp directory. Storage resolves
104+
// global paths through `homedir()` which reads this env var. This is set
105+
// after auth so credential lookup uses the real home directory.
106+
vi.stubEnv('GEMINI_CLI_HOME', this.homeDir);
94107
}
95108

96109
async cleanup() {
110+
await this.config?.dispose();
111+
vi.unstubAllEnvs();
97112
fs.rmSync(this.testDir, { recursive: true, force: true });
113+
fs.rmSync(this.homeDir, { recursive: true, force: true });
98114
}
99115
}
100116

0 commit comments

Comments
 (0)