Skip to content

Commit 47f3e5e

Browse files
committed
Add test runner script, module loader bootstrap, and E2E suite
1 parent 54b8a3c commit 47f3e5e

6 files changed

Lines changed: 469 additions & 1 deletion

File tree

register-loader.mjs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Bootstrap module for `--import` that registers the custom module loader.
2+
// Replaces the deprecated `--experimental-loader` flag.
3+
// Phase 1: uses module.register() — safe on Node <25.
4+
// Phase 2: migrate to module.registerHooks() when targeting Node >=25.
5+
import { register } from "node:module";
6+
import { dirname, resolve } from "node:path";
7+
import { fileURLToPath, pathToFileURL } from "node:url";
8+
9+
const HERE = dirname(fileURLToPath(import.meta.url));
10+
register(pathToFileURL(resolve(HERE, "test-loader.mjs")), pathToFileURL(HERE + "/"));

scripts/run-node-test.mjs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import { spawnSync } from "node:child_process";
2+
import { dirname, resolve } from "node:path";
3+
import { fileURLToPath, pathToFileURL } from "node:url";
4+
5+
const here = dirname(fileURLToPath(import.meta.url));
6+
const root = resolve(here, "..");
7+
const args = process.argv.slice(2);
8+
const updateSnapshots = args.includes("--update-snapshots");
9+
const patterns = args.filter((arg) => arg !== "--update-snapshots");
10+
11+
if (patterns.length === 0) {
12+
throw new Error("Pass at least one test file or glob pattern.");
13+
}
14+
15+
const loader = pathToFileURL(resolve(root, "register-loader.mjs")).href;
16+
const result = spawnSync(
17+
process.execPath,
18+
["--import", loader, "--test", ...patterns],
19+
{
20+
cwd: root,
21+
stdio: "inherit",
22+
env: updateSnapshots
23+
? { ...process.env, UPDATE_SNAPSHOTS: "1" }
24+
: process.env,
25+
},
26+
);
27+
28+
if (result.error) throw result.error;
29+
if (result.signal) process.kill(process.pid, result.signal);
30+
process.exit(result.status ?? 1);

test-loader.mjs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,28 @@
11
import { access } from "node:fs/promises";
22
import { fileURLToPath, pathToFileURL } from "node:url";
33
import path from "node:path";
4+
import { existsSync } from "node:fs";
45

5-
const PACKAGE_ROOT = "/Users/ofri/.nvm/versions/node/v24.14.1/lib/node_modules/@earendil-works/pi-coding-agent";
6+
/**
7+
* Walk up from a start directory to find node_modules/<name>.
8+
* Works regardless of how the package was installed (local vs global).
9+
*/
10+
function findPackageRoot(name, startDir) {
11+
let dir = startDir;
12+
while (true) {
13+
const candidate = path.join(dir, "node_modules", name);
14+
if (existsSync(candidate)) return candidate;
15+
const parent = path.dirname(dir);
16+
if (parent === dir) return null;
17+
dir = parent;
18+
}
19+
}
20+
21+
const PACKAGE_ROOT = findPackageRoot(
22+
"@earendil-works/pi-coding-agent",
23+
path.dirname(fileURLToPath(import.meta.url)),
24+
);
25+
if (!PACKAGE_ROOT) throw new Error("Cannot find @earendil-works/pi-coding-agent package root");
626
const PACKAGE_ALIASES = {
727
"@earendil-works/pi-coding-agent": `${PACKAGE_ROOT}/dist/index.js`,
828
"@earendil-works/pi-ai": `${PACKAGE_ROOT}/node_modules/@earendil-works/pi-ai/dist/index.js`,

tests/e2e/basic.test.ts

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
/**
2+
* Process-isolated E2E tests for the agenticoding extension.
3+
*
4+
* These tests spawn a fresh Node.js process per test case. Process isolation
5+
* means no shared singletons and no console races between test cases.
6+
*/
7+
8+
import { describe, it } from "node:test";
9+
import assert from "node:assert/strict";
10+
import { PytestHarness } from "./pty-harness.js";
11+
12+
/**
13+
* Create a fresh host, wait for READY, and return the harness.
14+
*/
15+
async function start(): Promise<PytestHarness> {
16+
const h = new PytestHarness();
17+
await h.waitForText("READY");
18+
return h;
19+
}
20+
21+
async function withHarness(run: (h: PytestHarness) => Promise<void>): Promise<void> {
22+
const h = await start();
23+
try {
24+
await run(h);
25+
} finally {
26+
try {
27+
h.write("exit");
28+
} catch {
29+
// already dead
30+
}
31+
h.close();
32+
}
33+
}
34+
35+
describe("agenticoding E2E", () => {
36+
it("host starts and extension registers", async () => withHarness(async (h) => {
37+
h.write("tools");
38+
await h.waitForText("OK:");
39+
40+
const snap = h.snapshot();
41+
assert.ok(snap.includes("notebook_write"), "notebook_write tool registered");
42+
assert.ok(snap.includes("notebook_read"), "notebook_read tool registered");
43+
assert.ok(snap.includes("notebook_index"), "notebook_index tool registered");
44+
assert.ok(snap.includes("notebook_topic_set"), "notebook_topic_set tool registered");
45+
assert.ok(snap.includes("handoff"), "handoff tool registered");
46+
assert.ok(snap.includes("spawn"), "spawn tool registered");
47+
}));
48+
49+
it("notebook write/read round-trip", async () => withHarness(async (h) => {
50+
h.write('tool notebook_write {"name":"my-page","content":"Hello World"}');
51+
await h.waitForText("OK:Saved notebook page");
52+
53+
h.write('tool notebook_read {"name":"my-page"}');
54+
await h.waitForText("OK:--- my-page ---");
55+
56+
const snap = h.snapshot();
57+
assert.ok(snap.includes("Hello World"), "content persisted");
58+
}));
59+
60+
it("notebook index reflects written pages", async () => withHarness(async (h) => {
61+
h.write('tool notebook_write {"name":"page-a","content":"Page A"}');
62+
await h.waitForText("OK:");
63+
64+
h.write("tool notebook_index {}");
65+
await h.waitForText("page-a");
66+
67+
// Second write should appear in index
68+
h.write('tool notebook_write {"name":"page-b","content":"Page B"}');
69+
await h.waitForText("OK:");
70+
71+
h.write("tool notebook_index {}");
72+
await h.waitForText("page-b");
73+
74+
const snap = h.snapshot();
75+
assert.ok(snap.includes("page-a"), "page-a in index");
76+
assert.ok(snap.includes("page-b"), "page-b in index");
77+
}));
78+
79+
it("notebook_write overwrites existing page", async () => withHarness(async (h) => {
80+
h.write('tool notebook_write {"name":"page","content":"v1"}');
81+
await h.waitForText("OK:");
82+
83+
// Clear accumulated output so we only check the second write/read
84+
h.clear();
85+
h.write('tool notebook_write {"name":"page","content":"v2"}');
86+
await h.waitForText("OK:");
87+
88+
h.clear();
89+
h.write('tool notebook_read {"name":"page"}');
90+
await h.waitForText("OK:--- page ---");
91+
92+
const snap = h.snapshot();
93+
assert.ok(snap.includes("v2"), "overwritten content present");
94+
assert.ok(!snap.includes("v1"), "old content absent from fresh output");
95+
}));
96+
97+
it("notebook topic lifecycle: set via command, agent-set blocked", async () => withHarness(async (h) => {
98+
// Set topic via /notebook command (human-set)
99+
h.write("cmd notebook my-e2e-topic");
100+
await h.waitForText("OK");
101+
102+
// Agent-set should be blocked (human is authoritative)
103+
h.write('tool notebook_topic_set {"topic":"agent-topic"}');
104+
await h.waitForText("ERR:");
105+
const snap = h.snapshot();
106+
assert.ok(
107+
snap.includes("authoritative") || snap.includes("already exists"),
108+
"human-set topic blocks agent override",
109+
);
110+
}));
111+
112+
it("agent-set topic works when unset", async () => withHarness(async (h) => {
113+
// No topic set yet -- agent can set
114+
h.write('tool notebook_topic_set {"topic":"fresh-agent-topic"}');
115+
await h.waitForText("OK:Active notebook topic:");
116+
const snap = h.snapshot();
117+
assert.ok(snap.includes("fresh-agent-topic"));
118+
}));
119+
120+
it("handoff tool queues handoff state", async () => withHarness(async (h) => {
121+
h.write('tool handoff {"task":"test handoff task","direction":"next-phase"}');
122+
await h.waitForText("OK:Handoff started");
123+
}));
124+
125+
it("commands are registered", async () => withHarness(async (h) => {
126+
h.write("cmds");
127+
await h.waitForText("OK:");
128+
129+
const snap = h.snapshot();
130+
assert.ok(snap.includes("notebook"), "/notebook command registered");
131+
assert.ok(snap.includes("handoff"), "/handoff command registered");
132+
}));
133+
134+
it("spawn tool errors gracefully without model infrastructure", async () => withHarness(async (h) => {
135+
// Without a real model/session manager, spawn should throw immediately.
136+
h.write('tool spawn {"prompt":"any task"}');
137+
await h.waitForText("ERR:");
138+
139+
const snap = h.snapshot();
140+
assert.ok(snap.includes("No model") || snap.includes("ERR"), "spawn errors gracefully");
141+
}));
142+
143+
it("handles errors gracefully", async () => withHarness(async (h) => {
144+
// Unknown tool
145+
h.write("tool nonexistent {}");
146+
await h.waitForText("ERR:unknown tool");
147+
148+
// Invalid JSON
149+
h.write("tool notebook_write {bad json}");
150+
await h.waitForText("ERR:invalid json");
151+
152+
// Unknown command
153+
h.write("cmd nonexistent");
154+
await h.waitForText("ERR:unknown command");
155+
}));
156+
});

tests/e2e/pty-harness.ts

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/**
2+
* pty-harness.ts — Process-isolated child-process harness for E2E tests.
3+
*
4+
* Spawns a fresh Node.js process and communicates over stdin/stdout. Process
5+
* isolation keeps runtime singletons and console output private per test case
6+
* without depending on PTY availability in CI.
7+
*/
8+
9+
import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process";
10+
import { isAbsolute, dirname, resolve } from "node:path";
11+
import { fileURLToPath, pathToFileURL } from "node:url";
12+
13+
const HERE = dirname(fileURLToPath(import.meta.url));
14+
const ROOT = resolve(HERE, "..", "..");
15+
const LOADER = pathToFileURL(resolve(ROOT, "register-loader.mjs")).href;
16+
17+
export const DEFAULT_SCRIPT = resolve(HERE, "test-host.ts");
18+
19+
export class PytestHarness {
20+
private child: ChildProcessWithoutNullStreams;
21+
private output = "";
22+
private readOffset = 0;
23+
private timeoutMs: number;
24+
private waiters = new Set<() => void>();
25+
26+
constructor(
27+
scriptPath = DEFAULT_SCRIPT,
28+
options?: { timeoutMs?: number },
29+
) {
30+
this.timeoutMs = options?.timeoutMs ?? 5000;
31+
32+
const entry = isAbsolute(scriptPath) ? scriptPath : resolve(ROOT, scriptPath);
33+
34+
this.child = spawn(process.execPath, ["--import", LOADER, entry], {
35+
cwd: ROOT,
36+
stdio: ["pipe", "pipe", "pipe"],
37+
env: {
38+
...process.env,
39+
FORCE_COLOR: "0",
40+
NODE_OPTIONS: "",
41+
},
42+
});
43+
44+
const append = (chunk: string | Buffer) => {
45+
this.output += chunk.toString();
46+
for (const wake of this.waiters) wake();
47+
this.waiters.clear();
48+
};
49+
50+
this.child.stdout.on("data", append);
51+
this.child.stderr.on("data", append);
52+
}
53+
54+
private async waitForOutput(ms: number): Promise<void> {
55+
if (ms <= 0) return;
56+
await new Promise<void>((resolve) => {
57+
const wake = () => {
58+
clearTimeout(timer);
59+
this.waiters.delete(wake);
60+
resolve();
61+
};
62+
const timer = setTimeout(wake, ms);
63+
this.waiters.add(wake);
64+
});
65+
}
66+
67+
/** Wait for a fresh substring to appear after the prior match. */
68+
async waitForText(text: string): Promise<void> {
69+
const deadline = Date.now() + this.timeoutMs;
70+
while (Date.now() < deadline) {
71+
const index = this.output.indexOf(text, this.readOffset);
72+
if (index !== -1) {
73+
this.readOffset = index + text.length;
74+
return;
75+
}
76+
await this.waitForOutput(deadline - Date.now());
77+
}
78+
throw new Error(
79+
`waitForText timeout after ${this.timeoutMs}ms looking for fresh \"${text}\".\n` +
80+
`Output so far:\n${this.output}`,
81+
);
82+
}
83+
84+
/** Write a line of input to the child process. */
85+
write(input: string): void {
86+
this.child.stdin.write(input + "\n");
87+
}
88+
89+
/** Return all accumulated output since creation or last clear(). */
90+
snapshot(): string {
91+
return this.output;
92+
}
93+
94+
/** Clear accumulated output and match cursor, keeping the child running. */
95+
clear(): void {
96+
this.output = "";
97+
this.readOffset = 0;
98+
}
99+
100+
/** Kill the child process. */
101+
close(): void {
102+
this.child.stdin.end();
103+
if (!this.child.killed) this.child.kill();
104+
}
105+
}

0 commit comments

Comments
 (0)