Skip to content

Commit c321cb7

Browse files
DeDuckProjectclaude
andcommitted
test: add LLM integration test for full pipeline
Real end-to-end test: diff → Anthropic analysis + script generation → Playwright recording → FFmpeg GIF conversion. No mocking — all layers are real. Skips gracefully when ANTHROPIC_API_KEY is not set. - Add virtual-tryon-diff.ts fixture with JSX diff matching test server HTML - Add llm-pipeline.test.ts with structural assertions on pipeline result - Add vitest.llm.config.ts (separate config, explicit .env loading) - Exclude llm-* from test:integration to keep it fast and free - Add test:llm script to package.json Result: 1 test, passes in ~27s, produces a 190KB GIF Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent bfc5d70 commit c321cb7

5 files changed

Lines changed: 156 additions & 0 deletions

File tree

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"build": "pnpm -r build",
1313
"test": "vitest run",
1414
"test:integration": "vitest run --config vitest.integration.config.ts",
15+
"test:llm": "vitest run --config vitest.llm.config.ts",
1516
"test:all": "vitest run && vitest run --config vitest.integration.config.ts",
1617
"typecheck": "pnpm -r typecheck",
1718
"lint": "pnpm -r lint"
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/**
2+
* Fixture diff describing the addition of a "Virtual Try-On" button and modal
3+
* to a product page. The element IDs and text match the test server HTML exactly,
4+
* so LLM-generated Playwright scripts will find them.
5+
*/
6+
export const VIRTUAL_TRYON_DIFF = `diff --git a/src/app/products/page.tsx b/src/app/products/page.tsx
7+
index a1b2c3d..e4f5a6b 100644
8+
--- a/src/app/products/page.tsx
9+
+++ b/src/app/products/page.tsx
10+
@@ -8,10 +8,32 @@ export default function ProductPage() {
11+
return (
12+
<div>
13+
<h1>Wireless Headphones</h1>
14+
<p>Premium noise-cancelling headphones with 30-hour battery life.</p>
15+
<p id="counter">0</p>
16+
<button id="add-to-cart" onClick={handleAddToCart}>
17+
Add to Cart
18+
</button>
19+
+ <button
20+
+ id="try-on-btn"
21+
+ onClick={() => setShowTryOn(true)}
22+
+ style={{ marginLeft: '0.5rem', background: '#10b981', color: 'white', border: 'none', padding: '0.75rem 1.5rem', borderRadius: '6px', cursor: 'pointer' }}
23+
+ >
24+
+ Virtual Try-On
25+
+ </button>
26+
+
27+
+ {showTryOn && (
28+
+ <div className="modal open" id="modal">
29+
+ <div className="modal-content">
30+
+ <button
31+
+ className="close-btn"
32+
+ id="close-modal"
33+
+ aria-label="Close"
34+
+ onClick={() => setShowTryOn(false)}
35+
+ >
36+
+ &times;
37+
+ </button>
38+
+ <h2>Virtual Try-On</h2>
39+
+ <p>See how these headphones look on you using your camera.</p>
40+
+ <button id="start-tryon">Start Try-On</button>
41+
+ </div>
42+
+ </div>
43+
+ )}
44+
</div>
45+
);
46+
}
47+
`;
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
2+
import { existsSync, rmSync } from 'node:fs';
3+
import { join } from 'node:path';
4+
import { tmpdir } from 'node:os';
5+
import { startTestServer, type TestServer } from './fixtures/server.js';
6+
import { runPipeline } from '../../packages/core/src/pipeline.js';
7+
import { VIRTUAL_TRYON_DIFF } from './fixtures/virtual-tryon-diff.js';
8+
import type { GitGlimpseConfig } from '../../packages/core/src/config/schema.js';
9+
10+
const HAS_API_KEY = !!process.env['ANTHROPIC_API_KEY'];
11+
12+
let server: TestServer;
13+
let outputDir: string;
14+
15+
beforeAll(async () => {
16+
server = await startTestServer();
17+
outputDir = join(tmpdir(), `git-glimpse-llm-test-${Date.now()}`);
18+
}, 15000);
19+
20+
afterAll(async () => {
21+
if (server) await server.close();
22+
if (existsSync(outputDir)) {
23+
rmSync(outputDir, { recursive: true, force: true });
24+
}
25+
});
26+
27+
const config: GitGlimpseConfig = {
28+
app: {},
29+
// Map the diff's file path to / so the LLM navigates to the test server root
30+
routeMap: {
31+
'src/app/products/page.tsx': '/',
32+
},
33+
recording: {
34+
viewport: { width: 1280, height: 720 },
35+
format: 'gif',
36+
maxDuration: 30,
37+
deviceScaleFactor: 1,
38+
},
39+
llm: {
40+
provider: 'anthropic',
41+
model: 'claude-sonnet-4-6',
42+
},
43+
};
44+
45+
describe.skipIf(!HAS_API_KEY)('LLM pipeline (real Anthropic + Playwright + FFmpeg)', () => {
46+
it('generates a working demo GIF from a fixture diff', async () => {
47+
const result = await runPipeline({
48+
diff: VIRTUAL_TRYON_DIFF,
49+
baseUrl: server.url,
50+
outputDir,
51+
config,
52+
});
53+
54+
// LLM produced a meaningful analysis
55+
expect(result.analysis.changeDescription).toBeTruthy();
56+
expect(result.analysis.suggestedDemoFlow).toBeTruthy();
57+
expect(result.analysis.changedFiles).toContain('src/app/products/page.tsx');
58+
59+
// LLM produced a structurally valid script
60+
expect(result.script).toContain('export async function demo');
61+
expect(result.script).toContain('page');
62+
63+
// Attempt count is within retry budget
64+
expect(result.attempts).toBeGreaterThanOrEqual(1);
65+
expect(result.attempts).toBeLessThanOrEqual(3);
66+
67+
if (!result.success) {
68+
// Surface LLM/Playwright errors to make failures easy to diagnose
69+
console.error('Pipeline errors:', result.errors);
70+
console.error('Generated script:\n', result.script);
71+
}
72+
73+
// Full pipeline succeeded — GIF was recorded
74+
expect(result.success).toBe(true);
75+
expect(result.recording).toBeDefined();
76+
expect(result.recording!.path).toMatch(/\.gif$/);
77+
expect(existsSync(result.recording!.path)).toBe(true);
78+
expect(result.recording!.sizeMB).toBeGreaterThan(0);
79+
expect(result.recording!.sizeMB).toBeLessThan(10);
80+
expect(result.recording!.duration).toBeGreaterThan(0);
81+
expect(result.recording!.duration).toBeLessThan(30);
82+
}, 120000);
83+
});

vitest.integration.config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { defineConfig } from 'vitest/config';
33
export default defineConfig({
44
test: {
55
include: ['tests/integration/**/*.test.ts'],
6+
exclude: ['tests/integration/llm-*.test.ts', '**/node_modules/**'],
67
globals: true,
78
testTimeout: 60000,
89
hookTimeout: 30000,

vitest.llm.config.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import { defineConfig } from 'vitest/config';
2+
import { readFileSync, existsSync } from 'node:fs';
3+
4+
function loadDotenv(): Record<string, string> {
5+
const env: Record<string, string> = {};
6+
if (!existsSync('.env')) return env;
7+
for (const line of readFileSync('.env', 'utf-8').split('\n')) {
8+
const match = line.match(/^([^#\s][^=]*)=(.*)$/);
9+
if (match) env[match[1].trim()] = match[2].trim();
10+
}
11+
return env;
12+
}
13+
14+
export default defineConfig({
15+
test: {
16+
include: ['tests/integration/llm-*.test.ts'],
17+
globals: true,
18+
testTimeout: 120000,
19+
hookTimeout: 30000,
20+
pool: 'forks',
21+
poolOptions: { forks: { singleFork: true } },
22+
env: loadDotenv(),
23+
},
24+
});

0 commit comments

Comments
 (0)