Skip to content

Commit 2559405

Browse files
committed
feat(04-02): add manifest-driven grammar CI test with fail-closed fallback
- Iterates CURATED_LANGUAGE_TO_WASM and verifies each grammar loads and parses its fixture - Asserts wasm file exists on disk and extractTreeSitterSymbols returns non-empty symbols - Negative test: corrupted wasm returns null without throwing - GenericAnalyzer fallback test: verifies line-or-component chunking on corrupted wasm
1 parent be84c3a commit 2559405

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import { describe, expect, it, afterAll } from 'vitest';
2+
import { promises as fs } from 'fs';
3+
import path from 'path';
4+
import os from 'os';
5+
import { randomBytes } from 'crypto';
6+
import { fileURLToPath } from 'url';
7+
8+
import { CURATED_LANGUAGE_TO_WASM, resolveGrammarDir } from '../src/grammars/manifest';
9+
import { extractTreeSitterSymbols } from '../src/utils/tree-sitter';
10+
import { GenericAnalyzer } from '../src/analyzers/generic/index';
11+
12+
const __filename = fileURLToPath(import.meta.url);
13+
const __dirname = path.dirname(__filename);
14+
15+
/**
16+
* Resolve the actual grammar directory using the source module's perspective.
17+
* The test file's import.meta.url won't resolve correctly (tests/ is not src/ or dist/),
18+
* so we construct a URL that looks like it lives inside src/.
19+
*/
20+
const GRAMMAR_DIR = resolveGrammarDir(new URL('../src/grammars/manifest.ts', import.meta.url).href);
21+
22+
/**
23+
* Map from manifest language key to fixture file in tests/fixtures/grammars/.
24+
*/
25+
const LANGUAGE_FIXTURE_FILE: Record<string, string> = {
26+
javascript: 'javascript.js',
27+
typescript: 'typescript.ts',
28+
typescriptreact: 'tsx.tsx',
29+
python: 'python.py',
30+
go: 'go.go',
31+
rust: 'rust.rs',
32+
java: 'java.java',
33+
c: 'c.c',
34+
cpp: 'cpp.cpp',
35+
csharp: 'csharp.cs'
36+
};
37+
38+
const fixturesDir = path.join(__dirname, 'fixtures', 'grammars');
39+
40+
/**
41+
* Negative / fail-closed tests MUST run before the positive load tests.
42+
* The tree-sitter module caches loaded grammars in-process; once a grammar
43+
* loads successfully it stays cached. Running these first ensures the corrupted
44+
* wasm is the first thing the loader sees for "typescript".
45+
*/
46+
describe('Grammar assets: fail-closed fallback (runs first)', () => {
47+
let tmpDir: string;
48+
const savedEnv = process.env.CODEBASE_CONTEXT_TS_GRAMMAR_DIR;
49+
50+
afterAll(async () => {
51+
// Restore env so subsequent tests use real grammars
52+
if (savedEnv !== undefined) {
53+
process.env.CODEBASE_CONTEXT_TS_GRAMMAR_DIR = savedEnv;
54+
} else {
55+
delete process.env.CODEBASE_CONTEXT_TS_GRAMMAR_DIR;
56+
}
57+
58+
// Cleanup tmp dir
59+
if (tmpDir) {
60+
await fs.rm(tmpDir, { recursive: true, force: true }).catch(() => {});
61+
}
62+
});
63+
64+
it('returns null (no throw) for corrupted wasm', async () => {
65+
// Create temp dir with a corrupted wasm
66+
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'grammar-fallback-'));
67+
const corruptedWasm = path.join(tmpDir, CURATED_LANGUAGE_TO_WASM.typescript);
68+
await fs.writeFile(corruptedWasm, randomBytes(64));
69+
70+
// Point grammar dir to our corrupted copy
71+
process.env.CODEBASE_CONTEXT_TS_GRAMMAR_DIR = tmpDir;
72+
73+
const fixtureText = await fs.readFile(path.join(fixturesDir, 'typescript.ts'), 'utf8');
74+
75+
// Must not throw — should return null
76+
const result = await extractTreeSitterSymbols(fixtureText, 'typescript');
77+
expect(result).toBeNull();
78+
});
79+
80+
it('GenericAnalyzer falls back to line-or-component chunking on corrupted wasm', async () => {
81+
// tmpDir already set up from previous test, but ensure it exists
82+
if (!tmpDir) {
83+
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'grammar-fallback-'));
84+
const corruptedWasm = path.join(tmpDir, CURATED_LANGUAGE_TO_WASM.typescript);
85+
await fs.writeFile(corruptedWasm, randomBytes(64));
86+
}
87+
88+
process.env.CODEBASE_CONTEXT_TS_GRAMMAR_DIR = tmpDir;
89+
90+
const fixtureText = await fs.readFile(path.join(fixturesDir, 'typescript.ts'), 'utf8');
91+
92+
const analyzer = new GenericAnalyzer();
93+
const result = await analyzer.analyze(path.join(tmpDir, 'fixture.ts'), fixtureText);
94+
95+
// Verify fallback path was taken
96+
expect(result.metadata.chunkStrategy).toBe('line-or-component');
97+
expect(result.metadata.treeSitterGrammar).toBeUndefined();
98+
expect(result.chunks.length).toBeGreaterThan(0);
99+
});
100+
});
101+
102+
describe('Grammar assets: manifest-driven load and parse', () => {
103+
const languages = Object.keys(CURATED_LANGUAGE_TO_WASM);
104+
105+
it('manifest covers all fixture languages', () => {
106+
for (const lang of languages) {
107+
expect(
108+
LANGUAGE_FIXTURE_FILE[lang],
109+
`Missing fixture mapping for manifest language '${lang}'`
110+
).toBeDefined();
111+
}
112+
});
113+
114+
for (const language of languages) {
115+
const fixtureFile = LANGUAGE_FIXTURE_FILE[language];
116+
117+
it(`loads and parses fixture for ${language}`, async () => {
118+
// 1. Grammar wasm exists on disk
119+
const wasmFile = CURATED_LANGUAGE_TO_WASM[language];
120+
const wasmPath = path.join(GRAMMAR_DIR, wasmFile);
121+
const wasmStat = await fs.stat(wasmPath).catch(() => null);
122+
expect(wasmStat, `wasm not found at ${wasmPath}`).not.toBeNull();
123+
124+
// 2. Read fixture
125+
const fixturePath = path.join(fixturesDir, fixtureFile);
126+
const fixtureText = await fs.readFile(fixturePath, 'utf8');
127+
128+
// 3. Extract symbols — this exercises the full load+parse pipeline
129+
const result = await extractTreeSitterSymbols(fixtureText, language);
130+
expect(result, `extractTreeSitterSymbols returned null for '${language}'`).not.toBeNull();
131+
expect(
132+
result!.symbols.length,
133+
`Expected at least one symbol for '${language}', got 0`
134+
).toBeGreaterThan(0);
135+
});
136+
}
137+
});

0 commit comments

Comments
 (0)