Skip to content

Commit 68a2d6d

Browse files
committed
feat(05-01): wire AST-aligned chunker into GenericAnalyzer with 21 unit tests
- Import createASTAlignedChunks in GenericAnalyzer for Tree-sitter success path - Change chunkStrategy from 'tree-sitter-symbol' to 'ast-aligned' - Add 21 unit tests covering buildSymbolTree, generateASTChunks, mergeSmallSymbolChunks, splitOversizedChunks, createASTAlignedChunks - Update tree-sitter-symbols.test.ts to match AST-aligned chunk structure
1 parent f865abc commit 68a2d6d

File tree

3 files changed

+469
-15
lines changed

3 files changed

+469
-15
lines changed

src/analyzers/generic/index.ts

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import {
1717
Dependency
1818
} from '../../types/index.js';
1919
import { createChunksFromCode } from '../../utils/chunking.js';
20+
import { createASTAlignedChunks } from '../../utils/ast-chunker.js';
2021
import { detectLanguage } from '../../utils/language-detection.js';
2122
import { extractTreeSitterSymbols, type TreeSitterSymbol } from '../../utils/tree-sitter.js';
2223
import {
@@ -103,10 +104,13 @@ export class GenericAnalyzer implements FrameworkAnalyzer {
103104
let exports: ExportStatement[] = [];
104105
let treeSitterGrammar: string | undefined;
105106
let usesTreeSitterSymbols = false;
107+
let treeSitterSymbols: TreeSitterSymbol[] = [];
106108

107109
try {
108110
const treeSitterResult = await extractTreeSitterSymbols(content, language);
109111
if (treeSitterResult && treeSitterResult.symbols.length > 0) {
112+
treeSitterSymbols = treeSitterResult.symbols;
113+
// Legacy: replaced by createASTAlignedChunks for AST-aligned chunking
110114
components = this.convertTreeSitterSymbolsToComponents(treeSitterResult.symbols);
111115
treeSitterGrammar = treeSitterResult.grammarFile;
112116
usesTreeSitterSymbols = true;
@@ -137,23 +141,39 @@ export class GenericAnalyzer implements FrameworkAnalyzer {
137141
analyzer: this.name,
138142
fileSize: content.length,
139143
lineCount: content.split('\n').length,
140-
chunkStrategy: usesTreeSitterSymbols ? 'tree-sitter-symbol' : 'line-or-component'
144+
chunkStrategy: usesTreeSitterSymbols ? 'ast-aligned' : 'line-or-component'
141145
};
142146

143147
if (usesTreeSitterSymbols && treeSitterGrammar) {
144148
metadata.treeSitterGrammar = treeSitterGrammar;
145149
metadata.symbolAware = true;
146150
}
147151

148-
// Create chunks
149-
const chunks = await createChunksFromCode(
150-
content,
151-
filePath,
152-
relativePath,
153-
language,
154-
components,
155-
metadata
156-
);
152+
// Create chunks — use AST-aligned chunker when Tree-sitter symbols are available
153+
let chunks: CodeChunk[];
154+
if (usesTreeSitterSymbols && treeSitterSymbols.length > 0) {
155+
chunks = createASTAlignedChunks(content, treeSitterSymbols, {
156+
minChunkLines: 10,
157+
maxChunkLines: 150,
158+
filePath,
159+
language,
160+
framework: 'generic',
161+
componentType: 'module'
162+
});
163+
// Enrich AST chunks with the correct relativePath
164+
for (const chunk of chunks) {
165+
chunk.relativePath = relativePath;
166+
}
167+
} else {
168+
chunks = await createChunksFromCode(
169+
content,
170+
filePath,
171+
relativePath,
172+
language,
173+
components,
174+
metadata
175+
);
176+
}
157177

158178
return {
159179
filePath,

0 commit comments

Comments
 (0)