Skip to content

Commit 1e97927

Browse files
authored
perf: forward langId hint to native standalone analysis functions (#743)
* chore: release v3.8.0 * perf: forward langId hint to native standalone analysis functions Pass the JS-resolved langId as an explicit Option<String> to the three native standalone analysis functions (analyzeComplexity, buildCfgAnalysis, extractDataflowAnalysis). Rust now uses the hint as the primary language, falling back to extension detection only when None. This ensures files whose language is inferred by content rather than extension (e.g. .vue files tagged as "javascript", extensionless shebang files) use the native Rust path instead of silently falling back to WASM. Closes #739 * fix: use langId fallback in extension-set guards for content-inferred files (#743) The needsComplexity/needsCfg/needsDataflow guards gated solely on file-extension sets, so content-inferred files (.vue tagged as "javascript", extensionless shebang files) could never reach the native or WASM analysis paths despite having a valid langId. Add langId-based fallback checks to all guard sites: the native analysis dispatcher, the WASM pre-parse check, the per-file CFG visitor setup, and the per-file dataflow visitor setup.
1 parent 0f6e10d commit 1e97927

4 files changed

Lines changed: 69 additions & 33 deletions

File tree

crates/codegraph-core/src/analysis.rs

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,18 @@ fn collect_function_nodes<'a>(
4444
}
4545

4646
/// Parse source code and return a tree + language kind, or None if unsupported.
47-
fn parse_source(source: &str, file_path: &str) -> Option<(tree_sitter::Tree, LanguageKind)> {
48-
let lang = LanguageKind::from_extension(file_path)?;
47+
/// When `lang_id` is provided, it is used as the primary language hint (supports
48+
/// files whose language is inferred by content rather than extension, e.g. `.vue`
49+
/// files tagged as `"javascript"` or extension-less files with a shebang).
50+
/// Falls back to extension detection when `lang_id` is `None`.
51+
fn parse_source(
52+
source: &str,
53+
file_path: &str,
54+
lang_id: Option<&str>,
55+
) -> Option<(tree_sitter::Tree, LanguageKind)> {
56+
let lang = lang_id
57+
.and_then(LanguageKind::from_lang_id)
58+
.or_else(|| LanguageKind::from_extension(file_path))?;
4959
let mut parser = Parser::new();
5060
parser.set_language(&lang.tree_sitter_language()).ok()?;
5161
let tree = parser.parse(source.as_bytes(), None)?;
@@ -57,8 +67,9 @@ fn parse_source(source: &str, file_path: &str) -> Option<(tree_sitter::Tree, Lan
5767
pub fn analyze_complexity_standalone(
5868
source: &str,
5969
file_path: &str,
70+
lang_id: Option<&str>,
6071
) -> Vec<FunctionComplexityResult> {
61-
let (tree, lang) = match parse_source(source, file_path) {
72+
let (tree, lang) = match parse_source(source, file_path, lang_id) {
6273
Some(v) => v,
6374
None => return Vec::new(),
6475
};
@@ -91,8 +102,8 @@ pub fn analyze_complexity_standalone(
91102

92103
/// Build control-flow graphs for all functions in the given source.
93104
/// Returns per-function results with name, line, and CFG data.
94-
pub fn build_cfg_standalone(source: &str, file_path: &str) -> Vec<FunctionCfgResult> {
95-
let (tree, lang) = match parse_source(source, file_path) {
105+
pub fn build_cfg_standalone(source: &str, file_path: &str, lang_id: Option<&str>) -> Vec<FunctionCfgResult> {
106+
let (tree, lang) = match parse_source(source, file_path, lang_id) {
96107
Some(v) => v,
97108
None => return Vec::new(),
98109
};
@@ -130,7 +141,7 @@ pub fn build_cfg_standalone(source: &str, file_path: &str) -> Vec<FunctionCfgRes
130141

131142
/// Extract dataflow analysis for the given source.
132143
/// Returns file-level dataflow result (parameters, returns, assignments, arg flows, mutations).
133-
pub fn extract_dataflow_standalone(source: &str, file_path: &str) -> Option<DataflowResult> {
134-
let (tree, lang) = parse_source(source, file_path)?;
144+
pub fn extract_dataflow_standalone(source: &str, file_path: &str, lang_id: Option<&str>) -> Option<DataflowResult> {
145+
let (tree, lang) = parse_source(source, file_path, lang_id)?;
135146
extract_dataflow(&tree, source.as_bytes(), lang.lang_id_str())
136147
}

crates/codegraph-core/src/lib.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -129,30 +129,36 @@ pub fn engine_version() -> String {
129129

130130
/// Analyze complexity metrics for all functions in the given source.
131131
/// Returns per-function results (name, line, endLine, complexity metrics).
132-
/// Language is detected from the file extension or treated as a lang_id.
132+
/// When `lang_id` is provided, it takes priority over extension-based detection.
133133
#[napi]
134134
pub fn analyze_complexity(
135135
source: String,
136136
file_path: String,
137+
lang_id: Option<String>,
137138
) -> Vec<types::FunctionComplexityResult> {
138-
analysis::analyze_complexity_standalone(&source, &file_path)
139+
analysis::analyze_complexity_standalone(&source, &file_path, lang_id.as_deref())
139140
}
140141

141142
/// Build control-flow graphs for all functions in the given source.
142143
/// Returns per-function results (name, line, endLine, CFG blocks + edges).
143-
/// Language is detected from the file extension or treated as a lang_id.
144+
/// When `lang_id` is provided, it takes priority over extension-based detection.
144145
#[napi]
145-
pub fn build_cfg_analysis(source: String, file_path: String) -> Vec<types::FunctionCfgResult> {
146-
analysis::build_cfg_standalone(&source, &file_path)
146+
pub fn build_cfg_analysis(
147+
source: String,
148+
file_path: String,
149+
lang_id: Option<String>,
150+
) -> Vec<types::FunctionCfgResult> {
151+
analysis::build_cfg_standalone(&source, &file_path, lang_id.as_deref())
147152
}
148153

149154
/// Extract dataflow analysis for the given source.
150155
/// Returns file-level dataflow (parameters, returns, assignments, arg flows, mutations).
151-
/// Language is detected from the file extension or treated as a lang_id.
156+
/// When `lang_id` is provided, it takes priority over extension-based detection.
152157
#[napi]
153158
pub fn extract_dataflow_analysis(
154159
source: String,
155160
file_path: String,
161+
lang_id: Option<String>,
156162
) -> Option<types::DataflowResult> {
157-
analysis::extract_dataflow_standalone(&source, &file_path)
163+
analysis::extract_dataflow_standalone(&source, &file_path, lang_id.as_deref())
158164
}

src/ast-analysis/engine.ts

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -127,15 +127,17 @@ function runNativeAnalysis(
127127

128128
const defs = symbols.definitions || [];
129129

130+
const langSupportsComplexity = COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId);
131+
const langSupportsCfg = CFG_EXTENSIONS.has(ext) || CFG_RULES.has(langId);
132+
const langSupportsDataflow = DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(langId);
133+
130134
const needsComplexity =
131-
doComplexity &&
132-
COMPLEXITY_EXTENSIONS.has(ext) &&
133-
defs.some((d) => hasFuncBody(d) && !d.complexity);
135+
doComplexity && langSupportsComplexity && defs.some((d) => hasFuncBody(d) && !d.complexity);
134136
const needsCfg =
135137
doCfg &&
136-
CFG_EXTENSIONS.has(ext) &&
138+
langSupportsCfg &&
137139
defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks));
138-
const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext);
140+
const needsDataflow = doDataflow && !symbols.dataflow && langSupportsDataflow;
139141

140142
if (!needsComplexity && !needsCfg && !needsDataflow) continue;
141143

@@ -151,7 +153,7 @@ function runNativeAnalysis(
151153
// Complexity
152154
if (needsComplexity && native.analyzeComplexity) {
153155
try {
154-
const results = native.analyzeComplexity(source, absPath);
156+
const results = native.analyzeComplexity(source, absPath, langId);
155157
storeNativeComplexityResults(results, defs);
156158
} catch (err: unknown) {
157159
debug(`native analyzeComplexity failed for ${relPath}: ${(err as Error).message}`);
@@ -161,7 +163,7 @@ function runNativeAnalysis(
161163
// CFG
162164
if (needsCfg && native.buildCfgAnalysis) {
163165
try {
164-
const results = native.buildCfgAnalysis(source, absPath);
166+
const results = native.buildCfgAnalysis(source, absPath, langId);
165167
storeNativeCfgResults(results, defs);
166168
} catch (err: unknown) {
167169
debug(`native buildCfgAnalysis failed for ${relPath}: ${(err as Error).message}`);
@@ -171,7 +173,7 @@ function runNativeAnalysis(
171173
// Dataflow
172174
if (needsDataflow && native.extractDataflowAnalysis) {
173175
try {
174-
const result = native.extractDataflowAnalysis(source, absPath);
176+
const result = native.extractDataflowAnalysis(source, absPath, langId);
175177
if (result) symbols.dataflow = result;
176178
} catch (err: unknown) {
177179
debug(`native extractDataflowAnalysis failed for ${relPath}: ${(err as Error).message}`);
@@ -305,16 +307,21 @@ async function ensureWasmTreesIfNeeded(
305307
!d.name.includes('.');
306308

307309
// AST: need tree when native didn't provide non-call astNodes
308-
const needsAst = doAst && !Array.isArray(symbols.astNodes) && WALK_EXTENSIONS.has(ext);
310+
const lid = symbols._langId || '';
311+
const needsAst =
312+
doAst &&
313+
!Array.isArray(symbols.astNodes) &&
314+
(WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(lid));
309315
const needsComplexity =
310316
doComplexity &&
311-
COMPLEXITY_EXTENSIONS.has(ext) &&
317+
(COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(lid)) &&
312318
defs.some((d) => hasFuncBody(d) && !d.complexity);
313319
const needsCfg =
314320
doCfg &&
315-
CFG_EXTENSIONS.has(ext) &&
321+
(CFG_EXTENSIONS.has(ext) || CFG_RULES.has(lid)) &&
316322
defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks));
317-
const needsDataflow = doDataflow && !symbols.dataflow && DATAFLOW_EXTENSIONS.has(ext);
323+
const needsDataflow =
324+
doDataflow && !symbols.dataflow && (DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(lid));
318325

319326
if (needsAst || needsComplexity || needsCfg || needsDataflow) {
320327
needsWasmTrees = true;
@@ -396,9 +403,9 @@ function setupComplexityVisitorForFile(
396403
}
397404

398405
/** Set up CFG visitor if any definitions need WASM CFG analysis. */
399-
function setupCfgVisitorForFile(defs: Definition[], langId: string, ext: string): Visitor | null {
406+
function setupCfgVisitorForFile(defs: Definition[], langId: string): Visitor | null {
400407
const cfgRulesForLang = CFG_RULES.get(langId);
401-
if (!cfgRulesForLang || !CFG_EXTENSIONS.has(ext)) return null;
408+
if (!cfgRulesForLang) return null;
402409

403410
const needsWasmCfg = defs.some(
404411
(d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks),
@@ -432,12 +439,12 @@ function setupVisitors(
432439
opts.complexity !== false ? setupComplexityVisitorForFile(defs, langId, walkerOpts) : null;
433440
if (complexityVisitor) visitors.push(complexityVisitor);
434441

435-
const cfgVisitor = opts.cfg !== false ? setupCfgVisitorForFile(defs, langId, ext) : null;
442+
const cfgVisitor = opts.cfg !== false ? setupCfgVisitorForFile(defs, langId) : null;
436443
if (cfgVisitor) visitors.push(cfgVisitor);
437444

438445
let dataflowVisitor: Visitor | null = null;
439446
const dfRules = DATAFLOW_RULES.get(langId);
440-
if (opts.dataflow !== false && dfRules && DATAFLOW_EXTENSIONS.has(ext) && !symbols.dataflow) {
447+
if (opts.dataflow !== false && dfRules && !symbols.dataflow) {
441448
dataflowVisitor = createDataflowVisitor(dfRules);
442449
visitors.push(dataflowVisitor);
443450
}

src/types.ts

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1874,9 +1874,21 @@ export interface NativeAddon {
18741874
rootDir: string,
18751875
): unknown[];
18761876
engineVersion(): string;
1877-
analyzeComplexity(source: string, filePath: string): NativeFunctionComplexityResult[];
1878-
buildCfgAnalysis(source: string, filePath: string): NativeFunctionCfgResult[];
1879-
extractDataflowAnalysis(source: string, filePath: string): DataflowResult | null;
1877+
analyzeComplexity(
1878+
source: string,
1879+
filePath: string,
1880+
langId?: string | null,
1881+
): NativeFunctionComplexityResult[];
1882+
buildCfgAnalysis(
1883+
source: string,
1884+
filePath: string,
1885+
langId?: string | null,
1886+
): NativeFunctionCfgResult[];
1887+
extractDataflowAnalysis(
1888+
source: string,
1889+
filePath: string,
1890+
langId?: string | null,
1891+
): DataflowResult | null;
18801892
ParseTreeCache: new () => NativeParseTreeCache;
18811893
NativeDatabase: {
18821894
openReadWrite(dbPath: string): NativeDatabase;

0 commit comments

Comments
 (0)