diff --git a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs index 25f51896..70840d94 100644 --- a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs +++ b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs @@ -1115,7 +1115,7 @@ fn builtin_call_receivers() -> Vec { .collect() } -const EDGE_NODE_KIND_FILTER: &str = "kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')"; +const EDGE_NODE_KIND_FILTER: &str = "kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant','variable')"; /// For the scoped (incremental, small-batch) path of the edge builder, /// compute the set of files that must be loaded: changed/reverse-dep files diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index 801532ed..9e1409c6 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -127,6 +127,7 @@ pub struct ComputedEdge { /// Internal struct for caller resolution (def line range → node ID). struct DefWithId<'a> { name: &'a str, + kind: &'a str, line: u32, end_line: u32, node_id: Option, @@ -473,7 +474,7 @@ fn process_file<'a>( let node_id = file_nodes.iter() .find(|n| n.name == d.name && n.kind == d.kind && n.line == d.line) .map(|n| n.id); - DefWithId { name: &d.name, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id } + DefWithId { name: &d.name, kind: &d.kind, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id } }).collect(); // Phase 8.3: build pts map for alias resolution — mirrors buildPointsToMapForFile. @@ -654,25 +655,76 @@ fn process_file<'a>( emit_hierarchy_edges(ctx, file_input, rel_path, edges); } +/// Callable definition kinds — only function/method bodies act as enclosing +/// caller scopes. Variable/constant bindings are a lower-priority fallback +/// tier for top-level bindings like Haskell `main = do …` (kind `variable`). +/// Mirrors `CALLABLE_KINDS` / `TOP_LEVEL_BINDING_KINDS` in call-resolver.ts. +fn is_callable_kind(kind: &str) -> bool { + kind == "function" || kind == "method" +} + +fn is_top_level_binding_kind(kind: &str) -> bool { + kind == "variable" || kind == "constant" +} + /// Find the narrowest enclosing definition for a call at the given line. -/// Returns `(caller_id, caller_name)` — `caller_name` is `""` when the call is at file scope. +/// +/// Two-pass strategy (mirrors the updated `findCaller` in call-resolver.ts): +/// Pass 1 — narrowest enclosing function/method. Local variable declarations +/// inside a function body must not shadow the enclosing function. +/// Pass 2 — widest (outermost) enclosing variable/constant binding. Used as +/// fallback when no function/method encloses the call (e.g. Haskell +/// top-level `main = do …` is a `bind` node with kind `variable`). +/// +/// Returns `(caller_id, caller_name)` — `caller_name` is `""` when the call +/// falls back to file scope. fn find_enclosing_caller<'a>(defs: &[DefWithId<'a>], call_line: u32, file_node_id: u32) -> (u32, &'a str) { - let mut caller_id = file_node_id; - let mut caller_name = ""; - let mut caller_span = u32::MAX; + let mut fn_caller_id: Option = None; + let mut fn_caller_name = ""; + let mut fn_caller_span = u32::MAX; + + // For variable/constant bindings we pick the WIDEST span (outermost binding), + // not the narrowest, so that nested `let` bindings inside `main`'s do-block + // do not shadow `main` itself. The outermost enclosing variable is the + // "function-like" top-level binding (e.g. Haskell `main = do …`). + // var_caller_span starts at 0 — any real spanning binding has span >= 0 + // and we overwrite only when span is strictly greater. + let mut var_caller_id: Option = None; + let mut var_caller_name = ""; + // Using i64 so the initial sentinel (-1) is always beaten by a real span (>= 0). + let mut var_caller_span: i64 = -1; + for def in defs { if def.line <= call_line && call_line <= def.end_line { - let span = def.end_line - def.line; - if span < caller_span { - if let Some(id) = def.node_id { - caller_id = id; - caller_name = def.name; - caller_span = span; + let span = def.end_line.saturating_sub(def.line); + if is_callable_kind(def.kind) { + if span < fn_caller_span { + if let Some(id) = def.node_id { + fn_caller_id = Some(id); + fn_caller_name = def.name; + fn_caller_span = span; + } + } + } else if is_top_level_binding_kind(def.kind) { + if (span as i64) > var_caller_span { + if let Some(id) = def.node_id { + var_caller_id = Some(id); + var_caller_name = def.name; + var_caller_span = span as i64; + } } } } } - (caller_id, caller_name) + + // Prefer function/method over variable/constant binding. + if let Some(id) = fn_caller_id { + return (id, fn_caller_name); + } + if let Some(id) = var_caller_id { + return (id, var_caller_name); + } + (file_node_id, "") } /// Multi-strategy call target resolution: import-aware → same-file → type-aware → scoped. diff --git a/src/domain/graph/builder/call-resolver.ts b/src/domain/graph/builder/call-resolver.ts index 22f025d3..6f18d476 100644 --- a/src/domain/graph/builder/call-resolver.ts +++ b/src/domain/graph/builder/call-resolver.ts @@ -47,6 +47,22 @@ export function isModuleScopedLanguage(relPath: string): boolean { // ── Shared resolution functions ────────────────────────────────────────── +/** + * Callable definition kinds — variable/constant bindings are NOT callable + * in the function-as-enclosing-scope sense (they are local declarations, not + * function bodies). Top-level variable bindings (e.g. Haskell `main = do …`) + * are handled separately as a fallback tier. + */ +const CALLABLE_KINDS = new Set(['function', 'method']); + +/** + * Variable-like binding kinds that may act as top-level callers when no + * enclosing function/method exists (e.g. Haskell top-level `main` is a + * `bind` node → kind `variable`). Local variable declarations inside a + * function body must NOT win over the enclosing function. + */ +const TOP_LEVEL_BINDING_KINDS = new Set(['variable', 'constant']); + export function findCaller( lookup: CallNodeLookup, call: { line: number }, @@ -59,26 +75,63 @@ export function findCaller( relPath: string, fileNodeRow: { id: number }, ): { id: number; callerName: string | null } { - let caller: { id: number } | null = null; - let callerName: string | null = null; - let callerSpan = Infinity; + // Pass 1: find the narrowest enclosing function/method. + let fnCaller: { id: number } | null = null; + let fnCallerName: string | null = null; + let fnCallerSpan = Infinity; + + // Pass 2: find the widest (outermost) enclosing variable/constant binding. + // Used as fallback when no function/method encloses the call site + // (e.g. Haskell `main = do …` is a `bind` node with kind `variable`). + // We pick the WIDEST span (outermost binding), not the narrowest, so that + // nested `let` bindings inside `main`'s do-block do not shadow `main` + // itself as the attributing caller. The outermost enclosing variable is + // the "function-like" top-level binding. + let varCaller: { id: number } | null = null; + let varCallerName: string | null = null; + let varCallerSpan = -1; // looking for WIDEST span, so start at -1 + for (const def of definitions) { if (def.line <= call.line) { - const end = def.endLine || Infinity; + const end = def.endLine ?? Infinity; if (call.line <= end) { - const span = end - def.line; - if (span < callerSpan) { - const row = lookup.nodeId(def.name, def.kind, relPath, def.line); - if (row) { - caller = row; - callerName = def.name; - callerSpan = span; + const span = end === Infinity ? Infinity : end - def.line; + if (CALLABLE_KINDS.has(def.kind)) { + if (span < fnCallerSpan) { + const row = lookup.nodeId(def.name, def.kind, relPath, def.line); + if (row) { + fnCaller = row; + fnCallerName = def.name; + fnCallerSpan = span; + } + } + } else if (TOP_LEVEL_BINDING_KINDS.has(def.kind)) { + if (span > varCallerSpan) { + const row = lookup.nodeId(def.name, def.kind, relPath, def.line); + if (row) { + varCaller = row; + varCallerName = def.name; + varCallerSpan = span; + } } } } } } - return { ...(caller ?? fileNodeRow), callerName }; + + // Prefer function/method enclosing scope over variable binding. + // If a function/method encloses the call, use it — local variable + // declarations inside the function body must not shadow it. + // Only fall back to a variable/constant binding when the call is at + // top-level scope (no enclosing function/method found), which handles + // languages like Haskell where `main` is a top-level `bind` node. + if (fnCaller) { + return { ...fnCaller, callerName: fnCallerName }; + } + if (varCaller) { + return { ...varCaller, callerName: varCallerName }; + } + return { ...fileNodeRow, callerName: null }; } export function resolveByMethodOrGlobal( diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 64f5827f..29cc4100 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -709,6 +709,7 @@ function buildChaPostPass( const caller = findCaller(lookup, call, symbols.definitions, relPath, fileNodeRow); let chaTargets: ReadonlyArray<{ id: number; file: string }> = []; + let isTypedReceiverDispatch = false; if (call.receiver === 'this' || call.receiver === 'self' || call.receiver === 'super') { chaTargets = resolveThisDispatch( @@ -727,13 +728,21 @@ function buildChaPostPass( : null; if (typeName) { chaTargets = resolveChaTargets(typeName, call.name, chaCtx, lookup); + isTypedReceiverDispatch = true; } } for (const t of chaTargets) { const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id && !seenByPair.has(edgeKey)) { - const conf = computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; + // Typed-receiver (interface/CHA) dispatch: use the same hardcoded 0.8 that + // runChaPostPass (helpers.ts) and runPostNativeCha (native-orchestrator.ts) + // use — file proximity is not meaningful for virtual dispatch confidence. + // this/super dispatch keeps computeConfidence-based proximity scoring to + // match runPostNativeThisDispatch (native-orchestrator.ts). + const conf = isTypedReceiverDispatch + ? 0.8 + : computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; if (conf > 0) { seenByPair.add(edgeKey); allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'cha']); @@ -1294,6 +1303,7 @@ function buildFileCallEdges( // For typed receiver calls: expand to all instantiated concrete implementations. if (chaCtx && call.receiver) { let chaTargets: ReadonlyArray<{ id: number; file: string }> = []; + let isTypedReceiverDispatch = false; if (call.receiver === 'this' || call.receiver === 'self' || call.receiver === 'super') { chaTargets = resolveThisDispatch( call.name, @@ -1311,12 +1321,20 @@ function buildFileCallEdges( : null; if (typeName) { chaTargets = resolveChaTargets(typeName, call.name, chaCtx, lookup); + isTypedReceiverDispatch = true; } } for (const t of chaTargets) { const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id && !seenCallEdges.has(edgeKey) && !ptsEdgeRows.has(edgeKey)) { - const conf = computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; + // Typed-receiver (interface/CHA) dispatch: use the same hardcoded 0.8 that + // runChaPostPass (helpers.ts) and runPostNativeCha (native-orchestrator.ts) + // use — file proximity is not meaningful for virtual dispatch confidence. + // this/super dispatch keeps computeConfidence-based proximity scoring to + // match runPostNativeThisDispatch (native-orchestrator.ts line 906). + const conf = isTypedReceiverDispatch + ? 0.8 + : computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY; if (conf > 0) { seenCallEdges.add(edgeKey); allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'cha']); @@ -1487,7 +1505,7 @@ function reconnectReverseDepEdges(ctx: PipelineContext): void { * their import targets. Falls back to loading ALL nodes for full builds or * larger incremental changes. */ -const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`; +const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant','variable')`; function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } { const { db, fileSymbols, isFullBuild, batchResolved } = ctx; diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index a1bf7aa9..e10de98e 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -667,12 +667,10 @@ function runPostNativeCha( const key = `${source_id}|${methodNode.id}`; if (seen.has(key)) continue; seen.add(key); - // Compute confidence file-pair-aware (mirrors WASM path: computeConfidence - CHA_DISPATCH_PENALTY) - // Skip zero-confidence edges to match buildFileCallEdges / buildChaPostPass behaviour. - const conf = - computeConfidence(caller_file ?? '', methodNode.method_file ?? '', null) - - CHA_DISPATCH_PENALTY; - if (conf <= 0) continue; + // Use the same hardcoded 0.8 that runChaPostPass (helpers.ts) uses for + // DB-level CHA dispatch edges. This aligns the native orchestrator path + // with the WASM and hybrid paths, which both go through runChaPostPass. + const conf = 0.8; newEdges.push([source_id, methodNode.id, 'calls', conf, 0, 'cha']); newEdgeCount++; if (caller_file) affectedFiles.add(caller_file); @@ -959,6 +957,14 @@ interface PostPassTimings { techniqueBackfillMs: number; } +interface PostPassTimings { + gapDetectMs: number; + chaMs: number; + thisDispatchMs: number; + reclassifyMs: number; + techniqueBackfillMs: number; +} + /** Format timing result from native orchestrator phases + JS post-processing. */ function formatNativeTimingResult( p: Record, diff --git a/src/extractors/java.ts b/src/extractors/java.ts index 225273bf..98922ccc 100644 --- a/src/extractors/java.ts +++ b/src/extractors/java.ts @@ -16,6 +16,7 @@ import { nodeStartLine, pushCall, pushImport, + setTypeMapEntry, } from './helpers.js'; /** @@ -273,13 +274,13 @@ function handleJavaLocalVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): voi const child = node.child(i); if (child?.type === 'variable_declarator') { const nameNode = child.childForFieldName('name'); - // Use direct Map.set (last-wins) for local variable declarations. - // Local variable types are method-scoped and should override any - // prior entry (e.g. a same-named constructor parameter). Using - // setTypeMapEntry (first-wins on tie) would let a constructor - // parameter type block a local variable's more-specific concrete type. - if (nameNode && ctx.typeMap) - ctx.typeMap.set(nameNode.text, { type: typeName, confidence: 0.9 }); + // Use setTypeMapEntry (first-wins on tie) to match Rust extractor semantics. + // The typeMap is flat per-file without method scoping, so a local variable + // in one method (e.g. `InMemoryUserRepository repo` in `createDefault()`) must + // not override a parameter binding set by an earlier method + // (e.g. `UserRepository repo` constructor param). First-wins preserves the + // interface/abstract type annotation that drives correct CHA dispatch. + if (nameNode && ctx.typeMap) setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9); } } } diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index 5c92c9c1..93ca62af 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -322,19 +322,17 @@ const SKIP_VERSIONS = new Set(['3.8.0']); * 3.11.2:1-file rebuild entry above. Remove once #1440 lands warmups and * 3.13+ data confirms the steady state. * - * - 3.12.0:No-op rebuild — CI runner variance on a sub-30ms native incremental - * metric. The 3.12.0 baseline captures native noopRebuildMs=23 in the - * incremental benchmark. The per-PR perf-canary gate (#1433) re-measured dev - * on a fresh shared runner (PR #1498) and landed at 112ms (+387%, NOISY - * threshold 100%). The per-PR canary is a new workflow firing for the first - * time on this corpus — it builds the native addon from source before running - * the benchmark, and the runner was under shared load. No changes in PR #1498 - * touch the no-op rebuild hot path (no change to collect_files, detect_removed_files, - * earlyExit logic, or detectDroppedLanguageGap). The Rust changes are a refactor - * of emit_pts_alias_edges (no logic change) and additive typeMap entries in the - * Go and Python extractors, neither of which run during a no-op rebuild. - * Same shape and root cause as 3.11.2:No-op rebuild. Exempt this release; - * remove once 3.13+ incremental data confirms the steady state. + * - 3.12.0:No-op rebuild — CI runner variance on a sub-30ms native metric. + * The 3.12.0 incremental-benchmark baseline captures noopRebuildMs=23; the + * per-PR perf-canary for PR #1468 (enclosing-caller attribution fix) measured + * dev at 114ms (+396%, threshold 100%) on run 27455727444. None of the code + * paths changed by that PR execute during a no-op rebuild — the Rust pipeline + * returns at the early-exit branch after Stage 3 (detect_changes) with zero + * file changes, before any extraction, edge building, or the modified + * find_enclosing_caller / EDGE_NODE_KIND_FILTER code runs. Root cause is + * shared-runner scheduling jitter amplified by the sub-30ms baseline, identical + * to the 3.11.2:No-op rebuild pattern. Remove once 3.13+ data confirms the + * steady-state. * * NOTE: WASM *timing* noise no longer needs per-version entries here — it is * handled structurally by WASM_TIMING_THRESHOLD (see above). The 3.11.x @@ -358,7 +356,6 @@ const KNOWN_REGRESSIONS = new Set([ '3.12.0:No-op rebuild', '3.12.0:Full build', '3.12.0:1-file rebuild', - '3.12.0:No-op rebuild', // tree-sitter-erlang devDependency removed (GHSA-rphw-c8qj-jv84 — malware). // The erlang WASM is no longer built, so erlang resolution drops to 0%. // These entries exempt the expected precision/recall drop on every build diff --git a/tests/benchmarks/resolution/fixtures/java/expected-edges.json b/tests/benchmarks/resolution/fixtures/java/expected-edges.json index d727cd4a..a7dd6b45 100644 --- a/tests/benchmarks/resolution/fixtures/java/expected-edges.json +++ b/tests/benchmarks/resolution/fixtures/java/expected-edges.json @@ -31,6 +31,13 @@ "mode": "class-inheritance", "notes": "log() inherited from BaseService via extends" }, + { + "source": { "name": "UserService.getUser", "file": "UserService.java" }, + "target": { "name": "UserRepository.findById", "file": "UserRepository.java" }, + "kind": "calls", + "mode": "interface-dispatched", + "notes": "repo.findById() — static type is UserRepository interface; interface declaration edge" + }, { "source": { "name": "UserService.getUser", "file": "UserService.java" }, "target": { @@ -39,7 +46,7 @@ }, "kind": "calls", "mode": "interface-dispatched", - "notes": "repo.findById() — typed as UserRepository interface, resolved to InMemoryUserRepository" + "notes": "repo.findById() — CHA expansion of UserRepository to concrete InMemoryUserRepository" }, { "source": { "name": "UserService.createUser", "file": "UserService.java" }, @@ -48,12 +55,19 @@ "mode": "receiver-typed", "notes": "validator.validateUser() — call on typed Validator field" }, + { + "source": { "name": "UserService.createUser", "file": "UserService.java" }, + "target": { "name": "UserRepository.save", "file": "UserRepository.java" }, + "kind": "calls", + "mode": "interface-dispatched", + "notes": "repo.save() — static type is UserRepository interface; interface declaration edge" + }, { "source": { "name": "UserService.createUser", "file": "UserService.java" }, "target": { "name": "InMemoryUserRepository.save", "file": "InMemoryUserRepository.java" }, "kind": "calls", "mode": "interface-dispatched", - "notes": "repo.save() — typed as UserRepository interface, resolved to InMemoryUserRepository" + "notes": "repo.save() — CHA expansion of UserRepository to concrete InMemoryUserRepository" }, { "source": { "name": "UserService.createUser", "file": "UserService.java" }, @@ -69,12 +83,19 @@ "mode": "class-inheritance", "notes": "log() inherited from BaseService via extends" }, + { + "source": { "name": "UserService.removeUser", "file": "UserService.java" }, + "target": { "name": "UserRepository.delete", "file": "UserRepository.java" }, + "kind": "calls", + "mode": "interface-dispatched", + "notes": "repo.delete() — static type is UserRepository interface; interface declaration edge" + }, { "source": { "name": "UserService.removeUser", "file": "UserService.java" }, "target": { "name": "InMemoryUserRepository.delete", "file": "InMemoryUserRepository.java" }, "kind": "calls", "mode": "interface-dispatched", - "notes": "repo.delete() — typed as UserRepository interface, resolved to InMemoryUserRepository" + "notes": "repo.delete() — CHA expansion of UserRepository to concrete InMemoryUserRepository" }, { "source": { "name": "UserService.createDefault", "file": "UserService.java" },