Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3892e7d
chore: gitignore napi-generated artifacts in crates/codegraph-core
carlos-alm Jun 13, 2026
ef8ea4f
chore(tests): remove unused biome suppression in visitor.test.ts
carlos-alm Jun 13, 2026
a372b82
fix(titan-run): sync --start-from enum and phase-timestamp list with …
carlos-alm Jun 13, 2026
9a52c7c
fix(hooks): track Bash file modifications via before/after git status…
carlos-alm Jun 13, 2026
85a26df
chore(native): remove dead code (unused var, method, variant, fields)
carlos-alm Jun 13, 2026
184d221
refactor(native): extract emit_pts_alias_edges params into PtsAliasCt…
carlos-alm Jun 13, 2026
909e1df
fix(wasm): sort call targets by confidence before emit to match nativ…
carlos-alm Jun 13, 2026
66fc899
fix(bench): add 2 warmup runs and raise INCREMENTAL_RUNS to 5 for inc…
carlos-alm Jun 13, 2026
84e1a5f
ci(bench): add per-PR perf canary for extractor/graph/native changes
carlos-alm Jun 13, 2026
d07b358
fix(perf): plumb symbolsOnly through parseFilesWasmInline to skip ana…
carlos-alm Jun 13, 2026
3db5d8c
fix(perf): scope runPostNativeCha to changed files on incremental builds
carlos-alm Jun 13, 2026
8b3aa3d
fix(native): add post-pass phase timings to result.phases
carlos-alm Jun 13, 2026
fd4ffd1
fix(perf): correct INLINE_BACKFILL_THRESHOLD docstring; raise thresho…
carlos-alm Jun 13, 2026
498ee21
fix(perf): guard post-native passes against unnecessary work on 1-fil…
carlos-alm Jun 13, 2026
61a9839
chore(types): remove dead protoMethodsMs field and stale comment
carlos-alm Jun 13, 2026
5f5d4d2
fix: class-scope field annotation typeMap keys to prevent cross-class…
carlos-alm Jun 13, 2026
29dd101
fix(bench): update elixir/julia/objc expected-edges to module-qualifi…
carlos-alm Jun 13, 2026
9320ed2
fix(wasm): emit receiver edges for declaration-typed locals in C++/CUDA
carlos-alm Jun 13, 2026
7313330
fix(native): resolve Go factory and Python constructor receiver types…
carlos-alm Jun 13, 2026
f93d0cb
fix: align enclosing-caller attribution for variable bindings (haskel…
carlos-alm Jun 13, 2026
1915e5e
chore(lint): fix unused import and formatting in cpp/cuda extractors …
carlos-alm Jun 13, 2026
7095ffe
fix: align Java interface dispatch across wasm/native/hybrid
carlos-alm Jun 13, 2026
1875c7f
fix(wasm): align typed-receiver CHA dispatch confidence to 0.8
carlos-alm Jun 13, 2026
d87c1ee
fix(caller): use nullish coalescing for endLine to avoid treating 0 a…
carlos-alm Jun 13, 2026
91eb078
docs(caller): fix Pass 2 docstring — widest not narrowest for variabl…
carlos-alm Jun 13, 2026
47e2f0d
fix(bench): exempt 3.12.0 No-op rebuild from perf-canary regression gate
carlos-alm Jun 13, 2026
9e03f5e
fix: resolve merge conflicts with main
carlos-alm Jun 13, 2026
4a77acb
fix: resolve merge conflicts with main
carlos-alm Jun 13, 2026
dcc1ac9
fix: resolve merge conflicts with main
carlos-alm Jun 14, 2026
2a179f1
fix: resolve merge conflicts with main
carlos-alm Jun 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/codegraph-core/src/domain/graph/builder/pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1115,7 +1115,7 @@ fn builtin_call_receivers() -> Vec<String> {
.collect()
}

const EDGE_NODE_KIND_FILTER: &str = "kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')";
const EDGE_NODE_KIND_FILTER: &str = "kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant','variable')";

/// For the scoped (incremental, small-batch) path of the edge builder,
/// compute the set of files that must be loaded: changed/reverse-dep files
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ pub struct ComputedEdge {
/// Internal struct for caller resolution (def line range → node ID).
struct DefWithId<'a> {
name: &'a str,
kind: &'a str,
line: u32,
end_line: u32,
node_id: Option<u32>,
Expand Down Expand Up @@ -473,7 +474,7 @@ fn process_file<'a>(
let node_id = file_nodes.iter()
.find(|n| n.name == d.name && n.kind == d.kind && n.line == d.line)
.map(|n| n.id);
DefWithId { name: &d.name, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id }
DefWithId { name: &d.name, kind: &d.kind, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id }
}).collect();

// Phase 8.3: build pts map for alias resolution — mirrors buildPointsToMapForFile.
Expand Down Expand Up @@ -654,25 +655,76 @@ fn process_file<'a>(
emit_hierarchy_edges(ctx, file_input, rel_path, edges);
}

/// Callable definition kinds — only function/method bodies act as enclosing
/// caller scopes. Variable/constant bindings are a lower-priority fallback
/// tier for top-level bindings like Haskell `main = do …` (kind `variable`).
/// Mirrors `CALLABLE_KINDS` / `TOP_LEVEL_BINDING_KINDS` in call-resolver.ts.
fn is_callable_kind(kind: &str) -> bool {
kind == "function" || kind == "method"
}

fn is_top_level_binding_kind(kind: &str) -> bool {
kind == "variable" || kind == "constant"
}

/// Find the narrowest enclosing definition for a call at the given line.
/// Returns `(caller_id, caller_name)` — `caller_name` is `""` when the call is at file scope.
///
/// Two-pass strategy (mirrors the updated `findCaller` in call-resolver.ts):
/// Pass 1 — narrowest enclosing function/method. Local variable declarations
/// inside a function body must not shadow the enclosing function.
/// Pass 2 — widest (outermost) enclosing variable/constant binding. Used as
/// fallback when no function/method encloses the call (e.g. Haskell
/// top-level `main = do …` is a `bind` node with kind `variable`).
///
/// Returns `(caller_id, caller_name)` — `caller_name` is `""` when the call
/// falls back to file scope.
fn find_enclosing_caller<'a>(defs: &[DefWithId<'a>], call_line: u32, file_node_id: u32) -> (u32, &'a str) {
let mut caller_id = file_node_id;
let mut caller_name = "";
let mut caller_span = u32::MAX;
let mut fn_caller_id: Option<u32> = None;
let mut fn_caller_name = "";
let mut fn_caller_span = u32::MAX;

// For variable/constant bindings we pick the WIDEST span (outermost binding),
// not the narrowest, so that nested `let` bindings inside `main`'s do-block
// do not shadow `main` itself. The outermost enclosing variable is the
// "function-like" top-level binding (e.g. Haskell `main = do …`).
// var_caller_span starts at 0 — any real spanning binding has span >= 0
// and we overwrite only when span is strictly greater.
let mut var_caller_id: Option<u32> = None;
let mut var_caller_name = "";
// Using i64 so the initial sentinel (-1) is always beaten by a real span (>= 0).
let mut var_caller_span: i64 = -1;

for def in defs {
if def.line <= call_line && call_line <= def.end_line {
let span = def.end_line - def.line;
if span < caller_span {
if let Some(id) = def.node_id {
caller_id = id;
caller_name = def.name;
caller_span = span;
let span = def.end_line.saturating_sub(def.line);
if is_callable_kind(def.kind) {
if span < fn_caller_span {
if let Some(id) = def.node_id {
fn_caller_id = Some(id);
fn_caller_name = def.name;
fn_caller_span = span;
}
}
} else if is_top_level_binding_kind(def.kind) {
if (span as i64) > var_caller_span {
if let Some(id) = def.node_id {
var_caller_id = Some(id);
var_caller_name = def.name;
var_caller_span = span as i64;
}
}
}
}
}
(caller_id, caller_name)

// Prefer function/method over variable/constant binding.
if let Some(id) = fn_caller_id {
return (id, fn_caller_name);
}
if let Some(id) = var_caller_id {
return (id, var_caller_name);
}
(file_node_id, "")
}

/// Multi-strategy call target resolution: import-aware → same-file → type-aware → scoped.
Expand Down
77 changes: 65 additions & 12 deletions src/domain/graph/builder/call-resolver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,22 @@ export function isModuleScopedLanguage(relPath: string): boolean {

// ── Shared resolution functions ──────────────────────────────────────────

/**
* Callable definition kinds — variable/constant bindings are NOT callable
* in the function-as-enclosing-scope sense (they are local declarations, not
* function bodies). Top-level variable bindings (e.g. Haskell `main = do …`)
* are handled separately as a fallback tier.
*/
const CALLABLE_KINDS = new Set(['function', 'method']);

/**
* Variable-like binding kinds that may act as top-level callers when no
* enclosing function/method exists (e.g. Haskell top-level `main` is a
* `bind` node → kind `variable`). Local variable declarations inside a
* function body must NOT win over the enclosing function.
*/
const TOP_LEVEL_BINDING_KINDS = new Set(['variable', 'constant']);

export function findCaller(
lookup: CallNodeLookup,
call: { line: number },
Expand All @@ -59,26 +75,63 @@ export function findCaller(
relPath: string,
fileNodeRow: { id: number },
): { id: number; callerName: string | null } {
let caller: { id: number } | null = null;
let callerName: string | null = null;
let callerSpan = Infinity;
// Pass 1: find the narrowest enclosing function/method.
let fnCaller: { id: number } | null = null;
let fnCallerName: string | null = null;
let fnCallerSpan = Infinity;

// Pass 2: find the widest (outermost) enclosing variable/constant binding.
// Used as fallback when no function/method encloses the call site
// (e.g. Haskell `main = do …` is a `bind` node with kind `variable`).
// We pick the WIDEST span (outermost binding), not the narrowest, so that
// nested `let` bindings inside `main`'s do-block do not shadow `main`
// itself as the attributing caller. The outermost enclosing variable is
// the "function-like" top-level binding.
let varCaller: { id: number } | null = null;
let varCallerName: string | null = null;
let varCallerSpan = -1; // looking for WIDEST span, so start at -1

for (const def of definitions) {
if (def.line <= call.line) {
const end = def.endLine || Infinity;
const end = def.endLine ?? Infinity;
if (call.line <= end) {
const span = end - def.line;
if (span < callerSpan) {
const row = lookup.nodeId(def.name, def.kind, relPath, def.line);
if (row) {
caller = row;
callerName = def.name;
callerSpan = span;
const span = end === Infinity ? Infinity : end - def.line;
if (CALLABLE_KINDS.has(def.kind)) {
if (span < fnCallerSpan) {
const row = lookup.nodeId(def.name, def.kind, relPath, def.line);
if (row) {
fnCaller = row;
fnCallerName = def.name;
fnCallerSpan = span;
}
}
} else if (TOP_LEVEL_BINDING_KINDS.has(def.kind)) {
if (span > varCallerSpan) {
const row = lookup.nodeId(def.name, def.kind, relPath, def.line);
if (row) {
varCaller = row;
varCallerName = def.name;
varCallerSpan = span;
}
}
}
}
}
}
return { ...(caller ?? fileNodeRow), callerName };

// Prefer function/method enclosing scope over variable binding.
// If a function/method encloses the call, use it — local variable
// declarations inside the function body must not shadow it.
// Only fall back to a variable/constant binding when the call is at
// top-level scope (no enclosing function/method found), which handles
// languages like Haskell where `main` is a top-level `bind` node.
if (fnCaller) {
return { ...fnCaller, callerName: fnCallerName };
}
if (varCaller) {
return { ...varCaller, callerName: varCallerName };
}
return { ...fileNodeRow, callerName: null };
}

export function resolveByMethodOrGlobal(
Expand Down
24 changes: 21 additions & 3 deletions src/domain/graph/builder/stages/build-edges.ts
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,7 @@ function buildChaPostPass(

const caller = findCaller(lookup, call, symbols.definitions, relPath, fileNodeRow);
let chaTargets: ReadonlyArray<{ id: number; file: string }> = [];
let isTypedReceiverDispatch = false;

if (call.receiver === 'this' || call.receiver === 'self' || call.receiver === 'super') {
chaTargets = resolveThisDispatch(
Expand All @@ -727,13 +728,21 @@ function buildChaPostPass(
: null;
if (typeName) {
chaTargets = resolveChaTargets(typeName, call.name, chaCtx, lookup);
isTypedReceiverDispatch = true;
}
}

for (const t of chaTargets) {
const edgeKey = `${caller.id}|${t.id}`;
if (t.id !== caller.id && !seenByPair.has(edgeKey)) {
const conf = computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY;
// Typed-receiver (interface/CHA) dispatch: use the same hardcoded 0.8 that
// runChaPostPass (helpers.ts) and runPostNativeCha (native-orchestrator.ts)
// use — file proximity is not meaningful for virtual dispatch confidence.
// this/super dispatch keeps computeConfidence-based proximity scoring to
// match runPostNativeThisDispatch (native-orchestrator.ts).
const conf = isTypedReceiverDispatch
? 0.8
: computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY;
if (conf > 0) {
seenByPair.add(edgeKey);
allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'cha']);
Expand Down Expand Up @@ -1294,6 +1303,7 @@ function buildFileCallEdges(
// For typed receiver calls: expand to all instantiated concrete implementations.
if (chaCtx && call.receiver) {
let chaTargets: ReadonlyArray<{ id: number; file: string }> = [];
let isTypedReceiverDispatch = false;
if (call.receiver === 'this' || call.receiver === 'self' || call.receiver === 'super') {
chaTargets = resolveThisDispatch(
call.name,
Expand All @@ -1311,12 +1321,20 @@ function buildFileCallEdges(
: null;
if (typeName) {
chaTargets = resolveChaTargets(typeName, call.name, chaCtx, lookup);
isTypedReceiverDispatch = true;
}
}
for (const t of chaTargets) {
const edgeKey = `${caller.id}|${t.id}`;
if (t.id !== caller.id && !seenCallEdges.has(edgeKey) && !ptsEdgeRows.has(edgeKey)) {
const conf = computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY;
// Typed-receiver (interface/CHA) dispatch: use the same hardcoded 0.8 that
// runChaPostPass (helpers.ts) and runPostNativeCha (native-orchestrator.ts)
// use — file proximity is not meaningful for virtual dispatch confidence.
// this/super dispatch keeps computeConfidence-based proximity scoring to
// match runPostNativeThisDispatch (native-orchestrator.ts line 906).
const conf = isTypedReceiverDispatch
? 0.8
: computeConfidence(relPath, t.file, null) - CHA_DISPATCH_PENALTY;
if (conf > 0) {
seenCallEdges.add(edgeKey);
allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'cha']);
Expand Down Expand Up @@ -1487,7 +1505,7 @@ function reconnectReverseDepEdges(ctx: PipelineContext): void {
* their import targets. Falls back to loading ALL nodes for full builds or
* larger incremental changes.
*/
const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')`;
const NODE_KIND_FILTER_SQL = `kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant','variable')`;

function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolean } {
const { db, fileSymbols, isFullBuild, batchResolved } = ctx;
Expand Down
18 changes: 12 additions & 6 deletions src/domain/graph/builder/stages/native-orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -667,12 +667,10 @@ function runPostNativeCha(
const key = `${source_id}|${methodNode.id}`;
if (seen.has(key)) continue;
seen.add(key);
// Compute confidence file-pair-aware (mirrors WASM path: computeConfidence - CHA_DISPATCH_PENALTY)
// Skip zero-confidence edges to match buildFileCallEdges / buildChaPostPass behaviour.
const conf =
computeConfidence(caller_file ?? '', methodNode.method_file ?? '', null) -
CHA_DISPATCH_PENALTY;
if (conf <= 0) continue;
// Use the same hardcoded 0.8 that runChaPostPass (helpers.ts) uses for
// DB-level CHA dispatch edges. This aligns the native orchestrator path
// with the WASM and hybrid paths, which both go through runChaPostPass.
const conf = 0.8;
newEdges.push([source_id, methodNode.id, 'calls', conf, 0, 'cha']);
newEdgeCount++;
if (caller_file) affectedFiles.add(caller_file);
Expand Down Expand Up @@ -959,6 +957,14 @@ interface PostPassTimings {
techniqueBackfillMs: number;
}

interface PostPassTimings {
gapDetectMs: number;
chaMs: number;
thisDispatchMs: number;
reclassifyMs: number;
techniqueBackfillMs: number;
}

/** Format timing result from native orchestrator phases + JS post-processing. */
function formatNativeTimingResult(
p: Record<string, number>,
Expand Down
15 changes: 8 additions & 7 deletions src/extractors/java.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import {
nodeStartLine,
pushCall,
pushImport,
setTypeMapEntry,
} from './helpers.js';

/**
Expand Down Expand Up @@ -273,13 +274,13 @@ function handleJavaLocalVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): voi
const child = node.child(i);
if (child?.type === 'variable_declarator') {
const nameNode = child.childForFieldName('name');
// Use direct Map.set (last-wins) for local variable declarations.
// Local variable types are method-scoped and should override any
// prior entry (e.g. a same-named constructor parameter). Using
// setTypeMapEntry (first-wins on tie) would let a constructor
// parameter type block a local variable's more-specific concrete type.
if (nameNode && ctx.typeMap)
ctx.typeMap.set(nameNode.text, { type: typeName, confidence: 0.9 });
// Use setTypeMapEntry (first-wins on tie) to match Rust extractor semantics.
// The typeMap is flat per-file without method scoping, so a local variable
// in one method (e.g. `InMemoryUserRepository repo` in `createDefault()`) must
// not override a parameter binding set by an earlier method
// (e.g. `UserRepository repo` constructor param). First-wins preserves the
// interface/abstract type annotation that drives correct CHA dispatch.
if (nameNode && ctx.typeMap) setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9);
}
}
}
Expand Down
25 changes: 11 additions & 14 deletions tests/benchmarks/regression-guard.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -322,19 +322,17 @@ const SKIP_VERSIONS = new Set(['3.8.0']);
* 3.11.2:1-file rebuild entry above. Remove once #1440 lands warmups and
* 3.13+ data confirms the steady state.
*
* - 3.12.0:No-op rebuild — CI runner variance on a sub-30ms native incremental
* metric. The 3.12.0 baseline captures native noopRebuildMs=23 in the
* incremental benchmark. The per-PR perf-canary gate (#1433) re-measured dev
* on a fresh shared runner (PR #1498) and landed at 112ms (+387%, NOISY
* threshold 100%). The per-PR canary is a new workflow firing for the first
* time on this corpus — it builds the native addon from source before running
* the benchmark, and the runner was under shared load. No changes in PR #1498
* touch the no-op rebuild hot path (no change to collect_files, detect_removed_files,
* earlyExit logic, or detectDroppedLanguageGap). The Rust changes are a refactor
* of emit_pts_alias_edges (no logic change) and additive typeMap entries in the
* Go and Python extractors, neither of which run during a no-op rebuild.
* Same shape and root cause as 3.11.2:No-op rebuild. Exempt this release;
* remove once 3.13+ incremental data confirms the steady state.
* - 3.12.0:No-op rebuild — CI runner variance on a sub-30ms native metric.
* The 3.12.0 incremental-benchmark baseline captures noopRebuildMs=23; the
* per-PR perf-canary for PR #1468 (enclosing-caller attribution fix) measured
* dev at 114ms (+396%, threshold 100%) on run 27455727444. None of the code
* paths changed by that PR execute during a no-op rebuild — the Rust pipeline
* returns at the early-exit branch after Stage 3 (detect_changes) with zero
* file changes, before any extraction, edge building, or the modified
* find_enclosing_caller / EDGE_NODE_KIND_FILTER code runs. Root cause is
* shared-runner scheduling jitter amplified by the sub-30ms baseline, identical
* to the 3.11.2:No-op rebuild pattern. Remove once 3.13+ data confirms the
* steady-state.
*
* NOTE: WASM *timing* noise no longer needs per-version entries here — it is
* handled structurally by WASM_TIMING_THRESHOLD (see above). The 3.11.x
Expand All @@ -358,7 +356,6 @@ const KNOWN_REGRESSIONS = new Set([
'3.12.0:No-op rebuild',
'3.12.0:Full build',
'3.12.0:1-file rebuild',
'3.12.0:No-op rebuild',
// tree-sitter-erlang devDependency removed (GHSA-rphw-c8qj-jv84 — malware).
// The erlang WASM is no longer built, so erlang resolution drops to 0%.
// These entries exempt the expected precision/recall drop on every build
Expand Down
Loading
Loading