From 3892e7dcddd5173d4d83cc6839ce0d888aa36226 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:31:28 -0600 Subject: [PATCH 01/13] chore: gitignore napi-generated artifacts in crates/codegraph-core --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index aa62bcb59..2606dd88f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ dist/ coverage/ .env grammars/*.wasm +crates/codegraph-core/index.js +crates/codegraph-core/index.d.ts +crates/codegraph-core/*.node .claude/session-edits.log .claude/worktrees/ generated/DEPENDENCIES.md From ef8ea4fb31f4c073529c8772fa3aa9288a992fd6 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:32:19 -0600 Subject: [PATCH 02/13] chore(tests): remove unused biome suppression in visitor.test.ts --- tests/unit/visitor.test.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/visitor.test.ts b/tests/unit/visitor.test.ts index 992b4307a..62dc5b3e6 100644 --- a/tests/unit/visitor.test.ts +++ b/tests/unit/visitor.test.ts @@ -4,7 +4,6 @@ import { describe, expect, it } from 'vitest'; // We need a tree-sitter tree to test. Use the JS parser. -// biome-ignore lint/suspicious/noExplicitAny: tree-sitter parser type is complex and not worth typing for tests let parse: any; async function ensureParser() { From a372b82593ddfecd591c17133ef378872ffbec13 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:33:09 -0600 Subject: [PATCH 03/13] fix(titan-run): sync --start-from enum and phase-timestamp list with actual phases --- .claude/skills/titan-run/SKILL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.claude/skills/titan-run/SKILL.md b/.claude/skills/titan-run/SKILL.md index 51e22569f..04c3fdc92 100644 --- a/.claude/skills/titan-run/SKILL.md +++ b/.claude/skills/titan-run/SKILL.md @@ -1,7 +1,7 @@ --- name: titan-run description: Run the full Titan Paradigm pipeline end-to-end by dispatching each phase to sub-agents with fresh context windows. Orchestrates recon → gauntlet → sync → forge → grind (+ repo-provided parity audit) automatically. -argument-hint: <--skip-recon> <--skip-gauntlet> <--start-from recon|gauntlet|sync|forge|grind|parity> <--gauntlet-batch-size 5> <--yes> +argument-hint: <--skip-recon> <--skip-gauntlet> <--start-from recon|gauntlet|sync|forge|grind|parity|close> <--gauntlet-batch-size 5> <--yes> allowed-tools: Agent, Read, Bash, Glob, Write, Edit --- @@ -50,7 +50,7 @@ You are the **orchestrator** for the full Titan Paradigm pipeline. Your job is t node -e "const fs=require('fs');const s=JSON.parse(fs.readFileSync('.codegraph/titan/titan-state.json','utf8'));s.phaseTimestamps=s.phaseTimestamps||{};s.phaseTimestamps['']=s.phaseTimestamps['']||{};s.phaseTimestamps[''].completedAt=new Date().toISOString();fs.writeFileSync('.codegraph/titan/titan-state.json',JSON.stringify(s,null,2));" ``` - Replace `` with `recon`, `gauntlet`, `sync`, `forge`, `parity`, or `close`. **Run the start command immediately before dispatching each phase's first sub-agent, and the completion command immediately after post-phase validation passes.** If resuming a phase (e.g., gauntlet loop iteration 2+), do NOT overwrite `startedAt` — only set it if it doesn't already exist. + Replace `` with `recon`, `gauntlet`, `sync`, `forge`, `grind`, `parity`, or `close`. **Run the start command immediately before dispatching each phase's first sub-agent, and the completion command immediately after post-phase validation passes.** If resuming a phase (e.g., gauntlet loop iteration 2+), do NOT overwrite `startedAt` — only set it if it doesn't already exist. **Timestamp validation:** After recording `completedAt` for any phase, verify `startedAt < completedAt`: ```bash From 9a52c7cc5eea2cba016d14ed3928e07128fad4e3 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:36:04 -0600 Subject: [PATCH 04/13] fix(hooks): track Bash file modifications via before/after git status diff Adds snapshot-pre-bash.sh (PreToolUse Bash) + track-bash-writes.sh (PostToolUse Bash): the pre-hook captures git status --porcelain to a per-worktree temp file before each Bash call; the post-hook diffs the before/after state and appends newly modified or created files to .claude/session-edits.log. This closes the gap where files written by sed -i, printf redirects, tee, heredocs, or build tools (Cargo.lock, lockfiles) were never recorded, causing guard-git.sh to emit false-positive BLOCKED errors. Closes #1457 --- .claude/hooks/snapshot-pre-bash.sh | 54 +++++++++++++ .claude/hooks/track-bash-writes.sh | 119 +++++++++++++++++++++++++++++ .claude/settings.json | 10 +++ 3 files changed, 183 insertions(+) create mode 100755 .claude/hooks/snapshot-pre-bash.sh create mode 100755 .claude/hooks/track-bash-writes.sh diff --git a/.claude/hooks/snapshot-pre-bash.sh b/.claude/hooks/snapshot-pre-bash.sh new file mode 100755 index 000000000..a91caebbb --- /dev/null +++ b/.claude/hooks/snapshot-pre-bash.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# snapshot-pre-bash.sh — PreToolUse hook for Bash tool calls +# Snapshots `git status --porcelain` to a temp file before each Bash call so +# that track-bash-writes.sh (PostToolUse) can diff the before/after state and +# log files newly modified by the command to .claude/session-edits.log. +# Always exits 0 (informational only, never blocks). + +set -euo pipefail + +INPUT=$(cat) + +# Extract the command from tool_input JSON +COMMAND=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.command||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# Skip read-only commands that can never write files — reduces snapshot overhead +# for the most common Bash calls (ls, cat, grep, git log, git status, etc.). +# sed is intentionally NOT in this list because `sed -i` modifies files in-place. +if echo "$COMMAND" | grep -qE '^\s*(ls|cat|head|tail|grep|find|git\s+(log|status|diff|show|branch|remote|fetch|rev-parse|stash\s+list|ls-files|blame|describe|tag|config\s+--get)|gh\s+(pr|issue|repo)\s+(view|list|status)|echo|printf|pwd|which|node\s+-e|node\s+-p|npx\s+--version|wc|sort|uniq|awk)\b'; then + exit 0 +fi + +# Resolve the project root (worktree-aware — each worktree has its own .claude/) +PROJECT_DIR=$(git rev-parse --show-toplevel 2>/dev/null) || PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" + +# Key the snapshot file to the project root so parallel worktrees don't collide. +# Use a simple hash of the path — just enough to be unique per worktree. +PROJECT_HASH=$(echo "$PROJECT_DIR" | node -e " + const crypto = require('crypto'); + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + process.stdout.write(crypto.createHash('sha1').update(d.trim()).digest('hex').slice(0,8)); + }); +" 2>/dev/null) || PROJECT_HASH="default" + +SNAPSHOT_FILE="/tmp/claude-bash-snapshot-${PROJECT_HASH}.txt" + +# Capture current git status --porcelain. +# Lines look like: "XY filename" or "XY orig -> dest" (rename). +# We only care about the status marker and path — porcelain is stable across git versions. +git -C "$PROJECT_DIR" status --porcelain 2>/dev/null > "$SNAPSHOT_FILE" || true + +exit 0 diff --git a/.claude/hooks/track-bash-writes.sh b/.claude/hooks/track-bash-writes.sh new file mode 100755 index 000000000..e5d1ded98 --- /dev/null +++ b/.claude/hooks/track-bash-writes.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# track-bash-writes.sh — PostToolUse hook for Bash tool calls +# Compares `git status --porcelain` against the snapshot taken by +# snapshot-pre-bash.sh (PreToolUse) to detect files newly modified or +# created by the Bash command, then appends them to .claude/session-edits.log +# so that guard-git.sh can validate commits correctly. +# Always exits 0 (informational only, never blocks). + +set -euo pipefail + +INPUT=$(cat) + +# Extract the command from tool_input JSON +COMMAND=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.command||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# Resolve the project root (worktree-aware — each worktree has its own .claude/) +PROJECT_DIR=$(git rev-parse --show-toplevel 2>/dev/null) || PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" + +# Reproduce the same project hash used by snapshot-pre-bash.sh +PROJECT_HASH=$(echo "$PROJECT_DIR" | node -e " + const crypto = require('crypto'); + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + process.stdout.write(crypto.createHash('sha1').update(d.trim()).digest('hex').slice(0,8)); + }); +" 2>/dev/null) || PROJECT_HASH="default" + +SNAPSHOT_FILE="/tmp/claude-bash-snapshot-${PROJECT_HASH}.txt" + +# If there is no snapshot (hook was not installed yet, or the pre-hook was +# skipped for a read-only command) we have no baseline — exit cleanly. +if [ ! -f "$SNAPSHOT_FILE" ]; then + exit 0 +fi + +# Capture current state after the command ran +AFTER=$(git -C "$PROJECT_DIR" status --porcelain 2>/dev/null) || true + +# Read the before-state +BEFORE=$(cat "$SNAPSHOT_FILE") || true + +# Clean up the snapshot so it doesn't pollute the next command's pre-hook +rm -f "$SNAPSHOT_FILE" + +# Build the set of paths that existed (as dirty) before the command ran. +# porcelain format: "XY path" or "XY original -> new" (rename). +# We extract every path token after the two-char status code. +parse_paths() { + local status_output="$1" + echo "$status_output" | awk ' + /^[ MADRCU?!]{2} / { + # Drop the two-char status + space + rest = substr($0, 4) + # Handle rename: "old -> new" + if (index(rest, " -> ") > 0) { + n = split(rest, parts, " -> ") + for (i = 1; i <= n; i++) { + p = parts[i] + gsub(/^"/, "", p); gsub(/"$/, "", p) + if (p != "") print p + } + } else { + gsub(/^"/, "", rest); gsub(/"$/, "", rest) + if (rest != "") print rest + } + } + ' +} + +BEFORE_PATHS=$(parse_paths "$BEFORE" | sort) +AFTER_PATHS=$(parse_paths "$AFTER" | sort) + +if [ -z "$AFTER_PATHS" ]; then + exit 0 +fi + +# Find paths present in AFTER but not in BEFORE — these were newly dirtied +# (modified, created, or renamed-to) by the Bash command. +NEW_PATHS=$(comm -13 <(echo "$BEFORE_PATHS") <(echo "$AFTER_PATHS")) || true + +if [ -z "$NEW_PATHS" ]; then + exit 0 +fi + +# Also exclude paths that were already tracked by track-edits.sh or other hooks +# (i.e. already in the session-edits.log) so we don't double-log. +LOG_FILE="$PROJECT_DIR/.claude/session-edits.log" +ALREADY_LOGGED="" +if [ -f "$LOG_FILE" ] && [ -s "$LOG_FILE" ]; then + ALREADY_LOGGED=$(awk '{print $2}' "$LOG_FILE" | sort -u) +fi + +mkdir -p "$(dirname "$LOG_FILE")" +TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) + +while IFS= read -r rel_path; do + if [ -z "$rel_path" ]; then + continue + fi + # Skip if already in the log from a prior hook (Edit/Write/track-moves) + if [ -n "$ALREADY_LOGGED" ] && echo "$ALREADY_LOGGED" | grep -qxF "$rel_path"; then + continue + fi + echo "$TS $rel_path" >> "$LOG_FILE" +done <<< "$NEW_PATHS" + +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json index b3acd6d1b..7ab746809 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -7,6 +7,11 @@ { "matcher": "Bash", "hooks": [ + { + "type": "command", + "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/snapshot-pre-bash.sh\"", + "timeout": 5 + }, { "type": "command", "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/check-readme.sh\"", @@ -79,6 +84,11 @@ { "matcher": "Bash", "hooks": [ + { + "type": "command", + "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/track-bash-writes.sh\"", + "timeout": 5 + }, { "type": "command", "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/track-moves.sh\"", From 85a26df4f1a06db752548b3f5e3d299ec5f46806 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:39:52 -0600 Subject: [PATCH 05/13] chore(native): remove dead code (unused var, method, variant, fields) - clojure.rs: annotate lifetime-anchor assignment to silence false-positive - cfg.rs: remove never-called start_line_of method - complexity.rs: remove never-constructed NotHandled variant; convert irrefutable if-let patterns to plain let destructures - dataflow.rs: remove never-read callee fields from CallReturn/Destructured - incremental.rs: remove never-read lang field from CacheEntry cargo check and cargo clippy both clean after these changes. --- crates/codegraph-core/src/ast_analysis/cfg.rs | 4 --- .../src/ast_analysis/complexity.rs | 36 ++++++++----------- .../src/ast_analysis/dataflow.rs | 10 +++--- .../src/domain/graph/builder/incremental.rs | 3 +- .../codegraph-core/src/extractors/clojure.rs | 4 +++ 5 files changed, 25 insertions(+), 32 deletions(-) diff --git a/crates/codegraph-core/src/ast_analysis/cfg.rs b/crates/codegraph-core/src/ast_analysis/cfg.rs index 226a31362..fb784d40f 100644 --- a/crates/codegraph-core/src/ast_analysis/cfg.rs +++ b/crates/codegraph-core/src/ast_analysis/cfg.rs @@ -659,10 +659,6 @@ impl<'a> CfgBuilder<'a> { } } - fn start_line_of(&self, block_idx: u32) -> Option { - self.blocks.iter().find(|b| b.index == block_idx).and_then(|b| b.start_line) - } - /// Get statement children from a block or statement list. fn get_statements<'b>(&self, node: &Node<'b>) -> Vec> { let kind = node.kind(); diff --git a/crates/codegraph-core/src/ast_analysis/complexity.rs b/crates/codegraph-core/src/ast_analysis/complexity.rs index fdd572512..9827b091f 100644 --- a/crates/codegraph-core/src/ast_analysis/complexity.rs +++ b/crates/codegraph-core/src/ast_analysis/complexity.rs @@ -516,8 +516,6 @@ fn walk_children( enum BranchAction { /// Node handled — walk children at the given nesting delta, then return. Handled { cognitive_delta: u32, cyclomatic_delta: u32, nesting_delta: u32 }, - /// Not a special branch pattern — fall through to normal processing. - NotHandled, } /// Classify a branch node (one where `rules.is_branch(kind)` is true). @@ -675,14 +673,12 @@ fn walk( // Branch/control flow nodes (skip keyword leaf tokens) if rules.is_branch(kind) && node.child_count() > 0 { - if let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = - classify_branch(node, kind, rules, nesting_level) - { - *cognitive += cognitive_delta; - *cyclomatic += cyclomatic_delta; - walk_children(node, nesting_level + nesting_delta, false, rules, cognitive, cyclomatic, max_nesting, depth); - return; - } + let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = + classify_branch(node, kind, rules, nesting_level); + *cognitive += cognitive_delta; + *cyclomatic += cyclomatic_delta; + walk_children(node, nesting_level + nesting_delta, false, rules, cognitive, cyclomatic, max_nesting, depth); + return; } // Pattern C plain else (Go/Java) @@ -1323,17 +1319,15 @@ fn walk_all( // Branch/control flow nodes (skip keyword leaf tokens) if c_rules.is_branch(kind) && node.child_count() > 0 { - if let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = - classify_branch(node, kind, c_rules, nesting_level) - { - *cognitive += cognitive_delta; - *cyclomatic += cyclomatic_delta; - walk_all_children( - node, source, nesting_level + nesting_delta, false, skip_h, - c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, - ); - return; - } + let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = + classify_branch(node, kind, c_rules, nesting_level); + *cognitive += cognitive_delta; + *cyclomatic += cyclomatic_delta; + walk_all_children( + node, source, nesting_level + nesting_delta, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; } // Pattern C plain else (Go/Java) diff --git a/crates/codegraph-core/src/ast_analysis/dataflow.rs b/crates/codegraph-core/src/ast_analysis/dataflow.rs index ddb4a11a1..5a897c0b9 100644 --- a/crates/codegraph-core/src/ast_analysis/dataflow.rs +++ b/crates/codegraph-core/src/ast_analysis/dataflow.rs @@ -882,8 +882,8 @@ fn collect_identifiers(node: &Node, out: &mut Vec, rules: &DataflowRules #[derive(Debug, Clone)] enum LocalSource { - CallReturn { callee: String }, - Destructured { callee: String }, + CallReturn, + Destructured, } struct ScopeFrame { @@ -1200,7 +1200,7 @@ fn handle_var_declarator( }); scope .locals - .insert(n.clone(), LocalSource::Destructured { callee: callee.clone() }); + .insert(n.clone(), LocalSource::Destructured); } } else { let var_name = node_text(&name_n, source).to_string(); @@ -1211,7 +1211,7 @@ fn handle_var_declarator( expression: truncate(node_text(node, source), DATAFLOW_TRUNCATION_LIMIT), line: node_line(node), }); - scope.locals.insert(var_name, LocalSource::CallReturn { callee }); + scope.locals.insert(var_name, LocalSource::CallReturn); } } @@ -1267,7 +1267,7 @@ fn handle_assignment( line: node_line(node), }); if let Some(scope) = scope_stack.last_mut() { - scope.locals.insert(var_name, LocalSource::CallReturn { callee }); + scope.locals.insert(var_name, LocalSource::CallReturn); } } } diff --git a/crates/codegraph-core/src/domain/graph/builder/incremental.rs b/crates/codegraph-core/src/domain/graph/builder/incremental.rs index 35fa04345..4985904ed 100644 --- a/crates/codegraph-core/src/domain/graph/builder/incremental.rs +++ b/crates/codegraph-core/src/domain/graph/builder/incremental.rs @@ -10,7 +10,6 @@ use crate::types::FileSymbols; struct CacheEntry { tree: Tree, - lang: LanguageKind, } /// Cache of parse trees for incremental parsing. @@ -51,7 +50,7 @@ impl ParseTreeCache { let symbols = extract_symbols(lang, &tree, source_bytes, &file_path); - self.entries.insert(file_path, CacheEntry { tree, lang }); + self.entries.insert(file_path, CacheEntry { tree }); Some(symbols) } diff --git a/crates/codegraph-core/src/extractors/clojure.rs b/crates/codegraph-core/src/extractors/clojure.rs index b5160474f..7263ecf1a 100644 --- a/crates/codegraph-core/src/extractors/clojure.rs +++ b/crates/codegraph-core/src/extractors/clojure.rs @@ -51,6 +51,10 @@ fn walk_clojure( return; } + // `next_ns_owned` holds the String so that `next_ns` can borrow it as + // `&str` for the duration of this stack frame. The assignment looks + // "never read" to the compiler but the borrow on the next line reads it. + #[allow(unused_assignments)] let mut next_ns_owned: Option = None; let next_ns: Option<&str> = if node.kind() == "list_lit" { match handle_list_form(node, source, symbols, current_ns) { From 184d22167f2f58a7a569538af6b616a1c11a3744 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:41:46 -0600 Subject: [PATCH 06/13] refactor(native): extract emit_pts_alias_edges params into PtsAliasCtx struct --- .../graph/builder/stages/build_edges.rs | 79 +++++++++++++------ 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index 3475adebe..39108e3d8 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -360,50 +360,55 @@ fn resolve_via_points_to<'a>( } } +/// Per-call-site inputs for `emit_pts_alias_edges`. +/// Groups the lookup parameters so the function stays within the argument-count limit. +struct PtsAliasCtx<'a> { + pts: &'a HashMap>, + lookup_name: &'a str, + call_line: u32, + caller_id: u32, + caller_name: &'a str, + is_dynamic: u32, + rel_path: &'a str, + imported_names: &'a HashMap<&'a str, &'a str>, + type_map: &'a HashMap<&'a str, (&'a str, f64)>, +} + /// Resolve each pts alias of `lookup_name` and emit hop-penalised call edges. /// Shared by the no-receiver gate and the receiver-key (`rest.prop()`) fallback; /// mirrors the alias-emission loops in buildFileCallEdges (build-edges.ts). -#[allow(clippy::too_many_arguments)] fn emit_pts_alias_edges<'a>( ctx: &EdgeContext<'a>, - pts: &HashMap>, - lookup_name: &str, - call_line: u32, - caller_id: u32, - caller_name: &str, - is_dynamic: u32, - rel_path: &str, - imported_names: &HashMap<&str, &str>, - type_map: &HashMap<&str, (&str, f64)>, + alias_ctx: &PtsAliasCtx<'_>, seen_edges: &HashSet, pts_edge_map: &mut HashMap, edges: &mut Vec, ) { - for alias in resolve_via_points_to(lookup_name, pts) { - let alias_imported_from = imported_names.get(alias).copied(); + for alias in resolve_via_points_to(alias_ctx.lookup_name, alias_ctx.pts) { + let alias_imported_from = alias_ctx.imported_names.get(alias).copied(); let alias_call = CallInfo { name: alias.to_string(), - line: call_line, + line: alias_ctx.call_line, dynamic: Some(true), receiver: None, }; let mut alias_targets = resolve_call_targets( - ctx, &alias_call, rel_path, alias_imported_from, type_map, caller_name, + ctx, &alias_call, alias_ctx.rel_path, alias_imported_from, alias_ctx.type_map, alias_ctx.caller_name, ); - sort_targets_by_confidence(&mut alias_targets, rel_path, alias_imported_from); + sort_targets_by_confidence(&mut alias_targets, alias_ctx.rel_path, alias_imported_from); for t in &alias_targets { - let edge_key = ((caller_id as u64) << 32) | (t.id as u64); - if t.id != caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { - let conf = resolve::compute_confidence(rel_path, &t.file, alias_imported_from) + let edge_key = ((alias_ctx.caller_id as u64) << 32) | (t.id as u64); + if t.id != alias_ctx.caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { + let conf = resolve::compute_confidence(alias_ctx.rel_path, &t.file, alias_imported_from) - PROPAGATION_HOP_PENALTY; if conf > 0.0 { pts_edge_map.insert(edge_key, edges.len()); edges.push(ComputedEdge { - source_id: caller_id, + source_id: alias_ctx.caller_id, target_id: t.id, kind: "calls".to_string(), confidence: conf, - dynamic: is_dynamic, + dynamic: alias_ctx.is_dynamic, }); } } @@ -593,8 +598,21 @@ fn process_file<'a>( }; if let Some(lookup_name) = lookup_name { emit_pts_alias_edges( - ctx, pts, &lookup_name, call.line, caller_id, caller_name, is_dynamic, - rel_path, &imported_names, &type_map, &seen_edges, &mut pts_edge_map, edges, + ctx, + &PtsAliasCtx { + pts, + lookup_name: &lookup_name, + call_line: call.line, + caller_id, + caller_name, + is_dynamic, + rel_path, + imported_names: &imported_names, + type_map: &type_map, + }, + &seen_edges, + &mut pts_edge_map, + edges, ); } } @@ -609,8 +627,21 @@ fn process_file<'a>( let receiver_key = format!("{}.{}", receiver, call.name); if pts.contains_key(receiver_key.as_str()) { emit_pts_alias_edges( - ctx, pts, &receiver_key, call.line, caller_id, caller_name, is_dynamic, - rel_path, &imported_names, &type_map, &seen_edges, &mut pts_edge_map, edges, + ctx, + &PtsAliasCtx { + pts, + lookup_name: &receiver_key, + call_line: call.line, + caller_id, + caller_name, + is_dynamic, + rel_path, + imported_names: &imported_names, + type_map: &type_map, + }, + &seen_edges, + &mut pts_edge_map, + edges, ); } } From 909e1df55b58fee7cc7d5942e1132be648fd7169 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:43:24 -0600 Subject: [PATCH 07/13] fix(wasm): sort call targets by confidence before emit to match native engine --- src/domain/graph/builder/stages/build-edges.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 3c0f2e3c3..88027aee0 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -1107,6 +1107,19 @@ function buildFileCallEdges( } } + // Sort targets by confidence descending before emitting edges. + // For multi-target calls with duplicate (source_id, target_id) pairs the + // stored confidence depends on which duplicate is processed last — sorting + // here guarantees the highest-confidence target wins on dedup, matching the + // native engine's sort_targets_by_confidence call in build_edges.rs. + if (targets.length > 1) { + targets = [...targets].sort( + (a, b) => + computeConfidence(relPath, b.file, importedFrom ?? null) - + computeConfidence(relPath, a.file, importedFrom ?? null), + ); + } + for (const t of targets) { const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id) { From 66fc899813dd51fcb8ca94063741530ac5a1ff54 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:44:33 -0600 Subject: [PATCH 08/13] fix(bench): add 2 warmup runs and raise INCREMENTAL_RUNS to 5 for incremental tiers --- scripts/benchmark.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/benchmark.ts b/scripts/benchmark.ts index fbc449813..642e2b1f5 100644 --- a/scripts/benchmark.ts +++ b/scripts/benchmark.ts @@ -90,7 +90,8 @@ try { if (typeof parser.disposeParsers === 'function') disposeParsers = parser.disposeParsers; } catch { /* older release — no worker pool to dispose */ } -const INCREMENTAL_RUNS = 3; +const WARMUP_RUNS = 2; +const INCREMENTAL_RUNS = 5; const QUERY_RUNS = 5; const QUERY_WARMUP_RUNS = 3; const PROBE_FILE = path.join(root, 'src', 'domain', 'queries.ts'); @@ -154,6 +155,9 @@ const dbSizeBytes = fs.statSync(dbPath).size; console.error(` [${engine}] Benchmarking no-op rebuild...`); let noopRebuildMs = null; try { + for (let i = 0; i < WARMUP_RUNS; i++) { + await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); + } const noopTimings = []; for (let i = 0; i < INCREMENTAL_RUNS; i++) { const start = performance.now(); @@ -170,6 +174,10 @@ const original = fs.readFileSync(PROBE_FILE, 'utf8'); let oneFileRebuildMs = null; let oneFilePhases = null; try { + for (let i = 0; i < WARMUP_RUNS; i++) { + fs.writeFileSync(PROBE_FILE, original + `\n// warmup-${i}\n`); + await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); + } const oneFileRuns = []; for (let i = 0; i < INCREMENTAL_RUNS; i++) { fs.writeFileSync(PROBE_FILE, original + `\n// probe-${i}\n`); From 84e1a5f588beccda4ebfa6917785bb93132dc9cf Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:49:25 -0600 Subject: [PATCH 09/13] ci(bench): add per-PR perf canary for extractor/graph/native changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds .github/workflows/perf-canary.yml — a path-filtered workflow that fires on PRs touching src/extractors/, src/domain/graph/, or crates/** and runs only the incremental-benchmark suite (full build + no-op + 1-file rebuild, both engines). Catches the class of regressions that accumulated invisibly across the Phase 8.x PRs and were only detected at v3.12.0 publish time. The regression guard gains BENCH_CANARY=1 mode: raises thresholds to 50%/100%/150% (standard/noisy/WASM) and skips the build, query, and resolution suites — only incremental checks run. This absorbs shared- runner timing variance while still blocking catastrophic regressions (+98% full build, +1827% 1-file rebuild from v3.12.0). Closes #1433 --- .github/workflows/perf-canary.yml | 111 ++++++++++++++++++++++ tests/benchmarks/regression-guard.test.ts | 49 ++++++++-- 2 files changed, 150 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/perf-canary.yml diff --git a/.github/workflows/perf-canary.yml b/.github/workflows/perf-canary.yml new file mode 100644 index 000000000..fb4432aed --- /dev/null +++ b/.github/workflows/perf-canary.yml @@ -0,0 +1,111 @@ +name: Perf Canary + +# Lightweight per-PR build-time regression gate for PRs that touch the +# extractor, graph-builder, or native Rust layers — the parts of the codebase +# that caused the v3.12.0 regressions (+1827% 1-file rebuild, +98% full build). +# +# Only the incremental-benchmark suite is run (full build + no-op + 1-file +# rebuild for both engines). The regression guard uses BENCH_CANARY=1 mode, +# which applies a 50% threshold instead of the full suite's 25% — enough +# to catch catastrophic regressions while tolerating CI runner variance. +# +# This is intentionally separate from the full pre-publish-benchmark job in +# ci.yml, which runs unconditionally on every PR and measures the complete +# suite. The canary completes in roughly 5–10 minutes; the full suite takes +# 20–60 minutes. + +on: + pull_request: + paths: + - "src/extractors/**" + - "src/domain/graph/**" + - "crates/**" + - "scripts/benchmark.ts" + - "scripts/incremental-benchmark.ts" + - "scripts/lib/bench-config.ts" + - "scripts/lib/fork-engine.ts" + +concurrency: + group: perf-canary-${{ github.ref }} + cancel-in-progress: true + +jobs: + perf-canary: + name: Perf canary (incremental tiers) + runs-on: ubuntu-latest + env: + CODEGRAPH_FAST_SKIP_DIAG: "1" + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: actions/setup-node@v6 + with: + node-version: "22" + cache: "npm" + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Rust cache + uses: Swatinem/rust-cache@v2 + with: + workspaces: crates/codegraph-core + + - name: Install napi-rs CLI + timeout-minutes: 5 + run: npm install -g @napi-rs/cli@3 + + - name: Build native addon + working-directory: crates/codegraph-core + run: napi build --release + + - name: Install dependencies + timeout-minutes: 20 + shell: bash + run: | + for attempt in 1 2 3; do + npm install && break + if [ "$attempt" -lt 3 ]; then + echo "::warning::npm install attempt $attempt failed, retrying in 15s..." + sleep 15 + else + echo "::error::npm install failed after 3 attempts" + exit 1 + fi + done + + - name: Install native addon over published binary + run: node scripts/ci-install-native.mjs + + # Build dist/ so benchmarks load the same compiled JS that ships to npm, + # matching the methodology used by the full pre-publish-benchmark gate. + - name: Build TypeScript + run: npm run build + + - name: Run incremental benchmark + timeout-minutes: 15 + run: | + STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')") + node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/incremental-benchmark.ts --version dev --dist > incremental-canary-result.json + + - name: Update incremental report + run: | + STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')") + node $STRIP_FLAG scripts/update-incremental-report.ts incremental-canary-result.json + + - name: Regression guard (50% threshold) + env: + RUN_REGRESSION_GUARD: "1" + BENCH_CANARY: "1" + run: npm run test:regression-guard + + - name: Upload canary result + if: always() + uses: actions/upload-artifact@v7 + with: + name: incremental-canary-result + path: incremental-canary-result.json + if-no-files-found: warn diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index 7ca1689d8..939dde61d 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -16,6 +16,16 @@ import { describe, expect, test } from 'vitest'; // ── Configuration ──────────────────────────────────────────────────────── +/** + * When BENCH_CANARY=1, only incremental-benchmark checks run and all timing + * thresholds are raised to 50%. This mode is used by the per-PR perf-canary + * workflow (.github/workflows/perf-canary.yml) which runs only on PRs + * touching src/extractors/, src/domain/graph/, or crates/. The looser + * threshold absorbs CI runner variance while still catching the class of + * catastrophic regressions that hit v3.12.0 (+98%/+1827%). + */ +const BENCH_CANARY = process.env.BENCH_CANARY === '1'; + /** * Maximum allowed regression (as a fraction, e.g. 0.25 = 25%). * @@ -26,8 +36,10 @@ import { describe, expect, test } from 'vitest'; * * Genuinely high-variance sub-30ms metrics get a wider tolerance via * `NOISY_METRICS` below — see that set's docstring for rationale. + * + * In BENCH_CANARY mode this is overridden to 0.5 (50%) — see above. */ -const REGRESSION_THRESHOLD = 0.25; +const REGRESSION_THRESHOLD = BENCH_CANARY ? 0.5 : 0.25; /** * Wider regression threshold applied to metrics in NOISY_METRICS. @@ -41,8 +53,11 @@ const REGRESSION_THRESHOLD = 0.25; * Keeping the global threshold at 25% means a regression in the 30–100ms * range is still caught (e.g. 50ms→63ms = +26%, flagged), while sub-30ms * metrics in this set get the wider 50% allowance. + * + * In BENCH_CANARY mode this is overridden to 1.0 (100%) — the canary's + * purpose is to catch gross regressions (+50%+), not sub-30ms jitter. */ -const NOISY_METRIC_THRESHOLD = 0.5; +const NOISY_METRIC_THRESHOLD = BENCH_CANARY ? 1.0 : 0.5; /** * Metric labels treated as high-variance and given the NOISY_METRIC_THRESHOLD @@ -86,8 +101,12 @@ const NOISY_METRICS = new Set(['No-op rebuild', '1-file rebuild', 'fnDep * v3.0.1–3.4.0), which 75% still flags, while absorbing the ≤71% shared-runner * jitter. Size metrics (DB bytes/file) are engine-independent and excluded from * this widening via SIZE_METRICS below — they keep the strict threshold. + * + * In BENCH_CANARY mode this is overridden to 1.5 (150%) — the canary targets + * gross regressions only, and WASM incremental metrics have extreme variance + * on shared runners. */ -const WASM_TIMING_THRESHOLD = 0.75; +const WASM_TIMING_THRESHOLD = BENCH_CANARY ? 1.5 : 0.75; /** * Metric labels that measure size/count rather than wall-clock time. These are @@ -608,6 +627,10 @@ interface IncrementalEntry { // in the default `npm test` run so docs commits that merge already-recorded // regressed history into main don't trigger false failures — by then the // release has already passed the gate. +// +// When BENCH_CANARY=1 (set by .github/workflows/perf-canary.yml), only the +// incremental-benchmark suite runs and thresholds are raised to 50% — see +// the BENCH_CANARY constant above. const RUN_REGRESSION_GUARD = process.env.RUN_REGRESSION_GUARD === '1'; describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { @@ -627,7 +650,9 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { // Warn when KNOWN_REGRESSIONS entries are stale (more than 1 minor version // behind the current package version). This makes the stale-exemption // problem self-detecting rather than requiring manual bookkeeping. - test('KNOWN_REGRESSIONS entries are not stale', () => { + // Skipped in canary mode — this check is maintenance-only and irrelevant + // for a lightweight build-time regression gate. + test.skipIf(BENCH_CANARY)('KNOWN_REGRESSIONS entries are not stale', () => { // eslint-disable-next-line @typescript-eslint/no-require-imports const pkgVersion: string = JSON.parse( fs.readFileSync(path.join(ROOT, 'package.json'), 'utf8'), @@ -656,18 +681,22 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { ).toBe(0); }); - // Validate newest-first ordering assumption for all history arrays - test('build history is sorted newest-first', () => { + // Validate newest-first ordering assumption for all history arrays. + // Build/query ordering checks are skipped in canary mode (only incremental + // history is updated by the canary workflow). + test.skipIf(BENCH_CANARY)('build history is sorted newest-first', () => { assertNewestFirst(buildHistory, 'Build benchmark'); }); - test('query history is sorted newest-first', () => { + test.skipIf(BENCH_CANARY)('query history is sorted newest-first', () => { assertNewestFirst(queryHistory, 'Query benchmark'); }); test('incremental history is sorted newest-first', () => { assertNewestFirst(incrementalHistory, 'Incremental benchmark'); }); - describe('build benchmarks', () => { + // In canary mode only the incremental suite runs — build/query/resolution + // benchmarks are not measured by the perf-canary workflow. + describe.skipIf(BENCH_CANARY)('build benchmarks', () => { for (const engineKey of ['native', 'wasm'] as const) { const pair = findLatestPair(buildHistory, (e) => e[engineKey] != null); if (!pair) continue; @@ -700,7 +729,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { }); }); - describe('query benchmarks', () => { + describe.skipIf(BENCH_CANARY)('query benchmarks', () => { for (const engineKey of ['native', 'wasm'] as const) { const pair = findLatestPair(queryHistory, (e) => e[engineKey] != null); if (!pair) continue; @@ -803,7 +832,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { }); }); - describe('resolution benchmarks', () => { + describe.skipIf(BENCH_CANARY)('resolution benchmarks', () => { /** * Resolution precision/recall regression thresholds. * These are percentage-point drops (not relative %) because resolution From d07b3588d5eb8c90d1901d0fcdc794f995a08d1e Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:53:10 -0600 Subject: [PATCH 10/13] fix(perf): plumb symbolsOnly through parseFilesWasmInline to skip analysis visitors --- src/domain/parser.ts | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 54aa7b994..9ff4fe58e 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -1198,11 +1198,16 @@ const INLINE_BACKFILL_THRESHOLD = 16; * * Returns symbols with `_tree` set so `runAnalyses` can run AST/CFG/dataflow * visitors via the unified walker (mirrors how WASM-engine results behaved - * before the worker pool was introduced). + * before the worker pool was introduced), unless `symbolsOnly` is true — in + * that case `_tree` is not set, skipping all analysis visitor walks. Use + * `symbolsOnly` when only definitions/calls/typeMap are needed (e.g. the + * this/super dispatch post-pass) to avoid the analysis overhead on the inline + * path, matching the optimization already applied to the worker-pool path. */ async function parseFilesWasmInline( filePaths: string[], rootDir: string, + symbolsOnly = false, ): Promise> { const result = new Map(); if (filePaths.length === 0) return result; @@ -1220,7 +1225,12 @@ async function parseFilesWasmInline( if (!extracted) continue; const relPath = path.relative(rootDir, filePath).split(path.sep).join('/'); const symbols = extracted.symbols as ExtractorOutput & { _tree?: unknown; _langId?: string }; - symbols._tree = extracted.tree; + // When symbolsOnly=true, skip setting _tree so runAnalyses does not run + // AST/complexity/CFG/dataflow visitor walks — only definitions/calls/typeMap + // are needed by callers like the this/super dispatch post-pass. + if (!symbolsOnly) { + symbols._tree = extracted.tree; + } symbols._langId = extracted.langId; result.set(relPath, symbols); } @@ -1246,7 +1256,7 @@ export async function parseFilesWasmForBackfill( opts: { symbolsOnly?: boolean } = {}, ): Promise> { if (filePaths.length <= INLINE_BACKFILL_THRESHOLD) { - return parseFilesWasmInline(filePaths, rootDir); + return parseFilesWasmInline(filePaths, rootDir, opts.symbolsOnly); } return parseFilesWasm(filePaths, rootDir, opts.symbolsOnly ? EXTRACT_ONLY : FULL_ANALYSIS); } From 3db5d8ccb1b45071cbde2d0980f6fde707da119d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:56:46 -0600 Subject: [PATCH 11/13] fix(perf): scope runPostNativeCha to changed files on incremental builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On incremental builds, runPostNativeCha previously scanned all call→qualified-method edges in the DB (~12ms flat, O(graph size)), even for 1-file changes where no hierarchy or RTA evidence changed. Add two cheap indexed gate queries. Gate A checks whether any changed file introduced a class/interface/trait/struct/record node (hierarchy may have new implementors reachable from unchanged call sites). Gate B checks whether any changed file added a call edge to a class-kind target (RTA set may have grown, enabling previously filtered expansions in unchanged callers). If neither gate fires, restrict the candidate query to src.file IN changedFiles — safe because the hierarchy and instantiated set are unchanged for all other files. Full builds (isFullBuild=true) and cases where either gate fires retain the existing full-scan behaviour. Mirrors the changed-files scoping pattern of runPostNativeThisDispatch. Closes #1441 --- .../builder/stages/native-orchestrator.ts | 130 ++++++++++++++++-- 1 file changed, 119 insertions(+), 11 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index e5c5bd9b5..a8981c1be 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -401,12 +401,26 @@ async function runPostNativeAnalysis( * Note: `this`/`super` dispatch is handled separately by `runPostNativeThisDispatch`, * which WASM-re-parses JS/TS files to obtain raw call site receiver info. * + * `changedFiles` controls candidate scoping on incremental builds: + * - null → full build; scan all call→method edges (existing behaviour). + * - array → incremental; two cheap gate queries decide scope: + * Gate A: any class/interface/trait/struct/record nodes in changed files? + * If yes, a new implementor may have appeared — full scan required. + * Gate B: any `calls` edges from changed-file sources targeting class-kind + * nodes? If yes, the RTA set may have grown, enabling previously + * filtered expansions in unchanged caller files — full scan required. + * If neither gate fires: scope `callToMethods` to `src.file IN changedFiles` + * (safe because no hierarchy or RTA evidence changed). + * * Returns the count of newly inserted CHA edges plus the set of files containing * the new edges' endpoints, so the caller can scope role re-classification to the * nodes whose fan-in/out actually changed. A zero count means no edges were added * and role re-classification is unnecessary. */ -function runPostNativeCha(db: BetterSqlite3Database): { +function runPostNativeCha( + db: BetterSqlite3Database, + changedFiles: string[] | null, +): { newEdgeCount: number; affectedFiles: Set; } { @@ -474,19 +488,111 @@ function runPostNativeCha(db: BetterSqlite3Database): { debug('runPostNativeCha: no constructor-call evidence found — proceeding without RTA filter'); } + // ── Incremental candidate scoping ────────────────────────────────────────── + // On incremental builds, two gate queries decide whether to restrict the + // candidate scan to changed-file call sites or run the full graph scan. + // + // Gate A: did a changed file add/change a class hierarchy node? + // A new `extends`/`implements` edge means a previously-untracked implementor + // is now in the hierarchy — unchanged call sites in OTHER files may gain new + // valid expansions, so the full scan is required. + // + // Gate B: did a changed file add new RTA evidence (`new ConcreteX()`)? + // A new `calls` edge to a class-kind target means the instantiated set grew — + // previously RTA-filtered expansions in unchanged caller files become + // admissible, so the full scan is required. + // + // If neither gate fires, the hierarchy and RTA set are unchanged for all files + // outside changedFiles, so restricting to changed-file sources is safe. + let scopeToChangedFiles = false; // true → add WHERE src.file IN changedFiles + if (changedFiles !== null && changedFiles.length > 0) { + // Gate A: class/interface/trait/struct/record nodes in changed files? + const CHUNK_SIZE = 500; + let gateAFired = false; + for (let i = 0; i < changedFiles.length && !gateAFired; i += CHUNK_SIZE) { + const chunk = changedFiles.slice(i, i + CHUNK_SIZE); + const ph = chunk.map(() => '?').join(','); + const row = db + .prepare( + `SELECT 1 FROM nodes + WHERE file IN (${ph}) + AND kind IN ('class', 'interface', 'trait', 'struct', 'record') + LIMIT 1`, + ) + .get(...chunk); + if (row) gateAFired = true; + } + + // Gate B: calls from changed-file sources to class-kind targets? + let gateBFired = false; + if (!gateAFired) { + for (let i = 0; i < changedFiles.length && !gateBFired; i += CHUNK_SIZE) { + const chunk = changedFiles.slice(i, i + CHUNK_SIZE); + const ph = chunk.map(() => '?').join(','); + const row = db + .prepare( + `SELECT 1 FROM edges e + JOIN nodes src ON e.source_id = src.id + JOIN nodes tgt ON e.target_id = tgt.id + WHERE e.kind = 'calls' AND tgt.kind = 'class' + AND src.file IN (${ph}) + LIMIT 1`, + ) + .get(...chunk); + if (row) gateBFired = true; + } + } + + if (!gateAFired && !gateBFired) { + scopeToChangedFiles = true; + debug( + `runPostNativeCha: neither gate fired — scoping candidate scan to ${changedFiles.length} changed file(s)`, + ); + } else { + debug( + `runPostNativeCha: ${gateAFired ? 'Gate A (hierarchy)' : 'Gate B (RTA)'} fired — running full scan`, + ); + } + } + // Find existing call edges targeting qualified methods (e.g., 'IWorker.doWork'). // Include the caller node's file so confidence can be computed file-pair-aware, // matching the WASM path's computeConfidence(callerFile, targetFile, null) - CHA_DISPATCH_PENALTY formula. - const callToMethods = db - .prepare(` - SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file - FROM edges e - JOIN nodes tgt ON e.target_id = tgt.id - JOIN nodes src ON e.source_id = src.id - WHERE e.kind = 'calls' AND tgt.kind = 'method' - AND INSTR(tgt.name, '.') > 0 - `) - .all() as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + // When scopeToChangedFiles is true, restrict to call sites in the changed files + // (safe because no hierarchy or RTA evidence changed outside those files). + let callToMethods: Array<{ source_id: number; method_name: string; caller_file: string | null }>; + if (scopeToChangedFiles && changedFiles && changedFiles.length > 0) { + const CHUNK_SIZE = 500; + const rows: Array<{ source_id: number; method_name: string; caller_file: string | null }> = []; + for (let i = 0; i < changedFiles.length; i += CHUNK_SIZE) { + const chunk = changedFiles.slice(i, i + CHUNK_SIZE); + const ph = chunk.map(() => '?').join(','); + const chunkRows = db + .prepare( + `SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file + FROM edges e + JOIN nodes tgt ON e.target_id = tgt.id + JOIN nodes src ON e.source_id = src.id + WHERE e.kind = 'calls' AND tgt.kind = 'method' + AND INSTR(tgt.name, '.') > 0 + AND src.file IN (${ph})`, + ) + .all(...chunk) as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + rows.push(...chunkRows); + } + callToMethods = rows; + } else { + callToMethods = db + .prepare(` + SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file + FROM edges e + JOIN nodes tgt ON e.target_id = tgt.id + JOIN nodes src ON e.source_id = src.id + WHERE e.kind = 'calls' AND tgt.kind = 'method' + AND INSTR(tgt.name, '.') > 0 + `) + .all() as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + } // Seed seen-pairs only from the source_ids we'll be expanding — avoids loading every // call edge in the DB (which would be O(all edges)) for large codebases. @@ -1427,6 +1533,8 @@ export async function tryNativeOrchestrator( // no WASM re-parse post-pass is needed for them. `Foo.prototype.bar = fn` likewise. const { newEdgeCount: chaEdgeCount, affectedFiles: chaAffectedFiles } = runPostNativeCha( ctx.db as unknown as BetterSqlite3Database, + // null = full build (scan all call→method edges); array = incremental (gate queries decide scope) + result.isFullBuild ? null : (result.changedFiles ?? null), ); // Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites From a79855e5f89b1705cfe8b3044a70bf812c68af0d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 20:13:08 -0600 Subject: [PATCH 12/13] fix(perf): broaden Gate B to cover constructor/function-kind RTA fallback schema Gate B previously checked only `tgt.kind = 'class'`, but the RTA seed has a fallback that matches `tgt.kind IN ('constructor', 'function')` when no class-kind constructor edges exist (older native engine schemas). On codebases where the fallback path is always active, Gate B would never fire, causing scopeToChangedFiles to be set incorrectly and silently dropping CHA edges for unchanged callers whose RTA evidence lives in the fallback-schema rows. Broaden Gate B to `tgt.kind IN ('class', 'constructor', 'function')` to mirror the full two-shape RTA seed. Also fix formatter violation on the .all() cast. --- .../builder/stages/native-orchestrator.ts | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index a8981c1be..6789e0fbf 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -406,9 +406,11 @@ async function runPostNativeAnalysis( * - array → incremental; two cheap gate queries decide scope: * Gate A: any class/interface/trait/struct/record nodes in changed files? * If yes, a new implementor may have appeared — full scan required. - * Gate B: any `calls` edges from changed-file sources targeting class-kind - * nodes? If yes, the RTA set may have grown, enabling previously - * filtered expansions in unchanged caller files — full scan required. + * Gate B: any `calls` edges from changed-file sources targeting + * class/constructor/function-kind nodes? If yes, the RTA set may + * have grown (also covers the older-schema fallback where + * constructor calls target `constructor`/`function` nodes instead + * of `class` nodes) — full scan required. * If neither gate fires: scope `callToMethods` to `src.file IN changedFiles` * (safe because no hierarchy or RTA evidence changed). * @@ -498,9 +500,10 @@ function runPostNativeCha( // valid expansions, so the full scan is required. // // Gate B: did a changed file add new RTA evidence (`new ConcreteX()`)? - // A new `calls` edge to a class-kind target means the instantiated set grew — - // previously RTA-filtered expansions in unchanged caller files become - // admissible, so the full scan is required. + // A new `calls` edge to a class/constructor/function-kind target means the + // instantiated set grew — previously RTA-filtered expansions in unchanged + // caller files become admissible, so the full scan is required. + // (`constructor`/`function` cover the older native engine fallback schema.) // // If neither gate fires, the hierarchy and RTA set are unchanged for all files // outside changedFiles, so restricting to changed-file sources is safe. @@ -523,7 +526,13 @@ function runPostNativeCha( if (row) gateAFired = true; } - // Gate B: calls from changed-file sources to class-kind targets? + // Gate B: calls from changed-file sources to class-kind targets (or + // constructor/function-kind targets in the older native engine fallback schema)? + // Mirrors the two-shape RTA seed: primary checks `tgt.kind = 'class'`; older + // native engine schemas record constructor calls against `constructor`/`function` + // kinds instead. Including all three kinds here prevents Gate B from silently + // passing on older-schema DBs, which would incorrectly set scopeToChangedFiles + // and miss CHA edges whose RTA evidence lives in the fallback-schema rows. let gateBFired = false; if (!gateAFired) { for (let i = 0; i < changedFiles.length && !gateBFired; i += CHUNK_SIZE) { @@ -534,7 +543,8 @@ function runPostNativeCha( `SELECT 1 FROM edges e JOIN nodes src ON e.source_id = src.id JOIN nodes tgt ON e.target_id = tgt.id - WHERE e.kind = 'calls' AND tgt.kind = 'class' + WHERE e.kind = 'calls' + AND tgt.kind IN ('class', 'constructor', 'function') AND src.file IN (${ph}) LIMIT 1`, ) @@ -577,7 +587,11 @@ function runPostNativeCha( AND INSTR(tgt.name, '.') > 0 AND src.file IN (${ph})`, ) - .all(...chunk) as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + .all(...chunk) as Array<{ + source_id: number; + method_name: string; + caller_file: string | null; + }>; rows.push(...chunkRows); } callToMethods = rows; From 76e5910cc745e16e16fa5b61cd388957f7be8315 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 20:19:51 -0600 Subject: [PATCH 13/13] docs(native): document Gate A deletion-safety invariant in runPostNativeCha --- src/domain/graph/builder/stages/native-orchestrator.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 6789e0fbf..33a3f62d9 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -498,6 +498,10 @@ function runPostNativeCha( // A new `extends`/`implements` edge means a previously-untracked implementor // is now in the hierarchy — unchanged call sites in OTHER files may gain new // valid expansions, so the full scan is required. + // Note: *removed* class nodes are safe — Rust's `purge_changed_files` runs + // before this post-pass and deletes stale nodes and their hierarchy edges, so + // Gate A queries the post-purge DB. A deleted class returns no row here, which + // is correct: its stale CHA edges were already cleaned up by the Rust purge. // // Gate B: did a changed file add new RTA evidence (`new ConcreteX()`)? // A new `calls` edge to a class/constructor/function-kind target means the