From 3892e7dcddd5173d4d83cc6839ce0d888aa36226 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:31:28 -0600 Subject: [PATCH 01/15] chore: gitignore napi-generated artifacts in crates/codegraph-core --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index aa62bcb59..2606dd88f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ dist/ coverage/ .env grammars/*.wasm +crates/codegraph-core/index.js +crates/codegraph-core/index.d.ts +crates/codegraph-core/*.node .claude/session-edits.log .claude/worktrees/ generated/DEPENDENCIES.md From ef8ea4fb31f4c073529c8772fa3aa9288a992fd6 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:32:19 -0600 Subject: [PATCH 02/15] chore(tests): remove unused biome suppression in visitor.test.ts --- tests/unit/visitor.test.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/visitor.test.ts b/tests/unit/visitor.test.ts index 992b4307a..62dc5b3e6 100644 --- a/tests/unit/visitor.test.ts +++ b/tests/unit/visitor.test.ts @@ -4,7 +4,6 @@ import { describe, expect, it } from 'vitest'; // We need a tree-sitter tree to test. Use the JS parser. -// biome-ignore lint/suspicious/noExplicitAny: tree-sitter parser type is complex and not worth typing for tests let parse: any; async function ensureParser() { From a372b82593ddfecd591c17133ef378872ffbec13 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:33:09 -0600 Subject: [PATCH 03/15] fix(titan-run): sync --start-from enum and phase-timestamp list with actual phases --- .claude/skills/titan-run/SKILL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.claude/skills/titan-run/SKILL.md b/.claude/skills/titan-run/SKILL.md index 51e22569f..04c3fdc92 100644 --- a/.claude/skills/titan-run/SKILL.md +++ b/.claude/skills/titan-run/SKILL.md @@ -1,7 +1,7 @@ --- name: titan-run description: Run the full Titan Paradigm pipeline end-to-end by dispatching each phase to sub-agents with fresh context windows. Orchestrates recon → gauntlet → sync → forge → grind (+ repo-provided parity audit) automatically. -argument-hint: <--skip-recon> <--skip-gauntlet> <--start-from recon|gauntlet|sync|forge|grind|parity> <--gauntlet-batch-size 5> <--yes> +argument-hint: <--skip-recon> <--skip-gauntlet> <--start-from recon|gauntlet|sync|forge|grind|parity|close> <--gauntlet-batch-size 5> <--yes> allowed-tools: Agent, Read, Bash, Glob, Write, Edit --- @@ -50,7 +50,7 @@ You are the **orchestrator** for the full Titan Paradigm pipeline. Your job is t node -e "const fs=require('fs');const s=JSON.parse(fs.readFileSync('.codegraph/titan/titan-state.json','utf8'));s.phaseTimestamps=s.phaseTimestamps||{};s.phaseTimestamps['']=s.phaseTimestamps['']||{};s.phaseTimestamps[''].completedAt=new Date().toISOString();fs.writeFileSync('.codegraph/titan/titan-state.json',JSON.stringify(s,null,2));" ``` - Replace `` with `recon`, `gauntlet`, `sync`, `forge`, `parity`, or `close`. **Run the start command immediately before dispatching each phase's first sub-agent, and the completion command immediately after post-phase validation passes.** If resuming a phase (e.g., gauntlet loop iteration 2+), do NOT overwrite `startedAt` — only set it if it doesn't already exist. + Replace `` with `recon`, `gauntlet`, `sync`, `forge`, `grind`, `parity`, or `close`. **Run the start command immediately before dispatching each phase's first sub-agent, and the completion command immediately after post-phase validation passes.** If resuming a phase (e.g., gauntlet loop iteration 2+), do NOT overwrite `startedAt` — only set it if it doesn't already exist. **Timestamp validation:** After recording `completedAt` for any phase, verify `startedAt < completedAt`: ```bash From 9a52c7cc5eea2cba016d14ed3928e07128fad4e3 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:36:04 -0600 Subject: [PATCH 04/15] fix(hooks): track Bash file modifications via before/after git status diff Adds snapshot-pre-bash.sh (PreToolUse Bash) + track-bash-writes.sh (PostToolUse Bash): the pre-hook captures git status --porcelain to a per-worktree temp file before each Bash call; the post-hook diffs the before/after state and appends newly modified or created files to .claude/session-edits.log. This closes the gap where files written by sed -i, printf redirects, tee, heredocs, or build tools (Cargo.lock, lockfiles) were never recorded, causing guard-git.sh to emit false-positive BLOCKED errors. Closes #1457 --- .claude/hooks/snapshot-pre-bash.sh | 54 +++++++++++++ .claude/hooks/track-bash-writes.sh | 119 +++++++++++++++++++++++++++++ .claude/settings.json | 10 +++ 3 files changed, 183 insertions(+) create mode 100755 .claude/hooks/snapshot-pre-bash.sh create mode 100755 .claude/hooks/track-bash-writes.sh diff --git a/.claude/hooks/snapshot-pre-bash.sh b/.claude/hooks/snapshot-pre-bash.sh new file mode 100755 index 000000000..a91caebbb --- /dev/null +++ b/.claude/hooks/snapshot-pre-bash.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# snapshot-pre-bash.sh — PreToolUse hook for Bash tool calls +# Snapshots `git status --porcelain` to a temp file before each Bash call so +# that track-bash-writes.sh (PostToolUse) can diff the before/after state and +# log files newly modified by the command to .claude/session-edits.log. +# Always exits 0 (informational only, never blocks). + +set -euo pipefail + +INPUT=$(cat) + +# Extract the command from tool_input JSON +COMMAND=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.command||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# Skip read-only commands that can never write files — reduces snapshot overhead +# for the most common Bash calls (ls, cat, grep, git log, git status, etc.). +# sed is intentionally NOT in this list because `sed -i` modifies files in-place. +if echo "$COMMAND" | grep -qE '^\s*(ls|cat|head|tail|grep|find|git\s+(log|status|diff|show|branch|remote|fetch|rev-parse|stash\s+list|ls-files|blame|describe|tag|config\s+--get)|gh\s+(pr|issue|repo)\s+(view|list|status)|echo|printf|pwd|which|node\s+-e|node\s+-p|npx\s+--version|wc|sort|uniq|awk)\b'; then + exit 0 +fi + +# Resolve the project root (worktree-aware — each worktree has its own .claude/) +PROJECT_DIR=$(git rev-parse --show-toplevel 2>/dev/null) || PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" + +# Key the snapshot file to the project root so parallel worktrees don't collide. +# Use a simple hash of the path — just enough to be unique per worktree. +PROJECT_HASH=$(echo "$PROJECT_DIR" | node -e " + const crypto = require('crypto'); + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + process.stdout.write(crypto.createHash('sha1').update(d.trim()).digest('hex').slice(0,8)); + }); +" 2>/dev/null) || PROJECT_HASH="default" + +SNAPSHOT_FILE="/tmp/claude-bash-snapshot-${PROJECT_HASH}.txt" + +# Capture current git status --porcelain. +# Lines look like: "XY filename" or "XY orig -> dest" (rename). +# We only care about the status marker and path — porcelain is stable across git versions. +git -C "$PROJECT_DIR" status --porcelain 2>/dev/null > "$SNAPSHOT_FILE" || true + +exit 0 diff --git a/.claude/hooks/track-bash-writes.sh b/.claude/hooks/track-bash-writes.sh new file mode 100755 index 000000000..e5d1ded98 --- /dev/null +++ b/.claude/hooks/track-bash-writes.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# track-bash-writes.sh — PostToolUse hook for Bash tool calls +# Compares `git status --porcelain` against the snapshot taken by +# snapshot-pre-bash.sh (PreToolUse) to detect files newly modified or +# created by the Bash command, then appends them to .claude/session-edits.log +# so that guard-git.sh can validate commits correctly. +# Always exits 0 (informational only, never blocks). + +set -euo pipefail + +INPUT=$(cat) + +# Extract the command from tool_input JSON +COMMAND=$(echo "$INPUT" | node -e " + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + const p=JSON.parse(d).tool_input?.command||''; + if(p)process.stdout.write(p); + }); +" 2>/dev/null) || true + +if [ -z "$COMMAND" ]; then + exit 0 +fi + +# Resolve the project root (worktree-aware — each worktree has its own .claude/) +PROJECT_DIR=$(git rev-parse --show-toplevel 2>/dev/null) || PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}" + +# Reproduce the same project hash used by snapshot-pre-bash.sh +PROJECT_HASH=$(echo "$PROJECT_DIR" | node -e " + const crypto = require('crypto'); + let d=''; + process.stdin.on('data',c=>d+=c); + process.stdin.on('end',()=>{ + process.stdout.write(crypto.createHash('sha1').update(d.trim()).digest('hex').slice(0,8)); + }); +" 2>/dev/null) || PROJECT_HASH="default" + +SNAPSHOT_FILE="/tmp/claude-bash-snapshot-${PROJECT_HASH}.txt" + +# If there is no snapshot (hook was not installed yet, or the pre-hook was +# skipped for a read-only command) we have no baseline — exit cleanly. +if [ ! -f "$SNAPSHOT_FILE" ]; then + exit 0 +fi + +# Capture current state after the command ran +AFTER=$(git -C "$PROJECT_DIR" status --porcelain 2>/dev/null) || true + +# Read the before-state +BEFORE=$(cat "$SNAPSHOT_FILE") || true + +# Clean up the snapshot so it doesn't pollute the next command's pre-hook +rm -f "$SNAPSHOT_FILE" + +# Build the set of paths that existed (as dirty) before the command ran. +# porcelain format: "XY path" or "XY original -> new" (rename). +# We extract every path token after the two-char status code. +parse_paths() { + local status_output="$1" + echo "$status_output" | awk ' + /^[ MADRCU?!]{2} / { + # Drop the two-char status + space + rest = substr($0, 4) + # Handle rename: "old -> new" + if (index(rest, " -> ") > 0) { + n = split(rest, parts, " -> ") + for (i = 1; i <= n; i++) { + p = parts[i] + gsub(/^"/, "", p); gsub(/"$/, "", p) + if (p != "") print p + } + } else { + gsub(/^"/, "", rest); gsub(/"$/, "", rest) + if (rest != "") print rest + } + } + ' +} + +BEFORE_PATHS=$(parse_paths "$BEFORE" | sort) +AFTER_PATHS=$(parse_paths "$AFTER" | sort) + +if [ -z "$AFTER_PATHS" ]; then + exit 0 +fi + +# Find paths present in AFTER but not in BEFORE — these were newly dirtied +# (modified, created, or renamed-to) by the Bash command. +NEW_PATHS=$(comm -13 <(echo "$BEFORE_PATHS") <(echo "$AFTER_PATHS")) || true + +if [ -z "$NEW_PATHS" ]; then + exit 0 +fi + +# Also exclude paths that were already tracked by track-edits.sh or other hooks +# (i.e. already in the session-edits.log) so we don't double-log. +LOG_FILE="$PROJECT_DIR/.claude/session-edits.log" +ALREADY_LOGGED="" +if [ -f "$LOG_FILE" ] && [ -s "$LOG_FILE" ]; then + ALREADY_LOGGED=$(awk '{print $2}' "$LOG_FILE" | sort -u) +fi + +mkdir -p "$(dirname "$LOG_FILE")" +TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) + +while IFS= read -r rel_path; do + if [ -z "$rel_path" ]; then + continue + fi + # Skip if already in the log from a prior hook (Edit/Write/track-moves) + if [ -n "$ALREADY_LOGGED" ] && echo "$ALREADY_LOGGED" | grep -qxF "$rel_path"; then + continue + fi + echo "$TS $rel_path" >> "$LOG_FILE" +done <<< "$NEW_PATHS" + +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json index b3acd6d1b..7ab746809 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -7,6 +7,11 @@ { "matcher": "Bash", "hooks": [ + { + "type": "command", + "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/snapshot-pre-bash.sh\"", + "timeout": 5 + }, { "type": "command", "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/check-readme.sh\"", @@ -79,6 +84,11 @@ { "matcher": "Bash", "hooks": [ + { + "type": "command", + "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/track-bash-writes.sh\"", + "timeout": 5 + }, { "type": "command", "command": "p=\"${CLAUDE_PROJECT_DIR}\"; [ -d \"$p/.claude/hooks\" ] || p=\"$(git rev-parse --show-toplevel 2>/dev/null)\"; [ -d \"$p/.claude/hooks\" ] || exit 0; bash \"$p/.claude/hooks/track-moves.sh\"", From 85a26df4f1a06db752548b3f5e3d299ec5f46806 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:39:52 -0600 Subject: [PATCH 05/15] chore(native): remove dead code (unused var, method, variant, fields) - clojure.rs: annotate lifetime-anchor assignment to silence false-positive - cfg.rs: remove never-called start_line_of method - complexity.rs: remove never-constructed NotHandled variant; convert irrefutable if-let patterns to plain let destructures - dataflow.rs: remove never-read callee fields from CallReturn/Destructured - incremental.rs: remove never-read lang field from CacheEntry cargo check and cargo clippy both clean after these changes. --- crates/codegraph-core/src/ast_analysis/cfg.rs | 4 --- .../src/ast_analysis/complexity.rs | 36 ++++++++----------- .../src/ast_analysis/dataflow.rs | 10 +++--- .../src/domain/graph/builder/incremental.rs | 3 +- .../codegraph-core/src/extractors/clojure.rs | 4 +++ 5 files changed, 25 insertions(+), 32 deletions(-) diff --git a/crates/codegraph-core/src/ast_analysis/cfg.rs b/crates/codegraph-core/src/ast_analysis/cfg.rs index 226a31362..fb784d40f 100644 --- a/crates/codegraph-core/src/ast_analysis/cfg.rs +++ b/crates/codegraph-core/src/ast_analysis/cfg.rs @@ -659,10 +659,6 @@ impl<'a> CfgBuilder<'a> { } } - fn start_line_of(&self, block_idx: u32) -> Option { - self.blocks.iter().find(|b| b.index == block_idx).and_then(|b| b.start_line) - } - /// Get statement children from a block or statement list. fn get_statements<'b>(&self, node: &Node<'b>) -> Vec> { let kind = node.kind(); diff --git a/crates/codegraph-core/src/ast_analysis/complexity.rs b/crates/codegraph-core/src/ast_analysis/complexity.rs index fdd572512..9827b091f 100644 --- a/crates/codegraph-core/src/ast_analysis/complexity.rs +++ b/crates/codegraph-core/src/ast_analysis/complexity.rs @@ -516,8 +516,6 @@ fn walk_children( enum BranchAction { /// Node handled — walk children at the given nesting delta, then return. Handled { cognitive_delta: u32, cyclomatic_delta: u32, nesting_delta: u32 }, - /// Not a special branch pattern — fall through to normal processing. - NotHandled, } /// Classify a branch node (one where `rules.is_branch(kind)` is true). @@ -675,14 +673,12 @@ fn walk( // Branch/control flow nodes (skip keyword leaf tokens) if rules.is_branch(kind) && node.child_count() > 0 { - if let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = - classify_branch(node, kind, rules, nesting_level) - { - *cognitive += cognitive_delta; - *cyclomatic += cyclomatic_delta; - walk_children(node, nesting_level + nesting_delta, false, rules, cognitive, cyclomatic, max_nesting, depth); - return; - } + let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = + classify_branch(node, kind, rules, nesting_level); + *cognitive += cognitive_delta; + *cyclomatic += cyclomatic_delta; + walk_children(node, nesting_level + nesting_delta, false, rules, cognitive, cyclomatic, max_nesting, depth); + return; } // Pattern C plain else (Go/Java) @@ -1323,17 +1319,15 @@ fn walk_all( // Branch/control flow nodes (skip keyword leaf tokens) if c_rules.is_branch(kind) && node.child_count() > 0 { - if let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = - classify_branch(node, kind, c_rules, nesting_level) - { - *cognitive += cognitive_delta; - *cyclomatic += cyclomatic_delta; - walk_all_children( - node, source, nesting_level + nesting_delta, false, skip_h, - c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, - ); - return; - } + let BranchAction::Handled { cognitive_delta, cyclomatic_delta, nesting_delta } = + classify_branch(node, kind, c_rules, nesting_level); + *cognitive += cognitive_delta; + *cyclomatic += cyclomatic_delta; + walk_all_children( + node, source, nesting_level + nesting_delta, false, skip_h, + c_rules, h_rules, cognitive, cyclomatic, max_nesting, operators, operands, + ); + return; } // Pattern C plain else (Go/Java) diff --git a/crates/codegraph-core/src/ast_analysis/dataflow.rs b/crates/codegraph-core/src/ast_analysis/dataflow.rs index ddb4a11a1..5a897c0b9 100644 --- a/crates/codegraph-core/src/ast_analysis/dataflow.rs +++ b/crates/codegraph-core/src/ast_analysis/dataflow.rs @@ -882,8 +882,8 @@ fn collect_identifiers(node: &Node, out: &mut Vec, rules: &DataflowRules #[derive(Debug, Clone)] enum LocalSource { - CallReturn { callee: String }, - Destructured { callee: String }, + CallReturn, + Destructured, } struct ScopeFrame { @@ -1200,7 +1200,7 @@ fn handle_var_declarator( }); scope .locals - .insert(n.clone(), LocalSource::Destructured { callee: callee.clone() }); + .insert(n.clone(), LocalSource::Destructured); } } else { let var_name = node_text(&name_n, source).to_string(); @@ -1211,7 +1211,7 @@ fn handle_var_declarator( expression: truncate(node_text(node, source), DATAFLOW_TRUNCATION_LIMIT), line: node_line(node), }); - scope.locals.insert(var_name, LocalSource::CallReturn { callee }); + scope.locals.insert(var_name, LocalSource::CallReturn); } } @@ -1267,7 +1267,7 @@ fn handle_assignment( line: node_line(node), }); if let Some(scope) = scope_stack.last_mut() { - scope.locals.insert(var_name, LocalSource::CallReturn { callee }); + scope.locals.insert(var_name, LocalSource::CallReturn); } } } diff --git a/crates/codegraph-core/src/domain/graph/builder/incremental.rs b/crates/codegraph-core/src/domain/graph/builder/incremental.rs index 35fa04345..4985904ed 100644 --- a/crates/codegraph-core/src/domain/graph/builder/incremental.rs +++ b/crates/codegraph-core/src/domain/graph/builder/incremental.rs @@ -10,7 +10,6 @@ use crate::types::FileSymbols; struct CacheEntry { tree: Tree, - lang: LanguageKind, } /// Cache of parse trees for incremental parsing. @@ -51,7 +50,7 @@ impl ParseTreeCache { let symbols = extract_symbols(lang, &tree, source_bytes, &file_path); - self.entries.insert(file_path, CacheEntry { tree, lang }); + self.entries.insert(file_path, CacheEntry { tree }); Some(symbols) } diff --git a/crates/codegraph-core/src/extractors/clojure.rs b/crates/codegraph-core/src/extractors/clojure.rs index b5160474f..7263ecf1a 100644 --- a/crates/codegraph-core/src/extractors/clojure.rs +++ b/crates/codegraph-core/src/extractors/clojure.rs @@ -51,6 +51,10 @@ fn walk_clojure( return; } + // `next_ns_owned` holds the String so that `next_ns` can borrow it as + // `&str` for the duration of this stack frame. The assignment looks + // "never read" to the compiler but the borrow on the next line reads it. + #[allow(unused_assignments)] let mut next_ns_owned: Option = None; let next_ns: Option<&str> = if node.kind() == "list_lit" { match handle_list_form(node, source, symbols, current_ns) { From 184d22167f2f58a7a569538af6b616a1c11a3744 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:41:46 -0600 Subject: [PATCH 06/15] refactor(native): extract emit_pts_alias_edges params into PtsAliasCtx struct --- .../graph/builder/stages/build_edges.rs | 79 +++++++++++++------ 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index 3475adebe..39108e3d8 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -360,50 +360,55 @@ fn resolve_via_points_to<'a>( } } +/// Per-call-site inputs for `emit_pts_alias_edges`. +/// Groups the lookup parameters so the function stays within the argument-count limit. +struct PtsAliasCtx<'a> { + pts: &'a HashMap>, + lookup_name: &'a str, + call_line: u32, + caller_id: u32, + caller_name: &'a str, + is_dynamic: u32, + rel_path: &'a str, + imported_names: &'a HashMap<&'a str, &'a str>, + type_map: &'a HashMap<&'a str, (&'a str, f64)>, +} + /// Resolve each pts alias of `lookup_name` and emit hop-penalised call edges. /// Shared by the no-receiver gate and the receiver-key (`rest.prop()`) fallback; /// mirrors the alias-emission loops in buildFileCallEdges (build-edges.ts). -#[allow(clippy::too_many_arguments)] fn emit_pts_alias_edges<'a>( ctx: &EdgeContext<'a>, - pts: &HashMap>, - lookup_name: &str, - call_line: u32, - caller_id: u32, - caller_name: &str, - is_dynamic: u32, - rel_path: &str, - imported_names: &HashMap<&str, &str>, - type_map: &HashMap<&str, (&str, f64)>, + alias_ctx: &PtsAliasCtx<'_>, seen_edges: &HashSet, pts_edge_map: &mut HashMap, edges: &mut Vec, ) { - for alias in resolve_via_points_to(lookup_name, pts) { - let alias_imported_from = imported_names.get(alias).copied(); + for alias in resolve_via_points_to(alias_ctx.lookup_name, alias_ctx.pts) { + let alias_imported_from = alias_ctx.imported_names.get(alias).copied(); let alias_call = CallInfo { name: alias.to_string(), - line: call_line, + line: alias_ctx.call_line, dynamic: Some(true), receiver: None, }; let mut alias_targets = resolve_call_targets( - ctx, &alias_call, rel_path, alias_imported_from, type_map, caller_name, + ctx, &alias_call, alias_ctx.rel_path, alias_imported_from, alias_ctx.type_map, alias_ctx.caller_name, ); - sort_targets_by_confidence(&mut alias_targets, rel_path, alias_imported_from); + sort_targets_by_confidence(&mut alias_targets, alias_ctx.rel_path, alias_imported_from); for t in &alias_targets { - let edge_key = ((caller_id as u64) << 32) | (t.id as u64); - if t.id != caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { - let conf = resolve::compute_confidence(rel_path, &t.file, alias_imported_from) + let edge_key = ((alias_ctx.caller_id as u64) << 32) | (t.id as u64); + if t.id != alias_ctx.caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { + let conf = resolve::compute_confidence(alias_ctx.rel_path, &t.file, alias_imported_from) - PROPAGATION_HOP_PENALTY; if conf > 0.0 { pts_edge_map.insert(edge_key, edges.len()); edges.push(ComputedEdge { - source_id: caller_id, + source_id: alias_ctx.caller_id, target_id: t.id, kind: "calls".to_string(), confidence: conf, - dynamic: is_dynamic, + dynamic: alias_ctx.is_dynamic, }); } } @@ -593,8 +598,21 @@ fn process_file<'a>( }; if let Some(lookup_name) = lookup_name { emit_pts_alias_edges( - ctx, pts, &lookup_name, call.line, caller_id, caller_name, is_dynamic, - rel_path, &imported_names, &type_map, &seen_edges, &mut pts_edge_map, edges, + ctx, + &PtsAliasCtx { + pts, + lookup_name: &lookup_name, + call_line: call.line, + caller_id, + caller_name, + is_dynamic, + rel_path, + imported_names: &imported_names, + type_map: &type_map, + }, + &seen_edges, + &mut pts_edge_map, + edges, ); } } @@ -609,8 +627,21 @@ fn process_file<'a>( let receiver_key = format!("{}.{}", receiver, call.name); if pts.contains_key(receiver_key.as_str()) { emit_pts_alias_edges( - ctx, pts, &receiver_key, call.line, caller_id, caller_name, is_dynamic, - rel_path, &imported_names, &type_map, &seen_edges, &mut pts_edge_map, edges, + ctx, + &PtsAliasCtx { + pts, + lookup_name: &receiver_key, + call_line: call.line, + caller_id, + caller_name, + is_dynamic, + rel_path, + imported_names: &imported_names, + type_map: &type_map, + }, + &seen_edges, + &mut pts_edge_map, + edges, ); } } From 909e1df55b58fee7cc7d5942e1132be648fd7169 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:43:24 -0600 Subject: [PATCH 07/15] fix(wasm): sort call targets by confidence before emit to match native engine --- src/domain/graph/builder/stages/build-edges.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index 3c0f2e3c3..88027aee0 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -1107,6 +1107,19 @@ function buildFileCallEdges( } } + // Sort targets by confidence descending before emitting edges. + // For multi-target calls with duplicate (source_id, target_id) pairs the + // stored confidence depends on which duplicate is processed last — sorting + // here guarantees the highest-confidence target wins on dedup, matching the + // native engine's sort_targets_by_confidence call in build_edges.rs. + if (targets.length > 1) { + targets = [...targets].sort( + (a, b) => + computeConfidence(relPath, b.file, importedFrom ?? null) - + computeConfidence(relPath, a.file, importedFrom ?? null), + ); + } + for (const t of targets) { const edgeKey = `${caller.id}|${t.id}`; if (t.id !== caller.id) { From 66fc899813dd51fcb8ca94063741530ac5a1ff54 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:44:33 -0600 Subject: [PATCH 08/15] fix(bench): add 2 warmup runs and raise INCREMENTAL_RUNS to 5 for incremental tiers --- scripts/benchmark.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/benchmark.ts b/scripts/benchmark.ts index fbc449813..642e2b1f5 100644 --- a/scripts/benchmark.ts +++ b/scripts/benchmark.ts @@ -90,7 +90,8 @@ try { if (typeof parser.disposeParsers === 'function') disposeParsers = parser.disposeParsers; } catch { /* older release — no worker pool to dispose */ } -const INCREMENTAL_RUNS = 3; +const WARMUP_RUNS = 2; +const INCREMENTAL_RUNS = 5; const QUERY_RUNS = 5; const QUERY_WARMUP_RUNS = 3; const PROBE_FILE = path.join(root, 'src', 'domain', 'queries.ts'); @@ -154,6 +155,9 @@ const dbSizeBytes = fs.statSync(dbPath).size; console.error(` [${engine}] Benchmarking no-op rebuild...`); let noopRebuildMs = null; try { + for (let i = 0; i < WARMUP_RUNS; i++) { + await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); + } const noopTimings = []; for (let i = 0; i < INCREMENTAL_RUNS; i++) { const start = performance.now(); @@ -170,6 +174,10 @@ const original = fs.readFileSync(PROBE_FILE, 'utf8'); let oneFileRebuildMs = null; let oneFilePhases = null; try { + for (let i = 0; i < WARMUP_RUNS; i++) { + fs.writeFileSync(PROBE_FILE, original + `\n// warmup-${i}\n`); + await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); + } const oneFileRuns = []; for (let i = 0; i < INCREMENTAL_RUNS; i++) { fs.writeFileSync(PROBE_FILE, original + `\n// probe-${i}\n`); From 84e1a5f588beccda4ebfa6917785bb93132dc9cf Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:49:25 -0600 Subject: [PATCH 09/15] ci(bench): add per-PR perf canary for extractor/graph/native changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds .github/workflows/perf-canary.yml — a path-filtered workflow that fires on PRs touching src/extractors/, src/domain/graph/, or crates/** and runs only the incremental-benchmark suite (full build + no-op + 1-file rebuild, both engines). Catches the class of regressions that accumulated invisibly across the Phase 8.x PRs and were only detected at v3.12.0 publish time. The regression guard gains BENCH_CANARY=1 mode: raises thresholds to 50%/100%/150% (standard/noisy/WASM) and skips the build, query, and resolution suites — only incremental checks run. This absorbs shared- runner timing variance while still blocking catastrophic regressions (+98% full build, +1827% 1-file rebuild from v3.12.0). Closes #1433 --- .github/workflows/perf-canary.yml | 111 ++++++++++++++++++++++ tests/benchmarks/regression-guard.test.ts | 49 ++++++++-- 2 files changed, 150 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/perf-canary.yml diff --git a/.github/workflows/perf-canary.yml b/.github/workflows/perf-canary.yml new file mode 100644 index 000000000..fb4432aed --- /dev/null +++ b/.github/workflows/perf-canary.yml @@ -0,0 +1,111 @@ +name: Perf Canary + +# Lightweight per-PR build-time regression gate for PRs that touch the +# extractor, graph-builder, or native Rust layers — the parts of the codebase +# that caused the v3.12.0 regressions (+1827% 1-file rebuild, +98% full build). +# +# Only the incremental-benchmark suite is run (full build + no-op + 1-file +# rebuild for both engines). The regression guard uses BENCH_CANARY=1 mode, +# which applies a 50% threshold instead of the full suite's 25% — enough +# to catch catastrophic regressions while tolerating CI runner variance. +# +# This is intentionally separate from the full pre-publish-benchmark job in +# ci.yml, which runs unconditionally on every PR and measures the complete +# suite. The canary completes in roughly 5–10 minutes; the full suite takes +# 20–60 minutes. + +on: + pull_request: + paths: + - "src/extractors/**" + - "src/domain/graph/**" + - "crates/**" + - "scripts/benchmark.ts" + - "scripts/incremental-benchmark.ts" + - "scripts/lib/bench-config.ts" + - "scripts/lib/fork-engine.ts" + +concurrency: + group: perf-canary-${{ github.ref }} + cancel-in-progress: true + +jobs: + perf-canary: + name: Perf canary (incremental tiers) + runs-on: ubuntu-latest + env: + CODEGRAPH_FAST_SKIP_DIAG: "1" + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: actions/setup-node@v6 + with: + node-version: "22" + cache: "npm" + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Rust cache + uses: Swatinem/rust-cache@v2 + with: + workspaces: crates/codegraph-core + + - name: Install napi-rs CLI + timeout-minutes: 5 + run: npm install -g @napi-rs/cli@3 + + - name: Build native addon + working-directory: crates/codegraph-core + run: napi build --release + + - name: Install dependencies + timeout-minutes: 20 + shell: bash + run: | + for attempt in 1 2 3; do + npm install && break + if [ "$attempt" -lt 3 ]; then + echo "::warning::npm install attempt $attempt failed, retrying in 15s..." + sleep 15 + else + echo "::error::npm install failed after 3 attempts" + exit 1 + fi + done + + - name: Install native addon over published binary + run: node scripts/ci-install-native.mjs + + # Build dist/ so benchmarks load the same compiled JS that ships to npm, + # matching the methodology used by the full pre-publish-benchmark gate. + - name: Build TypeScript + run: npm run build + + - name: Run incremental benchmark + timeout-minutes: 15 + run: | + STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')") + node $STRIP_FLAG --import ./scripts/ts-resolve-loader.js scripts/incremental-benchmark.ts --version dev --dist > incremental-canary-result.json + + - name: Update incremental report + run: | + STRIP_FLAG=$(node -e "const [M]=process.versions.node.split('.').map(Number); console.log(M>=23?'--strip-types':'--experimental-strip-types')") + node $STRIP_FLAG scripts/update-incremental-report.ts incremental-canary-result.json + + - name: Regression guard (50% threshold) + env: + RUN_REGRESSION_GUARD: "1" + BENCH_CANARY: "1" + run: npm run test:regression-guard + + - name: Upload canary result + if: always() + uses: actions/upload-artifact@v7 + with: + name: incremental-canary-result + path: incremental-canary-result.json + if-no-files-found: warn diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index 7ca1689d8..939dde61d 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -16,6 +16,16 @@ import { describe, expect, test } from 'vitest'; // ── Configuration ──────────────────────────────────────────────────────── +/** + * When BENCH_CANARY=1, only incremental-benchmark checks run and all timing + * thresholds are raised to 50%. This mode is used by the per-PR perf-canary + * workflow (.github/workflows/perf-canary.yml) which runs only on PRs + * touching src/extractors/, src/domain/graph/, or crates/. The looser + * threshold absorbs CI runner variance while still catching the class of + * catastrophic regressions that hit v3.12.0 (+98%/+1827%). + */ +const BENCH_CANARY = process.env.BENCH_CANARY === '1'; + /** * Maximum allowed regression (as a fraction, e.g. 0.25 = 25%). * @@ -26,8 +36,10 @@ import { describe, expect, test } from 'vitest'; * * Genuinely high-variance sub-30ms metrics get a wider tolerance via * `NOISY_METRICS` below — see that set's docstring for rationale. + * + * In BENCH_CANARY mode this is overridden to 0.5 (50%) — see above. */ -const REGRESSION_THRESHOLD = 0.25; +const REGRESSION_THRESHOLD = BENCH_CANARY ? 0.5 : 0.25; /** * Wider regression threshold applied to metrics in NOISY_METRICS. @@ -41,8 +53,11 @@ const REGRESSION_THRESHOLD = 0.25; * Keeping the global threshold at 25% means a regression in the 30–100ms * range is still caught (e.g. 50ms→63ms = +26%, flagged), while sub-30ms * metrics in this set get the wider 50% allowance. + * + * In BENCH_CANARY mode this is overridden to 1.0 (100%) — the canary's + * purpose is to catch gross regressions (+50%+), not sub-30ms jitter. */ -const NOISY_METRIC_THRESHOLD = 0.5; +const NOISY_METRIC_THRESHOLD = BENCH_CANARY ? 1.0 : 0.5; /** * Metric labels treated as high-variance and given the NOISY_METRIC_THRESHOLD @@ -86,8 +101,12 @@ const NOISY_METRICS = new Set(['No-op rebuild', '1-file rebuild', 'fnDep * v3.0.1–3.4.0), which 75% still flags, while absorbing the ≤71% shared-runner * jitter. Size metrics (DB bytes/file) are engine-independent and excluded from * this widening via SIZE_METRICS below — they keep the strict threshold. + * + * In BENCH_CANARY mode this is overridden to 1.5 (150%) — the canary targets + * gross regressions only, and WASM incremental metrics have extreme variance + * on shared runners. */ -const WASM_TIMING_THRESHOLD = 0.75; +const WASM_TIMING_THRESHOLD = BENCH_CANARY ? 1.5 : 0.75; /** * Metric labels that measure size/count rather than wall-clock time. These are @@ -608,6 +627,10 @@ interface IncrementalEntry { // in the default `npm test` run so docs commits that merge already-recorded // regressed history into main don't trigger false failures — by then the // release has already passed the gate. +// +// When BENCH_CANARY=1 (set by .github/workflows/perf-canary.yml), only the +// incremental-benchmark suite runs and thresholds are raised to 50% — see +// the BENCH_CANARY constant above. const RUN_REGRESSION_GUARD = process.env.RUN_REGRESSION_GUARD === '1'; describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { @@ -627,7 +650,9 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { // Warn when KNOWN_REGRESSIONS entries are stale (more than 1 minor version // behind the current package version). This makes the stale-exemption // problem self-detecting rather than requiring manual bookkeeping. - test('KNOWN_REGRESSIONS entries are not stale', () => { + // Skipped in canary mode — this check is maintenance-only and irrelevant + // for a lightweight build-time regression gate. + test.skipIf(BENCH_CANARY)('KNOWN_REGRESSIONS entries are not stale', () => { // eslint-disable-next-line @typescript-eslint/no-require-imports const pkgVersion: string = JSON.parse( fs.readFileSync(path.join(ROOT, 'package.json'), 'utf8'), @@ -656,18 +681,22 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { ).toBe(0); }); - // Validate newest-first ordering assumption for all history arrays - test('build history is sorted newest-first', () => { + // Validate newest-first ordering assumption for all history arrays. + // Build/query ordering checks are skipped in canary mode (only incremental + // history is updated by the canary workflow). + test.skipIf(BENCH_CANARY)('build history is sorted newest-first', () => { assertNewestFirst(buildHistory, 'Build benchmark'); }); - test('query history is sorted newest-first', () => { + test.skipIf(BENCH_CANARY)('query history is sorted newest-first', () => { assertNewestFirst(queryHistory, 'Query benchmark'); }); test('incremental history is sorted newest-first', () => { assertNewestFirst(incrementalHistory, 'Incremental benchmark'); }); - describe('build benchmarks', () => { + // In canary mode only the incremental suite runs — build/query/resolution + // benchmarks are not measured by the perf-canary workflow. + describe.skipIf(BENCH_CANARY)('build benchmarks', () => { for (const engineKey of ['native', 'wasm'] as const) { const pair = findLatestPair(buildHistory, (e) => e[engineKey] != null); if (!pair) continue; @@ -700,7 +729,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { }); }); - describe('query benchmarks', () => { + describe.skipIf(BENCH_CANARY)('query benchmarks', () => { for (const engineKey of ['native', 'wasm'] as const) { const pair = findLatestPair(queryHistory, (e) => e[engineKey] != null); if (!pair) continue; @@ -803,7 +832,7 @@ describe.runIf(RUN_REGRESSION_GUARD)('Benchmark regression guard', () => { }); }); - describe('resolution benchmarks', () => { + describe.skipIf(BENCH_CANARY)('resolution benchmarks', () => { /** * Resolution precision/recall regression thresholds. * These are percentage-point drops (not relative %) because resolution From d07b3588d5eb8c90d1901d0fcdc794f995a08d1e Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:53:10 -0600 Subject: [PATCH 10/15] fix(perf): plumb symbolsOnly through parseFilesWasmInline to skip analysis visitors --- src/domain/parser.ts | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 54aa7b994..9ff4fe58e 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -1198,11 +1198,16 @@ const INLINE_BACKFILL_THRESHOLD = 16; * * Returns symbols with `_tree` set so `runAnalyses` can run AST/CFG/dataflow * visitors via the unified walker (mirrors how WASM-engine results behaved - * before the worker pool was introduced). + * before the worker pool was introduced), unless `symbolsOnly` is true — in + * that case `_tree` is not set, skipping all analysis visitor walks. Use + * `symbolsOnly` when only definitions/calls/typeMap are needed (e.g. the + * this/super dispatch post-pass) to avoid the analysis overhead on the inline + * path, matching the optimization already applied to the worker-pool path. */ async function parseFilesWasmInline( filePaths: string[], rootDir: string, + symbolsOnly = false, ): Promise> { const result = new Map(); if (filePaths.length === 0) return result; @@ -1220,7 +1225,12 @@ async function parseFilesWasmInline( if (!extracted) continue; const relPath = path.relative(rootDir, filePath).split(path.sep).join('/'); const symbols = extracted.symbols as ExtractorOutput & { _tree?: unknown; _langId?: string }; - symbols._tree = extracted.tree; + // When symbolsOnly=true, skip setting _tree so runAnalyses does not run + // AST/complexity/CFG/dataflow visitor walks — only definitions/calls/typeMap + // are needed by callers like the this/super dispatch post-pass. + if (!symbolsOnly) { + symbols._tree = extracted.tree; + } symbols._langId = extracted.langId; result.set(relPath, symbols); } @@ -1246,7 +1256,7 @@ export async function parseFilesWasmForBackfill( opts: { symbolsOnly?: boolean } = {}, ): Promise> { if (filePaths.length <= INLINE_BACKFILL_THRESHOLD) { - return parseFilesWasmInline(filePaths, rootDir); + return parseFilesWasmInline(filePaths, rootDir, opts.symbolsOnly); } return parseFilesWasm(filePaths, rootDir, opts.symbolsOnly ? EXTRACT_ONLY : FULL_ANALYSIS); } From 3db5d8ccb1b45071cbde2d0980f6fde707da119d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 18:56:46 -0600 Subject: [PATCH 11/15] fix(perf): scope runPostNativeCha to changed files on incremental builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On incremental builds, runPostNativeCha previously scanned all call→qualified-method edges in the DB (~12ms flat, O(graph size)), even for 1-file changes where no hierarchy or RTA evidence changed. Add two cheap indexed gate queries. Gate A checks whether any changed file introduced a class/interface/trait/struct/record node (hierarchy may have new implementors reachable from unchanged call sites). Gate B checks whether any changed file added a call edge to a class-kind target (RTA set may have grown, enabling previously filtered expansions in unchanged callers). If neither gate fires, restrict the candidate query to src.file IN changedFiles — safe because the hierarchy and instantiated set are unchanged for all other files. Full builds (isFullBuild=true) and cases where either gate fires retain the existing full-scan behaviour. Mirrors the changed-files scoping pattern of runPostNativeThisDispatch. Closes #1441 --- .../builder/stages/native-orchestrator.ts | 130 ++++++++++++++++-- 1 file changed, 119 insertions(+), 11 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index e5c5bd9b5..a8981c1be 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -401,12 +401,26 @@ async function runPostNativeAnalysis( * Note: `this`/`super` dispatch is handled separately by `runPostNativeThisDispatch`, * which WASM-re-parses JS/TS files to obtain raw call site receiver info. * + * `changedFiles` controls candidate scoping on incremental builds: + * - null → full build; scan all call→method edges (existing behaviour). + * - array → incremental; two cheap gate queries decide scope: + * Gate A: any class/interface/trait/struct/record nodes in changed files? + * If yes, a new implementor may have appeared — full scan required. + * Gate B: any `calls` edges from changed-file sources targeting class-kind + * nodes? If yes, the RTA set may have grown, enabling previously + * filtered expansions in unchanged caller files — full scan required. + * If neither gate fires: scope `callToMethods` to `src.file IN changedFiles` + * (safe because no hierarchy or RTA evidence changed). + * * Returns the count of newly inserted CHA edges plus the set of files containing * the new edges' endpoints, so the caller can scope role re-classification to the * nodes whose fan-in/out actually changed. A zero count means no edges were added * and role re-classification is unnecessary. */ -function runPostNativeCha(db: BetterSqlite3Database): { +function runPostNativeCha( + db: BetterSqlite3Database, + changedFiles: string[] | null, +): { newEdgeCount: number; affectedFiles: Set; } { @@ -474,19 +488,111 @@ function runPostNativeCha(db: BetterSqlite3Database): { debug('runPostNativeCha: no constructor-call evidence found — proceeding without RTA filter'); } + // ── Incremental candidate scoping ────────────────────────────────────────── + // On incremental builds, two gate queries decide whether to restrict the + // candidate scan to changed-file call sites or run the full graph scan. + // + // Gate A: did a changed file add/change a class hierarchy node? + // A new `extends`/`implements` edge means a previously-untracked implementor + // is now in the hierarchy — unchanged call sites in OTHER files may gain new + // valid expansions, so the full scan is required. + // + // Gate B: did a changed file add new RTA evidence (`new ConcreteX()`)? + // A new `calls` edge to a class-kind target means the instantiated set grew — + // previously RTA-filtered expansions in unchanged caller files become + // admissible, so the full scan is required. + // + // If neither gate fires, the hierarchy and RTA set are unchanged for all files + // outside changedFiles, so restricting to changed-file sources is safe. + let scopeToChangedFiles = false; // true → add WHERE src.file IN changedFiles + if (changedFiles !== null && changedFiles.length > 0) { + // Gate A: class/interface/trait/struct/record nodes in changed files? + const CHUNK_SIZE = 500; + let gateAFired = false; + for (let i = 0; i < changedFiles.length && !gateAFired; i += CHUNK_SIZE) { + const chunk = changedFiles.slice(i, i + CHUNK_SIZE); + const ph = chunk.map(() => '?').join(','); + const row = db + .prepare( + `SELECT 1 FROM nodes + WHERE file IN (${ph}) + AND kind IN ('class', 'interface', 'trait', 'struct', 'record') + LIMIT 1`, + ) + .get(...chunk); + if (row) gateAFired = true; + } + + // Gate B: calls from changed-file sources to class-kind targets? + let gateBFired = false; + if (!gateAFired) { + for (let i = 0; i < changedFiles.length && !gateBFired; i += CHUNK_SIZE) { + const chunk = changedFiles.slice(i, i + CHUNK_SIZE); + const ph = chunk.map(() => '?').join(','); + const row = db + .prepare( + `SELECT 1 FROM edges e + JOIN nodes src ON e.source_id = src.id + JOIN nodes tgt ON e.target_id = tgt.id + WHERE e.kind = 'calls' AND tgt.kind = 'class' + AND src.file IN (${ph}) + LIMIT 1`, + ) + .get(...chunk); + if (row) gateBFired = true; + } + } + + if (!gateAFired && !gateBFired) { + scopeToChangedFiles = true; + debug( + `runPostNativeCha: neither gate fired — scoping candidate scan to ${changedFiles.length} changed file(s)`, + ); + } else { + debug( + `runPostNativeCha: ${gateAFired ? 'Gate A (hierarchy)' : 'Gate B (RTA)'} fired — running full scan`, + ); + } + } + // Find existing call edges targeting qualified methods (e.g., 'IWorker.doWork'). // Include the caller node's file so confidence can be computed file-pair-aware, // matching the WASM path's computeConfidence(callerFile, targetFile, null) - CHA_DISPATCH_PENALTY formula. - const callToMethods = db - .prepare(` - SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file - FROM edges e - JOIN nodes tgt ON e.target_id = tgt.id - JOIN nodes src ON e.source_id = src.id - WHERE e.kind = 'calls' AND tgt.kind = 'method' - AND INSTR(tgt.name, '.') > 0 - `) - .all() as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + // When scopeToChangedFiles is true, restrict to call sites in the changed files + // (safe because no hierarchy or RTA evidence changed outside those files). + let callToMethods: Array<{ source_id: number; method_name: string; caller_file: string | null }>; + if (scopeToChangedFiles && changedFiles && changedFiles.length > 0) { + const CHUNK_SIZE = 500; + const rows: Array<{ source_id: number; method_name: string; caller_file: string | null }> = []; + for (let i = 0; i < changedFiles.length; i += CHUNK_SIZE) { + const chunk = changedFiles.slice(i, i + CHUNK_SIZE); + const ph = chunk.map(() => '?').join(','); + const chunkRows = db + .prepare( + `SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file + FROM edges e + JOIN nodes tgt ON e.target_id = tgt.id + JOIN nodes src ON e.source_id = src.id + WHERE e.kind = 'calls' AND tgt.kind = 'method' + AND INSTR(tgt.name, '.') > 0 + AND src.file IN (${ph})`, + ) + .all(...chunk) as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + rows.push(...chunkRows); + } + callToMethods = rows; + } else { + callToMethods = db + .prepare(` + SELECT e.source_id, tgt.name AS method_name, src.file AS caller_file + FROM edges e + JOIN nodes tgt ON e.target_id = tgt.id + JOIN nodes src ON e.source_id = src.id + WHERE e.kind = 'calls' AND tgt.kind = 'method' + AND INSTR(tgt.name, '.') > 0 + `) + .all() as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + } // Seed seen-pairs only from the source_ids we'll be expanding — avoids loading every // call edge in the DB (which would be O(all edges)) for large codebases. @@ -1427,6 +1533,8 @@ export async function tryNativeOrchestrator( // no WASM re-parse post-pass is needed for them. `Foo.prototype.bar = fn` likewise. const { newEdgeCount: chaEdgeCount, affectedFiles: chaAffectedFiles } = runPostNativeCha( ctx.db as unknown as BetterSqlite3Database, + // null = full build (scan all call→method edges); array = incremental (gate queries decide scope) + result.isFullBuild ? null : (result.changedFiles ?? null), ); // Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites From 8b3aa3d3438178bc306edba9c7a4038663dc44b0 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:01:26 -0600 Subject: [PATCH 12/15] fix(native): add post-pass phase timings to result.phases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Times each JS post-pass in tryNativeOrchestrator and exposes the measurements in BuildResult.phases: - gapDetectMs — dropped-language gap detection + backfill - chaMs — CHA expansion (interface dispatch) - thisDispatchMs — this/super dispatch WASM re-parse (was already tracked but now properly named alongside the rest) - reclassifyMs — scoped role re-classification after edge insertion - techniqueBackfillMs — technique-column UPDATE on native-written edges Previously only thisDispatchMs was reported, causing wall-clock vs phaseSum to diverge by 1.1s+ on 1-file rebuilds and making benchmark regressions undiagnosable from committed history. Updates update-incremental-report.ts to render the new phases in a collapsible details block under each engine's 1-file rebuild section. Closes #1434 --- scripts/update-incremental-report.ts | 49 +++++++++++++++++++ .../builder/stages/native-orchestrator.ts | 49 ++++++++++++++++--- src/types.ts | 10 ++++ 3 files changed, 102 insertions(+), 6 deletions(-) diff --git a/scripts/update-incremental-report.ts b/scripts/update-incremental-report.ts index bce3e7346..3a047a845 100644 --- a/scripts/update-incremental-report.ts +++ b/scripts/update-incremental-report.ts @@ -161,6 +161,55 @@ for (const engineKey of ['native', 'wasm']) { md += `| Full build | ${formatMs(e.fullBuildMs)} |\n`; md += `| No-op rebuild | ${e.noopRebuildMs != null ? formatMs(e.noopRebuildMs) : 'n/a'} |\n`; md += `| 1-file rebuild | ${e.oneFileRebuildMs != null ? formatMs(e.oneFileRebuildMs) : 'n/a'} |\n\n`; + + // 1-file rebuild phase breakdown — skipped when phases are unavailable (older + // benchmark entries that predate per-phase tracking, or failed runs). + const ph = e.oneFilePhases; + if (ph && typeof ph === 'object') { + md += `
1-file rebuild phase breakdown (${engineKey})\n\n`; + md += '| Phase | Time |\n'; + md += '|-------|-----:|\n'; + // Core Rust pipeline phases (present for both engines) + const corePhases = [ + ['setup', 'setupMs'], + ['collect', 'collectMs'], + ['detect', 'detectMs'], + ['parse', 'parseMs'], + ['insert', 'insertMs'], + ['resolve', 'resolveMs'], + ['edges', 'edgesMs'], + ['structure', 'structureMs'], + ['roles', 'rolesMs'], + ]; + for (const [label, key] of corePhases) { + if (ph[key] != null) md += `| ${label} | ${formatMs(ph[key])} |\n`; + } + // Native-only JS post-pass phases (only present when engine=native) + if (engineKey === 'native') { + const nativePostPhases = [ + ['gap detect + backfill', 'gapDetectMs'], + ['CHA expansion', 'chaMs'], + ['this/super dispatch', 'thisDispatchMs'], + ['role reclassify', 'reclassifyMs'], + ['technique backfill', 'techniqueBackfillMs'], + ]; + for (const [label, key] of nativePostPhases) { + if (ph[key] != null) md += `| ${label} | ${formatMs(ph[key])} |\n`; + } + } + // Analysis phases (present for both engines) + const analysisPhases = [ + ['ast', 'astMs'], + ['complexity', 'complexityMs'], + ['cfg', 'cfgMs'], + ['dataflow', 'dataflowMs'], + ['finalize', 'finalizeMs'], + ]; + for (const [label, key] of analysisPhases) { + if (ph[key] != null) md += `| ${label} | ${formatMs(ph[key])} |\n`; + } + md += '\n
\n\n'; + } } const r = latest.resolve; diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index a8981c1be..c16213a7b 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -577,7 +577,11 @@ function runPostNativeCha( AND INSTR(tgt.name, '.') > 0 AND src.file IN (${ph})`, ) - .all(...chunk) as Array<{ source_id: number; method_name: string; caller_file: string | null }>; + .all(...chunk) as Array<{ + source_id: number; + method_name: string; + caller_file: string | null; + }>; rows.push(...chunkRows); } callToMethods = rows; @@ -933,12 +937,20 @@ async function runPostNativeThisDispatch( return { elapsedMs: Date.now() - t0, targetIds, affectedFiles }; } +interface PostPassTimings { + gapDetectMs: number; + chaMs: number; + thisDispatchMs: number; + reclassifyMs: number; + techniqueBackfillMs: number; +} + /** Format timing result from native orchestrator phases + JS post-processing. */ function formatNativeTimingResult( p: Record, structurePatchMs: number, analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }, - thisDispatchMs: number, + postPass: PostPassTimings, ): BuildResult { return { phases: { @@ -951,7 +963,11 @@ function formatNativeTimingResult( edgesMs: +(p.edgesMs ?? 0).toFixed(1), structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1), rolesMs: +(p.rolesMs ?? 0).toFixed(1), - thisDispatchMs: +thisDispatchMs.toFixed(1), + gapDetectMs: +postPass.gapDetectMs.toFixed(1), + chaMs: +postPass.chaMs.toFixed(1), + thisDispatchMs: +postPass.thisDispatchMs.toFixed(1), + reclassifyMs: +postPass.reclassifyMs.toFixed(1), + techniqueBackfillMs: +postPass.techniqueBackfillMs.toFixed(1), astMs: +(analysisTiming.astMs ?? 0).toFixed(1), complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1), cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1), @@ -1490,8 +1506,14 @@ export async function tryNativeOrchestrator( ctx.db = openDb(ctx.dbPath); ctx.nativeFirstProxy = false; } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) { - // DB reopen failed — return partial result - return formatNativeTimingResult(p, 0, analysisTiming, 0); + // DB reopen failed — return partial result (no post-pass phases completed) + return formatNativeTimingResult(p, 0, analysisTiming, { + gapDetectMs: 0, + chaMs: 0, + thisDispatchMs: 0, + reclassifyMs: 0, + techniqueBackfillMs: 0, + }); } } @@ -1513,6 +1535,7 @@ export async function tryNativeOrchestrator( // gated below. const removedCount = result.removedCount ?? 0; const changedCount = result.changedCount ?? 0; + const gapDetectStart = performance.now(); const gap = detectDroppedLanguageGap(ctx); if ( result.isFullBuild || @@ -1523,6 +1546,7 @@ export async function tryNativeOrchestrator( ) { await backfillNativeDroppedFiles(ctx, gap); } + const gapDetectMs = performance.now() - gapDetectStart; // Phase 8.5: expand CHA call edges (interface dispatch → concrete implementations). // Returns the affected files so role re-classification below can be scoped to @@ -1531,11 +1555,13 @@ export async function tryNativeOrchestrator( // Function-as-object-property methods (`fn.method = function() {}`) are extracted // natively by the Rust engine (#1432) and resolved in-build by its edge builder, so // no WASM re-parse post-pass is needed for them. `Foo.prototype.bar = fn` likewise. + const chaStart = performance.now(); const { newEdgeCount: chaEdgeCount, affectedFiles: chaAffectedFiles } = runPostNativeCha( ctx.db as unknown as BetterSqlite3Database, // null = full build (scan all call→method edges); array = incremental (gate queries decide scope) result.isFullBuild ? null : (result.changedFiles ?? null), ); + const chaMs = performance.now() - chaStart; // Phase 8.5: this/super dispatch — hybrid WASM re-parse to resolve call sites // whose raw receiver info the Rust pipeline does not persist to DB. @@ -1558,6 +1584,7 @@ export async function tryNativeOrchestrator( // files restores correctness without re-running the classifier over the // whole graph (which cost ~130ms per build on codegraph itself and was a // major part of the v3.12.0 native full-build benchmark regression). + let reclassifyMs = 0; if (chaEdgeCount > 0 || thisDispatchTargetIds.size > 0) { const affectedFiles = [...new Set([...chaAffectedFiles, ...thisDispatchAffectedFiles])]; // When edges were inserted but all their endpoint nodes have null `file` @@ -1566,6 +1593,7 @@ export async function tryNativeOrchestrator( // case — scoped classification with an empty set would be a no-op, leaving // roles stale for those nodes. const scopedFiles = affectedFiles.length > 0 ? affectedFiles : null; + const reclassifyStart = performance.now(); try { const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { classifyNodeRoles: ( @@ -1582,13 +1610,16 @@ export async function tryNativeOrchestrator( } catch (err) { debug(`Post-pass role re-classification failed: ${toErrorMessage(err)}`); } + reclassifyMs = performance.now() - reclassifyStart; } // Backfill the `technique` column on `calls` edges written by the Rust // orchestrator, which does not write the column. Runs after all edge-writing // phases (including the WASM dropped-language backfill, CHA post-pass, and // this/super dispatch) so every new edge in this build cycle gets a label. + const techniqueBackfillStart = performance.now(); backfillEdgeTechniquesAfterNativeOrchestrator(ctx.db, !!result.isFullBuild, result.changedFiles); + const techniqueBackfillMs = performance.now() - techniqueBackfillStart; // Re-count nodes/edges now that all edge-writing post-passes have run: the // Rust orchestrator captured its counts before the JS post-passes added @@ -1633,5 +1664,11 @@ export async function tryNativeOrchestrator( } closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); - return formatNativeTimingResult(p, structurePatchMs, analysisTiming, thisDispatchMs); + return formatNativeTimingResult(p, structurePatchMs, analysisTiming, { + gapDetectMs, + chaMs, + thisDispatchMs, + reclassifyMs, + techniqueBackfillMs, + }); } diff --git a/src/types.ts b/src/types.ts index d7f97da6a..59897944a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1268,8 +1268,18 @@ export interface BuildResult { edgesMs: number; structureMs: number; rolesMs: number; + /** Wall-clock time for the prototype-method post-pass (native path only). */ + protoMethodsMs?: number; + /** Wall-clock time for the CHA expansion post-pass (native path only). */ + chaMs?: number; /** Wall-clock time for the this/super dispatch WASM post-pass (native path only). */ thisDispatchMs?: number; + /** Wall-clock time for the dropped-language gap detection + backfill (native path only). */ + gapDetectMs?: number; + /** Wall-clock time for role re-classification after JS edge-writing post-passes (native path only). */ + reclassifyMs?: number; + /** Wall-clock time for the technique-column backfill on native-written edges (native path only). */ + techniqueBackfillMs?: number; astMs: number; complexityMs: number; cfgMs: number; From fd4ffd123eecac2410e29b13ea14cb704b64571e Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:04:04 -0600 Subject: [PATCH 13/15] fix(perf): correct INLINE_BACKFILL_THRESHOLD docstring; raise threshold for required-tier grammars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docstring claimed pool cost was "amortised over enough parse work" — measurements show IPC overhead scales linearly (~55–64ms/file pool vs ~8–10ms/file inline). The real motivation is crash safety for exotic WASM grammars (#965); JS/TS/TSX (required-tier, used in all this-dispatch backfill calls) have never triggered the V8 fatal crash class and are safe to run inline. Raise threshold 16 → 32 to keep typical this-dispatch batches (≤ 18 files on the codegraph corpus) on the inline fast path. Exotic-language drops are almost always well under 32 files and also benefit from the inline path without meaningful crash risk increase. Closes #1435 --- src/domain/parser.ts | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 9ff4fe58e..411ba147a 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -1181,12 +1181,25 @@ async function parseFilesWasm( /** * Files at or below this count use the inline parse path (no worker spawn). * - * Sized for typical engine-parity drops: a handful of fixture files in one - * or two languages (the recurring HCL case is 4 files). Above this, the - * worker-pool's IPC + crash-isolation cost (#965) is amortized over enough - * parse work to be worth paying; below it, the ~1–2s cold-start dominates. + * The worker pool exists for crash safety (#965): exotic (non-required) WASM + * grammars can trigger uncatchable V8 fatal errors that would kill the main + * process. Running them in a worker means only the worker dies; the pool + * detects the exit, skips the file, respawns, and continues. + * + * JS/TS/TSX are required-tier grammars — they have never triggered the V8 + * fatal crash class and are safe to run inline. The primary hot caller + * (this/super dispatch post-pass) exclusively handles JS/TS/TSX files and + * measured ~55–64ms/file through the pool vs ~8–10ms/file inline (#1435); + * IPC overhead scales linearly with file count, not amortised. + * + * The threshold is set high enough to keep typical this-dispatch batches + * (≤ 18 files on the codegraph corpus) on the inline path, while still + * routing truly large exotic-language drops (rare; typical HCL case is 4 + * files) through the pool for crash isolation. Exotic-language drops are + * almost always well under this limit anyway, so they benefit from the + * inline fast path too without meaningful crash risk increase. */ -const INLINE_BACKFILL_THRESHOLD = 16; +const INLINE_BACKFILL_THRESHOLD = 32; /** * Inline WASM parse (no worker) for small file batches. From f90d4ba7570baff984c1ceece111dbb78bd73c57 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 12 Jun 2026 19:47:51 -0600 Subject: [PATCH 14/15] fix(perf): update stale parseFilesWasmForBackfill docstring to reference threshold constant The secondary docstring still described the old 16-value rationale ("engine-parity drop sizes"). Replace with a pointer to INLINE_BACKFILL_THRESHOLD where the full rationale now lives. --- src/domain/parser.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 411ba147a..707769ff0 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -1253,8 +1253,7 @@ async function parseFilesWasmInline( /** * Backfill helper: small batches use the inline (main-thread) path; larger * batches keep the worker-pool isolation against tree-sitter WASM crashes - * (#965). Threshold matches typical engine-parity drop sizes (a few fixture - * files in one or two languages). + * (#965). See INLINE_BACKFILL_THRESHOLD for threshold rationale. * * `opts.symbolsOnly` skips the AST/complexity/CFG/dataflow visitors in the * worker (and their result serialization across the thread boundary) for From 0c1028461392ffae498f8e5b26c04400092ae4f9 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Sat, 13 Jun 2026 20:00:48 -0600 Subject: [PATCH 15/15] fix(perf): guard post-native passes on 1-file incremental rebuilds (#1493) * fix(perf): guard post-native passes against unnecessary work on 1-file incremental rebuilds On 1-file native incremental builds, two JS post-passes ran unconditionally even when they had no work to do: - `backfillNativeDroppedFiles`: called whenever changedCount > 0, even when detectDroppedLanguageGap returned an empty gap. Gate now checks gap.missingAbs.length > 0 || gap.staleRel.length > 0 directly, matching backfillNativeDroppedFiles's own internal early-exit guard. - Node/edge COUNT(*) re-count: ran unconditionally after all post-passes even when none of them wrote any edges. COUNT(*) over 50K+ edge tables is non-trivial, especially via the NativeDbProxy napi-rs round-trip. Now gated on postPassWroteData (backfill | CHA edges | this-dispatch edges). Closes #1454 * refactor(perf): hoist backfillHappened before if to avoid duplicate expression Greptile suggested hoisting the backfillHappened variable declaration above the conditional that guards backfillNativeDroppedFiles, so the boolean expression is written exactly once. Previously the condition was evaluated in both the if-guard and the const declaration on the following line. --- .../builder/stages/native-orchestrator.ts | 54 +++++++++---------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index e43fabf83..a1bf7aa91 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -1538,26 +1538,16 @@ export async function tryNativeOrchestrator( // stale native binaries). WASM handles those — backfill via WASM so both // engines process the same file set (#967). // - // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for - // both gating and the backfill itself. On dirty incrementals/full builds - // the orchestrator signals trigger backfill, so the walk happens once - // (instead of redundantly inside backfill). On quiet incrementals we - // still pay the walk so we can detect brand-new files in dropped-language - // extensions — a gap that the orchestrator's `detect_removed_files` - // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap - // because the expensive part (WASM re-parse of the missing set) is - // gated below. - const removedCount = result.removedCount ?? 0; - const changedCount = result.changedCount ?? 0; + // Detect the gap once (fs walk + 2 DB queries) and use it for both gating + // and the backfill itself. On quiet incrementals we still pay the walk so + // we can detect brand-new files in dropped-language extensions — a gap that + // the orchestrator's `detect_removed_files` filter (#1070) leaves open + // (#1083, #1091). The pre-check is cheap because the expensive part (WASM + // re-parse of the missing set) is gated below. const gapDetectStart = performance.now(); const gap = detectDroppedLanguageGap(ctx); - if ( - result.isFullBuild || - removedCount > 0 || - changedCount > 0 || - gap.missingAbs.length > 0 || - gap.staleRel.length > 0 - ) { + const backfillHappened = gap.missingAbs.length > 0 || gap.staleRel.length > 0; + if (backfillHappened) { await backfillNativeDroppedFiles(ctx, gap); } const gapDetectMs = performance.now() - gapDetectStart; @@ -1638,19 +1628,27 @@ export async function tryNativeOrchestrator( // Re-count nodes/edges now that all edge-writing post-passes have run: the // Rust orchestrator captured its counts before the JS post-passes added // edges, so both its summary and build_meta under-report (#1452). + // + // Fast path: skip the COUNT(*) scan when no post-pass wrote any edges. + // COUNT(*) on large tables (50K+ edges) is non-trivial, especially via the + // NativeDbProxy napi-rs round-trip. When all post-passes were no-ops, the + // Rust orchestrator's counts are still accurate — no re-count needed. let finalNodeCount = result.nodeCount ?? 0; let finalEdgeCount = result.edgeCount ?? 0; - try { - const counts = (ctx.db as unknown as BetterSqlite3Database) - .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS n, (SELECT COUNT(*) FROM edges) AS e') - .get() as { n: number; e: number }; - if (counts.n !== finalNodeCount || counts.e !== finalEdgeCount) { - finalNodeCount = counts.n; - finalEdgeCount = counts.e; - setBuildMeta(ctx.db, { node_count: finalNodeCount, edge_count: finalEdgeCount }); + const postPassWroteData = backfillHappened || chaEdgeCount > 0 || thisDispatchTargetIds.size > 0; + if (postPassWroteData) { + try { + const counts = (ctx.db as unknown as BetterSqlite3Database) + .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS n, (SELECT COUNT(*) FROM edges) AS e') + .get() as { n: number; e: number }; + if (counts.n !== finalNodeCount || counts.e !== finalEdgeCount) { + finalNodeCount = counts.n; + finalEdgeCount = counts.e; + setBuildMeta(ctx.db, { node_count: finalNodeCount, edge_count: finalEdgeCount }); + } + } catch (err) { + debug(`Post-pass node/edge re-count failed: ${toErrorMessage(err)}`); } - } catch (err) { - debug(`Post-pass node/edge re-count failed: ${toErrorMessage(err)}`); } info( `Native build orchestrator completed: ${finalNodeCount} nodes, ${finalEdgeCount} edges, ${result.fileCount ?? 0} files`,