fix: resolve merge conflict with main in finalize.ts

carlos-alm · carlos-alm · commit 130697d05c9e · 2026-03-25T20:19:31.000-06:00
Impact: 3 functions changed, 8 affected
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -4,6 +4,8 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 > **Hooks enforce code quality.** This project uses Claude Code hooks (`.claude/hooks/`) to automatically inject file-level dependency context on reads, rebuild the graph after edits, block commits with cycles or dead exports, run lint on staged files, and show diff-impact before commits. If codegraph reports an error or produces wrong results when analyzing itself, **fix the bug in the codebase**.
 
+> **Never document bugs as expected behavior.** If two engines (native vs WASM) produce different results, that is a bug in the less-accurate engine — not an acceptable "parity gap." Adding comments or tests that frame wrong output as "expected" blocks future agents from ever fixing it. Instead: identify the root cause, file an issue, and fix the extraction/resolution layer that produces incorrect results. The correct response to "engine A reports 8 cycles, engine B reports 11" is to fix the 3 false cycles in engine B, not to document why the difference is okay.
+
 ## Codegraph Workflow
 
 Hooks handle: file-level deps on reads, graph rebuild after edits, commit-time checks (cycles, dead exports, diff-impact, lint). **You must actively run these for function-level understanding:**
@@ -138,7 +140,7 @@ Source is TypeScript in `src/`, compiled via `tsup`. The Rust native engine live
 | `ast-analysis/` | Unified AST analysis framework: shared DFS walker (`visitor.ts`), engine orchestrator (`engine.ts`), extracted metrics (`metrics.ts`), and pluggable visitors for complexity, dataflow, and AST-store |
 
 **Key design decisions:**
-- **Dual-engine architecture:** Native Rust parsing via napi-rs (`crates/codegraph-core/`) with automatic fallback to WASM. Controlled by `--engine native|wasm|auto` (default: `auto`)
+- **Dual-engine architecture:** Native Rust parsing via napi-rs (`crates/codegraph-core/`) with automatic fallback to WASM. Controlled by `--engine native|wasm|auto` (default: `auto`). **Both engines must produce identical results.** If they diverge, the less-accurate engine has a bug — fix it, don't document the gap
 - Platform-specific prebuilt binaries published as optional npm packages (`@optave/codegraph-{platform}-{arch}`)
 - WASM grammars are built from devDeps on `npm install` (via `prepare` script) and not committed to git — used as fallback when native addon is unavailable
 - **Language parser registry:** `LANGUAGE_REGISTRY` in `domain/parser.ts` is the single source of truth for all supported languages — maps each language to `{ id, extensions, grammarFile, extractor, required }`. `EXTENSIONS` in `shared/constants.ts` is derived from the registry. Adding a new language requires one registry entry + extractor function
diff --git a/src/domain/graph/builder/incremental.ts b/src/domain/graph/builder/incremental.ts
@@ -412,9 +412,6 @@ function findCaller(
             callerSpan = span;
           }
         }
-      } else if (!caller) {
-        const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line);
-        if (row) caller = row;
       }
     }
   }
diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts
@@ -361,9 +361,6 @@ function findCaller(
             callerSpan = span;
           }
         }
-      } else if (!caller) {
-        const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line);
-        if (row) caller = row;
       }
     }
   }
diff --git a/src/domain/graph/builder/stages/finalize.ts b/src/domain/graph/builder/stages/finalize.ts
@@ -37,6 +37,10 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
     symbols._langId = undefined;
   }
 
+  // Capture a single wall-clock timestamp for the current build — used for
+  // both the stale-embeddings comparison and the persisted built_at metadata.
+  const buildNow = new Date();
+
   const nodeCount = (db.prepare('SELECT COUNT(*) as c FROM nodes').get() as { c: number }).c;
   const actualEdgeCount = (db.prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number }).c;
   info(`Graph built: ${nodeCount} nodes, ${actualEdgeCount} edges`);
@@ -63,6 +67,22 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
     }
   }
 
+  // Persist build metadata early so downstream checks (e.g. stale-embeddings)
+  // can read the *current* build's built_at rather than the previous one.
+  try {
+    setBuildMeta(db, {
+      engine: ctx.engineName,
+      engine_version: ctx.engineVersion || '',
+      codegraph_version: CODEGRAPH_VERSION,
+      schema_version: String(schemaVersion),
+      built_at: buildNow.toISOString(),
+      node_count: nodeCount,
+      edge_count: actualEdgeCount,
+    });
+  } catch (err) {
+    warn(`Failed to write build metadata: ${(err as Error).message}`);
+  }
+
   // Orphaned embeddings warning
   if (hasEmbeddings) {
     try {
@@ -83,7 +103,7 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
     }
   }
 
-  // Stale embeddings warning (built before last graph rebuild)
+  // Stale embeddings warning (built before current graph rebuild)
   if (hasEmbeddings) {
     try {
       const embedBuiltAt = (
@@ -93,17 +113,10 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
       )?.value;
       if (embedBuiltAt) {
         const embedTime = new Date(embedBuiltAt).getTime();
-        const now = Date.now();
-        if (embedTime < now && !Number.isNaN(embedTime)) {
-          const prevBuildAt = getBuildMeta(db, 'built_at');
-          if (prevBuildAt) {
-            const prevBuildTime = new Date(prevBuildAt).getTime();
-            if (embedTime < prevBuildTime) {
-              warn(
-                'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
-              );
-            }
-          }
+        if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
+          warn(
+            'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
+          );
         }
       }
     } catch {
@@ -136,21 +149,6 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
     /* exported column may not exist on older DBs */
   }
 
-  // Persist build metadata
-  try {
-    setBuildMeta(db, {
-      engine: ctx.engineName,
-      engine_version: ctx.engineVersion || '',
-      codegraph_version: CODEGRAPH_VERSION,
-      schema_version: String(schemaVersion),
-      built_at: new Date().toISOString(),
-      node_count: nodeCount,
-      edge_count: actualEdgeCount,
-    });
-  } catch (err) {
-    warn(`Failed to write build metadata: ${(err as Error).message}`);
-  }
-
   closeDb(db);
 
   // Write journal header after successful build
diff --git a/src/domain/graph/cycles.ts b/src/domain/graph/cycles.ts
@@ -4,25 +4,6 @@ import { CodeGraph } from '../../graph/model.js';
 import { loadNative } from '../../infrastructure/native.js';
 import type { BetterSqlite3Database } from '../../types.js';
 
-/**
- * Engine parity note — function-level cycle counts
- *
- * The native (Rust) and WASM engines may report different function-level cycle
- * counts even on the same codebase. This is expected behavior, not a bug in
- * the cycle detection algorithm (Tarjan SCC is identical in both engines).
- *
- * Root cause: the native engine extracts slightly more symbols and resolves
- * more call edges than WASM (e.g. 10883 nodes / 4000 calls native vs 10857
- * nodes / 3986 calls WASM on the codegraph repo). The additional precision
- * can both create new edges and — more commonly — resolve previously ambiguous
- * calls to their correct targets, which breaks false cycles that WASM reports.
- *
- * For file-level cycles the engines are in parity because import edges are
- * resolved identically. The gap only manifests at function-level granularity
- * where call-site extraction differs between the Rust and WASM parsers.
- *
- * See: https://github.com/optave/codegraph/issues/597
- */
 export function findCycles(
   db: BetterSqlite3Database,
   opts: { fileLevel?: boolean; noTests?: boolean } = {},
diff --git a/tests/graph/cycles.test.ts b/tests/graph/cycles.test.ts
@@ -148,21 +148,6 @@ describe('formatCycles', () => {
   });
 });
 
-// ── Engine parity: extraction-level differences ────────────────────
-//
-// The native (Rust) and WASM engines produce slightly different function-level
-// graphs because the native extractor resolves more symbols and call edges.
-// This means function-level cycle *counts* can legitimately differ between
-// engines (e.g. 8 native vs 11 WASM on the codegraph repo itself).
-//
-// The Tarjan SCC algorithm is identical — given the SAME edge set, both
-// engines produce the same cycles. The tests below verify that invariant.
-//
-// File-level cycles are unaffected because import resolution is engine-
-// independent.
-//
-// See: https://github.com/optave/codegraph/issues/597
-
 // ── Native vs JS parity ────────────────────────────────────────────
 
 describe.skipIf(!hasNative)('Cycle detection: native vs JS parity', () => {
@@ -222,49 +207,3 @@ describe.skipIf(!hasNative)('Cycle detection: native vs JS parity', () => {
     expect(sortCycles(nativeResult)).toEqual(sortCycles(jsResult));
   });
 });
-
-// ── Extraction-level parity gap (issue #597) ───────────────────────
-
-describe('Cycle count sensitivity to edge differences', () => {
-  it('resolving an ambiguous call edge to its correct target can break a false cycle', () => {
-    // Demonstrates why native (more precise edge targets) can report FEWER cycles than WASM.
-    // With ambiguous resolution, a -> b -> c -> a forms a 3-node cycle.
-    // Resolving the ambiguous c -> a edge to its correct target c -> d
-    // breaks the cycle.
-    const ambiguousEdges = [
-      { source: 'a', target: 'b' },
-      { source: 'b', target: 'c' },
-      { source: 'c', target: 'a' },
-    ];
-    const ambiguousCycles = findCyclesJS(ambiguousEdges);
-    expect(ambiguousCycles).toHaveLength(1);
-
-    // After resolving: c -> a becomes c -> d (a different target).
-    // The cycle is broken.
-    const resolvedEdges = [
-      { source: 'a', target: 'b' },
-      { source: 'b', target: 'c' },
-      { source: 'c', target: 'd' },
-    ];
-    const resolvedCycles = findCyclesJS(resolvedEdges);
-    expect(resolvedCycles).toHaveLength(0);
-  });
-
-  it('JS cycle detection is deterministic on repeated calls', () => {
-    // The Tarjan SCC algorithm is deterministic: given the same edge set,
-    // repeated calls always produce the same result. Any cycle count
-    // difference between engines comes from the graph they are fed, not
-    // from the algorithm.
-    const edges = [
-      { source: 'a', target: 'b' },
-      { source: 'b', target: 'c' },
-      { source: 'c', target: 'a' },
-      { source: 'd', target: 'e' },
-      { source: 'e', target: 'd' },
-    ];
-    const result1 = findCyclesJS(edges);
-    const result2 = findCyclesJS(edges);
-    expect(result1).toEqual(result2);
-    expect(result1).toHaveLength(2);
-  });
-});

Original file line number	Diff line number	Diff line change
`@@ -412,9 +412,6 @@ function findCaller(`
`412`	`412`	`callerSpan = span;`
`413`	`413`	`}`
`414`	`414`	`}`
`415`		`- } else if (!caller) {`
`416`		`- const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line);`
`417`		`- if (row) caller = row;`
`418`	`415`	`}`
`419`	`416`	`}`
`420`	`417`	`}`
Original file line number	Diff line number	Diff line change
`@@ -361,9 +361,6 @@ function findCaller(`
`361`	`361`	`callerSpan = span;`
`362`	`362`	`}`
`363`	`363`	`}`
`364`		`- } else if (!caller) {`
`365`		`- const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line);`
`366`		`- if (row) caller = row;`
`367`	`364`	`}`
`368`	`365`	`}`
`369`	`366`	`}`