Skip to content

Commit 130697d

Browse files
committed
fix: resolve merge conflict with main in finalize.ts
Impact: 3 functions changed, 8 affected
2 parents b4f4ce2 + 820e9ff commit 130697d

6 files changed

Lines changed: 28 additions & 114 deletions

File tree

CLAUDE.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
44

55
> **Hooks enforce code quality.** This project uses Claude Code hooks (`.claude/hooks/`) to automatically inject file-level dependency context on reads, rebuild the graph after edits, block commits with cycles or dead exports, run lint on staged files, and show diff-impact before commits. If codegraph reports an error or produces wrong results when analyzing itself, **fix the bug in the codebase**.
66
7+
> **Never document bugs as expected behavior.** If two engines (native vs WASM) produce different results, that is a bug in the less-accurate engine — not an acceptable "parity gap." Adding comments or tests that frame wrong output as "expected" blocks future agents from ever fixing it. Instead: identify the root cause, file an issue, and fix the extraction/resolution layer that produces incorrect results. The correct response to "engine A reports 8 cycles, engine B reports 11" is to fix the 3 false cycles in engine B, not to document why the difference is okay.
8+
79
## Codegraph Workflow
810

911
Hooks handle: file-level deps on reads, graph rebuild after edits, commit-time checks (cycles, dead exports, diff-impact, lint). **You must actively run these for function-level understanding:**
@@ -138,7 +140,7 @@ Source is TypeScript in `src/`, compiled via `tsup`. The Rust native engine live
138140
| `ast-analysis/` | Unified AST analysis framework: shared DFS walker (`visitor.ts`), engine orchestrator (`engine.ts`), extracted metrics (`metrics.ts`), and pluggable visitors for complexity, dataflow, and AST-store |
139141

140142
**Key design decisions:**
141-
- **Dual-engine architecture:** Native Rust parsing via napi-rs (`crates/codegraph-core/`) with automatic fallback to WASM. Controlled by `--engine native|wasm|auto` (default: `auto`)
143+
- **Dual-engine architecture:** Native Rust parsing via napi-rs (`crates/codegraph-core/`) with automatic fallback to WASM. Controlled by `--engine native|wasm|auto` (default: `auto`). **Both engines must produce identical results.** If they diverge, the less-accurate engine has a bug — fix it, don't document the gap
142144
- Platform-specific prebuilt binaries published as optional npm packages (`@optave/codegraph-{platform}-{arch}`)
143145
- WASM grammars are built from devDeps on `npm install` (via `prepare` script) and not committed to git — used as fallback when native addon is unavailable
144146
- **Language parser registry:** `LANGUAGE_REGISTRY` in `domain/parser.ts` is the single source of truth for all supported languages — maps each language to `{ id, extensions, grammarFile, extractor, required }`. `EXTENSIONS` in `shared/constants.ts` is derived from the registry. Adding a new language requires one registry entry + extractor function

src/domain/graph/builder/incremental.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -412,9 +412,6 @@ function findCaller(
412412
callerSpan = span;
413413
}
414414
}
415-
} else if (!caller) {
416-
const row = stmts.getNodeId.get(def.name, def.kind, relPath, def.line);
417-
if (row) caller = row;
418415
}
419416
}
420417
}

src/domain/graph/builder/stages/build-edges.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,9 +361,6 @@ function findCaller(
361361
callerSpan = span;
362362
}
363363
}
364-
} else if (!caller) {
365-
const row = getNodeIdStmt.get(def.name, def.kind, relPath, def.line);
366-
if (row) caller = row;
367364
}
368365
}
369366
}

src/domain/graph/builder/stages/finalize.ts

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
3737
symbols._langId = undefined;
3838
}
3939

40+
// Capture a single wall-clock timestamp for the current build — used for
41+
// both the stale-embeddings comparison and the persisted built_at metadata.
42+
const buildNow = new Date();
43+
4044
const nodeCount = (db.prepare('SELECT COUNT(*) as c FROM nodes').get() as { c: number }).c;
4145
const actualEdgeCount = (db.prepare('SELECT COUNT(*) as c FROM edges').get() as { c: number }).c;
4246
info(`Graph built: ${nodeCount} nodes, ${actualEdgeCount} edges`);
@@ -63,6 +67,22 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
6367
}
6468
}
6569

70+
// Persist build metadata early so downstream checks (e.g. stale-embeddings)
71+
// can read the *current* build's built_at rather than the previous one.
72+
try {
73+
setBuildMeta(db, {
74+
engine: ctx.engineName,
75+
engine_version: ctx.engineVersion || '',
76+
codegraph_version: CODEGRAPH_VERSION,
77+
schema_version: String(schemaVersion),
78+
built_at: buildNow.toISOString(),
79+
node_count: nodeCount,
80+
edge_count: actualEdgeCount,
81+
});
82+
} catch (err) {
83+
warn(`Failed to write build metadata: ${(err as Error).message}`);
84+
}
85+
6686
// Orphaned embeddings warning
6787
if (hasEmbeddings) {
6888
try {
@@ -83,7 +103,7 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
83103
}
84104
}
85105

86-
// Stale embeddings warning (built before last graph rebuild)
106+
// Stale embeddings warning (built before current graph rebuild)
87107
if (hasEmbeddings) {
88108
try {
89109
const embedBuiltAt = (
@@ -93,17 +113,10 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
93113
)?.value;
94114
if (embedBuiltAt) {
95115
const embedTime = new Date(embedBuiltAt).getTime();
96-
const now = Date.now();
97-
if (embedTime < now && !Number.isNaN(embedTime)) {
98-
const prevBuildAt = getBuildMeta(db, 'built_at');
99-
if (prevBuildAt) {
100-
const prevBuildTime = new Date(prevBuildAt).getTime();
101-
if (embedTime < prevBuildTime) {
102-
warn(
103-
'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
104-
);
105-
}
106-
}
116+
if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
117+
warn(
118+
'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
119+
);
107120
}
108121
}
109122
} catch {
@@ -136,21 +149,6 @@ export async function finalize(ctx: PipelineContext): Promise<void> {
136149
/* exported column may not exist on older DBs */
137150
}
138151

139-
// Persist build metadata
140-
try {
141-
setBuildMeta(db, {
142-
engine: ctx.engineName,
143-
engine_version: ctx.engineVersion || '',
144-
codegraph_version: CODEGRAPH_VERSION,
145-
schema_version: String(schemaVersion),
146-
built_at: new Date().toISOString(),
147-
node_count: nodeCount,
148-
edge_count: actualEdgeCount,
149-
});
150-
} catch (err) {
151-
warn(`Failed to write build metadata: ${(err as Error).message}`);
152-
}
153-
154152
closeDb(db);
155153

156154
// Write journal header after successful build

src/domain/graph/cycles.ts

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,6 @@ import { CodeGraph } from '../../graph/model.js';
44
import { loadNative } from '../../infrastructure/native.js';
55
import type { BetterSqlite3Database } from '../../types.js';
66

7-
/**
8-
* Engine parity note — function-level cycle counts
9-
*
10-
* The native (Rust) and WASM engines may report different function-level cycle
11-
* counts even on the same codebase. This is expected behavior, not a bug in
12-
* the cycle detection algorithm (Tarjan SCC is identical in both engines).
13-
*
14-
* Root cause: the native engine extracts slightly more symbols and resolves
15-
* more call edges than WASM (e.g. 10883 nodes / 4000 calls native vs 10857
16-
* nodes / 3986 calls WASM on the codegraph repo). The additional precision
17-
* can both create new edges and — more commonly — resolve previously ambiguous
18-
* calls to their correct targets, which breaks false cycles that WASM reports.
19-
*
20-
* For file-level cycles the engines are in parity because import edges are
21-
* resolved identically. The gap only manifests at function-level granularity
22-
* where call-site extraction differs between the Rust and WASM parsers.
23-
*
24-
* See: https://github.com/optave/codegraph/issues/597
25-
*/
267
export function findCycles(
278
db: BetterSqlite3Database,
289
opts: { fileLevel?: boolean; noTests?: boolean } = {},

tests/graph/cycles.test.ts

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -148,21 +148,6 @@ describe('formatCycles', () => {
148148
});
149149
});
150150

151-
// ── Engine parity: extraction-level differences ────────────────────
152-
//
153-
// The native (Rust) and WASM engines produce slightly different function-level
154-
// graphs because the native extractor resolves more symbols and call edges.
155-
// This means function-level cycle *counts* can legitimately differ between
156-
// engines (e.g. 8 native vs 11 WASM on the codegraph repo itself).
157-
//
158-
// The Tarjan SCC algorithm is identical — given the SAME edge set, both
159-
// engines produce the same cycles. The tests below verify that invariant.
160-
//
161-
// File-level cycles are unaffected because import resolution is engine-
162-
// independent.
163-
//
164-
// See: https://github.com/optave/codegraph/issues/597
165-
166151
// ── Native vs JS parity ────────────────────────────────────────────
167152

168153
describe.skipIf(!hasNative)('Cycle detection: native vs JS parity', () => {
@@ -222,49 +207,3 @@ describe.skipIf(!hasNative)('Cycle detection: native vs JS parity', () => {
222207
expect(sortCycles(nativeResult)).toEqual(sortCycles(jsResult));
223208
});
224209
});
225-
226-
// ── Extraction-level parity gap (issue #597) ───────────────────────
227-
228-
describe('Cycle count sensitivity to edge differences', () => {
229-
it('resolving an ambiguous call edge to its correct target can break a false cycle', () => {
230-
// Demonstrates why native (more precise edge targets) can report FEWER cycles than WASM.
231-
// With ambiguous resolution, a -> b -> c -> a forms a 3-node cycle.
232-
// Resolving the ambiguous c -> a edge to its correct target c -> d
233-
// breaks the cycle.
234-
const ambiguousEdges = [
235-
{ source: 'a', target: 'b' },
236-
{ source: 'b', target: 'c' },
237-
{ source: 'c', target: 'a' },
238-
];
239-
const ambiguousCycles = findCyclesJS(ambiguousEdges);
240-
expect(ambiguousCycles).toHaveLength(1);
241-
242-
// After resolving: c -> a becomes c -> d (a different target).
243-
// The cycle is broken.
244-
const resolvedEdges = [
245-
{ source: 'a', target: 'b' },
246-
{ source: 'b', target: 'c' },
247-
{ source: 'c', target: 'd' },
248-
];
249-
const resolvedCycles = findCyclesJS(resolvedEdges);
250-
expect(resolvedCycles).toHaveLength(0);
251-
});
252-
253-
it('JS cycle detection is deterministic on repeated calls', () => {
254-
// The Tarjan SCC algorithm is deterministic: given the same edge set,
255-
// repeated calls always produce the same result. Any cycle count
256-
// difference between engines comes from the graph they are fed, not
257-
// from the algorithm.
258-
const edges = [
259-
{ source: 'a', target: 'b' },
260-
{ source: 'b', target: 'c' },
261-
{ source: 'c', target: 'a' },
262-
{ source: 'd', target: 'e' },
263-
{ source: 'e', target: 'd' },
264-
];
265-
const result1 = findCyclesJS(edges);
266-
const result2 = findCyclesJS(edges);
267-
expect(result1).toEqual(result2);
268-
expect(result1).toHaveLength(2);
269-
});
270-
});

0 commit comments

Comments
 (0)