Skip to content
21 changes: 21 additions & 0 deletions scripts/lib/bench-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,27 @@ export async function resolveBenchmarkSource() {
console.error(`Warning: failed to install native package: ${err.message}`);
}

// @huggingface/transformers is a devDependency (lazy-loaded for embeddings).
// It is not installed as a transitive dep in npm mode, so install it
// explicitly so the embedding benchmark workers can import it.
try {
const localPkg = JSON.parse(
fs.readFileSync(path.resolve(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), '..', '..', 'package.json'), 'utf8'),
);
const hfVersion = localPkg.devDependencies?.['@huggingface/transformers'];
if (hfVersion) {
console.error(`Installing @huggingface/transformers@${hfVersion} for embedding benchmarks...`);
execFileSync('npm', ['install', `@huggingface/transformers@${hfVersion}`, '--no-audit', '--no-fund', '--no-save'], {
cwd: tmpDir,
stdio: 'pipe',
timeout: 120_000,
});
console.error('Installed @huggingface/transformers');
}
} catch (err) {
console.error(`Warning: failed to install @huggingface/transformers: ${err.message}`);
}

const srcDir = path.join(pkgDir, 'src');

if (!fs.existsSync(srcDir)) {
Expand Down
33 changes: 31 additions & 2 deletions scripts/query-benchmark.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,32 @@ function round1(n) {
return Math.round(n * 10) / 10;
}

// Pinned hub targets — stable function names that exist across versions.
// Auto-selecting the most-connected node makes version-to-version comparison
// meaningless when barrel/type files get added or removed.
const PINNED_HUB_CANDIDATES = ['buildGraph', 'openDb', 'loadConfig'];

function selectTargets() {
const db = new Database(dbPath, { readonly: true });
try {

// Try pinned candidates first for a stable hub across versions
let hub = null;
for (const candidate of PINNED_HUB_CANDIDATES) {
const row = db
.prepare(
`SELECT n.name FROM nodes n
JOIN edges e ON e.source_id = n.id OR e.target_id = n.id
WHERE n.name = ? AND n.file NOT LIKE '%test%' AND n.file NOT LIKE '%spec%'
LIMIT 1`,
)
.get(candidate);
if (row) {
hub = row.name;
break;
}
}

const rows = db
.prepare(
`SELECT n.name, COUNT(e.id) AS cnt
Expand All @@ -123,14 +147,19 @@ function selectTargets() {
ORDER BY cnt DESC`,
)
.all();
db.close();

if (rows.length === 0) throw new Error('No nodes with edges found in graph');

const hub = rows[0].name;
// Fall back to most-connected if no pinned candidate found
if (!hub) hub = rows[0].name;

const mid = rows[Math.floor(rows.length / 2)].name;
const leaf = rows[rows.length - 1].name;
return { hub, mid, leaf };

} finally {
db.close();
}
}

function benchDepths(fn, name, depths) {
Expand Down
9 changes: 9 additions & 0 deletions scripts/update-embedding-report.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ if (arg) {
}
const entry = JSON.parse(jsonText);

// Guard: reject empty benchmark results (all workers crashed or no symbols indexed)
if (!entry.symbols || !entry.models || Object.keys(entry.models).length === 0) {
console.error(
`Embedding benchmark produced empty results (symbols=${entry.symbols}, models=${Object.keys(entry.models || {}).length}). ` +
'Skipping report update to avoid overwriting valid data. Check benchmark worker logs.',
);
process.exit(1);
}

// ── Paths ────────────────────────────────────────────────────────────────
const reportPath = path.join(root, 'generated', 'benchmarks', 'EMBEDDING-BENCHMARKS.md');

Expand Down
20 changes: 18 additions & 2 deletions src/domain/analysis/impact.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@ import { findMatchingNodes } from './symbol-lookup.js';

const INTERFACE_LIKE_KINDS = new Set(['interface', 'trait']);

/**
* Check whether the graph contains any 'implements' edges.
* Cached per db handle so the query runs at most once per connection.
*/
const _hasImplementsCache = new WeakMap();
function hasImplementsEdges(db) {
if (_hasImplementsCache.has(db)) return _hasImplementsCache.get(db);
const row = db.prepare("SELECT 1 FROM edges WHERE kind = 'implements' LIMIT 1").get();
const result = !!row;
_hasImplementsCache.set(db, result);
return result;
}

/**
* BFS traversal to find transitive callers of a node.
* When an interface/trait node is encountered (either as the start node or
Expand All @@ -40,14 +53,17 @@ export function bfsTransitiveCallers(
startId,
{ noTests = false, maxDepth = 3, includeImplementors = true, onVisit } = {},
) {
// Skip all implementor lookups when the graph has no implements edges
const resolveImplementors = includeImplementors && hasImplementsEdges(db);

const visited = new Set([startId]);
const levels = {};
let frontier = [startId];

// Seed: if start node is an interface/trait, include its implementors at depth 1.
// Implementors go into a separate list so their callers appear at depth 2, not depth 1.
const implNextFrontier = [];
if (includeImplementors) {
if (resolveImplementors) {
const startNode = findNodeById(db, startId);
if (startNode && INTERFACE_LIKE_KINDS.has(startNode.kind)) {
const impls = findImplementors(db, startId);
Expand Down Expand Up @@ -88,7 +104,7 @@ export function bfsTransitiveCallers(

// If a caller is an interface/trait, also pull in its implementors
// Implementors are one extra hop away, so record at d+1
if (includeImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) {
if (resolveImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) {
const impls = findImplementors(db, c.id);
for (const impl of impls) {
if (!visited.has(impl.id) && (!noTests || !isTestFile(impl.file))) {
Expand Down
58 changes: 43 additions & 15 deletions src/domain/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,22 @@ async function backfillTypeMap(filePath, source) {
}
const parsers = await createParsers();
const extracted = wasmExtractSymbols(parsers, filePath, code);
if (!extracted?.symbols?.typeMap) return { typeMap: [], backfilled: false };
if (!extracted?.symbols?.typeMap) {
// Free the WASM tree to prevent memory accumulation across repeated builds
if (extracted?.tree && typeof extracted.tree.delete === 'function') {
try {
extracted.tree.delete();
} catch {}
}
return { typeMap: [], backfilled: false };
}
const tm = extracted.symbols.typeMap;
// Free the WASM tree — only the typeMap data is needed
if (extracted.tree && typeof extracted.tree.delete === 'function') {
try {
extracted.tree.delete();
} catch {}
}
return {
typeMap: tm instanceof Map ? tm : new Map(tm.map((e) => [e.name, e.typeName])),
backfilled: true,
Expand Down Expand Up @@ -486,21 +500,35 @@ export async function parseFilesAuto(filePaths, rootDir, opts = {}) {
}
// Backfill typeMap via WASM for native binaries that predate the type-map feature
if (needsTypeMap.length > 0) {
const parsers = await createParsers();
for (const { filePath, relPath } of needsTypeMap) {
try {
const code = fs.readFileSync(filePath, 'utf-8');
const extracted = wasmExtractSymbols(parsers, filePath, code);
if (extracted?.symbols?.typeMap) {
const symbols = result.get(relPath);
symbols.typeMap =
extracted.symbols.typeMap instanceof Map
? extracted.symbols.typeMap
: new Map(extracted.symbols.typeMap.map((e) => [e.name, e.typeName]));
symbols._typeMapBackfilled = true;
// Only backfill for languages where WASM extraction can produce typeMap
// (TS/TSX have type annotations; JS only has `new Expr()` which native already handles)
const TS_EXTS = new Set(['.ts', '.tsx']);
const tsFiles = needsTypeMap.filter(({ filePath }) => TS_EXTS.has(path.extname(filePath)));
if (tsFiles.length > 0) {
const parsers = await createParsers();
for (const { filePath, relPath } of tsFiles) {
let extracted;
try {
const code = fs.readFileSync(filePath, 'utf-8');
extracted = wasmExtractSymbols(parsers, filePath, code);
if (extracted?.symbols?.typeMap) {
const symbols = result.get(relPath);
symbols.typeMap =
extracted.symbols.typeMap instanceof Map
? extracted.symbols.typeMap
: new Map(extracted.symbols.typeMap.map((e) => [e.name, e.typeName]));
symbols._typeMapBackfilled = true;
}
} catch {
/* skip — typeMap is a best-effort backfill */
} finally {
// Free the WASM tree to prevent memory accumulation across repeated builds
if (extracted?.tree && typeof extracted.tree.delete === 'function') {
try {
extracted.tree.delete();
} catch {}
}
}
} catch {
/* skip — typeMap is a best-effort backfill */
}
}
}
Expand Down
26 changes: 23 additions & 3 deletions src/infrastructure/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,19 @@ export const DEFAULTS = {
},
};

// Per-cwd config cache — avoids re-reading the config file on every query call.
// The config file rarely changes within a single process lifetime.
const _configCache = new Map();

/**
* Load project configuration from a .codegraphrc.json or similar file.
* Returns merged config with defaults.
* Returns merged config with defaults. Results are cached per cwd.
*/
export function loadConfig(cwd) {
cwd = cwd || process.cwd();
const cached = _configCache.get(cwd);
if (cached) return structuredClone(cached);

for (const name of CONFIG_FILES) {
const filePath = path.join(cwd, name);
if (fs.existsSync(filePath)) {
Expand All @@ -148,13 +155,26 @@ export function loadConfig(cwd) {
merged.query.excludeTests = Boolean(config.excludeTests);
}
delete merged.excludeTests;
return resolveSecrets(applyEnvOverrides(merged));
const result = resolveSecrets(applyEnvOverrides(merged));
_configCache.set(cwd, result);
return result;
Comment on lines +158 to +160

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Inconsistent return value on first vs. subsequent cache hits

On the first call for a given cwd, loadConfig returns the actual cached object stored in _configCache. All subsequent calls correctly return a structuredClone. If any caller mutates the returned config (e.g., patching llm.apiKey or similar), the cached entry is silently corrupted and all future callers receive the mutated data.

Both return sites after populating the cache should return a clone for consistency:

Suggested change
const result = resolveSecrets(applyEnvOverrides(merged));
_configCache.set(cwd, result);
return result;
const result = resolveSecrets(applyEnvOverrides(merged));
_configCache.set(cwd, result);
return structuredClone(result);

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed. The merge with main already brought in the structuredClone fix for both cache-population return sites (commit f8016c6 on main). The merged resolution preserves structuredClone(result) when storing to cache, so the first caller gets the raw object and the cache holds an isolated copy.

} catch (err) {
debug(`Failed to parse config ${filePath}: ${err.message}`);
}
}
}
return resolveSecrets(applyEnvOverrides({ ...DEFAULTS }));
const defaults = resolveSecrets(applyEnvOverrides({ ...DEFAULTS }));
_configCache.set(cwd, defaults);
return defaults;
Comment on lines +166 to +168

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Same raw-reference return for the defaults branch

The defaults code path has the same issue — the object stored in the cache is returned directly on the first call. This should also return a structuredClone to match the behaviour of the early-return cache hit path above:

Suggested change
const defaults = resolveSecrets(applyEnvOverrides({ ...DEFAULTS }));
_configCache.set(cwd, defaults);
return defaults;
const defaults = resolveSecrets(applyEnvOverrides({ ...DEFAULTS }));
_configCache.set(cwd, defaults);
return structuredClone(defaults);

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed. Same as above -- the merge with main brought in structuredClone(defaults) for this path as well.

}

/**
* Clear the config cache. Intended for long-running processes that need to
* pick up on-disk config changes, and for test isolation when tests share
* the same cwd.
*/
export function clearConfigCache() {
_configCache.clear();
}

const ENV_LLM_MAP = {
Expand Down
Loading