diff --git a/crates/codegraph-core/src/extractors/csharp.rs b/crates/codegraph-core/src/extractors/csharp.rs index 7d153092..5992dda0 100644 --- a/crates/codegraph-core/src/extractors/csharp.rs +++ b/crates/codegraph-core/src/extractors/csharp.rs @@ -438,33 +438,6 @@ fn extract_csharp_base_types( // ── Type map extraction ───────────────────────────────────────────────────── -/// Extract the constructor type from a `var x = new Foo()` initializer. -fn extract_var_init_type(declarator: &Node, source: &[u8]) -> Option { - for i in 0..declarator.child_count() { - let Some(child) = declarator.child(i) else { continue }; - // Defensive: handle object_creation_expression as a direct child of variable_declarator. - // The standard grammar always wraps it in equals_value_clause, but this guard is kept - // as a belt-and-suspenders fallback for edge cases or future grammar changes. - if child.kind() == "object_creation_expression" { - if let Some(t) = child.child_by_field_name("type") { - return extract_csharp_type_name(&t, source).map(|s| s.to_string()); - } - } - if child.kind() == "equals_value_clause" { - for j in 0..child.child_count() { - if let Some(expr) = child.child(j) { - if expr.kind() == "object_creation_expression" { - if let Some(t) = expr.child_by_field_name("type") { - return extract_csharp_type_name(&t, source).map(|s| s.to_string()); - } - } - } - } - } - } - None -} - fn extract_csharp_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { match type_node.kind() { "identifier" | "qualified_name" => Some(node_text(type_node, source)), @@ -482,29 +455,38 @@ fn match_csharp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, "variable_declaration" => { let type_node = node.child_by_field_name("type").or_else(|| node.child(0)); if let Some(type_node) = type_node { - let is_var = type_node.kind() == "implicit_type" || type_node.kind() == "var_keyword"; - let explicit_type_name: Option = if is_var { - None - } else { - extract_csharp_type_name(&type_node, source).map(|s| s.to_string()) - }; - if is_var || explicit_type_name.is_some() { + if type_node.kind() == "implicit_type" { + // var x = new Foo() — infer type from object_creation_expression initializer for i in 0..node.child_count() { - if let Some(child) = node.child(i) { - if child.kind() == "variable_declarator" { - let name_node = child.child_by_field_name("name") - .or_else(|| child.child(0)); - if let Some(name_node) = name_node { - if name_node.kind() == "identifier" { - let type_name = if is_var { - extract_var_init_type(&child, source) - } else { - explicit_type_name.clone() - }; - if let Some(type_name) = type_name { + if let Some(declarator) = node.child(i) { + if declarator.kind() != "variable_declarator" { continue; } + let name_node = declarator.child_by_field_name("name") + .or_else(|| declarator.child(0)); + let Some(name_node) = name_node else { continue }; + if name_node.kind() != "identifier" { continue; } + let Some(obj_creation) = find_child(&declarator, "object_creation_expression") else { continue }; + let Some(ctor_type_node) = obj_creation.child_by_field_name("type") else { continue }; + if let Some(ctor_type) = extract_csharp_type_name(&ctor_type_node, source) { + symbols.type_map.push(TypeMapEntry { + name: node_text(&name_node, source).to_string(), + type_name: ctor_type.to_string(), + confidence: 1.0, + }); + } + } + } + } else if type_node.kind() != "var_keyword" { + if let Some(type_name) = extract_csharp_type_name(&type_node, source) { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "variable_declarator" { + let name_node = child.child_by_field_name("name") + .or_else(|| child.child(0)); + if let Some(name_node) = name_node { + if name_node.kind() == "identifier" { symbols.type_map.push(TypeMapEntry { name: node_text(&name_node, source).to_string(), - type_name, + type_name: type_name.to_string(), confidence: 0.9, }); } diff --git a/src/domain/graph/builder/call-resolver.ts b/src/domain/graph/builder/call-resolver.ts index b3f4524d..16a834db 100644 --- a/src/domain/graph/builder/call-resolver.ts +++ b/src/domain/graph/builder/call-resolver.ts @@ -40,7 +40,7 @@ const MODULE_SCOPED_BARE_CALL_EXTENSIONS = new Set([ '.cts', ]); -function isModuleScopedLanguage(relPath: string): boolean { +export function isModuleScopedLanguage(relPath: string): boolean { const ext = relPath.slice(relPath.lastIndexOf('.')); return MODULE_SCOPED_BARE_CALL_EXTENSIONS.has(ext); } diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index f62e1122..fa1d0f44 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -30,6 +30,7 @@ import { enrichTypeMapWithTsc } from '../../resolver/ts-resolver.js'; import { type CallNodeLookup, findCaller, + isModuleScopedLanguage, resolveCallTargets, resolveReceiverEdge, } from '../call-resolver.js'; @@ -1340,9 +1341,35 @@ function buildFileCallEdges( // not the enclosing class, so qualifying with the child class name would // produce a false edge when the child also defines a same-named method. if (targets.length === 0 && call.receiver === 'this' && caller.callerName != null) { - const dotIdx = caller.callerName.indexOf('.'); - if (dotIdx > 0) { - const className = caller.callerName.slice(0, dotIdx); + const lastDot = caller.callerName.lastIndexOf('.'); + if (lastDot > 0) { + const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1); + const className = caller.callerName.slice(prevDot + 1, lastDot); + const qualifiedName = `${className}.${call.name}`; + const qualified = lookup + .byNameAndFile(qualifiedName, relPath) + .filter((n) => n.kind === 'method'); + if (qualified.length > 0) { + targets = qualified; + } + } + } + + // Same-class bare-call fallback: when a no-receiver call can't be resolved + // globally, try the caller's own class as a qualifier. Handles C# static + // sibling calls: `IsValidEmail()` inside `Validators.ValidateUser` resolves + // to `Validators.IsValidEmail`. Skipped for JS/TS where bare calls are + // module-scoped, not class-scoped. + if ( + targets.length === 0 && + !call.receiver && + caller.callerName != null && + !isModuleScopedLanguage(relPath) + ) { + const lastDot = caller.callerName.lastIndexOf('.'); + if (lastDot > 0) { + const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1); + const className = caller.callerName.slice(prevDot + 1, lastDot); const qualifiedName = `${className}.${call.name}`; const qualified = lookup .byNameAndFile(qualifiedName, relPath) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 59bb83fc..55c1902f 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -402,9 +402,9 @@ async function runPostNativeAnalysis( * Note: `this`/`super` dispatch is handled separately by `runPostNativeThisDispatch`, * which WASM-re-parses JS/TS files to obtain raw call site receiver info. * - * Returns the set of target node IDs for newly inserted CHA edges so the caller - * can re-classify roles for the affected implementation files. An empty set - * means no edges were added and role re-classification is unnecessary. + * Returns the count of newly inserted CHA edges so the caller can determine + * whether a full role re-classification is needed. Zero means no edges were + * added and role re-classification is unnecessary. */ function runPostNativeCha(db: BetterSqlite3Database): number { // Fast guard: no hierarchy edges → no CHA work @@ -1607,37 +1607,9 @@ export async function tryNativeOrchestrator( } // Phase 8.5: expand CHA call edges (interface dispatch → concrete implementations). - // The Rust orchestrator ran role classification BEFORE this post-pass, so without - // a re-run the newly-called implementor methods stay classified as `dead-ffi`. - // - // CHA also changes the global fan-out distribution (callee files gain fan_in, and - // new edges shift the median). A full re-classification is required — not just the - // callee files — because the median shift can change roles in unrelated files whose - // fan-out sits near the old median. (Example: a method that called two siblings - // pre-CHA might be near the median, but post-CHA the median is higher, changing - // its role from utility → core.) Using an incremental pass with a stale median - // cache would produce incorrect roles outside the CHA-affected file set. - // - // Performance: classifyNodeRoles is O(all_nodes). For most repos this is sub-100ms; - // on very large codebases (100k+ nodes) it may add a few hundred ms per build. - // If this becomes a bottleneck, consider a two-pass strategy: incremental first - // (fast, slightly inaccurate), then full only when the median shifts by >N%. + // Returns the count of newly inserted edges; used to determine whether + // a full role re-classification is needed after all edge-writing post-passes complete. const chaEdgeCount = runPostNativeCha(ctx.db as unknown as BetterSqlite3Database); - if (chaEdgeCount > 0) { - try { - const db = ctx.db as unknown as BetterSqlite3Database; - const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { - classifyNodeRoles: ( - db: BetterSqlite3Database, - changedFiles?: string[] | null, - ) => Record; - }; - classifyNodeRoles(db); - debug(`CHA post-pass: full role re-classification after ${chaEdgeCount} new CHA edges`); - } catch (err) { - debug(`CHA post-pass role re-classification failed: ${toErrorMessage(err)}`); - } - } // Function-as-object-property post-pass: the Rust engine does not yet recognise // `fn.method = function() {}` patterns. Re-parse only those JS/TS files via @@ -1659,51 +1631,23 @@ export async function tryNativeOrchestrator( !!result.isFullBuild, ); - // Re-classify roles for methods that gained incoming this/super dispatch edges. - // The Rust orchestrator classifies roles BEFORE this post-pass, so target methods - // (e.g. Animal.speak, ConcreteWorker.prepare) that had no callers at Rust time - // are classified `dead` or `dead-ffi`. Inserting the new call edges does not - // automatically update those role labels — without a re-run the stale labels - // propagate to dead-code detection and API boundary analysis. - if (thisDispatchTargetIds.size > 0) { + // Full role re-classification after JS edge-writing post-passes. + // The Rust orchestrator classifies roles before these post-passes (CHA, + // this-dispatch) add edges, so the Rust-computed roles and the cached + // fan-out medians are stale. A full re-classification ensures the final + // roles reflect the true fan-in/out with all edges in place. + if (chaEdgeCount > 0 || thisDispatchTargetIds.size > 0) { try { - const db = ctx.db as unknown as BetterSqlite3Database; - const idArray = Array.from(thisDispatchTargetIds); - const CHUNK_SIZE = 500; - const seenFiles = new Set(); - const affectedFiles: Array<{ file: string }> = []; - for (let i = 0; i < idArray.length; i += CHUNK_SIZE) { - const chunk = idArray.slice(i, i + CHUNK_SIZE); - const placeholders = chunk.map(() => '?').join(','); - const rows = db - .prepare( - `SELECT DISTINCT file FROM nodes WHERE id IN (${placeholders}) AND file IS NOT NULL`, - ) - .all(...chunk) as Array<{ file: string }>; - for (const row of rows) { - if (!seenFiles.has(row.file)) { - seenFiles.add(row.file); - affectedFiles.push(row); - } - } - } - if (affectedFiles.length > 0) { - const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { - classifyNodeRoles: ( - db: BetterSqlite3Database, - changedFiles?: string[] | null, - ) => Record; - }; - classifyNodeRoles( - db, - affectedFiles.map((r) => r.file), - ); - debug( - `this/super dispatch post-pass: re-classified roles for ${affectedFiles.length} target file(s)`, - ); - } + const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as { + classifyNodeRoles: ( + db: BetterSqlite3Database, + changedFiles?: string[] | null, + ) => Record; + }; + classifyNodeRoles(ctx.db as unknown as BetterSqlite3Database, null); + debug(`Post-pass full role re-classification complete`); } catch (err) { - debug(`this/super dispatch post-pass role re-classification failed: ${toErrorMessage(err)}`); + debug(`Post-pass full role re-classification failed: ${toErrorMessage(err)}`); } } diff --git a/src/extractors/csharp.ts b/src/extractors/csharp.ts index 0bbfae87..850bb8a3 100644 --- a/src/extractors/csharp.ts +++ b/src/extractors/csharp.ts @@ -329,43 +329,36 @@ function extractCSharpTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void extractCSharpTypeMapDepth(node, ctx, 0); } -/** Extract the constructor type from a `var x = new Foo()` initializer. */ -function extractVarInitType(declarator: TreeSitterNode): string | null { - for (let i = 0; i < declarator.childCount; i++) { - const child = declarator.child(i); - // Defensive: handle object_creation_expression as a direct child of variable_declarator. - // The standard grammar always wraps it in equals_value_clause, but this guard is kept - // as a belt-and-suspenders fallback for edge cases or future grammar changes. - if (child?.type === 'object_creation_expression') { - const tNode = child.childForFieldName('type'); - if (tNode) return extractCSharpTypeName(tNode); - } - if (child?.type === 'equals_value_clause') { - for (let j = 0; j < child.childCount; j++) { - const expr = child.child(j); - if (expr?.type === 'object_creation_expression') { - const tNode = expr.childForFieldName('type'); - if (tNode) return extractCSharpTypeName(tNode); - } - } - } - } - return null; -} - -/** Extract type info from a variable_declaration node (local vars with explicit or inferred types). */ +/** Extract type info from a variable_declaration node (local vars with explicit types). */ function handleCSharpVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { const typeNode = node.childForFieldName('type') || node.child(0); if (!typeNode) return; - const isVar = typeNode.type === 'implicit_type' || typeNode.type === 'var_keyword'; - const explicitTypeName = isVar ? null : extractCSharpTypeName(typeNode); - if (!isVar && !explicitTypeName) return; + + if (typeNode.type === 'implicit_type') { + // var x = new Foo() — infer type from object_creation_expression initializer + if (!ctx.typeMap) return; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child?.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode?.type !== 'identifier') continue; + const objCreation = findChild(child, 'object_creation_expression'); + if (!objCreation) continue; + const ctorTypeNode = objCreation.childForFieldName('type'); + if (!ctorTypeNode) continue; + const ctorType = extractCSharpTypeName(ctorTypeNode); + if (ctorType) setTypeMapEntry(ctx.typeMap, nameNode.text, ctorType, 1.0); + } + return; + } + + const typeName = extractCSharpTypeName(typeNode); + if (!typeName) return; for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (child?.type !== 'variable_declarator') continue; const nameNode = child.childForFieldName('name') || child.child(0); if (nameNode?.type !== 'identifier' || !ctx.typeMap) continue; - const typeName = isVar ? extractVarInitType(child) : explicitTypeName; if (typeName) setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9); } } diff --git a/tests/parsers/csharp.test.ts b/tests/parsers/csharp.test.ts index 20170765..f0b9036d 100644 --- a/tests/parsers/csharp.test.ts +++ b/tests/parsers/csharp.test.ts @@ -160,8 +160,8 @@ public class Service : BaseService, IDisposable { service.AddUser(null); } }`); - expect(symbols.typeMap.get('service')).toEqual({ type: 'UserService', confidence: 0.9 }); - expect(symbols.typeMap.get('repo')).toEqual({ type: 'UserRepository', confidence: 0.9 }); + expect(symbols.typeMap.get('service')).toEqual({ type: 'UserService', confidence: 1.0 }); + expect(symbols.typeMap.get('repo')).toEqual({ type: 'UserRepository', confidence: 1.0 }); }); it('populates typeMap for explicitly-typed local variables', () => {