Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 29 additions & 47 deletions crates/codegraph-core/src/extractors/csharp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,33 +438,6 @@ fn extract_csharp_base_types(

// ── Type map extraction ─────────────────────────────────────────────────────

/// Extract the constructor type from a `var x = new Foo()` initializer.
fn extract_var_init_type(declarator: &Node, source: &[u8]) -> Option<String> {
for i in 0..declarator.child_count() {
let Some(child) = declarator.child(i) else { continue };
// Defensive: handle object_creation_expression as a direct child of variable_declarator.
// The standard grammar always wraps it in equals_value_clause, but this guard is kept
// as a belt-and-suspenders fallback for edge cases or future grammar changes.
if child.kind() == "object_creation_expression" {
if let Some(t) = child.child_by_field_name("type") {
return extract_csharp_type_name(&t, source).map(|s| s.to_string());
}
}
if child.kind() == "equals_value_clause" {
for j in 0..child.child_count() {
if let Some(expr) = child.child(j) {
if expr.kind() == "object_creation_expression" {
if let Some(t) = expr.child_by_field_name("type") {
return extract_csharp_type_name(&t, source).map(|s| s.to_string());
}
}
}
}
}
}
None
}

fn extract_csharp_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> {
match type_node.kind() {
"identifier" | "qualified_name" => Some(node_text(type_node, source)),
Expand All @@ -482,29 +455,38 @@ fn match_csharp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols,
"variable_declaration" => {
let type_node = node.child_by_field_name("type").or_else(|| node.child(0));
if let Some(type_node) = type_node {
let is_var = type_node.kind() == "implicit_type" || type_node.kind() == "var_keyword";
let explicit_type_name: Option<String> = if is_var {
None
} else {
extract_csharp_type_name(&type_node, source).map(|s| s.to_string())
};
if is_var || explicit_type_name.is_some() {
if type_node.kind() == "implicit_type" {
// var x = new Foo() — infer type from object_creation_expression initializer
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.kind() == "variable_declarator" {
let name_node = child.child_by_field_name("name")
.or_else(|| child.child(0));
if let Some(name_node) = name_node {
if name_node.kind() == "identifier" {
let type_name = if is_var {
extract_var_init_type(&child, source)
} else {
explicit_type_name.clone()
};
if let Some(type_name) = type_name {
if let Some(declarator) = node.child(i) {
if declarator.kind() != "variable_declarator" { continue; }
let name_node = declarator.child_by_field_name("name")
.or_else(|| declarator.child(0));
let Some(name_node) = name_node else { continue };
if name_node.kind() != "identifier" { continue; }
let Some(obj_creation) = find_child(&declarator, "object_creation_expression") else { continue };
let Some(ctor_type_node) = obj_creation.child_by_field_name("type") else { continue };
if let Some(ctor_type) = extract_csharp_type_name(&ctor_type_node, source) {
symbols.type_map.push(TypeMapEntry {
name: node_text(&name_node, source).to_string(),
type_name: ctor_type.to_string(),
confidence: 1.0,
});
}
}
}
} else if type_node.kind() != "var_keyword" {
if let Some(type_name) = extract_csharp_type_name(&type_node, source) {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.kind() == "variable_declarator" {
let name_node = child.child_by_field_name("name")
.or_else(|| child.child(0));
if let Some(name_node) = name_node {
if name_node.kind() == "identifier" {
symbols.type_map.push(TypeMapEntry {
name: node_text(&name_node, source).to_string(),
type_name,
type_name: type_name.to_string(),
confidence: 0.9,
});
}
Expand Down
2 changes: 1 addition & 1 deletion src/domain/graph/builder/call-resolver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ const MODULE_SCOPED_BARE_CALL_EXTENSIONS = new Set([
'.cts',
]);

function isModuleScopedLanguage(relPath: string): boolean {
export function isModuleScopedLanguage(relPath: string): boolean {
const ext = relPath.slice(relPath.lastIndexOf('.'));
return MODULE_SCOPED_BARE_CALL_EXTENSIONS.has(ext);
}
Expand Down
33 changes: 30 additions & 3 deletions src/domain/graph/builder/stages/build-edges.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import { enrichTypeMapWithTsc } from '../../resolver/ts-resolver.js';
import {
type CallNodeLookup,
findCaller,
isModuleScopedLanguage,
resolveCallTargets,
resolveReceiverEdge,
} from '../call-resolver.js';
Expand Down Expand Up @@ -1340,9 +1341,35 @@ function buildFileCallEdges(
// not the enclosing class, so qualifying with the child class name would
// produce a false edge when the child also defines a same-named method.
if (targets.length === 0 && call.receiver === 'this' && caller.callerName != null) {
const dotIdx = caller.callerName.indexOf('.');
if (dotIdx > 0) {
const className = caller.callerName.slice(0, dotIdx);
const lastDot = caller.callerName.lastIndexOf('.');
if (lastDot > 0) {
const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1);
const className = caller.callerName.slice(prevDot + 1, lastDot);
const qualifiedName = `${className}.${call.name}`;
const qualified = lookup
.byNameAndFile(qualifiedName, relPath)
.filter((n) => n.kind === 'method');
if (qualified.length > 0) {
targets = qualified;
}
}
}

// Same-class bare-call fallback: when a no-receiver call can't be resolved
// globally, try the caller's own class as a qualifier. Handles C# static
// sibling calls: `IsValidEmail()` inside `Validators.ValidateUser` resolves
// to `Validators.IsValidEmail`. Skipped for JS/TS where bare calls are
// module-scoped, not class-scoped.
if (
targets.length === 0 &&
!call.receiver &&
caller.callerName != null &&
!isModuleScopedLanguage(relPath)
) {
const lastDot = caller.callerName.lastIndexOf('.');
if (lastDot > 0) {
const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1);
const className = caller.callerName.slice(prevDot + 1, lastDot);
const qualifiedName = `${className}.${call.name}`;
const qualified = lookup
.byNameAndFile(qualifiedName, relPath)
Expand Down
96 changes: 20 additions & 76 deletions src/domain/graph/builder/stages/native-orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,9 @@ async function runPostNativeAnalysis(
* Note: `this`/`super` dispatch is handled separately by `runPostNativeThisDispatch`,
* which WASM-re-parses JS/TS files to obtain raw call site receiver info.
*
* Returns the set of target node IDs for newly inserted CHA edges so the caller
* can re-classify roles for the affected implementation files. An empty set
* means no edges were added and role re-classification is unnecessary.
* Returns the count of newly inserted CHA edges so the caller can determine
* whether a full role re-classification is needed. Zero means no edges were
* added and role re-classification is unnecessary.
*/
function runPostNativeCha(db: BetterSqlite3Database): number {
// Fast guard: no hierarchy edges → no CHA work
Expand Down Expand Up @@ -1607,37 +1607,9 @@ export async function tryNativeOrchestrator(
}

// Phase 8.5: expand CHA call edges (interface dispatch → concrete implementations).
// The Rust orchestrator ran role classification BEFORE this post-pass, so without
// a re-run the newly-called implementor methods stay classified as `dead-ffi`.
//
// CHA also changes the global fan-out distribution (callee files gain fan_in, and
// new edges shift the median). A full re-classification is required — not just the
// callee files — because the median shift can change roles in unrelated files whose
// fan-out sits near the old median. (Example: a method that called two siblings
// pre-CHA might be near the median, but post-CHA the median is higher, changing
// its role from utility → core.) Using an incremental pass with a stale median
// cache would produce incorrect roles outside the CHA-affected file set.
//
// Performance: classifyNodeRoles is O(all_nodes). For most repos this is sub-100ms;
// on very large codebases (100k+ nodes) it may add a few hundred ms per build.
// If this becomes a bottleneck, consider a two-pass strategy: incremental first
// (fast, slightly inaccurate), then full only when the median shifts by >N%.
// Returns the count of newly inserted edges; used to determine whether
// a full role re-classification is needed after all edge-writing post-passes complete.
const chaEdgeCount = runPostNativeCha(ctx.db as unknown as BetterSqlite3Database);
if (chaEdgeCount > 0) {
try {
const db = ctx.db as unknown as BetterSqlite3Database;
const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
classifyNodeRoles: (
db: BetterSqlite3Database,
changedFiles?: string[] | null,
) => Record<string, number>;
};
classifyNodeRoles(db);
debug(`CHA post-pass: full role re-classification after ${chaEdgeCount} new CHA edges`);
} catch (err) {
debug(`CHA post-pass role re-classification failed: ${toErrorMessage(err)}`);
}
}

// Function-as-object-property post-pass: the Rust engine does not yet recognise
// `fn.method = function() {}` patterns. Re-parse only those JS/TS files via
Expand All @@ -1659,51 +1631,23 @@ export async function tryNativeOrchestrator(
!!result.isFullBuild,
);

// Re-classify roles for methods that gained incoming this/super dispatch edges.
// The Rust orchestrator classifies roles BEFORE this post-pass, so target methods
// (e.g. Animal.speak, ConcreteWorker.prepare) that had no callers at Rust time
// are classified `dead` or `dead-ffi`. Inserting the new call edges does not
// automatically update those role labels — without a re-run the stale labels
// propagate to dead-code detection and API boundary analysis.
if (thisDispatchTargetIds.size > 0) {
// Full role re-classification after JS edge-writing post-passes.
// The Rust orchestrator classifies roles before these post-passes (CHA,
// this-dispatch) add edges, so the Rust-computed roles and the cached
// fan-out medians are stale. A full re-classification ensures the final
// roles reflect the true fan-in/out with all edges in place.
if (chaEdgeCount > 0 || thisDispatchTargetIds.size > 0) {
try {
const db = ctx.db as unknown as BetterSqlite3Database;
const idArray = Array.from(thisDispatchTargetIds);
const CHUNK_SIZE = 500;
const seenFiles = new Set<string>();
const affectedFiles: Array<{ file: string }> = [];
for (let i = 0; i < idArray.length; i += CHUNK_SIZE) {
const chunk = idArray.slice(i, i + CHUNK_SIZE);
const placeholders = chunk.map(() => '?').join(',');
const rows = db
.prepare(
`SELECT DISTINCT file FROM nodes WHERE id IN (${placeholders}) AND file IS NOT NULL`,
)
.all(...chunk) as Array<{ file: string }>;
for (const row of rows) {
if (!seenFiles.has(row.file)) {
seenFiles.add(row.file);
affectedFiles.push(row);
}
}
}
if (affectedFiles.length > 0) {
const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
classifyNodeRoles: (
db: BetterSqlite3Database,
changedFiles?: string[] | null,
) => Record<string, number>;
};
classifyNodeRoles(
db,
affectedFiles.map((r) => r.file),
);
debug(
`this/super dispatch post-pass: re-classified roles for ${affectedFiles.length} target file(s)`,
);
}
const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
classifyNodeRoles: (
db: BetterSqlite3Database,
changedFiles?: string[] | null,
) => Record<string, number>;
};
classifyNodeRoles(ctx.db as unknown as BetterSqlite3Database, null);
debug(`Post-pass full role re-classification complete`);
} catch (err) {
debug(`this/super dispatch post-pass role re-classification failed: ${toErrorMessage(err)}`);
debug(`Post-pass full role re-classification failed: ${toErrorMessage(err)}`);
}
}

Expand Down
51 changes: 22 additions & 29 deletions src/extractors/csharp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -329,43 +329,36 @@ function extractCSharpTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void
extractCSharpTypeMapDepth(node, ctx, 0);
}

/** Extract the constructor type from a `var x = new Foo()` initializer. */
function extractVarInitType(declarator: TreeSitterNode): string | null {
for (let i = 0; i < declarator.childCount; i++) {
const child = declarator.child(i);
// Defensive: handle object_creation_expression as a direct child of variable_declarator.
// The standard grammar always wraps it in equals_value_clause, but this guard is kept
// as a belt-and-suspenders fallback for edge cases or future grammar changes.
if (child?.type === 'object_creation_expression') {
const tNode = child.childForFieldName('type');
if (tNode) return extractCSharpTypeName(tNode);
}
if (child?.type === 'equals_value_clause') {
for (let j = 0; j < child.childCount; j++) {
const expr = child.child(j);
if (expr?.type === 'object_creation_expression') {
const tNode = expr.childForFieldName('type');
if (tNode) return extractCSharpTypeName(tNode);
}
}
}
}
return null;
}

/** Extract type info from a variable_declaration node (local vars with explicit or inferred types). */
/** Extract type info from a variable_declaration node (local vars with explicit types). */
function handleCSharpVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
const typeNode = node.childForFieldName('type') || node.child(0);
if (!typeNode) return;
const isVar = typeNode.type === 'implicit_type' || typeNode.type === 'var_keyword';
const explicitTypeName = isVar ? null : extractCSharpTypeName(typeNode);
if (!isVar && !explicitTypeName) return;

if (typeNode.type === 'implicit_type') {
// var x = new Foo() — infer type from object_creation_expression initializer
if (!ctx.typeMap) return;
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (child?.type !== 'variable_declarator') continue;
const nameNode = child.childForFieldName('name') || child.child(0);
if (nameNode?.type !== 'identifier') continue;
const objCreation = findChild(child, 'object_creation_expression');
if (!objCreation) continue;
const ctorTypeNode = objCreation.childForFieldName('type');
if (!ctorTypeNode) continue;
const ctorType = extractCSharpTypeName(ctorTypeNode);
if (ctorType) setTypeMapEntry(ctx.typeMap, nameNode.text, ctorType, 1.0);
}
return;
}

const typeName = extractCSharpTypeName(typeNode);
if (!typeName) return;
for (let i = 0; i < node.childCount; i++) {
const child = node.child(i);
if (child?.type !== 'variable_declarator') continue;
const nameNode = child.childForFieldName('name') || child.child(0);
if (nameNode?.type !== 'identifier' || !ctx.typeMap) continue;
const typeName = isVar ? extractVarInitType(child) : explicitTypeName;
if (typeName) setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9);
}
}
Expand Down
4 changes: 2 additions & 2 deletions tests/parsers/csharp.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ public class Service : BaseService, IDisposable {
service.AddUser(null);
}
}`);
expect(symbols.typeMap.get('service')).toEqual({ type: 'UserService', confidence: 0.9 });
expect(symbols.typeMap.get('repo')).toEqual({ type: 'UserRepository', confidence: 0.9 });
expect(symbols.typeMap.get('service')).toEqual({ type: 'UserService', confidence: 1.0 });
expect(symbols.typeMap.get('repo')).toEqual({ type: 'UserRepository', confidence: 1.0 });
});

it('populates typeMap for explicitly-typed local variables', () => {
Expand Down
Loading