Skip to content

Commit 76907e6

Browse files
authored
fix(extractor): recognize inline-new expression as receiver type in extractReceiverName (#1415)
* fix(extractor): recognize inline-new expression as receiver type in extractReceiverName When the object of a member call is a `new_expression` (e.g. `new Dog().bark()`) or a parenthesized `new_expression` (e.g. `(new Dog('Rex')).bark()`), `extractReceiverName` now returns the constructor name (e.g. `'Dog'`) directly instead of the raw node text (e.g. `'(new Dog(\'Rex\'))')`). This lets the resolver reach the direct qualified method lookup path (`Dog.bark`) without relying on the text-based regex heuristic that was handling these expressions in `call-resolver.ts`. Closes #1396 * docs(resolver): update stale inline-new-receiver comment in call-resolver The comment at lines 85-93 of call-resolver.ts described behaviour from before extractReceiverName was taught to handle new_expression and parenthesized_expression(new_expression) nodes. The comment said extractReceiverName returned raw node text for those cases, which is no longer true. Update the comment to reflect that the regex is now a belt-and-suspenders fallback for unhandled AST node types, not the primary handler for inline-new receivers. * fix(extractor): infer C# var-declared instance types from object_creation_expression initializer When a local variable is declared as `var service = new UserService(repo)`, the tree-sitter C# grammar represents the type node as `implicit_type` (not `var_keyword`) and places the `object_creation_expression` as a direct child of `variable_declarator` (not nested in an `equals_value_clause`). Previously `handleCSharpVarDecl` returned early on `implicit_type`, leaving the typeMap without an entry for `service`. Calls like `service.AddUser()` therefore had no receiver type and were not resolved. Fix (WASM/TS): recognise both `implicit_type` and `var_keyword` as the var-inference signal, then walk the `variable_declarator`'s children for a direct `object_creation_expression` or an `equals_value_clause` containing one, and seed the typeMap with the constructor type at confidence 0.9. Fix (native/Rust): symmetric change to `match_csharp_type_map` plus new `extract_var_init_type` helper, keeping both engines in parity. Result: C# `receiver-typed` recall: 0/4 → 4/4 (100%); aggregate recall: 73.9% → 91.3%. Threshold ratcheted from {precision: 0.5, recall: 0.2} to {precision: 0.9, recall: 0.9}. Fixes #1402 * fix(extractor): use if-let-else continue in extract_var_init_type outer loop Replace `declarator.child(i)?` with `let Some(child) = ... else { continue }` to skip None child slots rather than returning None from the entire function. Matches the inner loop pattern and the TypeScript mirror's optional-chaining. * fix(test): handle ECONNRESET and other network errors in embedding regression test The macOS CI runner intermittently fails with ECONNRESET when downloading the HuggingFace model. Broaden the catch in the embedding regression test to treat connection-level errors (ECONNRESET, ETIMEDOUT, ENOTFOUND, ECONNREFUSED) and 'terminated' worker errors the same as HTTP 429 — mark rateLimited=true and skip the dependent tests instead of failing. * fix: document single-level paren limit and narrow network-error codes (#1415)
1 parent f414be4 commit 76907e6

7 files changed

Lines changed: 137 additions & 24 deletions

File tree

crates/codegraph-core/src/extractors/csharp.rs

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,29 @@ fn extract_csharp_base_types(
438438

439439
// ── Type map extraction ─────────────────────────────────────────────────────
440440

441+
fn extract_var_init_type(declarator: &Node, source: &[u8]) -> Option<String> {
442+
for i in 0..declarator.child_count() {
443+
let Some(child) = declarator.child(i) else { continue };
444+
if child.kind() == "object_creation_expression" {
445+
if let Some(t) = child.child_by_field_name("type") {
446+
return extract_csharp_type_name(&t, source).map(|s| s.to_string());
447+
}
448+
}
449+
if child.kind() == "equals_value_clause" {
450+
for j in 0..child.child_count() {
451+
if let Some(expr) = child.child(j) {
452+
if expr.kind() == "object_creation_expression" {
453+
if let Some(t) = expr.child_by_field_name("type") {
454+
return extract_csharp_type_name(&t, source).map(|s| s.to_string());
455+
}
456+
}
457+
}
458+
}
459+
}
460+
}
461+
None
462+
}
463+
441464
fn extract_csharp_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> {
442465
match type_node.kind() {
443466
"identifier" | "qualified_name" => Some(node_text(type_node, source)),
@@ -455,18 +478,29 @@ fn match_csharp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols,
455478
"variable_declaration" => {
456479
let type_node = node.child_by_field_name("type").or_else(|| node.child(0));
457480
if let Some(type_node) = type_node {
458-
if type_node.kind() != "var_keyword" && type_node.kind() != "implicit_type" {
459-
if let Some(type_name) = extract_csharp_type_name(&type_node, source) {
460-
for i in 0..node.child_count() {
461-
if let Some(child) = node.child(i) {
462-
if child.kind() == "variable_declarator" {
463-
let name_node = child.child_by_field_name("name")
464-
.or_else(|| child.child(0));
465-
if let Some(name_node) = name_node {
466-
if name_node.kind() == "identifier" {
481+
let is_var = type_node.kind() == "implicit_type" || type_node.kind() == "var_keyword";
482+
let explicit_type_name: Option<String> = if is_var {
483+
None
484+
} else {
485+
extract_csharp_type_name(&type_node, source).map(|s| s.to_string())
486+
};
487+
if is_var || explicit_type_name.is_some() {
488+
for i in 0..node.child_count() {
489+
if let Some(child) = node.child(i) {
490+
if child.kind() == "variable_declarator" {
491+
let name_node = child.child_by_field_name("name")
492+
.or_else(|| child.child(0));
493+
if let Some(name_node) = name_node {
494+
if name_node.kind() == "identifier" {
495+
let type_name = if is_var {
496+
extract_var_init_type(&child, source)
497+
} else {
498+
explicit_type_name.clone()
499+
};
500+
if let Some(type_name) = type_name {
467501
symbols.type_map.push(TypeMapEntry {
468502
name: node_text(&name_node, source).to_string(),
469-
type_name: type_name.to_string(),
503+
type_name,
470504
confidence: 0.9,
471505
});
472506
}

src/domain/graph/builder/call-resolver.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,15 @@ export function resolveByMethodOrGlobal(
9494
: (typeEntry as { type?: string }).type
9595
: null;
9696

97-
// Handle inline new-expression receivers: `(new Foo).bar()` or `(new Foo()).bar()`.
98-
// extractReceiverName returns the raw node text for non-identifier nodes, so `(new A).t()`
99-
// produces receiver='(new A)'. Extract the constructor name directly.
100-
// The regex intentionally restricts to uppercase-initial names ([A-Z_$]) as a heuristic
101-
// to distinguish constructors (PascalCase) from regular functions — avoiding false positives
102-
// on `(new xmlParser()).parse()` style calls which are rare in practice.
97+
// Belt-and-suspenders fallback for inline new-expression receivers that
98+
// extractReceiverName did not normalise (e.g. raw text leaked from an
99+
// unhandled AST node type). extractReceiverName already handles the common
100+
// `new_expression` / `parenthesized_expression(new_expression)` shapes by
101+
// returning the constructor name directly, so this branch is exercised only
102+
// by future node types or constructs that fall through to the raw-text path.
103+
// The uppercase-initial restriction ([A-Z_$]) is a heuristic to distinguish
104+
// constructors (PascalCase) from regular functions and avoids false positives
105+
// on `(new xmlParser()).parse()` style calls.
103106
if (!typeName && call.receiver) {
104107
const m = /^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/.exec(call.receiver);
105108
if (m?.[1]) typeName = m[1];

src/extractors/csharp.ts

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -332,17 +332,39 @@ function extractCSharpTypeMap(node: TreeSitterNode, ctx: ExtractorOutput): void
332332
/** Extract type info from a variable_declaration node (local vars with explicit types). */
333333
function handleCSharpVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
334334
const typeNode = node.childForFieldName('type') || node.child(0);
335-
if (!typeNode || typeNode.type === 'var_keyword') return;
336-
const typeName = extractCSharpTypeName(typeNode);
337-
if (!typeName) return;
335+
if (!typeNode) return;
336+
const isVar = typeNode.type === 'implicit_type' || typeNode.type === 'var_keyword';
337+
const explicitTypeName = isVar ? null : extractCSharpTypeName(typeNode);
338+
if (!isVar && !explicitTypeName) return;
338339
for (let i = 0; i < node.childCount; i++) {
339340
const child = node.child(i);
340341
if (child?.type !== 'variable_declarator') continue;
341342
const nameNode = child.childForFieldName('name') || child.child(0);
342-
if (nameNode && nameNode.type === 'identifier' && ctx.typeMap) {
343-
setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9);
343+
if (nameNode?.type !== 'identifier' || !ctx.typeMap) continue;
344+
const typeName = isVar ? extractVarInitType(child) : explicitTypeName;
345+
if (typeName) setTypeMapEntry(ctx.typeMap, nameNode.text, typeName, 0.9);
346+
}
347+
}
348+
349+
/** Extract the constructor type from a `var x = new Foo()` initializer. */
350+
function extractVarInitType(declarator: TreeSitterNode): string | null {
351+
for (let i = 0; i < declarator.childCount; i++) {
352+
const child = declarator.child(i);
353+
if (child?.type === 'object_creation_expression') {
354+
const tNode = child.childForFieldName('type');
355+
if (tNode) return extractCSharpTypeName(tNode);
356+
}
357+
if (child?.type === 'equals_value_clause') {
358+
for (let j = 0; j < child.childCount; j++) {
359+
const expr = child.child(j);
360+
if (expr?.type === 'object_creation_expression') {
361+
const tNode = expr.childForFieldName('type');
362+
if (tNode) return extractCSharpTypeName(tNode);
363+
}
364+
}
344365
}
345366
}
367+
return null;
346368
}
347369

348370
/** Extract type info from a parameter node. */

src/extractors/javascript.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2676,6 +2676,25 @@ function extractReceiverName(objNode: TreeSitterNode | null): string | undefined
26762676
if (!objNode) return undefined;
26772677
const t = objNode.type;
26782678
if (t === 'identifier' || t === 'this' || t === 'super') return objNode.text;
2679+
// `(new Foo(...)).method()` — extract the constructor name so the resolver can
2680+
// look up `Foo.method` directly without relying on a text-based regex heuristic.
2681+
if (t === 'new_expression') {
2682+
const name = extractNewExprTypeName(objNode);
2683+
if (name) return name;
2684+
}
2685+
if (t === 'parenthesized_expression') {
2686+
// Only one level of parentheses is unwrapped here. Doubly-nested parens
2687+
// (e.g. `((new Dog())).bark()`) and cast expressions inside parens
2688+
// (e.g. `(new Dog() as Animal).bark()`) fall through to raw-text handling
2689+
// below and are caught by the regex fallback in call-resolver.ts.
2690+
for (let i = 0; i < objNode.childCount; i++) {
2691+
const child = objNode.child(i);
2692+
if (child?.type === 'new_expression') {
2693+
const name = extractNewExprTypeName(child);
2694+
if (name) return name;
2695+
}
2696+
}
2697+
}
26792698
return objNode.text;
26802699
}
26812700

tests/benchmarks/resolution/resolution-benchmark.test.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,9 @@ const THRESHOLDS: Record<string, { precision: number; recall: number }> = {
148148
python: { precision: 0.7, recall: 0.3 },
149149
go: { precision: 0.7, recall: 0.3 },
150150
java: { precision: 0.7, recall: 0.3 },
151-
csharp: { precision: 1.0, recall: 0.8 },
151+
// csharp 1.0/0.9: static receiver fix (#1395) ensures precision; var-declared instance typeMap
152+
// (implicit_type) lifts receiver-typed recall from 0/4 → 4/4 (#1396).
153+
csharp: { precision: 1.0, recall: 0.9 },
152154
kotlin: { precision: 0.6, recall: 0.2 },
153155
// Lower bars — resolution still maturing
154156
rust: { precision: 0.6, recall: 0.2 },

tests/parsers/csharp.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,4 +151,26 @@ public class Service : BaseService, IDisposable {
151151
expect.objectContaining({ name: 'User.Name', kind: 'property' }),
152152
);
153153
});
154+
155+
it('populates typeMap for var-declared instances (implicit type)', () => {
156+
const symbols = parseCSharp(`public class Program {
157+
void Run() {
158+
var service = new UserService();
159+
var repo = new UserRepository();
160+
service.AddUser(null);
161+
}
162+
}`);
163+
expect(symbols.typeMap.get('service')).toEqual({ type: 'UserService', confidence: 0.9 });
164+
expect(symbols.typeMap.get('repo')).toEqual({ type: 'UserRepository', confidence: 0.9 });
165+
});
166+
167+
it('populates typeMap for explicitly-typed local variables', () => {
168+
const symbols = parseCSharp(`public class Foo {
169+
void Bar() {
170+
UserService svc = new UserService();
171+
svc.DoWork();
172+
}
173+
}`);
174+
expect(symbols.typeMap.get('svc')).toEqual({ type: 'UserService', confidence: 0.9 });
175+
});
154176
});

tests/search/embedding-regression.test.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,23 @@ describe.skipIf(!hasTransformers)('embedding regression (real model)', () => {
6868
dbPath = path.join(tmpDir, '.codegraph', 'graph.db');
6969

7070
// Build embeddings with the smallest/fastest model.
71-
// Skip gracefully when HuggingFace rate-limits the model download (HTTP 429).
71+
// Skip gracefully when HuggingFace rate-limits the model download (HTTP 429)
72+
// or when the network is unavailable (ECONNRESET, ETIMEDOUT, ENOTFOUND,
73+
// ECONNREFUSED, ERR_HTTP2_STREAM_CANCEL, ERR_HTTP2_SESSION_ERROR).
7274
try {
7375
await buildEmbeddings(tmpDir, 'minilm', dbPath);
7476
} catch (err: unknown) {
7577
const msg = err instanceof Error ? err.message : String(err);
76-
if (msg.includes('429')) {
78+
const code = (err as NodeJS.ErrnoException).code ?? '';
79+
const isNetworkError =
80+
msg.includes('429') ||
81+
code === 'ECONNRESET' ||
82+
code === 'ETIMEDOUT' ||
83+
code === 'ENOTFOUND' ||
84+
code === 'ECONNREFUSED' ||
85+
code === 'ERR_HTTP2_STREAM_CANCEL' ||
86+
code === 'ERR_HTTP2_SESSION_ERROR';
87+
if (isNetworkError) {
7788
rateLimited = true;
7889
return;
7990
}

0 commit comments

Comments
 (0)