Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
3892e7d
chore: gitignore napi-generated artifacts in crates/codegraph-core
carlos-alm Jun 13, 2026
ef8ea4f
chore(tests): remove unused biome suppression in visitor.test.ts
carlos-alm Jun 13, 2026
a372b82
fix(titan-run): sync --start-from enum and phase-timestamp list with …
carlos-alm Jun 13, 2026
9a52c7c
fix(hooks): track Bash file modifications via before/after git status…
carlos-alm Jun 13, 2026
85a26df
chore(native): remove dead code (unused var, method, variant, fields)
carlos-alm Jun 13, 2026
184d221
refactor(native): extract emit_pts_alias_edges params into PtsAliasCt…
carlos-alm Jun 13, 2026
909e1df
fix(wasm): sort call targets by confidence before emit to match nativ…
carlos-alm Jun 13, 2026
66fc899
fix(bench): add 2 warmup runs and raise INCREMENTAL_RUNS to 5 for inc…
carlos-alm Jun 13, 2026
84e1a5f
ci(bench): add per-PR perf canary for extractor/graph/native changes
carlos-alm Jun 13, 2026
d07b358
fix(perf): plumb symbolsOnly through parseFilesWasmInline to skip ana…
carlos-alm Jun 13, 2026
3db5d8c
fix(perf): scope runPostNativeCha to changed files on incremental builds
carlos-alm Jun 13, 2026
8b3aa3d
fix(native): add post-pass phase timings to result.phases
carlos-alm Jun 13, 2026
fd4ffd1
fix(perf): correct INLINE_BACKFILL_THRESHOLD docstring; raise thresho…
carlos-alm Jun 13, 2026
498ee21
fix(perf): guard post-native passes against unnecessary work on 1-fil…
carlos-alm Jun 13, 2026
61a9839
chore(types): remove dead protoMethodsMs field and stale comment
carlos-alm Jun 13, 2026
5f5d4d2
fix: class-scope field annotation typeMap keys to prevent cross-class…
carlos-alm Jun 13, 2026
29dd101
fix(bench): update elixir/julia/objc expected-edges to module-qualifi…
carlos-alm Jun 13, 2026
9320ed2
fix(wasm): emit receiver edges for declaration-typed locals in C++/CUDA
carlos-alm Jun 13, 2026
7313330
fix(native): resolve Go factory and Python constructor receiver types…
carlos-alm Jun 13, 2026
f9608be
fix(review): address Greptile review comments and fix lint failures
carlos-alm Jun 13, 2026
d6c4d3c
fix: resolve merge conflicts with main
carlos-alm Jun 13, 2026
90df67b
fix(dataflow): remove stale struct-pattern syntax from unit variant m…
carlos-alm Jun 13, 2026
10bc11f
fix: resolve merge conflicts with main
carlos-alm Jun 13, 2026
6ea7b3f
fix: resolve merge conflicts with main
carlos-alm Jun 14, 2026
d6c13b0
fix: resolve merge conflicts with main
carlos-alm Jun 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 195 additions & 0 deletions crates/codegraph-core/src/extractors/go.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,10 +317,144 @@ fn match_go_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep
"var_spec" | "parameter_declaration" => {
collect_go_typed_identifiers(node, source, &mut symbols.type_map);
}
// x := Struct{} / x := &Struct{} / x := NewFoo() — short variable declarations.
"short_var_declaration" => {
infer_short_var_types(node, source, &mut symbols.type_map);
}
_ => {}
}
}

/// Seed typeMap entries from `x := Struct{}`, `x := &Struct{}`, and `x := NewFoo()`.
/// Mirrors the JS `inferShortVarType` → `inferCompositeLiteral` / `inferAddressOfComposite`
/// / `inferFactoryCall` chain in `src/extractors/go.ts`.
fn infer_short_var_types(node: &Node, source: &[u8], type_map: &mut Vec<TypeMapEntry>) {
let Some(left) = node.child_by_field_name("left") else { return };
let Some(right) = node.child_by_field_name("right") else { return };

// Collect LHS identifiers (may be an expression_list for multi-assignment).
let lefts: Vec<Node> = if left.kind() == "expression_list" {
(0..left.child_count())
.filter_map(|i| left.child(i))
.filter(|c| c.kind() == "identifier")
.collect()
} else if left.kind() == "identifier" {
vec![left]
} else {
return;
};

// Collect RHS values (may be an expression_list).
let rights: Vec<Node> = if right.kind() == "expression_list" {
(0..right.child_count())
.filter_map(|i| right.child(i))
.filter(|c| c.kind() != ",")
.collect()
} else {
vec![right]
};

for (idx, var_node) in lefts.iter().enumerate() {
let Some(rhs) = rights.get(idx) else { continue };
infer_single_short_var(var_node, rhs, source, type_map);
}
}

/// Try composite literal, address-of-composite, then factory call for a single LHS/RHS pair.
fn infer_single_short_var(
var_node: &Node,
rhs: &Node,
source: &[u8],
type_map: &mut Vec<TypeMapEntry>,
) {
if infer_composite_literal(var_node, rhs, source, type_map) { return; }
if infer_address_of_composite(var_node, rhs, source, type_map) { return; }
infer_factory_call(var_node, rhs, source, type_map);
}

/// `x := Struct{...}` → seed x : Struct at conf 1.0.
fn infer_composite_literal(
var_node: &Node,
rhs: &Node,
source: &[u8],
type_map: &mut Vec<TypeMapEntry>,
) -> bool {
if rhs.kind() != "composite_literal" { return false; }
let Some(type_node) = rhs.child_by_field_name("type") else { return false };
let Some(type_name) = extract_go_type_name(&type_node, source) else { return false };
type_map.push(TypeMapEntry {
name: node_text(var_node, source).to_string(),
type_name: type_name.to_string(),
confidence: 1.0,
});
true
}

/// `x := &Struct{...}` → seed x : Struct at conf 1.0.
Comment on lines +382 to +393

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Unary operator not verified before treating as address-of

infer_address_of_composite only checks that the unary_expression's operand is a composite_literal, but never verifies the operator is &. In theory, any other unary operator applied to a composite literal (e.g. a hypothetical ^Struct{}) would still seed the typeMap. While valid Go syntax prevents non-& unary ops on composite literals from compiling, the function operates on the raw AST, so a defensive check on the operator node would make the intent explicit and guard against future grammar edge-cases.

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — added a defensive check for the & operator before treating a unary_expression as address-of: if node_text(&op_node, source) != "&" { return false; }. This makes the intent explicit and guards against future grammar edge cases.

fn infer_address_of_composite(
var_node: &Node,
rhs: &Node,
source: &[u8],
type_map: &mut Vec<TypeMapEntry>,
) -> bool {
if rhs.kind() != "unary_expression" { return false; }
// Verify the operator is `&` — guards against any other unary operator
// applied to a composite literal on a raw AST.
let Some(op_node) = rhs.child(0) else { return false };
if node_text(&op_node, source) != "&" { return false; }
// The operand of `&` is a composite_literal.
let Some(operand) = rhs.child_by_field_name("operand") else { return false };
if operand.kind() != "composite_literal" { return false; }
let Some(type_node) = operand.child_by_field_name("type") else { return false };
let Some(type_name) = extract_go_type_name(&type_node, source) else { return false };
type_map.push(TypeMapEntry {
name: node_text(var_node, source).to_string(),
type_name: type_name.to_string(),
confidence: 1.0,
});
true
}

/// `x := NewFoo(...)` or `x := pkg.NewFoo(...)` → seed x : Foo at conf 0.7.
fn infer_factory_call(
var_node: &Node,
rhs: &Node,
source: &[u8],
type_map: &mut Vec<TypeMapEntry>,
) -> bool {
if rhs.kind() != "call_expression" { return false; }
let Some(fn_node) = rhs.child_by_field_name("function") else { return false };
match fn_node.kind() {
"selector_expression" => {
// pkg.NewFoo(...) — use the field name only.
let Some(field) = fn_node.child_by_field_name("field") else { return false };
let field_text = node_text(&field, source);
if !field_text.starts_with("New") { return false; }
let type_name = &field_text[3..];
if type_name.is_empty() { return false; }
type_map.push(TypeMapEntry {
name: node_text(var_node, source).to_string(),
type_name: type_name.to_string(),
confidence: 0.7,
});
true
}
"identifier" => {
let fn_text = node_text(&fn_node, source);
if !fn_text.starts_with("New") { return false; }
let type_name = &fn_text[3..];
if type_name.is_empty() { return false; }
type_map.push(TypeMapEntry {
name: node_text(var_node, source).to_string(),
type_name: type_name.to_string(),
confidence: 0.7,
});
true
}
_ => false,
}
}

fn collect_go_typed_identifiers(node: &Node, source: &[u8], type_map: &mut Vec<TypeMapEntry>) {
let Some(type_node) = node.child_by_field_name("type") else { return };
let Some(type_name) = extract_go_type_name(&type_node, source) else { return };
Expand Down Expand Up @@ -412,4 +546,65 @@ mod tests {
let c = s.definitions.iter().find(|d| d.name == "MaxRetries").unwrap();
assert_eq!(c.kind, "constant");
}

// ── Short-var-declaration typeMap tests ─────────────────────────────────

#[test]
fn infers_factory_call_new_prefix() {
// svc := NewUserService(repo) → svc : UserService at conf 0.7
let s = parse_go(
"package main\nfunc main() {\n svc := NewUserService(repo)\n _ = svc\n}\n",
);
let entry = s.type_map.iter().find(|e| e.name == "svc");
assert!(entry.is_some(), "expected svc in type_map");
let entry = entry.unwrap();
assert_eq!(entry.type_name, "UserService");
assert!((entry.confidence - 0.7).abs() < f64::EPSILON);
}

#[test]
fn infers_pkg_factory_call() {
// svc := service.NewUserService(repo) → svc : UserService at conf 0.7
let s = parse_go(
"package main\nfunc main() {\n svc := service.NewUserService(repo)\n _ = svc\n}\n",
);
let entry = s.type_map.iter().find(|e| e.name == "svc");
assert!(entry.is_some(), "expected svc in type_map for pkg.NewX");
assert_eq!(entry.unwrap().type_name, "UserService");
}

#[test]
fn infers_composite_literal() {
// u := User{Name: "Alice"} → u : User at conf 1.0
let s = parse_go(
"package main\nfunc main() {\n u := User{Name: \"Alice\"}\n _ = u\n}\n",
);
let entry = s.type_map.iter().find(|e| e.name == "u");
assert!(entry.is_some(), "expected u in type_map for composite literal");
assert_eq!(entry.unwrap().type_name, "User");
assert!((entry.unwrap().confidence - 1.0).abs() < f64::EPSILON);
}

#[test]
fn infers_address_of_composite() {
// u := &User{} → u : User at conf 1.0
let s = parse_go(
"package main\nfunc main() {\n u := &User{}\n _ = u\n}\n",
);
let entry = s.type_map.iter().find(|e| e.name == "u");
assert!(entry.is_some(), "expected u in type_map for address-of composite literal");
assert_eq!(entry.unwrap().type_name, "User");
}

#[test]
fn non_new_prefix_not_inferred() {
// srv := createServer() — not a New* factory, should not seed typeMap
let s = parse_go(
"package main\nfunc main() {\n srv := createServer()\n _ = srv\n}\n",
);
assert!(
s.type_map.iter().all(|e| e.name != "srv"),
"unexpected typeMap entry for non-New factory"
);
}
}
154 changes: 154 additions & 0 deletions crates/codegraph-core/src/extractors/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,53 @@ fn extract_python_type_name<'a>(type_node: &Node<'a>, source: &'a [u8]) -> Optio
}
}

/// Python builtins / stdlib classes that start with an uppercase letter and would
/// false-positive on the constructor-call heuristic. Mirrors `BUILTIN_GLOBALS_PY`
/// in `src/extractors/python.ts`.
fn is_python_builtin(name: &str) -> bool {
matches!(
name,
"Exception"
| "BaseException"
| "ValueError"
| "TypeError"
| "KeyError"
| "IndexError"
| "AttributeError"
| "RuntimeError"
| "OSError"
| "IOError"
| "FileNotFoundError"
| "PermissionError"
| "NotImplementedError"
| "StopIteration"
| "GeneratorExit"
| "SystemExit"
| "KeyboardInterrupt"
| "ArithmeticError"
| "LookupError"
| "UnicodeError"
| "UnicodeDecodeError"
| "UnicodeEncodeError"
| "ImportError"
| "ModuleNotFoundError"
| "ConnectionError"
| "TimeoutError"
| "OverflowError"
| "ZeroDivisionError"
| "NameError"
| "SyntaxError"
| "RecursionError"
| "MemoryError"
| "Path"
| "PurePath"
| "OrderedDict"
| "Counter"
| "Decimal"
| "Fraction"
)
}

fn match_python_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) {
match node.kind() {
"typed_parameter" => {
Expand Down Expand Up @@ -357,6 +404,52 @@ fn match_python_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols,
}
}
}
// `order = Order(...)` → seed order : Order at conf 1.0.
// `obj = module.Class(...)` → seed obj : module at conf 0.7 (factory pattern).
// Mirrors `handlePyAssignmentType` in `src/extractors/python.ts`.
"assignment" => {
infer_py_assignment_type(node, source, &mut symbols.type_map);
}
_ => {}
}
}

/// Seed typeMap from plain Python assignments where the RHS is a constructor or factory call.
fn infer_py_assignment_type(node: &Node, source: &[u8], type_map: &mut Vec<TypeMapEntry>) {
let Some(left) = node.child_by_field_name("left") else { return };
let Some(right) = node.child_by_field_name("right") else { return };
if left.kind() != "identifier" || right.kind() != "call" { return; }
let var_name = node_text(&left, source).to_string();
let Some(fn_node) = right.child_by_field_name("function") else { return };
match fn_node.kind() {
"identifier" => {
// `order = Order(...)` — uppercase first char → constructor, conf 1.0.
let name = node_text(&fn_node, source);
if name.chars().next().map(|c| c.is_uppercase()).unwrap_or(false) {
type_map.push(TypeMapEntry {
name: var_name,
type_name: name.to_string(),
confidence: 1.0,
});
}
}
"attribute" => {
// `obj = Module.Class(...)` — uppercase object name, not a builtin → conf 0.7.
if let Some(obj_node) = fn_node.child_by_field_name("object") {
if obj_node.kind() == "identifier" {
let obj_name = node_text(&obj_node, source);
if obj_name.chars().next().map(|c| c.is_uppercase()).unwrap_or(false)
&& !is_python_builtin(obj_name)
{
type_map.push(TypeMapEntry {
name: var_name,
type_name: obj_name.to_string(),
confidence: 0.7,
});
Comment on lines +435 to +448

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 attribute branch seeds the module name, not the class name, as receiver type

For svc = Models.UserService(db), the typeMap entry is svc → "Models" at 0.7 confidence. When svc.create_user() is later resolved, the resolver looks up Models.create_user — not UserService.create_user. The test documents this as intentional parity with the JS extractor. Worth confirming the JS version also stores the module name, and whether the class name would produce more useful edges.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time! Can you confirm the JS extractor also stores the module name (not the class name) in the attribute case, and that downstream resolution benefits from this?

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confirmed — the JS extractor (handlePyAssignmentType in src/extractors/python.ts line 397) also stores the module/object name at 0.7, not the class name. For svc = Models.UserService(db), the resolver sees svc → Models and resolves svc.create_user() as Models.create_user. Since import resolution maps Models to the imported module, this is consistent with how the WASM engine works. The Rust extractor intentionally mirrors this behavior.

}
}
}
}
_ => {}
}
}
Expand Down Expand Up @@ -455,4 +548,65 @@ mod tests {
let c = s.definitions.iter().find(|d| d.name == "MAX_RETRIES").unwrap();
assert_eq!(c.kind, "constant");
}

// ── Assignment typeMap tests ─────────────────────────────────────────────

#[test]
fn infers_constructor_call_uppercase() {
// order = Order("o1", 100.0) → order : Order at conf 1.0
let s = parse_py("def run():\n order = Order(\"o1\", 100.0)\n order.validate()\n");
let entry = s.type_map.iter().find(|e| e.name == "order");
assert!(entry.is_some(), "expected order in type_map");
let entry = entry.unwrap();
assert_eq!(entry.type_name, "Order");
assert!((entry.confidence - 1.0).abs() < f64::EPSILON);
}

#[test]
fn infers_module_factory_call() {
// svc = Models.UserService(db) → svc : Models at conf 0.7
// The object name must be uppercase to match the JS heuristic.
let s = parse_py("def run():\n svc = Models.UserService(db)\n svc.create()\n");
let entry = s.type_map.iter().find(|e| e.name == "svc");
assert!(entry.is_some(), "expected svc in type_map for Module.Class(...)");
let entry = entry.unwrap();
assert_eq!(entry.type_name, "Models");
assert!((entry.confidence - 0.7).abs() < f64::EPSILON);
}

#[test]
fn does_not_infer_lowercase_module_factory() {
// svc = models.UserService(db) — lowercase module name → no typeMap entry (matches JS)
let s = parse_py("def run():\n svc = models.UserService(db)\n svc.create()\n");
assert!(
s.type_map.iter().all(|e| e.name != "svc"),
"should not seed typeMap for lowercase module prefix"
);
}

#[test]
fn does_not_infer_lowercase_constructor() {
// obj = create_thing() — lowercase, should not seed typeMap
let s = parse_py("def run():\n obj = create_thing()\n obj.work()\n");
assert!(
s.type_map.iter().all(|e| e.name != "obj"),
"should not seed typeMap for lowercase function call"
);
}

#[test]
fn does_not_infer_builtin_exception() {
// err = ValueError("msg") — builtin exception, should not seed typeMap
let s = parse_py("def run():\n err = ValueError(\"msg\")\n");
// Note: ValueError is uppercase so it WOULD match the heuristic — but it's a builtin.
// The JS extractor does NOT exclude builtins from conf-1.0 uppercase constructor
// matching (only from the attribute/factory path). We match that behaviour here.
// This test documents the current behaviour rather than asserting exclusion.
let entry = s.type_map.iter().find(|e| e.name == "err");
// Builtins ARE seeded at conf 1.0 by the identifier branch (same as JS).
// Only the attribute/factory branch (Module.Class) checks is_python_builtin.
if let Some(e) = entry {
assert_eq!(e.type_name, "ValueError");
}
}
}
Loading
Loading