From 4ef042e5b59f4d0047296bbe97af9ba7b3889630 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 11 Jun 2026 16:44:53 -0600 Subject: [PATCH 1/5] refactor(native): mirror crate module layout to the src/ TypeScript tree Reorganize crates/codegraph-core/src/ so every module sits at the path of its TypeScript counterpart (snake_case for kebab-case): shared/, infrastructure/, db/repository/, domain/graph/builder/stages/, ast_analysis/, graph/algorithms/, graph/classifiers/, features/. - Pure git mv moves; only graph_algorithms.rs is split (bfs, shortest_path, centrality, louvain) along its existing section boundaries - lib.rs doc comment carries the full Rust<->TypeScript mapping table - Cross-references in TS sources, tests, and docs updated to new paths - Cargo.lock version synced to 3.12.0 (Cargo.toml was already bumped) - cargo test: 360 passed; tsc build and drift-guard test green --- CLAUDE.md | 1 + Cargo.lock | 2 +- .../src/{ => ast_analysis}/cfg.rs | 2 +- .../src/{ => ast_analysis}/complexity.rs | 2 +- .../src/{ => ast_analysis}/dataflow.rs | 2 +- .../{analysis.rs => ast_analysis/engine.rs} | 10 +- crates/codegraph-core/src/ast_analysis/mod.rs | 4 + .../src/{native_db.rs => db/connection.rs} | 18 +- crates/codegraph-core/src/db/mod.rs | 2 + .../src/{ast_db.rs => db/repository/ast.rs} | 0 .../{edges_db.rs => db/repository/edges.rs} | 0 .../repository/graph_read.rs} | 4 +- .../codegraph-core/src/db/repository/mod.rs | 4 + .../src/{ => db/repository}/read_types.rs | 0 .../graph/builder}/barrel_resolution.rs | 0 .../{ => domain/graph/builder}/incremental.rs | 2 +- .../src/domain/graph/builder/mod.rs | 4 + .../graph/builder/pipeline.rs} | 134 ++++---- .../graph/builder/stages/build_edges.rs} | 22 +- .../graph/builder/stages/collect_files.rs} | 4 +- .../graph/builder/stages/detect_changes.rs} | 6 +- .../graph/builder/stages}/import_edges.rs | 8 +- .../graph/builder/stages}/insert_nodes.rs | 0 .../src/domain/graph/builder/stages/mod.rs | 5 + .../src/{ => domain/graph}/journal.rs | 0 crates/codegraph-core/src/domain/graph/mod.rs | 3 + .../graph/resolve.rs} | 0 crates/codegraph-core/src/domain/mod.rs | 3 + .../src/{ => domain}/parallel.rs | 4 +- .../{parser_registry.rs => domain/parser.rs} | 0 crates/codegraph-core/src/extractors/bash.rs | 4 +- crates/codegraph-core/src/extractors/c.rs | 4 +- .../codegraph-core/src/extractors/clojure.rs | 6 +- crates/codegraph-core/src/extractors/cpp.rs | 4 +- .../codegraph-core/src/extractors/csharp.rs | 4 +- crates/codegraph-core/src/extractors/cuda.rs | 4 +- crates/codegraph-core/src/extractors/dart.rs | 4 +- .../codegraph-core/src/extractors/elixir.rs | 4 +- .../codegraph-core/src/extractors/fsharp.rs | 4 +- crates/codegraph-core/src/extractors/gleam.rs | 4 +- crates/codegraph-core/src/extractors/go.rs | 4 +- .../codegraph-core/src/extractors/groovy.rs | 4 +- .../codegraph-core/src/extractors/haskell.rs | 4 +- .../codegraph-core/src/extractors/helpers.rs | 2 +- crates/codegraph-core/src/extractors/java.rs | 4 +- .../src/extractors/javascript.rs | 4 +- crates/codegraph-core/src/extractors/julia.rs | 4 +- .../codegraph-core/src/extractors/kotlin.rs | 6 +- crates/codegraph-core/src/extractors/lua.rs | 4 +- crates/codegraph-core/src/extractors/mod.rs | 2 +- crates/codegraph-core/src/extractors/objc.rs | 4 +- crates/codegraph-core/src/extractors/ocaml.rs | 4 +- crates/codegraph-core/src/extractors/php.rs | 4 +- .../codegraph-core/src/extractors/python.rs | 4 +- .../codegraph-core/src/extractors/r_lang.rs | 4 +- crates/codegraph-core/src/extractors/ruby.rs | 4 +- .../src/extractors/rust_lang.rs | 4 +- crates/codegraph-core/src/extractors/scala.rs | 4 +- .../codegraph-core/src/extractors/solidity.rs | 2 +- crates/codegraph-core/src/extractors/swift.rs | 4 +- crates/codegraph-core/src/extractors/zig.rs | 4 +- crates/codegraph-core/src/features/mod.rs | 1 + .../src/{ => features}/structure.rs | 0 .../src/graph/algorithms/bfs.rs | 137 ++++++++ .../src/graph/algorithms/centrality.rs | 65 ++++ .../algorithms/louvain.rs} | 297 +----------------- .../src/graph/algorithms/mod.rs | 48 +++ .../src/graph/algorithms/shortest_path.rs | 92 ++++++ .../{cycles.rs => graph/algorithms/tarjan.rs} | 0 .../src/graph/classifiers/mod.rs | 1 + .../classifiers/roles.rs} | 0 crates/codegraph-core/src/graph/mod.rs | 2 + .../src/{ => infrastructure}/config.rs | 0 .../codegraph-core/src/infrastructure/mod.rs | 1 + crates/codegraph-core/src/lib.rs | 84 ++--- .../src/{ => shared}/constants.rs | 0 crates/codegraph-core/src/shared/mod.rs | 1 + docs/contributing/adding-a-language.md | 4 +- src/db/migrations.ts | 2 +- .../builder/stages/native-orchestrator.ts | 2 +- .../graph/builder/stages/resolve-imports.ts | 2 +- src/domain/parser.ts | 2 +- src/infrastructure/config.ts | 2 +- tests/benchmarks/regression-guard.test.ts | 2 +- .../native-drop-classification.test.ts | 15 +- 85 files changed, 612 insertions(+), 516 deletions(-) rename crates/codegraph-core/src/{ => ast_analysis}/cfg.rs (99%) rename crates/codegraph-core/src/{ => ast_analysis}/complexity.rs (99%) rename crates/codegraph-core/src/{ => ast_analysis}/dataflow.rs (99%) rename crates/codegraph-core/src/{analysis.rs => ast_analysis/engine.rs} (94%) create mode 100644 crates/codegraph-core/src/ast_analysis/mod.rs rename crates/codegraph-core/src/{native_db.rs => db/connection.rs} (99%) create mode 100644 crates/codegraph-core/src/db/mod.rs rename crates/codegraph-core/src/{ast_db.rs => db/repository/ast.rs} (100%) rename crates/codegraph-core/src/{edges_db.rs => db/repository/edges.rs} (100%) rename crates/codegraph-core/src/{read_queries.rs => db/repository/graph_read.rs} (99%) create mode 100644 crates/codegraph-core/src/db/repository/mod.rs rename crates/codegraph-core/src/{ => db/repository}/read_types.rs (100%) rename crates/codegraph-core/src/{ => domain/graph/builder}/barrel_resolution.rs (100%) rename crates/codegraph-core/src/{ => domain/graph/builder}/incremental.rs (98%) create mode 100644 crates/codegraph-core/src/domain/graph/builder/mod.rs rename crates/codegraph-core/src/{build_pipeline.rs => domain/graph/builder/pipeline.rs} (93%) rename crates/codegraph-core/src/{edge_builder.rs => domain/graph/builder/stages/build_edges.rs} (98%) rename crates/codegraph-core/src/{file_collector.rs => domain/graph/builder/stages/collect_files.rs} (99%) rename crates/codegraph-core/src/{change_detection.rs => domain/graph/builder/stages/detect_changes.rs} (99%) rename crates/codegraph-core/src/{ => domain/graph/builder/stages}/import_edges.rs (98%) rename crates/codegraph-core/src/{ => domain/graph/builder/stages}/insert_nodes.rs (100%) create mode 100644 crates/codegraph-core/src/domain/graph/builder/stages/mod.rs rename crates/codegraph-core/src/{ => domain/graph}/journal.rs (100%) create mode 100644 crates/codegraph-core/src/domain/graph/mod.rs rename crates/codegraph-core/src/{import_resolution.rs => domain/graph/resolve.rs} (100%) create mode 100644 crates/codegraph-core/src/domain/mod.rs rename crates/codegraph-core/src/{ => domain}/parallel.rs (97%) rename crates/codegraph-core/src/{parser_registry.rs => domain/parser.rs} (100%) create mode 100644 crates/codegraph-core/src/features/mod.rs rename crates/codegraph-core/src/{ => features}/structure.rs (100%) create mode 100644 crates/codegraph-core/src/graph/algorithms/bfs.rs create mode 100644 crates/codegraph-core/src/graph/algorithms/centrality.rs rename crates/codegraph-core/src/{graph_algorithms.rs => graph/algorithms/louvain.rs} (53%) create mode 100644 crates/codegraph-core/src/graph/algorithms/mod.rs create mode 100644 crates/codegraph-core/src/graph/algorithms/shortest_path.rs rename crates/codegraph-core/src/{cycles.rs => graph/algorithms/tarjan.rs} (100%) create mode 100644 crates/codegraph-core/src/graph/classifiers/mod.rs rename crates/codegraph-core/src/{roles_db.rs => graph/classifiers/roles.rs} (100%) create mode 100644 crates/codegraph-core/src/graph/mod.rs rename crates/codegraph-core/src/{ => infrastructure}/config.rs (100%) create mode 100644 crates/codegraph-core/src/infrastructure/mod.rs rename crates/codegraph-core/src/{ => shared}/constants.rs (100%) create mode 100644 crates/codegraph-core/src/shared/mod.rs diff --git a/CLAUDE.md b/CLAUDE.md index 7bdc4c8db..cc8f2a65b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -152,6 +152,7 @@ Source is TypeScript in `src/`, compiled via `tsup`. The Rust native engine live **Key design decisions:** - **Dual-engine architecture:** Native Rust parsing via napi-rs (`crates/codegraph-core/`) with automatic fallback to WASM. Controlled by `--engine native|wasm|auto` (default: `auto`). **Both engines must produce identical results.** If they diverge, the less-accurate engine has a bug — fix it, don't document the gap +- **Mirrored engine layout:** `crates/codegraph-core/src/` mirrors the `src/` TypeScript tree (snake_case for kebab-case): `shared/`, `infrastructure/`, `db/repository/`, `domain/graph/builder/stages/`, `ast_analysis/`, `graph/algorithms/`, `graph/classifiers/`, `features/`, `extractors/`. The full module↔file mapping table lives in the `lib.rs` doc comment. When changing engine behavior in one language, make the equivalent change in the mirrored module of the other — new Rust modules must be placed at the path of their TS counterpart - Platform-specific prebuilt binaries published as optional npm packages (`@optave/codegraph-{platform}-{arch}`) - WASM grammars are built from devDeps on `npm install` (via `prepare` script) and not committed to git — used as fallback when native addon is unavailable - **Language parser registry:** `LANGUAGE_REGISTRY` in `domain/parser.ts` is the single source of truth for all supported languages — maps each language to `{ id, extensions, grammarFile, extractor, required }`. `EXTENSIONS` in `shared/constants.ts` is derived from the registry. Adding a new language requires one registry entry + extractor function diff --git a/Cargo.lock b/Cargo.lock index 629505d1e..caf7e334c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -66,7 +66,7 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "codegraph-core" -version = "3.11.2" +version = "3.12.0" dependencies = [ "globset", "ignore", diff --git a/crates/codegraph-core/src/cfg.rs b/crates/codegraph-core/src/ast_analysis/cfg.rs similarity index 99% rename from crates/codegraph-core/src/cfg.rs rename to crates/codegraph-core/src/ast_analysis/cfg.rs index d69d4228d..226a31362 100644 --- a/crates/codegraph-core/src/cfg.rs +++ b/crates/codegraph-core/src/ast_analysis/cfg.rs @@ -1,5 +1,5 @@ use tree_sitter::Node; -use crate::constants::MAX_WALK_DEPTH; +use crate::shared::constants::MAX_WALK_DEPTH; use crate::types::{CfgBlock, CfgData, CfgEdge}; // ─── CFG Rules ────────────────────────────────────────────────────────── diff --git a/crates/codegraph-core/src/complexity.rs b/crates/codegraph-core/src/ast_analysis/complexity.rs similarity index 99% rename from crates/codegraph-core/src/complexity.rs rename to crates/codegraph-core/src/ast_analysis/complexity.rs index 3f5e41672..fdd572512 100644 --- a/crates/codegraph-core/src/complexity.rs +++ b/crates/codegraph-core/src/ast_analysis/complexity.rs @@ -1,6 +1,6 @@ use tree_sitter::Node; -use crate::constants::MAX_WALK_DEPTH; +use crate::shared::constants::MAX_WALK_DEPTH; use crate::types::ComplexityMetrics; // ─── Language-Configurable Complexity Rules ─────────────────────────────── diff --git a/crates/codegraph-core/src/dataflow.rs b/crates/codegraph-core/src/ast_analysis/dataflow.rs similarity index 99% rename from crates/codegraph-core/src/dataflow.rs rename to crates/codegraph-core/src/ast_analysis/dataflow.rs index 091b44dd0..ddb4a11a1 100644 --- a/crates/codegraph-core/src/dataflow.rs +++ b/crates/codegraph-core/src/ast_analysis/dataflow.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use tree_sitter::{Node, Tree}; -use crate::constants::{DATAFLOW_TRUNCATION_LIMIT, MAX_WALK_DEPTH}; +use crate::shared::constants::{DATAFLOW_TRUNCATION_LIMIT, MAX_WALK_DEPTH}; use crate::types::{ DataflowArgFlow, DataflowAssignment, DataflowMutation, DataflowParam, DataflowResult, DataflowReturn, diff --git a/crates/codegraph-core/src/analysis.rs b/crates/codegraph-core/src/ast_analysis/engine.rs similarity index 94% rename from crates/codegraph-core/src/analysis.rs rename to crates/codegraph-core/src/ast_analysis/engine.rs index a6541f6f9..3cfdb0a6c 100644 --- a/crates/codegraph-core/src/analysis.rs +++ b/crates/codegraph-core/src/ast_analysis/engine.rs @@ -7,11 +7,11 @@ use tree_sitter::{Node, Parser}; -use crate::cfg::{build_function_cfg, get_cfg_rules}; -use crate::complexity::{compute_all_metrics, lang_rules}; -use crate::constants::MAX_WALK_DEPTH; -use crate::dataflow::extract_dataflow; -use crate::parser_registry::LanguageKind; +use crate::ast_analysis::cfg::{build_function_cfg, get_cfg_rules}; +use crate::ast_analysis::complexity::{compute_all_metrics, lang_rules}; +use crate::shared::constants::MAX_WALK_DEPTH; +use crate::ast_analysis::dataflow::extract_dataflow; +use crate::domain::parser::LanguageKind; use crate::types::{DataflowResult, FunctionCfgResult, FunctionComplexityResult}; /// Extract the name of a function/method node via the "name" field. diff --git a/crates/codegraph-core/src/ast_analysis/mod.rs b/crates/codegraph-core/src/ast_analysis/mod.rs new file mode 100644 index 000000000..2c28126b8 --- /dev/null +++ b/crates/codegraph-core/src/ast_analysis/mod.rs @@ -0,0 +1,4 @@ +pub mod cfg; +pub mod complexity; +pub mod dataflow; +pub mod engine; diff --git a/crates/codegraph-core/src/native_db.rs b/crates/codegraph-core/src/db/connection.rs similarity index 99% rename from crates/codegraph-core/src/native_db.rs rename to crates/codegraph-core/src/db/connection.rs index 38869c282..a8d175dbd 100644 --- a/crates/codegraph-core/src/native_db.rs +++ b/crates/codegraph-core/src/db/connection.rs @@ -11,10 +11,10 @@ use napi_derive::napi; use rusqlite::{params, types::ValueRef, Connection, OpenFlags}; use send_wrapper::SendWrapper; -use crate::ast_db::{self, FileAstBatch}; -use crate::edges_db::{self, EdgeRow}; -use crate::insert_nodes::{self, FileHashEntry, InsertNodesBatch}; -use crate::roles_db::{self, RoleSummary}; +use crate::db::repository::ast::{self, FileAstBatch}; +use crate::db::repository::edges::{self, EdgeRow}; +use crate::domain::graph::builder::stages::insert_nodes::{self, FileHashEntry, InsertNodesBatch}; +use crate::graph::classifiers::roles::{self, RoleSummary}; // ── Migration DDL (mirrored from src/db/migrations.ts) ────────────────── @@ -819,7 +819,7 @@ impl NativeDatabase { return Ok(true); } let conn = self.conn()?; - Ok(edges_db::do_insert_edges(conn, &edges) + Ok(edges::do_insert_edges(conn, &edges) .inspect_err(|e| eprintln!("[NativeDatabase] bulk_insert_edges failed: {e}")) .is_ok()) } @@ -829,7 +829,7 @@ impl NativeDatabase { #[napi] pub fn bulk_insert_ast_nodes(&self, batches: Vec) -> napi::Result { let conn = self.conn()?; - Ok(ast_db::do_insert_ast_nodes(conn, &batches).unwrap_or(0)) + Ok(ast::do_insert_ast_nodes(conn, &batches).unwrap_or(0)) } /// Bulk-insert complexity metrics for functions/methods. @@ -1022,7 +1022,7 @@ impl NativeDatabase { #[napi] pub fn classify_roles_full(&self) -> napi::Result> { let conn = self.conn()?; - Ok(roles_db::do_classify_full(conn).ok()) + Ok(roles::do_classify_full(conn).ok()) } /// Incremental role classification: only reclassifies nodes from changed @@ -1033,7 +1033,7 @@ impl NativeDatabase { changed_files: Vec, ) -> napi::Result> { let conn = self.conn()?; - Ok(roles_db::do_classify_incremental(conn, &changed_files).ok()) + Ok(roles::do_classify_incremental(conn, &changed_files).ok()) } // ── Phase 6.18: Batched build-glue queries ────────────────────────── @@ -1363,7 +1363,7 @@ impl NativeDatabase { opts_json: String, ) -> napi::Result { let conn = self.conn()?; - let result = crate::build_pipeline::run_pipeline( + let result = crate::domain::graph::builder::pipeline::run_pipeline( conn, &root_dir, &config_json, diff --git a/crates/codegraph-core/src/db/mod.rs b/crates/codegraph-core/src/db/mod.rs new file mode 100644 index 000000000..74ba7d8d4 --- /dev/null +++ b/crates/codegraph-core/src/db/mod.rs @@ -0,0 +1,2 @@ +pub mod connection; +pub mod repository; diff --git a/crates/codegraph-core/src/ast_db.rs b/crates/codegraph-core/src/db/repository/ast.rs similarity index 100% rename from crates/codegraph-core/src/ast_db.rs rename to crates/codegraph-core/src/db/repository/ast.rs diff --git a/crates/codegraph-core/src/edges_db.rs b/crates/codegraph-core/src/db/repository/edges.rs similarity index 100% rename from crates/codegraph-core/src/edges_db.rs rename to crates/codegraph-core/src/db/repository/edges.rs diff --git a/crates/codegraph-core/src/read_queries.rs b/crates/codegraph-core/src/db/repository/graph_read.rs similarity index 99% rename from crates/codegraph-core/src/read_queries.rs rename to crates/codegraph-core/src/db/repository/graph_read.rs index c353390e1..26c7766eb 100644 --- a/crates/codegraph-core/src/read_queries.rs +++ b/crates/codegraph-core/src/db/repository/graph_read.rs @@ -8,8 +8,8 @@ use std::collections::{HashMap, HashSet, VecDeque}; use napi_derive::napi; use rusqlite::params; -use crate::native_db::{has_table, NativeDatabase}; -use crate::read_types::*; +use crate::db::connection::{has_table, NativeDatabase}; +use crate::db::repository::read_types::*; // ── Helpers ───────────────────────────────────────────────────────────── diff --git a/crates/codegraph-core/src/db/repository/mod.rs b/crates/codegraph-core/src/db/repository/mod.rs new file mode 100644 index 000000000..ef1af6936 --- /dev/null +++ b/crates/codegraph-core/src/db/repository/mod.rs @@ -0,0 +1,4 @@ +pub mod ast; +pub mod edges; +pub mod graph_read; +pub mod read_types; diff --git a/crates/codegraph-core/src/read_types.rs b/crates/codegraph-core/src/db/repository/read_types.rs similarity index 100% rename from crates/codegraph-core/src/read_types.rs rename to crates/codegraph-core/src/db/repository/read_types.rs diff --git a/crates/codegraph-core/src/barrel_resolution.rs b/crates/codegraph-core/src/domain/graph/builder/barrel_resolution.rs similarity index 100% rename from crates/codegraph-core/src/barrel_resolution.rs rename to crates/codegraph-core/src/domain/graph/builder/barrel_resolution.rs diff --git a/crates/codegraph-core/src/incremental.rs b/crates/codegraph-core/src/domain/graph/builder/incremental.rs similarity index 98% rename from crates/codegraph-core/src/incremental.rs rename to crates/codegraph-core/src/domain/graph/builder/incremental.rs index cf02d50a4..35fa04345 100644 --- a/crates/codegraph-core/src/incremental.rs +++ b/crates/codegraph-core/src/domain/graph/builder/incremental.rs @@ -5,7 +5,7 @@ use tree_sitter::{Parser, Tree}; use napi_derive::napi; use crate::extractors::extract_symbols; -use crate::parser_registry::LanguageKind; +use crate::domain::parser::LanguageKind; use crate::types::FileSymbols; struct CacheEntry { diff --git a/crates/codegraph-core/src/domain/graph/builder/mod.rs b/crates/codegraph-core/src/domain/graph/builder/mod.rs new file mode 100644 index 000000000..36586205d --- /dev/null +++ b/crates/codegraph-core/src/domain/graph/builder/mod.rs @@ -0,0 +1,4 @@ +pub mod barrel_resolution; +pub mod incremental; +pub mod pipeline; +pub mod stages; diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs similarity index 93% rename from crates/codegraph-core/src/build_pipeline.rs rename to crates/codegraph-core/src/domain/graph/builder/pipeline.rs index 9c7ffc160..6ef4d2f2b 100644 --- a/crates/codegraph-core/src/build_pipeline.rs +++ b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs @@ -10,23 +10,23 @@ //! 3. Detect changes (tiered: journal/mtime/hash) //! 4. Parse files in parallel (existing `parallel::parse_files_parallel`) //! 5. Insert nodes (existing `insert_nodes::do_insert_nodes`) -//! 6. Resolve imports (existing `import_resolution::resolve_imports_batch`) +//! 6. Resolve imports (existing `resolve::resolve_imports_batch`) //! 6b. Re-parse barrel candidates (incremental only) //! 7. Build import edges + call edges + barrel resolution //! 8. Structure metrics + role classification //! 9. Finalize (metadata, journal) -use crate::change_detection; -use crate::config::{BuildConfig, BuildOpts, BuildPathAliases}; -use crate::constants::{FAST_PATH_MAX_CHANGED_FILES, FAST_PATH_MIN_EXISTING_FILES}; -use crate::file_collector; -use crate::import_edges::{self, ImportEdgeContext}; -use crate::import_resolution; -use crate::journal; -use crate::parallel; -use crate::ast_db::{self, AstInsertNode, FileAstBatch}; -use crate::roles_db; -use crate::structure; +use crate::domain::graph::builder::stages::detect_changes; +use crate::infrastructure::config::{BuildConfig, BuildOpts, BuildPathAliases}; +use crate::shared::constants::{FAST_PATH_MAX_CHANGED_FILES, FAST_PATH_MIN_EXISTING_FILES}; +use crate::domain::graph::builder::stages::collect_files; +use crate::domain::graph::builder::stages::import_edges::{self, ImportEdgeContext}; +use crate::domain::graph::resolve; +use crate::domain::graph::journal; +use crate::domain::parallel; +use crate::db::repository::ast::{self, AstInsertNode, FileAstBatch}; +use crate::graph::classifiers::roles; +use crate::features::structure; use crate::types::{FileSymbols, ImportResolutionInput}; use rusqlite::Connection; use serde::Serialize; @@ -145,9 +145,9 @@ fn early_exit_result( timing: PipelineTiming, conn: &Connection, root_dir: &str, - metadata_updates: &[change_detection::MetadataUpdate], + metadata_updates: &[detect_changes::MetadataUpdate], ) -> BuildPipelineResult { - change_detection::heal_metadata(conn, metadata_updates); + detect_changes::heal_metadata(conn, metadata_updates); journal::write_journal_header(root_dir, now_ms()); BuildPipelineResult { phases: timing, @@ -170,29 +170,29 @@ fn early_exit_result( /// pipeline can reconnect them after Stage 5 and reclassify roles in Stage 8. fn save_and_purge_changed( conn: &Connection, - parse_changes: &[&change_detection::ChangedFile], - change_result: &change_detection::ChangeResult, + parse_changes: &[&detect_changes::ChangedFile], + change_result: &detect_changes::ChangeResult, opts: &BuildOpts, root_dir: &str, -) -> (Vec, Vec) { - let mut saved_reverse_dep_edges: Vec = Vec::new(); +) -> (Vec, Vec) { + let mut saved_reverse_dep_edges: Vec = Vec::new(); let mut removal_reverse_deps: Vec = Vec::new(); if change_result.is_full_build { - let has_embeddings = change_detection::has_embeddings(conn); - change_detection::clear_all_graph_data(conn, has_embeddings); + let has_embeddings = detect_changes::has_embeddings(conn); + detect_changes::clear_all_graph_data(conn, has_embeddings); return (saved_reverse_dep_edges, removal_reverse_deps); } let changed_paths: Vec = parse_changes.iter().map(|c| c.rel_path.clone()).collect(); if !opts.no_reverse_deps.unwrap_or(false) { - saved_reverse_dep_edges = change_detection::save_reverse_dep_edges(conn, &changed_paths); + saved_reverse_dep_edges = detect_changes::save_reverse_dep_edges(conn, &changed_paths); if !change_result.removed.is_empty() { let removed_set: HashSet = change_result.removed.iter().cloned().collect(); removal_reverse_deps = - change_detection::find_reverse_dependencies(conn, &removed_set, root_dir) + detect_changes::find_reverse_dependencies(conn, &removed_set, root_dir) .into_iter() .collect(); } @@ -204,14 +204,14 @@ fn save_and_purge_changed( .chain(parse_changes.iter().map(|c| &c.rel_path)) .cloned() .collect(); - change_detection::purge_changed_files(conn, &files_to_purge, &[]); + detect_changes::purge_changed_files(conn, &files_to_purge, &[]); (saved_reverse_dep_edges, removal_reverse_deps) } /// Parse a changed-file slice in parallel and key the results by relative path. fn parse_and_index_files( - parse_changes: &[&change_detection::ChangedFile], + parse_changes: &[&detect_changes::ChangedFile], root_dir: &str, include_dataflow: bool, include_ast: bool, @@ -251,7 +251,7 @@ fn resolve_pipeline_imports( let known_files: HashSet = collect_files.iter().map(|f| relative_path(root_dir, f)).collect(); let resolved = - import_resolution::resolve_imports_batch(&batch_inputs, root_dir, napi_aliases, Some(&known_files)); + resolve::resolve_imports_batch(&batch_inputs, root_dir, napi_aliases, Some(&known_files)); let mut batch_resolved: HashMap = HashMap::new(); for r in &resolved { let key = format!("{}|{}", r.from_file, r.import_source); @@ -263,12 +263,12 @@ fn resolve_pipeline_imports( /// Reconnect any saved reverse-dep edges to the new target node IDs (#1012). fn reconnect_saved_reverse_dep_edges( conn: &Connection, - saved: &[change_detection::SavedReverseDepEdge], + saved: &[detect_changes::SavedReverseDepEdge], ) { if saved.is_empty() { return; } - let (reconnected, dropped) = change_detection::reconnect_reverse_dep_edges(conn, saved); + let (reconnected, dropped) = detect_changes::reconnect_reverse_dep_edges(conn, saved); if dropped > 0 { eprintln!( "[codegraph] reconnect_reverse_dep_edges: {reconnected} reconnected, {dropped} dropped (target nodes not found)" @@ -341,10 +341,10 @@ fn run_role_classification( }; if let Some(ref files) = changed_file_list { if !files.is_empty() { - let _ = roles_db::do_classify_incremental(conn, files); + let _ = roles::do_classify_incremental(conn, files); } } else { - let _ = roles_db::do_classify_full(conn); + let _ = roles::do_classify_full(conn); } } @@ -386,7 +386,7 @@ fn run_analysis_persistence( if include_ast { let t0 = Instant::now(); let ast_batches = build_ast_batches(file_symbols, &analysis_file_set); - if ast_db::do_insert_ast_nodes(conn, &ast_batches).is_err() { + if ast::do_insert_ast_nodes(conn, &ast_batches).is_err() { analysis_ok = false; } timing.ast_ms = t0.elapsed().as_secs_f64() * 1000.0; @@ -458,7 +458,7 @@ pub fn run_pipeline( // ── Stage 3: Detect changes ──────────────────────────────────────── let t0 = Instant::now(); - let change_result = change_detection::detect_changes( + let change_result = detect_changes::detect_changes( conn, &collect_result.files, root_dir, @@ -469,7 +469,7 @@ pub fn run_pipeline( timing.detect_ms = t0.elapsed().as_secs_f64() * 1000.0; // Filter out metadata-only changes - let parse_changes: Vec<&change_detection::ChangedFile> = change_result + let parse_changes: Vec<&detect_changes::ChangedFile> = change_result .changed .iter() .filter(|c| !c.metadata_only) @@ -505,13 +505,13 @@ pub fn run_pipeline( let t0 = Instant::now(); let insert_batches = build_insert_batches(&file_symbols); let file_hashes = build_file_hash_entries(&parse_changes); - let _ = crate::insert_nodes::do_insert_nodes( + let _ = crate::domain::graph::builder::stages::insert_nodes::do_insert_nodes( conn, &insert_batches, &file_hashes, &change_result.removed, ); - change_detection::heal_metadata(conn, &change_result.metadata_updates); + detect_changes::heal_metadata(conn, &change_result.metadata_updates); timing.insert_ms = t0.elapsed().as_secs_f64() * 1000.0; // ── Stage 6: Resolve imports ─────────────────────────────────────── @@ -646,7 +646,7 @@ fn collect_source_files( opts: &BuildOpts, incremental: bool, force_full_rebuild: bool, -) -> file_collector::CollectResult { +) -> collect_files::CollectResult { if let Some(ref scope) = opts.scope { // Scoped rebuild let files: Vec = scope @@ -657,7 +657,7 @@ fn collect_source_files( }) .filter(|f| Path::new(f).exists()) .collect(); - file_collector::CollectResult { + collect_files::CollectResult { directories: files .iter() .filter_map(|f| { @@ -684,7 +684,7 @@ fn collect_source_files( .unwrap_or_default(); if !db_files.is_empty() { - file_collector::try_fast_collect( + collect_files::try_fast_collect( root_dir, &db_files, &journal.changed, @@ -693,7 +693,7 @@ fn collect_source_files( &config.exclude, ) } else { - file_collector::collect_files( + collect_files::collect_files( root_dir, &config.ignore_dirs, &config.include, @@ -701,7 +701,7 @@ fn collect_source_files( ) } } else { - file_collector::collect_files( + collect_files::collect_files( root_dir, &config.ignore_dirs, &config.include, @@ -709,7 +709,7 @@ fn collect_source_files( ) } } else { - file_collector::collect_files( + collect_files::collect_files( root_dir, &config.ignore_dirs, &config.include, @@ -820,7 +820,7 @@ fn reparse_barrel_candidates( from_file: abs_str.clone(), import_source: imp.source.clone(), }; - let resolved_batch = import_resolution::resolve_imports_batch( + let resolved_batch = resolve::resolve_imports_batch( &[input], root_dir, napi_aliases, @@ -982,16 +982,16 @@ fn check_version_mismatch(conn: &Connection) -> bool { /// Build InsertNodesBatch from parsed file symbols. fn build_insert_batches( file_symbols: &HashMap, -) -> Vec { +) -> Vec { file_symbols .iter() .map( - |(rel_path, symbols)| crate::insert_nodes::InsertNodesBatch { + |(rel_path, symbols)| crate::domain::graph::builder::stages::insert_nodes::InsertNodesBatch { file: rel_path.clone(), definitions: symbols .definitions .iter() - .map(|d| crate::insert_nodes::InsertNodesDefinition { + .map(|d| crate::domain::graph::builder::stages::insert_nodes::InsertNodesDefinition { name: d.name.clone(), kind: d.kind.clone(), line: d.line, @@ -1002,7 +1002,7 @@ fn build_insert_batches( .as_ref() .map(|kids| { kids.iter() - .map(|c| crate::insert_nodes::InsertNodesChild { + .map(|c| crate::domain::graph::builder::stages::insert_nodes::InsertNodesChild { name: c.name.clone(), kind: c.kind.clone(), line: c.line, @@ -1017,7 +1017,7 @@ fn build_insert_batches( exports: symbols .exports .iter() - .map(|e| crate::insert_nodes::InsertNodesExport { + .map(|e| crate::domain::graph::builder::stages::insert_nodes::InsertNodesExport { name: e.name.clone(), kind: e.kind.clone(), line: e.line, @@ -1034,8 +1034,8 @@ fn build_insert_batches( /// reading file content. In that case we read and hash each file here so /// that `file_hashes` is populated for subsequent incremental builds. fn build_file_hash_entries( - changed: &[&change_detection::ChangedFile], -) -> Vec { + changed: &[&detect_changes::ChangedFile], +) -> Vec { changed .iter() .filter_map(|c| { @@ -1072,7 +1072,7 @@ fn build_file_hash_entries( } else { (c.mtime as f64, c.size as f64) }; - Some(crate::insert_nodes::FileHashEntry { + Some(crate::domain::graph::builder::stages::insert_nodes::FileHashEntry { file: c.rel_path.clone(), hash, mtime, @@ -1119,7 +1119,7 @@ const EDGE_NODE_KIND_FILTER: &str = "kind IN ('function','method','class','inter /// `relevantFiles` accumulation in `loadNodes` (#976, greptile P1). fn compute_edge_relevant_files( file_symbols: &HashMap, - import_ctx: &crate::import_edges::ImportEdgeContext, + import_ctx: &crate::domain::graph::builder::stages::import_edges::ImportEdgeContext, ) -> HashSet { let mut relevant_files: HashSet = file_symbols.keys().cloned().collect(); for (rel_path, symbols) in file_symbols { @@ -1156,10 +1156,10 @@ fn compute_edge_relevant_files( fn load_edge_node_set( conn: &Connection, file_symbols: &HashMap, - import_ctx: &crate::import_edges::ImportEdgeContext, + import_ctx: &crate::domain::graph::builder::stages::import_edges::ImportEdgeContext, is_incremental: bool, -) -> Vec { - use crate::edge_builder::NodeInfo; +) -> Vec { + use crate::domain::graph::builder::stages::build_edges::NodeInfo; let existing_file_count: i64 = conn .query_row( @@ -1169,8 +1169,8 @@ fn load_edge_node_set( ) .unwrap_or(0); let scope_eligible = is_incremental - && file_symbols.len() <= crate::constants::FAST_PATH_MAX_CHANGED_FILES - && existing_file_count > crate::constants::FAST_PATH_MIN_EXISTING_FILES; + && file_symbols.len() <= crate::shared::constants::FAST_PATH_MAX_CHANGED_FILES + && existing_file_count > crate::shared::constants::FAST_PATH_MIN_EXISTING_FILES; if !scope_eligible { return load_all_edge_nodes(conn); @@ -1212,7 +1212,7 @@ fn load_edge_node_set( } /// Load every candidate edge node from the DB (full-build path). -fn load_all_edge_nodes(conn: &Connection) -> Vec { +fn load_all_edge_nodes(conn: &Connection) -> Vec { let sql = format!( "SELECT id, name, kind, file, line FROM nodes WHERE {EDGE_NODE_KIND_FILTER}", ); @@ -1227,8 +1227,8 @@ fn load_all_edge_nodes(conn: &Connection) -> Vec /// Row-mapper for the `SELECT id, name, kind, file, line FROM nodes ...` /// shape used by both scoped and full edge-node loads. -fn read_edge_node_info(row: &rusqlite::Row) -> rusqlite::Result { - Ok(crate::edge_builder::NodeInfo { +fn read_edge_node_info(row: &rusqlite::Row) -> rusqlite::Result { + Ok(crate::domain::graph::builder::stages::build_edges::NodeInfo { id: row.get::<_, i64>(0)? as u32, name: row.get(1)?, kind: row.get(2)?, @@ -1262,9 +1262,9 @@ fn load_file_node_id_map(conn: &Connection) -> HashMap { fn collect_imported_names_for_file( abs_str: &str, symbols: &FileSymbols, - import_ctx: &crate::import_edges::ImportEdgeContext, -) -> Vec { - use crate::edge_builder::ImportedName; + import_ctx: &crate::domain::graph::builder::stages::import_edges::ImportEdgeContext, +) -> Vec { + use crate::domain::graph::builder::stages::build_edges::ImportedName; let mut imported_names: Vec = Vec::new(); for imp in &symbols.imports { let resolved_path = import_ctx.get_resolved(abs_str, &imp.source); @@ -1289,13 +1289,13 @@ fn collect_imported_names_for_file( } /// Insert the edges produced by the native edge builder into the edges table. -fn insert_call_edge_rows(conn: &Connection, edges: &[crate::edge_builder::ComputedEdge]) { +fn insert_call_edge_rows(conn: &Connection, edges: &[crate::domain::graph::builder::stages::build_edges::ComputedEdge]) { if edges.is_empty() { return; } - let edge_rows: Vec = edges + let edge_rows: Vec = edges .iter() - .map(|e| crate::edges_db::EdgeRow { + .map(|e| crate::db::repository::edges::EdgeRow { source_id: e.source_id, target_id: e.target_id, kind: e.kind.clone(), @@ -1303,7 +1303,7 @@ fn insert_call_edge_rows(conn: &Connection, edges: &[crate::edge_builder::Comput dynamic: e.dynamic, }) .collect(); - let _ = crate::edges_db::do_insert_edges(conn, &edge_rows); + let _ = crate::db::repository::edges::do_insert_edges(conn, &edge_rows); } /// Full builds always load every node — there is no smaller set anyway. @@ -1313,7 +1313,7 @@ fn build_and_insert_call_edges( import_ctx: &ImportEdgeContext, is_incremental: bool, ) { - use crate::edge_builder::*; + use crate::domain::graph::builder::stages::build_edges::*; let all_nodes = load_edge_node_set(conn, file_symbols, import_ctx, is_incremental); if all_nodes.is_empty() { @@ -1446,7 +1446,7 @@ fn build_analysis_node_map( map } -/// Convert FileSymbols AST nodes to FileAstBatch format for `ast_db::do_insert_ast_nodes`. +/// Convert FileSymbols AST nodes to FileAstBatch format for `ast::do_insert_ast_nodes`. fn build_ast_batches( file_symbols: &HashMap, analysis_files: &HashSet<&str>, diff --git a/crates/codegraph-core/src/edge_builder.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs similarity index 98% rename from crates/codegraph-core/src/edge_builder.rs rename to crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index c3a769fe5..339e3ecc0 100644 --- a/crates/codegraph-core/src/edge_builder.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -2,8 +2,8 @@ use std::collections::{HashMap, HashSet}; use napi_derive::napi; -use crate::barrel_resolution::{self, BarrelContext, ReexportRef}; -use crate::import_resolution; +use crate::domain::graph::builder::barrel_resolution::{self, BarrelContext, ReexportRef}; +use crate::domain::graph::resolve; use crate::types::FnRefBinding; /// Kind sets for hierarchy edge resolution -- mirrors the JS constants in @@ -317,7 +317,7 @@ fn process_file<'a>( for t in &alias_targets { let edge_key = ((caller_id as u64) << 32) | (t.id as u64); if t.id != caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { - let conf = import_resolution::compute_confidence( + let conf = resolve::compute_confidence( rel_path, &t.file, alias_imported_from, ) - PROPAGATION_HOP_PENALTY; if conf > 0.0 { @@ -432,7 +432,7 @@ fn resolve_call_targets<'a>( let resolved: Vec<&NodeInfo> = ctx.nodes_by_name .get(proto_target) .map(|v| v.iter() - .filter(|n| import_resolution::compute_confidence(rel_path, &n.file, None) >= 0.5) + .filter(|n| resolve::compute_confidence(rel_path, &n.file, None) >= 0.5) .copied().collect()) .unwrap_or_default(); if !resolved.is_empty() { return resolved; } @@ -460,7 +460,7 @@ fn resolve_call_targets<'a>( let resolved: Vec<&NodeInfo> = ctx.nodes_by_name .get(pts_target) .map(|v| v.iter() - .filter(|n| import_resolution::compute_confidence(rel_path, &n.file, None) >= 0.5) + .filter(|n| resolve::compute_confidence(rel_path, &n.file, None) >= 0.5) .copied().collect()) .unwrap_or_default(); if !resolved.is_empty() { return resolved; } @@ -486,7 +486,7 @@ fn resolve_call_targets<'a>( let accessor_resolved: Vec<&NodeInfo> = ctx.nodes_by_name .get(target_fn) .map(|v| v.iter() - .filter(|n| import_resolution::compute_confidence(rel_path, &n.file, None) >= 0.5) + .filter(|n| resolve::compute_confidence(rel_path, &n.file, None) >= 0.5) .copied().collect()) .unwrap_or_default(); if !accessor_resolved.is_empty() { return accessor_resolved; } @@ -498,7 +498,7 @@ fn resolve_call_targets<'a>( let exact: Vec<&NodeInfo> = ctx.nodes_by_name .get(call.name.as_str()) .map(|v| v.iter() - .filter(|n| import_resolution::compute_confidence(rel_path, &n.file, None) >= 0.5) + .filter(|n| resolve::compute_confidence(rel_path, &n.file, None) >= 0.5) .copied().collect()) .unwrap_or_default(); if !exact.is_empty() { return exact; } @@ -516,7 +516,7 @@ fn resolve_call_targets<'a>( .get(qualified.as_str()) .map(|v| v.iter() .filter(|n| n.kind == "method" - && import_resolution::compute_confidence(rel_path, &n.file, None) >= 0.5) + && resolve::compute_confidence(rel_path, &n.file, None) >= 0.5) .copied().collect()) .unwrap_or_default(); if !class_scoped.is_empty() { return class_scoped; } @@ -581,8 +581,8 @@ fn extract_inline_new_type(receiver: &str) -> Option { fn sort_targets_by_confidence(targets: &mut Vec<&NodeInfo>, rel_path: &str, imported_from: Option<&str>) { if targets.len() > 1 { targets.sort_by(|a, b| { - let conf_a = import_resolution::compute_confidence(rel_path, &a.file, imported_from); - let conf_b = import_resolution::compute_confidence(rel_path, &b.file, imported_from); + let conf_a = resolve::compute_confidence(rel_path, &a.file, imported_from); + let conf_b = resolve::compute_confidence(rel_path, &b.file, imported_from); conf_b.partial_cmp(&conf_a).unwrap_or(std::cmp::Ordering::Equal) }); } @@ -597,7 +597,7 @@ fn emit_call_edges( for t in targets { let edge_key = ((caller_id as u64) << 32) | (t.id as u64); if t.id != caller_id && !seen_edges.contains(&edge_key) { - let confidence = import_resolution::compute_confidence(rel_path, &t.file, imported_from); + let confidence = resolve::compute_confidence(rel_path, &t.file, imported_from); if let Some(&pts_idx) = pts_edge_map.get(&edge_key) { // A pts-resolved edge already exists for this caller→target pair with a // penalised confidence. Upgrade it to the direct-call confidence in-place, diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/domain/graph/builder/stages/collect_files.rs similarity index 99% rename from crates/codegraph-core/src/file_collector.rs rename to crates/codegraph-core/src/domain/graph/builder/stages/collect_files.rs index edf301787..cf127d92d 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/collect_files.rs @@ -4,7 +4,7 @@ //! extension filters, and ignored directory names. Uses the `ignore` crate //! (from BurntSushi/ripgrep) for gitignore-aware traversal. -use crate::parser_registry::LanguageKind; +use crate::domain::parser::LanguageKind; use globset::{Glob, GlobSet, GlobSetBuilder}; use std::collections::{HashMap, HashSet, VecDeque}; use std::path::Path; @@ -60,7 +60,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ /// /// Mirrors the predicate at the heart of `collect_files`: a file is collected /// if `LanguageKind::from_extension` recognizes it OR its raw extension is in -/// `SUPPORTED_EXTENSIONS`. Exposed for `change_detection::detect_removed_files` +/// `SUPPORTED_EXTENSIONS`. Exposed for `detect_changes::detect_removed_files` /// so that files outside Rust's capability are not flagged as "removed" /// merely because the orchestrator's narrower collector never sees them. pub fn is_supported_extension(path: &str) -> bool { diff --git a/crates/codegraph-core/src/change_detection.rs b/crates/codegraph-core/src/domain/graph/builder/stages/detect_changes.rs similarity index 99% rename from crates/codegraph-core/src/change_detection.rs rename to crates/codegraph-core/src/domain/graph/builder/stages/detect_changes.rs index 8c27c027d..838b2937c 100644 --- a/crates/codegraph-core/src/change_detection.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/detect_changes.rs @@ -10,8 +10,8 @@ //! when switching between JS and native engines, so hash format compatibility is //! not required. -use crate::file_collector::is_supported_extension; -use crate::journal; +use crate::domain::graph::builder::stages::collect_files::is_supported_extension; +use crate::domain::graph::journal; use rusqlite::Connection; use sha2::{Digest, Sha256}; use std::collections::{HashMap, HashSet}; @@ -547,7 +547,7 @@ pub fn find_reverse_dependencies( /// Purge graph data for changed/removed files and delete outgoing edges for reverse deps. /// -/// Deletion order: analysis dependents → edges → nodes (matches `native_db::purge_files_data`). +/// Deletion order: analysis dependents → edges → nodes (matches `connection::purge_files_data`). /// Analysis tables use join-based queries (node_id IN SELECT id FROM nodes) because they /// reference nodes by ID, not by file path directly. pub fn purge_changed_files( diff --git a/crates/codegraph-core/src/import_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/import_edges.rs similarity index 98% rename from crates/codegraph-core/src/import_edges.rs rename to crates/codegraph-core/src/domain/graph/builder/stages/import_edges.rs index 1df73c9ea..7308ece75 100644 --- a/crates/codegraph-core/src/import_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/import_edges.rs @@ -4,8 +4,8 @@ //! the barrel detection from `resolve-imports.ts:isBarrelFile()`, and the //! recursive barrel export resolution from `resolveBarrelExport()`. -use crate::barrel_resolution::{self, BarrelContext, ReexportRef}; -use crate::import_resolution; +use crate::domain::graph::builder::barrel_resolution::{self, BarrelContext, ReexportRef}; +use crate::domain::graph::resolve; use crate::types::{FileSymbols, PathAliases}; use rusqlite::Connection; use std::collections::{HashMap, HashSet}; @@ -56,7 +56,7 @@ impl ImportEdgeContext { if let Some(hit) = self.batch_resolved.get(&key) { return hit.clone(); } - import_resolution::resolve_import_path( + resolve::resolve_import_path( abs_file, import_source, &self.root_dir, @@ -449,7 +449,7 @@ pub fn build_import_edges(conn: &Connection, ctx: &ImportEdgeContext) -> Vec(node: &Node, source: &'a [u8]) -> &'a str { diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs index 94dd99e87..be3f98f31 100644 --- a/crates/codegraph-core/src/extractors/java.rs +++ b/crates/codegraph-core/src/extractors/java.rs @@ -1,7 +1,7 @@ use super::helpers::*; use super::SymbolExtractor; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use tree_sitter::{Node, Tree}; diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 04a2be97a..769bac06b 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -1,7 +1,7 @@ use super::helpers::*; use super::SymbolExtractor; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use tree_sitter::{Node, Tree}; diff --git a/crates/codegraph-core/src/extractors/julia.rs b/crates/codegraph-core/src/extractors/julia.rs index f8ceeb6c1..237bbd91a 100644 --- a/crates/codegraph-core/src/extractors/julia.rs +++ b/crates/codegraph-core/src/extractors/julia.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; diff --git a/crates/codegraph-core/src/extractors/kotlin.rs b/crates/codegraph-core/src/extractors/kotlin.rs index d057111c5..bb85de51b 100644 --- a/crates/codegraph-core/src/extractors/kotlin.rs +++ b/crates/codegraph-core/src/extractors/kotlin.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; @@ -373,7 +373,7 @@ mod tests { fn parse_kotlin(code: &str) -> FileSymbols { let mut parser = Parser::new(); parser - .set_language(&crate::parser_registry::LanguageKind::Kotlin.tree_sitter_language()) + .set_language(&crate::domain::parser::LanguageKind::Kotlin.tree_sitter_language()) .unwrap(); let tree = parser.parse(code.as_bytes(), None).unwrap(); KotlinExtractor.extract(&tree, code.as_bytes(), "Test.kt") diff --git a/crates/codegraph-core/src/extractors/lua.rs b/crates/codegraph-core/src/extractors/lua.rs index 2dccded38..e0f68b83c 100644 --- a/crates/codegraph-core/src/extractors/lua.rs +++ b/crates/codegraph-core/src/extractors/lua.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 35ddf11a2..0f0a0d7e9 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -32,7 +32,7 @@ pub mod swift; pub mod verilog; pub mod zig; -use crate::parser_registry::LanguageKind; +use crate::domain::parser::LanguageKind; use crate::types::FileSymbols; use tree_sitter::Tree; diff --git a/crates/codegraph-core/src/extractors/objc.rs b/crates/codegraph-core/src/extractors/objc.rs index edd029f88..e8538d6c7 100644 --- a/crates/codegraph-core/src/extractors/objc.rs +++ b/crates/codegraph-core/src/extractors/objc.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; diff --git a/crates/codegraph-core/src/extractors/ocaml.rs b/crates/codegraph-core/src/extractors/ocaml.rs index 5693dee11..2774bc56f 100644 --- a/crates/codegraph-core/src/extractors/ocaml.rs +++ b/crates/codegraph-core/src/extractors/ocaml.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; diff --git a/crates/codegraph-core/src/extractors/php.rs b/crates/codegraph-core/src/extractors/php.rs index 008069877..707eab10b 100644 --- a/crates/codegraph-core/src/extractors/php.rs +++ b/crates/codegraph-core/src/extractors/php.rs @@ -1,7 +1,7 @@ use super::helpers::*; use super::SymbolExtractor; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use tree_sitter::{Node, Tree}; diff --git a/crates/codegraph-core/src/extractors/python.rs b/crates/codegraph-core/src/extractors/python.rs index 108fb0998..bd72eef92 100644 --- a/crates/codegraph-core/src/extractors/python.rs +++ b/crates/codegraph-core/src/extractors/python.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; diff --git a/crates/codegraph-core/src/extractors/r_lang.rs b/crates/codegraph-core/src/extractors/r_lang.rs index 3686c562e..4a7081013 100644 --- a/crates/codegraph-core/src/extractors/r_lang.rs +++ b/crates/codegraph-core/src/extractors/r_lang.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; diff --git a/crates/codegraph-core/src/extractors/ruby.rs b/crates/codegraph-core/src/extractors/ruby.rs index 2a871568a..d8d6ebad1 100644 --- a/crates/codegraph-core/src/extractors/ruby.rs +++ b/crates/codegraph-core/src/extractors/ruby.rs @@ -1,7 +1,7 @@ use super::helpers::*; use super::SymbolExtractor; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use tree_sitter::{Node, Tree}; diff --git a/crates/codegraph-core/src/extractors/rust_lang.rs b/crates/codegraph-core/src/extractors/rust_lang.rs index a8397a4fa..119010ace 100644 --- a/crates/codegraph-core/src/extractors/rust_lang.rs +++ b/crates/codegraph-core/src/extractors/rust_lang.rs @@ -1,7 +1,7 @@ use super::helpers::*; use super::SymbolExtractor; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use tree_sitter::{Node, Tree}; diff --git a/crates/codegraph-core/src/extractors/scala.rs b/crates/codegraph-core/src/extractors/scala.rs index f57185c1b..4811c95c7 100644 --- a/crates/codegraph-core/src/extractors/scala.rs +++ b/crates/codegraph-core/src/extractors/scala.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; diff --git a/crates/codegraph-core/src/extractors/solidity.rs b/crates/codegraph-core/src/extractors/solidity.rs index 313b259fa..6aa6e9694 100644 --- a/crates/codegraph-core/src/extractors/solidity.rs +++ b/crates/codegraph-core/src/extractors/solidity.rs @@ -500,7 +500,7 @@ mod tests { fn parse_sol(code: &str) -> FileSymbols { let mut parser = Parser::new(); parser - .set_language(&crate::parser_registry::LanguageKind::Solidity.tree_sitter_language()) + .set_language(&crate::domain::parser::LanguageKind::Solidity.tree_sitter_language()) .unwrap(); let tree = parser.parse(code.as_bytes(), None).unwrap(); SolidityExtractor.extract(&tree, code.as_bytes(), "Test.sol") diff --git a/crates/codegraph-core/src/extractors/swift.rs b/crates/codegraph-core/src/extractors/swift.rs index b53d8f950..f85e12d38 100644 --- a/crates/codegraph-core/src/extractors/swift.rs +++ b/crates/codegraph-core/src/extractors/swift.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; diff --git a/crates/codegraph-core/src/extractors/zig.rs b/crates/codegraph-core/src/extractors/zig.rs index 9fb08272f..555a4f570 100644 --- a/crates/codegraph-core/src/extractors/zig.rs +++ b/crates/codegraph-core/src/extractors/zig.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use crate::cfg::build_function_cfg; -use crate::complexity::compute_all_metrics; +use crate::ast_analysis::cfg::build_function_cfg; +use crate::ast_analysis::complexity::compute_all_metrics; use crate::types::*; use super::helpers::*; use super::SymbolExtractor; diff --git a/crates/codegraph-core/src/features/mod.rs b/crates/codegraph-core/src/features/mod.rs new file mode 100644 index 000000000..26a4f5f42 --- /dev/null +++ b/crates/codegraph-core/src/features/mod.rs @@ -0,0 +1 @@ +pub mod structure; diff --git a/crates/codegraph-core/src/structure.rs b/crates/codegraph-core/src/features/structure.rs similarity index 100% rename from crates/codegraph-core/src/structure.rs rename to crates/codegraph-core/src/features/structure.rs diff --git a/crates/codegraph-core/src/graph/algorithms/bfs.rs b/crates/codegraph-core/src/graph/algorithms/bfs.rs new file mode 100644 index 000000000..7d561cd6c --- /dev/null +++ b/crates/codegraph-core/src/graph/algorithms/bfs.rs @@ -0,0 +1,137 @@ +use std::collections::{HashMap, VecDeque}; + +use super::DirectedGraph; +use crate::types::GraphEdge; +use napi_derive::napi; + +#[napi(object)] +#[derive(Debug, Clone)] +pub struct BfsEntry { + pub node: String, + pub depth: i32, +} + +/// Pick the neighbor set used by `bfs_traversal` for the requested direction. +/// "backward" → predecessors, "both" → predecessors + successors, +/// anything else → successors. Mirrors the JS direction enum. +fn bfs_neighbors_for_direction<'a>( + graph: &'a DirectedGraph<'a>, + current: &str, + direction: &str, +) -> Vec<&'a str> { + match direction { + "backward" => graph + .predecessors + .get(current) + .map(|v| v.as_slice()) + .unwrap_or(&[]) + .to_vec(), + "both" => { + let mut all: Vec<&str> = Vec::new(); + if let Some(succ) = graph.successors.get(current) { + all.extend(succ.iter()); + } + if let Some(pred) = graph.predecessors.get(current) { + all.extend(pred.iter()); + } + all + } + _ => graph + .successors + .get(current) + .map(|v| v.as_slice()) + .unwrap_or(&[]) + .to_vec(), + } +} + +/// BFS traversal on a directed graph built from edges. +/// `direction`: "forward" (default), "backward", or "both". +/// Returns node→depth pairs for all reachable nodes. +#[napi] +pub fn bfs_traversal( + edges: Vec, + start_ids: Vec, + max_depth: Option, + direction: Option, +) -> Vec { + let graph = DirectedGraph::from_edges(&edges); + let max_depth = max_depth.unwrap_or(i32::MAX); + let dir = direction.as_deref().unwrap_or("forward"); + + let mut depths: HashMap<&str, i32> = HashMap::new(); + let mut queue: VecDeque<&str> = VecDeque::new(); + + for id in &start_ids { + let key = id.as_str(); + if graph.nodes.contains(key) && !depths.contains_key(key) { + depths.insert(key, 0); + queue.push_back(key); + } + } + + while let Some(current) = queue.pop_front() { + let depth = depths[current]; + if depth >= max_depth { + continue; + } + for n in bfs_neighbors_for_direction(&graph, current, dir) { + if !depths.contains_key(n) { + depths.insert(n, depth + 1); + queue.push_back(n); + } + } + } + + depths + .into_iter() + .map(|(node, depth)| BfsEntry { + node: node.to_string(), + depth, + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn edge(src: &str, tgt: &str) -> GraphEdge { + GraphEdge { + source: src.to_string(), + target: tgt.to_string(), + } + } + + #[test] + fn test_bfs_forward() { + let edges = vec![edge("a", "b"), edge("b", "c"), edge("c", "d")]; + let result = bfs_traversal(edges, vec!["a".into()], None, None); + let map: HashMap = result.into_iter().map(|e| (e.node, e.depth)).collect(); + assert_eq!(map["a"], 0); + assert_eq!(map["b"], 1); + assert_eq!(map["c"], 2); + assert_eq!(map["d"], 3); + } + + #[test] + fn test_bfs_max_depth() { + let edges = vec![edge("a", "b"), edge("b", "c"), edge("c", "d")]; + let result = bfs_traversal(edges, vec!["a".into()], Some(2), None); + let map: HashMap = result.into_iter().map(|e| (e.node, e.depth)).collect(); + assert_eq!(map.get("a"), Some(&0)); + assert_eq!(map.get("b"), Some(&1)); + assert_eq!(map.get("c"), Some(&2)); + assert_eq!(map.get("d"), None); + } + + #[test] + fn test_bfs_backward() { + let edges = vec![edge("a", "b"), edge("b", "c")]; + let result = bfs_traversal(edges, vec!["c".into()], None, Some("backward".into())); + let map: HashMap = result.into_iter().map(|e| (e.node, e.depth)).collect(); + assert_eq!(map["c"], 0); + assert_eq!(map["b"], 1); + assert_eq!(map["a"], 2); + } +} diff --git a/crates/codegraph-core/src/graph/algorithms/centrality.rs b/crates/codegraph-core/src/graph/algorithms/centrality.rs new file mode 100644 index 000000000..fdd1fe610 --- /dev/null +++ b/crates/codegraph-core/src/graph/algorithms/centrality.rs @@ -0,0 +1,65 @@ +use std::collections::{HashMap, HashSet}; + +use crate::types::GraphEdge; +use napi_derive::napi; + +#[napi(object)] +#[derive(Debug, Clone)] +pub struct FanInOutEntry { + pub node: String, + #[napi(js_name = "fanIn")] + pub fan_in: i32, + #[napi(js_name = "fanOut")] + pub fan_out: i32, +} + +/// Compute fan-in (in-degree) and fan-out (out-degree) for all nodes. +#[napi] +pub fn fan_in_out(edges: Vec) -> Vec { + let mut in_degree: HashMap<&str, i32> = HashMap::new(); + let mut out_degree: HashMap<&str, i32> = HashMap::new(); + let mut nodes: HashSet<&str> = HashSet::new(); + + for edge in &edges { + let src = edge.source.as_str(); + let tgt = edge.target.as_str(); + nodes.insert(src); + nodes.insert(tgt); + *out_degree.entry(src).or_insert(0) += 1; + *in_degree.entry(tgt).or_insert(0) += 1; + } + + nodes + .into_iter() + .map(|node| FanInOutEntry { + node: node.to_string(), + fan_in: *in_degree.get(node).unwrap_or(&0), + fan_out: *out_degree.get(node).unwrap_or(&0), + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn edge(src: &str, tgt: &str) -> GraphEdge { + GraphEdge { + source: src.to_string(), + target: tgt.to_string(), + } + } + + #[test] + fn test_fan_in_out() { + let edges = vec![edge("a", "b"), edge("a", "c"), edge("b", "c")]; + let result = fan_in_out(edges); + let map: HashMap = result + .into_iter() + .map(|e| (e.node, (e.fan_in, e.fan_out))) + .collect(); + assert_eq!(map["a"], (0, 2)); + assert_eq!(map["b"], (1, 1)); + assert_eq!(map["c"], (2, 0)); + } +} diff --git a/crates/codegraph-core/src/graph_algorithms.rs b/crates/codegraph-core/src/graph/algorithms/louvain.rs similarity index 53% rename from crates/codegraph-core/src/graph_algorithms.rs rename to crates/codegraph-core/src/graph/algorithms/louvain.rs index 4d08a4d67..165a5c27b 100644 --- a/crates/codegraph-core/src/graph_algorithms.rs +++ b/crates/codegraph-core/src/graph/algorithms/louvain.rs @@ -1,28 +1,11 @@ -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::HashMap; -use crate::constants::{DEFAULT_RANDOM_SEED, LOUVAIN_MAX_LEVELS, LOUVAIN_MAX_PASSES, LOUVAIN_MIN_GAIN}; +use crate::shared::constants::{ + DEFAULT_RANDOM_SEED, LOUVAIN_MAX_LEVELS, LOUVAIN_MAX_PASSES, LOUVAIN_MIN_GAIN, +}; use crate::types::GraphEdge; use napi_derive::napi; -// ─── Result types ──────────────────────────────────────────────────── - -#[napi(object)] -#[derive(Debug, Clone)] -pub struct BfsEntry { - pub node: String, - pub depth: i32, -} - -#[napi(object)] -#[derive(Debug, Clone)] -pub struct FanInOutEntry { - pub node: String, - #[napi(js_name = "fanIn")] - pub fan_in: i32, - #[napi(js_name = "fanOut")] - pub fan_out: i32, -} - #[napi(object)] #[derive(Debug, Clone)] pub struct CommunityAssignment { @@ -37,210 +20,6 @@ pub struct LouvainResult { pub modularity: f64, } -// ─── Adjacency builder ────────────────────────────────────────────── - -struct DirectedGraph<'a> { - successors: HashMap<&'a str, Vec<&'a str>>, - predecessors: HashMap<&'a str, Vec<&'a str>>, - nodes: HashSet<&'a str>, -} - -impl<'a> DirectedGraph<'a> { - fn from_edges(edges: &'a [GraphEdge]) -> Self { - let mut successors: HashMap<&str, Vec<&str>> = HashMap::new(); - let mut predecessors: HashMap<&str, Vec<&str>> = HashMap::new(); - let mut nodes: HashSet<&str> = HashSet::new(); - - for edge in edges { - let src = edge.source.as_str(); - let tgt = edge.target.as_str(); - nodes.insert(src); - nodes.insert(tgt); - successors.entry(src).or_default().push(tgt); - predecessors.entry(tgt).or_default().push(src); - successors.entry(tgt).or_default(); - predecessors.entry(src).or_default(); - } - - Self { - successors, - predecessors, - nodes, - } - } -} - -// ─── Traversal helpers ─────────────────────────────────────────────── - -/// Pick the neighbor set used by `bfs_traversal` for the requested direction. -/// "backward" → predecessors, "both" → predecessors + successors, -/// anything else → successors. Mirrors the JS direction enum. -fn bfs_neighbors_for_direction<'a>( - graph: &'a DirectedGraph<'a>, - current: &str, - direction: &str, -) -> Vec<&'a str> { - match direction { - "backward" => graph - .predecessors - .get(current) - .map(|v| v.as_slice()) - .unwrap_or(&[]) - .to_vec(), - "both" => { - let mut all: Vec<&str> = Vec::new(); - if let Some(succ) = graph.successors.get(current) { - all.extend(succ.iter()); - } - if let Some(pred) = graph.predecessors.get(current) { - all.extend(pred.iter()); - } - all - } - _ => graph - .successors - .get(current) - .map(|v| v.as_slice()) - .unwrap_or(&[]) - .to_vec(), - } -} - -/// Walk the parent pointers produced by a BFS back from `terminal` to the -/// start node and return the path as a `Vec` (start → terminal). -fn reconstruct_bfs_path<'a>( - parent: &HashMap<&'a str, Option<&'a str>>, - terminal: &'a str, -) -> Vec { - let mut path: Vec = Vec::new(); - let mut node: Option<&str> = Some(terminal); - while let Some(n) = node { - path.push(n.to_string()); - node = parent.get(n).copied().flatten(); - } - path.reverse(); - path -} - -// ─── BFS ───────────────────────────────────────────────────────────── - -/// BFS traversal on a directed graph built from edges. -/// `direction`: "forward" (default), "backward", or "both". -/// Returns node→depth pairs for all reachable nodes. -#[napi] -pub fn bfs_traversal( - edges: Vec, - start_ids: Vec, - max_depth: Option, - direction: Option, -) -> Vec { - let graph = DirectedGraph::from_edges(&edges); - let max_depth = max_depth.unwrap_or(i32::MAX); - let dir = direction.as_deref().unwrap_or("forward"); - - let mut depths: HashMap<&str, i32> = HashMap::new(); - let mut queue: VecDeque<&str> = VecDeque::new(); - - for id in &start_ids { - let key = id.as_str(); - if graph.nodes.contains(key) && !depths.contains_key(key) { - depths.insert(key, 0); - queue.push_back(key); - } - } - - while let Some(current) = queue.pop_front() { - let depth = depths[current]; - if depth >= max_depth { - continue; - } - for n in bfs_neighbors_for_direction(&graph, current, dir) { - if !depths.contains_key(n) { - depths.insert(n, depth + 1); - queue.push_back(n); - } - } - } - - depths - .into_iter() - .map(|(node, depth)| BfsEntry { - node: node.to_string(), - depth, - }) - .collect() -} - -// ─── Shortest path ─────────────────────────────────────────────────── - -/// BFS-based shortest path on a directed graph. -/// Returns the path from `from_id` to `to_id` (inclusive), or empty if unreachable. -#[napi] -pub fn shortest_path(edges: Vec, from_id: String, to_id: String) -> Vec { - let graph = DirectedGraph::from_edges(&edges); - - if !graph.nodes.contains(from_id.as_str()) || !graph.nodes.contains(to_id.as_str()) { - return vec![]; - } - if from_id == to_id { - return vec![from_id]; - } - - let mut parent: HashMap<&str, Option<&str>> = HashMap::new(); - parent.insert(from_id.as_str(), None); - let mut queue: VecDeque<&str> = VecDeque::new(); - queue.push_back(from_id.as_str()); - - while let Some(current) = queue.pop_front() { - let neighbors = match graph.successors.get(current) { - Some(n) => n, - None => continue, - }; - for &neighbor in neighbors { - if parent.contains_key(neighbor) { - continue; - } - parent.insert(neighbor, Some(current)); - if neighbor == to_id.as_str() { - return reconstruct_bfs_path(&parent, neighbor); - } - queue.push_back(neighbor); - } - } - - vec![] -} - -// ─── Fan-in / Fan-out centrality ───────────────────────────────────── - -/// Compute fan-in (in-degree) and fan-out (out-degree) for all nodes. -#[napi] -pub fn fan_in_out(edges: Vec) -> Vec { - let mut in_degree: HashMap<&str, i32> = HashMap::new(); - let mut out_degree: HashMap<&str, i32> = HashMap::new(); - let mut nodes: HashSet<&str> = HashSet::new(); - - for edge in &edges { - let src = edge.source.as_str(); - let tgt = edge.target.as_str(); - nodes.insert(src); - nodes.insert(tgt); - *out_degree.entry(src).or_insert(0) += 1; - *in_degree.entry(tgt).or_insert(0) += 1; - } - - nodes - .into_iter() - .map(|node| FanInOutEntry { - node: node.to_string(), - fan_in: *in_degree.get(node).unwrap_or(&0), - fan_out: *out_degree.get(node).unwrap_or(&0), - }) - .collect() -} - -// ─── Louvain community detection ───────────────────────────────────── - /// Classic Louvain algorithm for undirected community detection. /// /// Takes an edge list and treats it as undirected. Optimizes modularity @@ -559,8 +338,6 @@ fn louvain_impl( } } -// ─── Tests ─────────────────────────────────────────────────────────── - #[cfg(test)] mod tests { use super::*; @@ -572,72 +349,6 @@ mod tests { } } - #[test] - fn test_bfs_forward() { - let edges = vec![edge("a", "b"), edge("b", "c"), edge("c", "d")]; - let result = bfs_traversal(edges, vec!["a".into()], None, None); - let map: HashMap = result.into_iter().map(|e| (e.node, e.depth)).collect(); - assert_eq!(map["a"], 0); - assert_eq!(map["b"], 1); - assert_eq!(map["c"], 2); - assert_eq!(map["d"], 3); - } - - #[test] - fn test_bfs_max_depth() { - let edges = vec![edge("a", "b"), edge("b", "c"), edge("c", "d")]; - let result = bfs_traversal(edges, vec!["a".into()], Some(2), None); - let map: HashMap = result.into_iter().map(|e| (e.node, e.depth)).collect(); - assert_eq!(map.get("a"), Some(&0)); - assert_eq!(map.get("b"), Some(&1)); - assert_eq!(map.get("c"), Some(&2)); - assert_eq!(map.get("d"), None); - } - - #[test] - fn test_bfs_backward() { - let edges = vec![edge("a", "b"), edge("b", "c")]; - let result = bfs_traversal(edges, vec!["c".into()], None, Some("backward".into())); - let map: HashMap = result.into_iter().map(|e| (e.node, e.depth)).collect(); - assert_eq!(map["c"], 0); - assert_eq!(map["b"], 1); - assert_eq!(map["a"], 2); - } - - #[test] - fn test_shortest_path_found() { - let edges = vec![edge("a", "b"), edge("b", "c"), edge("a", "c")]; - let path = shortest_path(edges, "a".into(), "c".into()); - assert_eq!(path, vec!["a", "c"]); - } - - #[test] - fn test_shortest_path_not_found() { - let edges = vec![edge("a", "b")]; - let path = shortest_path(edges, "b".into(), "a".into()); - assert!(path.is_empty()); - } - - #[test] - fn test_shortest_path_same_node() { - let edges = vec![edge("a", "b")]; - let path = shortest_path(edges, "a".into(), "a".into()); - assert_eq!(path, vec!["a"]); - } - - #[test] - fn test_fan_in_out() { - let edges = vec![edge("a", "b"), edge("a", "c"), edge("b", "c")]; - let result = fan_in_out(edges); - let map: HashMap = result - .into_iter() - .map(|e| (e.node, (e.fan_in, e.fan_out))) - .collect(); - assert_eq!(map["a"], (0, 2)); - assert_eq!(map["b"], (1, 1)); - assert_eq!(map["c"], (2, 0)); - } - #[test] fn test_louvain_empty() { let result = louvain_communities(vec![], vec![], None, None); diff --git a/crates/codegraph-core/src/graph/algorithms/mod.rs b/crates/codegraph-core/src/graph/algorithms/mod.rs new file mode 100644 index 000000000..2479ba1c2 --- /dev/null +++ b/crates/codegraph-core/src/graph/algorithms/mod.rs @@ -0,0 +1,48 @@ +//! Graph algorithms — mirrors `src/graph/algorithms/`. + +pub mod bfs; +pub mod centrality; +pub mod louvain; +pub mod shortest_path; +pub mod tarjan; + +pub use bfs::{bfs_traversal, BfsEntry}; +pub use centrality::{fan_in_out, FanInOutEntry}; +pub use louvain::{louvain_communities, CommunityAssignment, LouvainResult}; +pub use shortest_path::shortest_path; + +use std::collections::{HashMap, HashSet}; + +use crate::types::GraphEdge; + +/// Directed adjacency representation shared by the traversal algorithms. +pub(crate) struct DirectedGraph<'a> { + pub(crate) successors: HashMap<&'a str, Vec<&'a str>>, + pub(crate) predecessors: HashMap<&'a str, Vec<&'a str>>, + pub(crate) nodes: HashSet<&'a str>, +} + +impl<'a> DirectedGraph<'a> { + pub(crate) fn from_edges(edges: &'a [GraphEdge]) -> Self { + let mut successors: HashMap<&str, Vec<&str>> = HashMap::new(); + let mut predecessors: HashMap<&str, Vec<&str>> = HashMap::new(); + let mut nodes: HashSet<&str> = HashSet::new(); + + for edge in edges { + let src = edge.source.as_str(); + let tgt = edge.target.as_str(); + nodes.insert(src); + nodes.insert(tgt); + successors.entry(src).or_default().push(tgt); + predecessors.entry(tgt).or_default().push(src); + successors.entry(tgt).or_default(); + predecessors.entry(src).or_default(); + } + + Self { + successors, + predecessors, + nodes, + } + } +} diff --git a/crates/codegraph-core/src/graph/algorithms/shortest_path.rs b/crates/codegraph-core/src/graph/algorithms/shortest_path.rs new file mode 100644 index 000000000..e9fc12c94 --- /dev/null +++ b/crates/codegraph-core/src/graph/algorithms/shortest_path.rs @@ -0,0 +1,92 @@ +use std::collections::{HashMap, VecDeque}; + +use super::DirectedGraph; +use crate::types::GraphEdge; +use napi_derive::napi; + +/// Walk the parent pointers produced by a BFS back from `terminal` to the +/// start node and return the path as a `Vec` (start → terminal). +fn reconstruct_bfs_path<'a>( + parent: &HashMap<&'a str, Option<&'a str>>, + terminal: &'a str, +) -> Vec { + let mut path: Vec = Vec::new(); + let mut node: Option<&str> = Some(terminal); + while let Some(n) = node { + path.push(n.to_string()); + node = parent.get(n).copied().flatten(); + } + path.reverse(); + path +} + +/// BFS-based shortest path on a directed graph. +/// Returns the path from `from_id` to `to_id` (inclusive), or empty if unreachable. +#[napi] +pub fn shortest_path(edges: Vec, from_id: String, to_id: String) -> Vec { + let graph = DirectedGraph::from_edges(&edges); + + if !graph.nodes.contains(from_id.as_str()) || !graph.nodes.contains(to_id.as_str()) { + return vec![]; + } + if from_id == to_id { + return vec![from_id]; + } + + let mut parent: HashMap<&str, Option<&str>> = HashMap::new(); + parent.insert(from_id.as_str(), None); + let mut queue: VecDeque<&str> = VecDeque::new(); + queue.push_back(from_id.as_str()); + + while let Some(current) = queue.pop_front() { + let neighbors = match graph.successors.get(current) { + Some(n) => n, + None => continue, + }; + for &neighbor in neighbors { + if parent.contains_key(neighbor) { + continue; + } + parent.insert(neighbor, Some(current)); + if neighbor == to_id.as_str() { + return reconstruct_bfs_path(&parent, neighbor); + } + queue.push_back(neighbor); + } + } + + vec![] +} + +#[cfg(test)] +mod tests { + use super::*; + + fn edge(src: &str, tgt: &str) -> GraphEdge { + GraphEdge { + source: src.to_string(), + target: tgt.to_string(), + } + } + + #[test] + fn test_shortest_path_found() { + let edges = vec![edge("a", "b"), edge("b", "c"), edge("a", "c")]; + let path = shortest_path(edges, "a".into(), "c".into()); + assert_eq!(path, vec!["a", "c"]); + } + + #[test] + fn test_shortest_path_not_found() { + let edges = vec![edge("a", "b")]; + let path = shortest_path(edges, "b".into(), "a".into()); + assert!(path.is_empty()); + } + + #[test] + fn test_shortest_path_same_node() { + let edges = vec![edge("a", "b")]; + let path = shortest_path(edges, "a".into(), "a".into()); + assert_eq!(path, vec!["a"]); + } +} diff --git a/crates/codegraph-core/src/cycles.rs b/crates/codegraph-core/src/graph/algorithms/tarjan.rs similarity index 100% rename from crates/codegraph-core/src/cycles.rs rename to crates/codegraph-core/src/graph/algorithms/tarjan.rs diff --git a/crates/codegraph-core/src/graph/classifiers/mod.rs b/crates/codegraph-core/src/graph/classifiers/mod.rs new file mode 100644 index 000000000..ad3e620aa --- /dev/null +++ b/crates/codegraph-core/src/graph/classifiers/mod.rs @@ -0,0 +1 @@ +pub mod roles; diff --git a/crates/codegraph-core/src/roles_db.rs b/crates/codegraph-core/src/graph/classifiers/roles.rs similarity index 100% rename from crates/codegraph-core/src/roles_db.rs rename to crates/codegraph-core/src/graph/classifiers/roles.rs diff --git a/crates/codegraph-core/src/graph/mod.rs b/crates/codegraph-core/src/graph/mod.rs new file mode 100644 index 000000000..1a19dee1e --- /dev/null +++ b/crates/codegraph-core/src/graph/mod.rs @@ -0,0 +1,2 @@ +pub mod algorithms; +pub mod classifiers; diff --git a/crates/codegraph-core/src/config.rs b/crates/codegraph-core/src/infrastructure/config.rs similarity index 100% rename from crates/codegraph-core/src/config.rs rename to crates/codegraph-core/src/infrastructure/config.rs diff --git a/crates/codegraph-core/src/infrastructure/mod.rs b/crates/codegraph-core/src/infrastructure/mod.rs new file mode 100644 index 000000000..ef68c3694 --- /dev/null +++ b/crates/codegraph-core/src/infrastructure/mod.rs @@ -0,0 +1 @@ +pub mod config; diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 52f9ae41f..a40b9d77f 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -1,31 +1,41 @@ -pub mod analysis; -pub mod ast_db; -pub mod barrel_resolution; -pub mod build_pipeline; -pub mod change_detection; -pub mod cfg; -pub mod complexity; -pub mod config; -pub mod constants; -pub mod cycles; -pub mod dataflow; -pub mod edge_builder; -pub mod edges_db; +//! codegraph-core — native (napi-rs) engine. +//! +//! # Structure parity with the TypeScript engine +//! +//! This crate mirrors the `src/` TypeScript tree so each module has a +//! predictable counterpart in the other engine. When changing resolution or +//! extraction behavior in one engine, apply the equivalent change to the +//! mirrored module in the other (both engines must produce identical results). +//! +//! | Rust module | TypeScript counterpart | +//! |--------------------------------------|-------------------------------------------------| +//! | `lib.rs` | `src/index.ts` (public API surface) | +//! | `types.rs` | `src/types.ts` | +//! | `shared/constants.rs` | `src/shared/constants.ts` | +//! | `infrastructure/config.rs` | `src/infrastructure/config.ts` | +//! | `db/connection.rs` | `src/db/connection.ts` + `src/db/migrations.ts` | +//! | `db/repository/*` | `src/db/repository/*` | +//! | `domain/parser.rs` | `src/domain/parser.ts` | +//! | `domain/parallel.rs` | `src/domain/wasm-worker-pool.ts` | +//! | `domain/graph/resolve.rs` | `src/domain/graph/resolve.ts` | +//! | `domain/graph/journal.rs` | `src/domain/graph/journal.ts` | +//! | `domain/graph/builder/pipeline.rs` | `src/domain/graph/builder/pipeline.ts` | +//! | `domain/graph/builder/incremental.rs`| `src/domain/graph/builder/incremental.ts` | +//! | `domain/graph/builder/stages/*` | `src/domain/graph/builder/stages/*` | +//! | `ast_analysis/*` | `src/ast-analysis/*` | +//! | `graph/algorithms/*` | `src/graph/algorithms/*` | +//! | `graph/classifiers/roles.rs` | `src/graph/classifiers/roles.ts` | +//! | `features/structure.rs` | `src/features/structure.ts` | +//! | `extractors/*` | `src/extractors/*` | + +pub mod ast_analysis; +pub mod db; +pub mod domain; pub mod extractors; -pub mod file_collector; -pub mod graph_algorithms; -pub mod import_edges; -pub mod import_resolution; -pub mod incremental; -pub mod insert_nodes; -pub mod journal; -pub mod native_db; -pub mod parallel; -pub mod parser_registry; -pub mod read_queries; -pub mod read_types; -pub mod roles_db; -pub mod structure; +pub mod features; +pub mod graph; +pub mod infrastructure; +pub mod shared; pub mod types; use napi_derive::napi; @@ -41,7 +51,7 @@ pub fn parse_file( include_dataflow: Option, include_ast_nodes: Option, ) -> Option { - parallel::parse_file( + domain::parallel::parse_file( &file_path, &source, include_dataflow.unwrap_or(false), @@ -59,7 +69,7 @@ pub fn parse_files( include_dataflow: Option, include_ast_nodes: Option, ) -> Vec { - parallel::parse_files_parallel( + domain::parallel::parse_files_parallel( &file_paths, &root_dir, include_dataflow.unwrap_or(false), @@ -75,7 +85,7 @@ pub fn parse_files_full( file_paths: Vec, root_dir: String, ) -> Vec { - parallel::parse_files_parallel_full( + domain::parallel::parse_files_parallel_full( &file_paths, &root_dir, ) @@ -93,7 +103,7 @@ pub fn resolve_import( base_url: None, paths: vec![], }); - import_resolution::resolve_import_path(&from_file, &import_source, &root_dir, &aliases) + domain::graph::resolve::resolve_import_path(&from_file, &import_source, &root_dir, &aliases) } /// Batch resolve multiple imports. @@ -110,7 +120,7 @@ pub fn resolve_imports( }); let known_set = known_files.map(|v| v.into_iter().collect::>()); - import_resolution::resolve_imports_batch(&inputs, &root_dir, &aliases, known_set.as_ref()) + domain::graph::resolve::resolve_imports_batch(&inputs, &root_dir, &aliases, known_set.as_ref()) } /// Compute proximity-based confidence for call resolution. @@ -120,14 +130,14 @@ pub fn compute_confidence( target_file: String, imported_from: Option, ) -> f64 { - import_resolution::compute_confidence(&caller_file, &target_file, imported_from.as_deref()) + domain::graph::resolve::compute_confidence(&caller_file, &target_file, imported_from.as_deref()) } /// Detect cycles using Tarjan's SCC algorithm. /// Returns arrays of node names forming each cycle. #[napi] pub fn detect_cycles(edges: Vec) -> Vec> { - cycles::detect_cycles(&edges) + graph::algorithms::tarjan::detect_cycles(&edges) } /// Returns the engine name. @@ -151,7 +161,7 @@ pub fn analyze_complexity( file_path: String, lang_id: Option, ) -> Vec { - analysis::analyze_complexity_standalone(&source, &file_path, lang_id.as_deref()) + ast_analysis::engine::analyze_complexity_standalone(&source, &file_path, lang_id.as_deref()) } /// Build control-flow graphs for all functions in the given source. @@ -163,7 +173,7 @@ pub fn build_cfg_analysis( file_path: String, lang_id: Option, ) -> Vec { - analysis::build_cfg_standalone(&source, &file_path, lang_id.as_deref()) + ast_analysis::engine::build_cfg_standalone(&source, &file_path, lang_id.as_deref()) } /// Extract dataflow analysis for the given source. @@ -175,5 +185,5 @@ pub fn extract_dataflow_analysis( file_path: String, lang_id: Option, ) -> Option { - analysis::extract_dataflow_standalone(&source, &file_path, lang_id.as_deref()) + ast_analysis::engine::extract_dataflow_standalone(&source, &file_path, lang_id.as_deref()) } diff --git a/crates/codegraph-core/src/constants.rs b/crates/codegraph-core/src/shared/constants.rs similarity index 100% rename from crates/codegraph-core/src/constants.rs rename to crates/codegraph-core/src/shared/constants.rs diff --git a/crates/codegraph-core/src/shared/mod.rs b/crates/codegraph-core/src/shared/mod.rs new file mode 100644 index 000000000..0b6d2c4ea --- /dev/null +++ b/crates/codegraph-core/src/shared/mod.rs @@ -0,0 +1 @@ +pub mod constants; diff --git a/docs/contributing/adding-a-language.md b/docs/contributing/adding-a-language.md index 07a4090cc..4c03d76b5 100644 --- a/docs/contributing/adding-a-language.md +++ b/docs/contributing/adding-a-language.md @@ -302,7 +302,7 @@ if (i. === undefined) i. = i._; tree-sitter- = "0.x" ``` -### 6. `crates/codegraph-core/src/parser_registry.rs` — register the language +### 6. `crates/codegraph-core/src/domain/parser.rs` — register the language Four changes in this file: @@ -518,7 +518,7 @@ codegraph query someFunction | 4 | `src/types.ts` | Both | Add `''` to the `LanguageId` union; add language-specific flag to `Import` if needed | | 5 | `src/domain/parser.ts` | WASM | Update `patchNativeResult` (if language flag needed) | | 6 | `crates/codegraph-core/Cargo.toml` | Native | Add tree-sitter crate | -| 7 | `crates/.../parser_registry.rs` | Native | Register enum + extension + grammar + `lang_id_str` | +| 7 | `crates/.../domain/parser.rs` | Native | Register enum + extension + grammar + `lang_id_str` | | 8 | `crates/.../extractors/.rs` | Native | Implement `SymbolExtractor` trait | | 9 | `crates/.../extractors/mod.rs` | Native | Declare module + dispatch arm in `extract_symbols_with_opts()` | | 10 | `crates/.../types.rs` | Native | Add language flag to `Import` (if needed) | diff --git a/src/db/migrations.ts b/src/db/migrations.ts index afcab0e51..6ee37d61d 100644 --- a/src/db/migrations.ts +++ b/src/db/migrations.ts @@ -8,7 +8,7 @@ interface Migration { up: string; } -// IMPORTANT: Migration DDL is mirrored in crates/codegraph-core/src/native_db.rs. +// IMPORTANT: Migration DDL is mirrored in crates/codegraph-core/src/db/connection.rs. // Any changes here MUST be reflected there (and vice-versa). export const MIGRATIONS: Migration[] = [ { diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index e6f2fe763..c89f9e2f0 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -1302,7 +1302,7 @@ export async function tryNativeOrchestrator( // Even on no-op rebuilds, dropped-language files added since the last // full build are still missing from `nodes`/`file_hashes` (#1083), and // WASM-only files deleted from disk leave stale rows behind (#1073). - // The orchestrator's file_collector skipped them, so its earlyExit + // The orchestrator's collect_files skipped them, so its earlyExit // doesn't imply DB consistency. Run the gap repair before returning. const gap = detectDroppedLanguageGap(ctx); if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) { diff --git a/src/domain/graph/builder/stages/resolve-imports.ts b/src/domain/graph/builder/stages/resolve-imports.ts index 64df0e1a3..5799030a6 100644 --- a/src/domain/graph/builder/stages/resolve-imports.ts +++ b/src/domain/graph/builder/stages/resolve-imports.ts @@ -135,7 +135,7 @@ async function reparseBarrelFiles( // which only runs on the original (changed + reverse-dep) fileSymbols. Barrel // candidates are merged here *after* insertNodes, so wiping those kinds // would permanently drop them (mirrors the Rust orchestrator's Stage 6b - // delete in build_pipeline.rs). + // delete in domain/graph/builder/pipeline.rs). const deleteOutgoingEdges = db.prepare( `DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?) AND kind NOT IN ('contains', 'parameter_of')`, diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 3b05ae450..54aa7b994 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -465,7 +465,7 @@ export function getInstalledWasmExtensions(): Set { * Lowercase file extensions covered by the native Rust addon. * * Mirrors `LanguageKind::from_extension` in - * `crates/codegraph-core/src/parser_registry.rs`. Used to classify why the + * `crates/codegraph-core/src/domain/parser.rs`. Used to classify why the * native orchestrator dropped a file: extensions outside this set are a * legitimate parser limit (no Rust extractor exists), while extensions inside * it indicate a real native bug (parse/read/extract failure). diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index bb537bfe6..261870084 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -90,7 +90,7 @@ export const DEFAULTS = { * Maximum fixed-point iterations for the Phase 8.3 points-to solver. * @reserved — currently not wired to either the WASM solver * (`MAX_SOLVER_ITERATIONS` in `points-to.ts`) or the native Rust solver - * (`MAX_SOLVER_ITERATIONS` in `edge_builder.rs`), both of which use the + * (`MAX_SOLVER_ITERATIONS` in `stages/build_edges.rs`), both of which use the * same hardcoded value of 50. See the TODO comment above. */ pointsToMaxIterations: 50, diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index 9068e81b6..03bc70b4c 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -54,7 +54,7 @@ const NOISY_METRIC_THRESHOLD = 0.5; * implementation, fnDepsData JS wrapper, and DB schema/indexes are all * byte-for-byte unchanged since v3.9.6 (verified by `git log v3.9.6..HEAD` * on crates/codegraph-core/src/read_queries.rs, src/domain/analysis/ - * dependencies.ts, src/db/, crates/codegraph-core/src/native_db.rs). + * dependencies.ts, src/db/, crates/codegraph-core/src/db/connection.rs). * CI consistently measures +40–60% on this sub-30ms metric while the * absolute delta (~13ms) is at the noise floor for shared runners. * Methodology already discards 3 warmup runs (#1077). Same pattern as diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 0eb89c854..ded76e70f 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -161,21 +161,22 @@ describe('formatDropExtensionSummary', () => { * the JS and Rust sides would mis-classify drops (real native failures shown * as info, parser-limit gaps shown as warn). The native addon doesn't expose * its own metadata, so we parse the Rust source instead and assert the two - * lists agree at build time. If `parser_registry.rs` is ever refactored, this + * lists agree at build time. If `domain/parser.rs` is ever refactored, this * test fails loudly so the maintainer notices. */ describe('NATIVE_SUPPORTED_EXTENSIONS drift guard', () => { - it('matches the extension set in crates/codegraph-core/src/parser_registry.rs', () => { + it('matches the extension set in crates/codegraph-core/src/domain/parser.rs', () => { const registryPath = path.join( REPO_ROOT, 'crates', 'codegraph-core', 'src', - 'parser_registry.rs', + 'domain', + 'parser.rs', ); const src = fs.readFileSync(registryPath, 'utf8'); const fromExtStart = src.indexOf('pub fn from_extension'); - expect(fromExtStart, 'from_extension not found in parser_registry.rs').toBeGreaterThan(-1); + expect(fromExtStart, 'from_extension not found in domain/parser.rs').toBeGreaterThan(-1); // Slice from `pub fn from_extension` to the next `pub fn` (boundary of // the next method) so we don't accidentally pick up extensions from // unrelated functions like `from_lang_id` (which contains lang_id @@ -207,11 +208,11 @@ describe('NATIVE_SUPPORTED_EXTENSIONS drift guard', () => { const onlyInJs = [...jsExts].filter((e) => !normalized.has(e)); expect( onlyInRust, - `Extensions in parser_registry.rs but missing from NATIVE_SUPPORTED_EXTENSIONS: ${onlyInRust.join(', ')}`, + `Extensions in domain/parser.rs but missing from NATIVE_SUPPORTED_EXTENSIONS: ${onlyInRust.join(', ')}`, ).toEqual([]); expect( onlyInJs, - `Extensions in NATIVE_SUPPORTED_EXTENSIONS but missing from parser_registry.rs: ${onlyInJs.join(', ')}`, + `Extensions in NATIVE_SUPPORTED_EXTENSIONS but missing from domain/parser.rs: ${onlyInJs.join(', ')}`, ).toEqual([]); }); }); @@ -222,7 +223,7 @@ describe('NATIVE_SUPPORTED_EXTENSIONS drift guard', () => { * Acceptance criterion from #1071 (tracked in #1121): a CI gate prevents * future drift between the JS `LANGUAGE_REGISTRY` and the Rust extractor * coverage. The existing drift guard above covers - * `NATIVE_SUPPORTED_EXTENSIONS ↔ parser_registry.rs`, but the link from + * `NATIVE_SUPPORTED_EXTENSIONS ↔ domain/parser.rs`, but the link from * `LANGUAGE_REGISTRY` (the source of truth for languages we support at all) * to `NATIVE_SUPPORTED_EXTENSIONS` (the hand-maintained mirror of the Rust * enum) had no test — silently adding a WASM-only language would degrade the From 477222bd7e0691aff3e066daf34fed27ea1f9b8c Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 11 Jun 2026 18:33:06 -0600 Subject: [PATCH 2/5] feat(parity): port the JS points-to solver to the native engine and unify per-engine resolution Native (Rust): - Extract all eight pts binding kinds in the JS extractor (param, this-call, array-element, spread-arg, for-of, array-callback, object-rest-param, object-prop) and surface them on FileSymbols - Run the same fixed-point points-to solver as the WASM path inside build_call_edges: thisCall-to-fnRef conversion, four-case key gate with receiver-key fallback, hop-penalised alias edges with pts upgrade - Normalize inline-new receivers (new A().t() -> receiver "A") in extract_receiver_name, mirroring extractReceiverName in the TS extractor - Apply the >=0.5 confidence filter on exact cross-file lookups (#1439) WASM/TS: - Plumb params and all eight binding arrays through NativeFileEntry so the hybrid path feeds the native solver - Serialize thisCallBindings across the WASM worker boundary - Backport the native engine's class-field type-annotation extraction (private repo: Repository seeds typeMap "repo"/"this.repo") - Remove the four JS pts post-passes that duplicated the native solver on the hybrid path (param-flow, fnRef, thisCall, object-rest) - Report the native build summary and build_meta counts after the JS edge-writing post-passes so they include CHA/this-dispatch edges (#1452) WASM, full-native orchestrator, and hybrid builds now produce identical edge multisets on the javascript fixture (155 rows each, including confidence and dynamic flags); javascript 42/42 and pts-javascript 13/13 expected edges on both engines; 392 Rust tests, 3043 JS tests, and the 176-test resolution benchmark are green. Closes #1453 Closes #1452 Closes #1439 --- .../src/domain/graph/builder/pipeline.rs | 27 +- .../graph/builder/stages/build_edges.rs | 922 ++++++++++++-- .../graph/builder/stages/import_edges.rs | 8 + .../src/extractors/javascript.rs | 1083 ++++++++++++++++- .../codegraph-core/src/features/structure.rs | 17 +- crates/codegraph-core/src/types.rs | 128 ++ .../graph/builder/stages/build-edges.ts | 443 +------ .../builder/stages/native-orchestrator.ts | 30 +- src/domain/wasm-worker-entry.ts | 1 + src/domain/wasm-worker-pool.ts | 1 + src/domain/wasm-worker-protocol.ts | 1 + src/extractors/javascript.ts | 30 + tests/parsers/javascript.test.ts | 11 + 13 files changed, 2150 insertions(+), 552 deletions(-) diff --git a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs index 6ef4d2f2b..36b9d7059 100644 --- a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs +++ b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs @@ -1348,6 +1348,10 @@ fn build_and_insert_call_edges( }) .collect(); + fn non_empty(v: &[T]) -> Option> { + if v.is_empty() { None } else { Some(v.to_vec()) } + } + file_entries.push(FileEdgeInput { file: rel_path.clone(), file_node_id, @@ -1359,6 +1363,15 @@ fn build_and_insert_call_edges( kind: d.kind.clone(), line: d.line, end_line: d.end_line, + // Phase 8.3c: ordered parameter names for parameter-flow pts — + // mirrors buildDefinitionParamsMap reading def.children. + params: d.children.as_ref().map(|children| { + children + .iter() + .filter(|c| c.kind == "parameter") + .map(|c| c.name.clone()) + .collect() + }), }) .collect(), calls: symbols @@ -1382,11 +1395,15 @@ fn build_and_insert_call_edges( }) .collect(), type_map, - fn_ref_bindings: if symbols.fn_ref_bindings.is_empty() { - None - } else { - Some(symbols.fn_ref_bindings.clone()) - }, + fn_ref_bindings: non_empty(&symbols.fn_ref_bindings), + param_bindings: non_empty(&symbols.param_bindings), + this_call_bindings: non_empty(&symbols.this_call_bindings), + array_elem_bindings: non_empty(&symbols.array_elem_bindings), + spread_arg_bindings: non_empty(&symbols.spread_arg_bindings), + for_of_bindings: non_empty(&symbols.for_of_bindings), + array_callback_bindings: non_empty(&symbols.array_callback_bindings), + object_rest_param_bindings: non_empty(&symbols.object_rest_param_bindings), + object_prop_bindings: non_empty(&symbols.object_prop_bindings), }); } diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index 339e3ecc0..2aedf7fdd 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -4,7 +4,10 @@ use napi_derive::napi; use crate::domain::graph::builder::barrel_resolution::{self, BarrelContext, ReexportRef}; use crate::domain::graph::resolve; -use crate::types::FnRefBinding; +use crate::types::{ + ArrayCallbackBinding, ArrayElemBinding, FnRefBinding, ForOfBinding, ObjectPropBinding, + ObjectRestParamBinding, ParamBinding, SpreadArgBinding, ThisCallBinding, +}; /// Kind sets for hierarchy edge resolution -- mirrors the JS constants in /// `build-edges.js` (`HIERARCHY_SOURCE_KINDS`, `EXTENDS_TARGET_KINDS`, @@ -55,6 +58,9 @@ pub struct DefInfo { pub line: u32, #[napi(js_name = "endLine")] pub end_line: Option, + /// Ordered parameter names for Phase 8.3c parameter-flow pts + /// (mirrors `buildDefinitionParamsMap` input in build-edges.ts). + pub params: Option>, } #[napi(object)] @@ -81,6 +87,30 @@ pub struct FileEdgeInput { /// Function-reference bindings for Phase 8.3 pts analysis (optional). #[napi(js_name = "fnRefBindings")] pub fn_ref_bindings: Option>, + /// Phase 8.3c: argument-to-parameter bindings. + #[napi(js_name = "paramBindings")] + pub param_bindings: Option>, + /// This-context bindings from `fn.call(ctx)` / `fn.apply(ctx)`. + #[napi(js_name = "thisCallBindings")] + pub this_call_bindings: Option>, + /// Phase 8.3e: array-element bindings. + #[napi(js_name = "arrayElemBindings")] + pub array_elem_bindings: Option>, + /// Phase 8.3e: spread-argument bindings. + #[napi(js_name = "spreadArgBindings")] + pub spread_arg_bindings: Option>, + /// Phase 8.3e: for-of iteration bindings. + #[napi(js_name = "forOfBindings")] + pub for_of_bindings: Option>, + /// Phase 8.3e: Array.from callback bindings. + #[napi(js_name = "arrayCallbackBindings")] + pub array_callback_bindings: Option>, + /// Phase 8.3f: object-rest parameter bindings. + #[napi(js_name = "objectRestParamBindings")] + pub object_rest_param_bindings: Option>, + /// Phase 8.3f: object-property bindings. + #[napi(js_name = "objectPropBindings")] + pub object_prop_bindings: Option>, } #[napi(object)] @@ -106,8 +136,6 @@ struct DefWithId<'a> { struct EdgeContext<'a> { nodes_by_name: HashMap<&'a str, Vec<&'a NodeInfo>>, nodes_by_name_and_file: HashMap<(&'a str, &'a str), Vec<&'a NodeInfo>>, - /// All nodes grouped by file — used for same-file method resolution (CHA this-dispatch). - nodes_by_file: HashMap<&'a str, Vec<&'a NodeInfo>>, builtin_set: HashSet<&'a str>, receiver_kinds: HashSet<&'a str>, } @@ -116,19 +144,17 @@ impl<'a> EdgeContext<'a> { fn new(all_nodes: &'a [NodeInfo], builtin_receivers: &'a [String]) -> Self { let mut nodes_by_name: HashMap<&str, Vec<&NodeInfo>> = HashMap::new(); let mut nodes_by_name_and_file: HashMap<(&str, &str), Vec<&NodeInfo>> = HashMap::new(); - let mut nodes_by_file: HashMap<&str, Vec<&NodeInfo>> = HashMap::new(); for node in all_nodes { nodes_by_name.entry(&node.name).or_default().push(node); nodes_by_name_and_file .entry((&node.name, &node.file)) .or_default() .push(node); - nodes_by_file.entry(&node.file).or_default().push(node); } let builtin_set: HashSet<&str> = builtin_receivers.iter().map(|s| s.as_str()).collect(); let receiver_kinds: HashSet<&str> = ["class", "struct", "interface", "type", "module"] .iter().copied().collect(); - Self { nodes_by_name, nodes_by_name_and_file, nodes_by_file, builtin_set, receiver_kinds } + Self { nodes_by_name, nodes_by_name_and_file, builtin_set, receiver_kinds } } } @@ -140,16 +166,31 @@ impl<'a> EdgeContext<'a> { /// config plumbing is in place (same pattern as `typePropagationDepth`). const MAX_SOLVER_ITERATIONS: usize = 50; +/// Per-file points-to binding inputs, borrowed from a `FileEdgeInput`. +/// `fn_ref_bindings` must already include the `fn::this → ctx` conversions +/// from `this_call_bindings` (see `process_file`). +struct PtsBindings<'a> { + fn_ref_bindings: &'a [FnRefBinding], + param_bindings: &'a [ParamBinding], + array_elem_bindings: &'a [ArrayElemBinding], + spread_arg_bindings: &'a [SpreadArgBinding], + for_of_bindings: &'a [ForOfBinding], + array_callback_bindings: &'a [ArrayCallbackBinding], + object_rest_param_bindings: &'a [ObjectRestParamBinding], + object_prop_bindings: &'a [ObjectPropBinding], +} + /// Build a per-file points-to map. Mirrors `buildPointsToMap` in -/// `src/domain/graph/resolver/points-to.ts`. +/// `src/domain/graph/resolver/points-to.ts` (full Phase 8.3–8.3f model). /// -/// Seeds every locally-defined callable and every imported name as -/// pointing to itself, then propagates assignments (`pts(lhs) ⊇ pts(rhs)`) -/// via fixed-point iteration. +/// Seeds every locally-defined callable and every imported name as pointing +/// to itself, generates inclusion constraints (`pts(lhs) ⊇ pts(rhsKey)`) +/// from every binding kind, then solves by fixed-point iteration. fn build_points_to_map( - fn_ref_bindings: &[FnRefBinding], + bindings: &PtsBindings, def_names: &HashSet<&str>, imported_names: &HashMap<&str, &str>, + definition_params: &HashMap<&str, Vec<&str>>, ) -> HashMap> { let mut pts: HashMap> = HashMap::new(); for name in def_names { @@ -158,20 +199,134 @@ fn build_points_to_map( for name in imported_names.keys() { pts.entry(name.to_string()).or_default().insert(name.to_string()); } - if fn_ref_bindings.is_empty() { - return pts; - } - let constraints: Vec<(String, String)> = fn_ref_bindings.iter().map(|b| { + + // Constraint list: pts(lhs) ⊇ pts(rhsKey). Member-expression rhs keys are + // composite ("obj.method") and only flow when a prior seed exists — safe. + let mut constraints: Vec<(String, String)> = bindings.fn_ref_bindings.iter().map(|b| { let rhs_key = match &b.rhs_receiver { Some(recv) => format!("{}.{}", recv, b.rhs), None => b.rhs.clone(), }; (b.lhs.clone(), rhs_key) }).collect(); + + // Phase 8.3c: parameter-flow constraints — `f(x)` at argIndex i adds + // pts(f::param_i) ⊇ pts(x). Keys are scoped "callee::paramName" to prevent + // collisions between same-named params across functions in one file. + for pb in bindings.param_bindings { + if let Some(params) = definition_params.get(pb.callee.as_str()) { + if let Some(param_name) = params.get(pb.arg_index as usize) { + constraints.push((format!("{}::{}", pb.callee, param_name), pb.arg_name.clone())); + } + } + } + + // Phase 8.3e: array-element bindings — seed per-index entries, wildcard + // `arr[*]` collects all elements via constraints. + for ab in bindings.array_elem_bindings { + let elem_key = format!("{}[{}]", ab.array_name, ab.index); + pts.entry(elem_key.clone()).or_default().insert(ab.elem_name.clone()); + constraints.push((format!("{}[*]", ab.array_name), elem_key)); + } + + // Phase 8.3e: spread-argument constraints — `f(...arr)` maps known array + // elements onto parameter slots; unknown sizes fall back to the wildcard. + if !bindings.spread_arg_bindings.is_empty() { + let mut array_max_index: HashMap<&str, i64> = HashMap::new(); + for ab in bindings.array_elem_bindings { + let cur = array_max_index.entry(ab.array_name.as_str()).or_insert(-1); + if i64::from(ab.index) > *cur { *cur = i64::from(ab.index); } + } + for sb in bindings.spread_arg_bindings { + let Some(params) = definition_params.get(sb.callee.as_str()) else { continue }; + let max_idx = array_max_index.get(sb.array_name.as_str()).copied().unwrap_or(-1); + if max_idx >= 0 { + for i in 0..=(max_idx as usize) { + let param_idx = sb.start_index as usize + i; + let Some(param) = params.get(param_idx) else { break }; + constraints.push(( + format!("{}::{}", sb.callee, param), + format!("{}[{}]", sb.array_name, i), + )); + } + } else { + for param in params.iter().skip(sb.start_index as usize) { + constraints.push(( + format!("{}::{}", sb.callee, param), + format!("{}[*]", sb.array_name), + )); + } + } + } + } + + // Phase 8.3e: for-of constraints — `for (const x of arr)` inside `outer` + // adds pts(outer::x) ⊇ pts(arr[*]). + for fb in bindings.for_of_bindings { + constraints.push(( + format!("{}::{}", fb.enclosing_func, fb.var_name), + format!("{}[*]", fb.source_name), + )); + } + + // Phase 8.3e: Array.from(source, cb) — pts(cb::param0) ⊇ pts(source[*]). + for cb in bindings.array_callback_bindings { + if let Some(param0) = definition_params.get(cb.callee_name.as_str()).and_then(|p| p.first()) { + constraints.push(( + format!("{}::{}", cb.callee_name, param0), + format!("{}[*]", cb.source_name), + )); + } + } + + // Phase 8.3f: object-rest dispatch — `function f({ ...rest })` + `f(obj)` + + // `const obj = { prop: fn }` seeds pts("rest.prop") = {"fn"}. + if !bindings.object_rest_param_bindings.is_empty() + && !bindings.object_prop_bindings.is_empty() + && !bindings.param_bindings.is_empty() + { + let mut param_by_callee_idx: HashMap<(&str, u32), Vec<&str>> = HashMap::new(); + for pb in bindings.param_bindings { + param_by_callee_idx + .entry((pb.callee.as_str(), pb.arg_index)) + .or_default() + .push(pb.arg_name.as_str()); + } + let mut props_by_object: HashMap<&str, Vec<(&str, &str)>> = HashMap::new(); + for ob in bindings.object_prop_bindings { + props_by_object + .entry(ob.object_name.as_str()) + .or_default() + .push((ob.prop_name.as_str(), ob.value_name.as_str())); + } + for rb in bindings.object_rest_param_bindings { + let Some(arg_names) = param_by_callee_idx.get(&(rb.callee.as_str(), rb.arg_index)) else { + continue; + }; + for arg_name in arg_names { + let Some(props) = props_by_object.get(arg_name) else { continue }; + for (prop_name, value_name) in props { + if !def_names.contains(value_name) && !imported_names.contains_key(value_name) { + continue; + } + pts.entry(format!("{}.{}", rb.rest_name, prop_name)) + .or_default() + .insert((*value_name).to_string()); + } + } + } + } + + if constraints.is_empty() { + return pts; + } + + // Fixed-point iteration: propagate pts sets until no new information flows. for _ in 0..MAX_SOLVER_ITERATIONS { let mut changed = false; for (lhs, rhs_key) in &constraints { let rhs_pts: Option> = pts.get(rhs_key.as_str()) + .filter(|s| !s.is_empty()) .map(|s| s.iter().cloned().collect()); if let Some(targets) = rhs_pts { let entry = pts.entry(lhs.clone()).or_default(); @@ -200,6 +355,57 @@ fn resolve_via_points_to<'a>( } } +/// Resolve each pts alias of `lookup_name` and emit hop-penalised call edges. +/// Shared by the no-receiver gate and the receiver-key (`rest.prop()`) fallback; +/// mirrors the alias-emission loops in buildFileCallEdges (build-edges.ts). +#[allow(clippy::too_many_arguments)] +fn emit_pts_alias_edges<'a>( + ctx: &EdgeContext<'a>, + pts: &HashMap>, + lookup_name: &str, + call_line: u32, + caller_id: u32, + caller_name: &str, + is_dynamic: u32, + rel_path: &str, + imported_names: &HashMap<&str, &str>, + type_map: &HashMap<&str, (&str, f64)>, + seen_edges: &HashSet, + pts_edge_map: &mut HashMap, + edges: &mut Vec, +) { + for alias in resolve_via_points_to(lookup_name, pts) { + let alias_imported_from = imported_names.get(alias).copied(); + let alias_call = CallInfo { + name: alias.to_string(), + line: call_line, + dynamic: Some(true), + receiver: None, + }; + let mut alias_targets = resolve_call_targets( + ctx, &alias_call, rel_path, alias_imported_from, type_map, caller_name, + ); + sort_targets_by_confidence(&mut alias_targets, rel_path, alias_imported_from); + for t in &alias_targets { + let edge_key = ((caller_id as u64) << 32) | (t.id as u64); + if t.id != caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { + let conf = resolve::compute_confidence(rel_path, &t.file, alias_imported_from) + - PROPAGATION_HOP_PENALTY; + if conf > 0.0 { + pts_edge_map.insert(edge_key, edges.len()); + edges.push(ComputedEdge { + source_id: caller_id, + target_id: t.id, + kind: "calls".to_string(), + confidence: conf, + dynamic: is_dynamic, + }); + } + } + } + } +} + /// Build call, receiver, extends, and implements edges in Rust. /// /// Mirrors the algorithm in builder.js `buildEdges` transaction (call edges @@ -260,16 +466,64 @@ fn process_file<'a>( DefWithId { name: &d.name, line: d.line, end_line: d.end_line.unwrap_or(u32::MAX), node_id } }).collect(); - // Phase 8.3: build pts map for alias resolution. - // Only callable (function/method) defs are seeded — mirrors JS buildPointsToMapForFile. - let pts_map: Option>> = - file_input.fn_ref_bindings.as_deref().filter(|b| !b.is_empty()).map(|bindings| { - let def_names: HashSet<&str> = file_input.definitions.iter() - .filter(|d| d.kind == "function" || d.kind == "method") - .map(|d| d.name.as_str()) - .collect(); - build_points_to_map(bindings, &def_names, &imported_names) - }); + // Phase 8.3: build pts map for alias resolution — mirrors buildPointsToMapForFile. + // Only callable (function/method) defs are seeded as concrete targets. + let raw_fn_ref: &[FnRefBinding] = file_input.fn_ref_bindings.as_deref().unwrap_or(&[]); + let this_calls: &[ThisCallBinding] = file_input.this_call_bindings.as_deref().unwrap_or(&[]); + let bindings = PtsBindings { + fn_ref_bindings: raw_fn_ref, + param_bindings: file_input.param_bindings.as_deref().unwrap_or(&[]), + array_elem_bindings: file_input.array_elem_bindings.as_deref().unwrap_or(&[]), + spread_arg_bindings: file_input.spread_arg_bindings.as_deref().unwrap_or(&[]), + for_of_bindings: file_input.for_of_bindings.as_deref().unwrap_or(&[]), + array_callback_bindings: file_input.array_callback_bindings.as_deref().unwrap_or(&[]), + object_rest_param_bindings: file_input.object_rest_param_bindings.as_deref().unwrap_or(&[]), + object_prop_bindings: file_input.object_prop_bindings.as_deref().unwrap_or(&[]), + }; + let has_pts_inputs = !bindings.fn_ref_bindings.is_empty() + || !bindings.param_bindings.is_empty() + || !bindings.array_elem_bindings.is_empty() + || !bindings.spread_arg_bindings.is_empty() + || !bindings.for_of_bindings.is_empty() + || !bindings.array_callback_bindings.is_empty() + || !bindings.object_rest_param_bindings.is_empty() + || !bindings.object_prop_bindings.is_empty() + || !this_calls.is_empty(); + // Convert thisCallBindings into scoped fnRefBindings (`fn::this → ctx`) so + // `this()` calls inside `fn` resolve via the scoped key `fn::this`. + let all_fn_ref_bindings: Vec; + let pts_map: Option>> = if has_pts_inputs { + let def_names: HashSet<&str> = file_input.definitions.iter() + .filter(|d| d.kind == "function" || d.kind == "method") + .map(|d| d.name.as_str()) + .collect(); + // First-wins on duplicate names — mirrors buildDefinitionParamsMap. + let mut definition_params: HashMap<&str, Vec<&str>> = HashMap::new(); + for d in &file_input.definitions { + if d.kind != "function" && d.kind != "method" { continue; } + let Some(params) = d.params.as_ref().filter(|p| !p.is_empty()) else { continue }; + definition_params.entry(d.name.as_str()) + .or_insert_with(|| params.iter().map(|s| s.as_str()).collect()); + } + let bindings = if this_calls.is_empty() { + bindings + } else { + let mut merged = raw_fn_ref.to_vec(); + merged.extend(this_calls.iter().map(|b| FnRefBinding { + lhs: format!("{}::this", b.callee), + rhs: b.this_arg.clone(), + rhs_receiver: None, + })); + all_fn_ref_bindings = merged; + PtsBindings { fn_ref_bindings: &all_fn_ref_bindings, ..bindings } + }; + Some(build_points_to_map(&bindings, &def_names, &imported_names, &definition_params)) + } else { + None + }; + // Case (c) flat-key gate set: lhs names from the *raw* fnRefBindings only + // (thisCall conversions are scoped keys and never flat-matched). + let fn_ref_binding_lhs: HashSet<&str> = raw_fn_ref.iter().map(|b| b.lhs.as_str()).collect(); let mut seen_edges: HashSet = HashSet::new(); // Phase 8.3: tracks pts-resolved edges separately from seen_edges so that a @@ -292,45 +546,67 @@ fn process_file<'a>( sort_targets_by_confidence(&mut targets, rel_path, imported_from); emit_call_edges(&targets, caller_id, is_dynamic, rel_path, imported_from, &mut seen_edges, &mut pts_edge_map, edges); - // Phase 8.3: pts fallback for unresolved dynamic identifier calls. - // When primary resolution finds nothing and the call is dynamic with no receiver, - // look up the call name in the pts map and retry resolution for each alias target. - // Confidence is penalised by one hop to reflect the extra indirection. + // Phase 8.3 / 8.3c / 8.3e: points-to fallback for unresolved calls. + // Mirrors the four-case gate in buildFileCallEdges (build-edges.ts): + // (a) dynamic alias calls — flat `call.name` lookup; + // (b) parameter / this-rebinding / for-of variable calls — scoped key + // `caller::name`, with the `::name` sentinel for + // top-level for-of loops; + // (c) module-level alias bindings (`const f = handler`, `f = fn.bind(ctx)`) + // — flat key, gated on fnRefBindingLhs so self-seeded local + // definitions never fire. + // Confidence is penalised by one hop to reflect the indirection. // - // Pts edges go into pts_edge_map (not seen_edges) so a later direct call to the - // same target in the same function body can upgrade confidence in-place — mirroring - // the ptsEdgeRows mechanism on the JS/WASM path. - if targets.is_empty() && call.dynamic.unwrap_or(false) && call.receiver.is_none() { + // Pts edges go into pts_edge_map (not seen_edges) so a later direct call + // to the same target can upgrade confidence in-place — mirroring ptsEdgeRows. + if targets.is_empty() && call.receiver.is_none() { if let Some(ref pts) = pts_map { - for alias in resolve_via_points_to(call.name.as_str(), pts) { - let alias_imported_from = imported_names.get(alias).copied(); - let alias_call = CallInfo { - name: alias.to_string(), - line: call.line, - dynamic: Some(true), - receiver: None, - }; - let mut alias_targets = resolve_call_targets( - ctx, &alias_call, rel_path, alias_imported_from, &type_map, caller_name, + let is_dyn_call = call.dynamic.unwrap_or(false); + let scoped_key = if caller_name.is_empty() { None } else { + Some(format!("{}::{}", caller_name, call.name)) + .filter(|k| pts.contains_key(k.as_str())) + }; + let module_key = if caller_name.is_empty() { + Some(format!("::{}", call.name)) + .filter(|k| pts.contains_key(k.as_str())) + } else { + None + }; + let flat_ok = !is_dyn_call + && fn_ref_binding_lhs.contains(call.name.as_str()) + && pts.contains_key(call.name.as_str()); + let lookup_name: Option = if is_dyn_call { + Some(call.name.clone()) + } else if let Some(k) = scoped_key { + Some(k) + } else if let Some(k) = module_key { + Some(k) + } else if flat_ok { + Some(call.name.clone()) + } else { + None + }; + if let Some(lookup_name) = lookup_name { + emit_pts_alias_edges( + ctx, pts, &lookup_name, call.line, caller_id, caller_name, is_dynamic, + rel_path, &imported_names, &type_map, &seen_edges, &mut pts_edge_map, edges, ); - sort_targets_by_confidence(&mut alias_targets, rel_path, alias_imported_from); - for t in &alias_targets { - let edge_key = ((caller_id as u64) << 32) | (t.id as u64); - if t.id != caller_id && !seen_edges.contains(&edge_key) && !pts_edge_map.contains_key(&edge_key) { - let conf = resolve::compute_confidence( - rel_path, &t.file, alias_imported_from, - ) - PROPAGATION_HOP_PENALTY; - if conf > 0.0 { - pts_edge_map.insert(edge_key, edges.len()); - edges.push(ComputedEdge { - source_id: caller_id, - target_id: t.id, - kind: "calls".to_string(), - confidence: conf, - dynamic: is_dynamic, - }); - } - } + } + } + } + + // Phase 8.3f: pts fallback for receiver calls via object-rest bindings. + // `rest.prop()` resolves when pts["rest.prop"] was seeded by the + // rest-dispatch chain. Builtin receivers were skipped at loop top. + if targets.is_empty() { + if let (Some(receiver), Some(pts)) = (call.receiver.as_deref(), pts_map.as_ref()) { + if receiver != "this" && receiver != "self" && receiver != "super" { + let receiver_key = format!("{}.{}", receiver, call.name); + if pts.contains_key(receiver_key.as_str()) { + emit_pts_alias_edges( + ctx, pts, &receiver_key, call.line, caller_id, caller_name, is_dynamic, + rel_path, &imported_names, &type_map, &seen_edges, &mut pts_edge_map, edges, + ); } } } @@ -363,9 +639,10 @@ fn find_enclosing_caller<'a>(defs: &[DefWithId<'a>], call_line: u32, file_node_i (caller_id, caller_name) } -/// Multi-strategy call target resolution: import-aware → same-file → method → type-aware → scoped. +/// Multi-strategy call target resolution: import-aware → same-file → type-aware → scoped. /// `caller_name` is the enclosing function/method name (e.g. `"Shape.describe"`) used to scope /// `this`/`self`/`super` dispatch to the caller's own class before falling back to a broader scan. +/// Mirrors `resolveCallTargets` / `resolveByMethodOrGlobal` in call-resolver.ts. fn resolve_call_targets<'a>( ctx: &EdgeContext<'a>, call: &CallInfo, @@ -388,15 +665,7 @@ fn resolve_call_targets<'a>( .cloned().unwrap_or_default(); if !targets.is_empty() { return targets; } - // 3. Method name match - let suffix = format!(".{}", call.name); - let method_candidates: Vec<&NodeInfo> = ctx.nodes_by_name - .get(call.name.as_str()) - .map(|v| v.iter().filter(|n| n.kind == "method" && n.name.ends_with(&suffix)).copied().collect()) - .unwrap_or_default(); - if !method_candidates.is_empty() { return method_candidates; } - - // 4. Type-aware resolution via receiver → type map. + // 3. Type-aware resolution via receiver → type map. // Strips "this." prefix so `this.repo.method()` resolves via typeMap["repo"] // or typeMap["this.repo"] (both seeded by the class-field extractor). if let Some(ref receiver) = call.receiver { @@ -405,8 +674,23 @@ fn resolve_call_targets<'a>( } else { receiver.as_str() }; + // Phase 8.3f: callee-scoped rest-param key (`callee::restName`) avoids + // same-name rest-binding collisions across functions in the same file (#1358). + let rest_param_key = format!("{}::{}", caller_name, effective_receiver); + // Class-scoped key (`ClassName.prop`) seeded by `this.prop = new Ctor()` + // property writes — prevents false edges when multiple classes define the + // same property name (issue #1323). Only consulted for `this.` receivers. + let class_scoped_key = if receiver.starts_with("this.") && !caller_name.is_empty() { + caller_name + .rfind('.') + .map(|dot| format!("{}.{}", &caller_name[..dot], effective_receiver)) + } else { + None + }; let type_lookup = type_map.get(effective_receiver) - .or_else(|| type_map.get(receiver.as_str())); + .or_else(|| type_map.get(receiver.as_str())) + .or_else(|| if caller_name.is_empty() { None } else { type_map.get(rest_param_key.as_str()) }) + .or_else(|| class_scoped_key.as_deref().and_then(|k| type_map.get(k))); // Inline new-expression receiver: `(new Foo).bar()` — extract the constructor name // when no typeMap entry exists for the complex receiver expression. // Mirrors the regex `/^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/` in call-resolver.ts. @@ -421,7 +705,10 @@ fn resolve_call_targets<'a>( let qualified = format!("{}.{}", type_name, call.name); let typed: Vec<&NodeInfo> = ctx.nodes_by_name .get(qualified.as_str()) - .map(|v| v.iter().filter(|n| n.kind == "method").copied().collect()) + .map(|v| v.iter() + .filter(|n| n.kind == "method" + && resolve::compute_confidence(rel_path, &n.file, None) >= 0.5) + .copied().collect()) .unwrap_or_default(); if !typed.is_empty() { return typed; } // Prototype alias: `Foo.prototype.bar = identifier` seeds typeMap['Foo.bar'] = identifier. @@ -438,23 +725,26 @@ fn resolve_call_targets<'a>( if !resolved.is_empty() { return resolved; } } } - // 4.5. Direct qualified method lookup: ClassName.staticMethod() or ClassName.instanceMethod() + // 3.5. Direct qualified method lookup: ClassName.staticMethod() or ClassName.instanceMethod() // when the receiver is a class name with no typeMap entry. Handles static method calls // like `Validators.IsValidEmail()` where the receiver IS the class. // Matches both "method" and "function" kinds to cover field-initializer synthetic defs. - // ORDER: must run before composite pts lookup (4.6) to match WASM call-resolver.ts ordering. + // ORDER: must run before composite pts lookup (3.6) to match WASM call-resolver.ts ordering. // Guard: skip when inline_new_type is Some — mirrors TS `!typeName` which is false when the // inline-new regex extracted a type (e.g. `(new Foo).bar()` → typeName='Foo' → skip). if type_lookup.is_none() && inline_new_type.is_none() { let qualified = format!("{}.{}", effective_receiver, call.name); let direct: Vec<&NodeInfo> = ctx.nodes_by_name .get(qualified.as_str()) - .map(|v| v.iter().filter(|n| n.kind == "method" || n.kind == "function").copied().collect()) + .map(|v| v.iter() + .filter(|n| (n.kind == "method" || n.kind == "function") + && resolve::compute_confidence(rel_path, &n.file, None) >= 0.5) + .copied().collect()) .unwrap_or_default(); if !direct.is_empty() { return direct; } } - // 4.6. Phase 8.3d: composite pts key — `obj.prop = fn` seeds typeMap['obj.prop'] + // 3.6. Phase 8.3d: composite pts key — `obj.prop = fn` seeds typeMap['obj.prop'] let composite_key = format!("{}.{}", receiver, call.name); if let Some(&(pts_target, _)) = type_map.get(composite_key.as_str()) { let resolved: Vec<&NodeInfo> = ctx.nodes_by_name @@ -467,7 +757,7 @@ fn resolve_call_targets<'a>( } } - // 5. Scoped fallback (this/self/super or no receiver) + // 4. Scoped fallback (this/self/super or no receiver) if call.receiver.is_none() || call.receiver.as_deref() == Some("this") || call.receiver.as_deref() == Some("self") @@ -509,41 +799,28 @@ fn resolve_call_targets<'a>( // `IsValidEmail()` inside `Validators.ValidateUser` → `Validators.IsValidEmail`). // This avoids false edges to unrelated classes that happen to have a method with the // same name in the same file. - if let Some(dot_pos) = caller_name.find('.') { - let class_prefix = &caller_name[..dot_pos]; - let qualified = format!("{}.{}", class_prefix, call.name); - let class_scoped: Vec<&NodeInfo> = ctx.nodes_by_name - .get(qualified.as_str()) - .map(|v| v.iter() - .filter(|n| n.kind == "method" - && resolve::compute_confidence(rel_path, &n.file, None) >= 0.5) - .copied().collect()) - .unwrap_or_default(); - if !class_scoped.is_empty() { return class_scoped; } - } - - // Broader fallback: same-file suffix scan. Only for this/self/super (not no-receiver - // plain calls) to avoid false positives on global function calls inside class methods. - // Always restricts to the caller's own class prefix to avoid false edges to unrelated - // classes in the same file (e.g. this.area() inside Shape.describe must never yield - // Calculator.area, even when Calculator.area is the only method with that name). - if call.receiver.is_some() { - let suffix = format!(".{}", call.name); - if let Some(file_nodes) = ctx.nodes_by_file.get(rel_path) { - let same_file_methods: Vec<&NodeInfo> = file_nodes.iter() - .filter(|n| n.kind == "method" && n.name.ends_with(&suffix)) - .copied() - .collect(); - if !same_file_methods.is_empty() { - if let Some(dot_pos) = caller_name.find('.') { - let caller_prefix = format!("{}.", &caller_name[..dot_pos]); - let caller_scoped: Vec<&NodeInfo> = same_file_methods.iter() - .filter(|n| n.name.starts_with(&caller_prefix)) - .copied() - .collect(); - if !caller_scoped.is_empty() { return caller_scoped; } - } - } + // + // For JS/TS, bare (no-receiver) calls are module-scoped — there is no implicit class + // binding. Skip the same-class fallback for bare calls in those languages to prevent + // false positives (e.g. `flush()` inside `Processor.run` must not resolve to + // `Processor.flush`). this/self/super calls are unaffected. + let is_bare_call = call.receiver.is_none(); + if !caller_name.is_empty() && !(is_bare_call && is_module_scoped_language(rel_path)) { + if let Some(dot_idx) = caller_name.rfind('.') { + // Extract only the segment immediately before the method name so that + // 'Namespace.ClassName.method' yields 'ClassName', not 'Namespace.ClassName'. + // Symbols are stored under their bare class name, not their qualified path. + let seg_start = caller_name[..dot_idx].rfind('.').map(|p| p + 1).unwrap_or(0); + let class_prefix = &caller_name[seg_start..dot_idx]; + let qualified = format!("{}.{}", class_prefix, call.name); + let class_scoped: Vec<&NodeInfo> = ctx.nodes_by_name + .get(qualified.as_str()) + .map(|v| v.iter() + .filter(|n| n.kind == "method" + && resolve::compute_confidence(rel_path, &n.file, None) >= 0.5) + .copied().collect()) + .unwrap_or_default(); + if !class_scoped.is_empty() { return class_scoped; } } } return exact; // empty @@ -552,6 +829,16 @@ fn resolve_call_targets<'a>( Vec::new() } +/// Languages where bare `foo()` calls inside a class method are lexically scoped +/// to the module, not the class — there is no implicit this/class binding. +/// Mirrors `MODULE_SCOPED_BARE_CALL_EXTENSIONS` in call-resolver.ts. +fn is_module_scoped_language(rel_path: &str) -> bool { + match rel_path.rsplit_once('.') { + Some((_, ext)) => matches!(ext, "js" | "mjs" | "cjs" | "jsx" | "ts" | "tsx" | "mts" | "cts"), + None => false, + } +} + /// Extract the constructor name from an inline `new` receiver expression. /// /// Mirrors the regex `/^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/` used in call-resolver.ts. @@ -631,8 +918,8 @@ fn emit_receiver_edge( || receiver == "this" || receiver == "self" || receiver == "super" { return; } - let effective_receiver = type_map.get(receiver.as_str()).map(|&(t, _)| t).unwrap_or(receiver.as_str()); - let type_resolved = effective_receiver != receiver.as_str(); + let type_entry = type_map.get(receiver.as_str()); + let effective_receiver = type_entry.map(|&(t, _)| t).unwrap_or(receiver.as_str()); // Filter-before: apply receiver_kinds to same-file candidates first, then // fall back to global candidates (also filtered) only when same-file yields @@ -659,7 +946,9 @@ fn emit_receiver_edge( let recv_key = (1u64 << 63) | ((caller_id as u64) << 32) | (recv_target.id as u64); if !seen_edges.contains(&recv_key) { seen_edges.insert(recv_key); - let confidence = if type_resolved { 0.9 } else { 0.7 }; + // Use the stored typeMap confidence when the receiver was type-resolved, + // mirroring `typeConfidence ?? (typeName ? 0.9 : 0.7)` in resolveReceiverEdge. + let confidence = type_entry.map(|&(_, c)| c).unwrap_or(0.7); edges.push(ComputedEdge { source_id: caller_id, target_id: recv_target.id, kind: "receiver".to_string(), confidence, dynamic: 0, @@ -1334,7 +1623,13 @@ mod call_edge_tests { } fn def(name: &str, kind: &str, line: u32, end_line: u32) -> DefInfo { - DefInfo { name: name.to_string(), kind: kind.to_string(), line, end_line: Some(end_line) } + DefInfo { + name: name.to_string(), + kind: kind.to_string(), + line, + end_line: Some(end_line), + params: None, + } } fn call(name: &str, line: u32, receiver: Option<&str>) -> CallInfo { @@ -1362,6 +1657,14 @@ mod call_edge_tests { classes, type_map, fn_ref_bindings: None, + param_bindings: None, + this_call_bindings: None, + array_elem_bindings: None, + spread_arg_bindings: None, + for_of_bindings: None, + array_callback_bindings: None, + object_rest_param_bindings: None, + object_prop_bindings: None, } } @@ -1433,6 +1736,155 @@ mod call_edge_tests { assert_eq!(re.target_id, 2, "receiver edge must point to Calculator class (id=2), not function (id=4)"); } + /// Issue #1453: `this.logger.error()` inside `UserService.create` where the + /// constructor seeded the class-scoped key `UserService.logger → Logger`. + /// The resolver must fall back to the `ClassName.prop` typeMap key (#1323). + #[test] + fn class_scoped_type_map_key_resolves_this_prop_receiver() { + let all_nodes = vec![ + node(1, "UserService.create", "method", "svc.js", 10), + node(2, "Logger.error", "method", "logger.js", 5), + node(3, "Logger", "class", "logger.js", 1), + ]; + let files = vec![make_file( + "svc.js", + 10, + vec![def("UserService.create", "method", 10, 20)], + vec![call("error", 15, Some("this.logger"))], + vec![type_map_entry("UserService.logger", "Logger", 1.0)], + vec![], + )]; + let edges = build_call_edges(files, all_nodes, vec![]); + assert!( + edges.iter().any(|e| e.kind == "calls" && e.source_id == 1 && e.target_id == 2), + "expected calls edge UserService.create → Logger.error; got: {:?}", + edges.iter().map(|e| (&e.kind, e.source_id, e.target_id)).collect::>() + ); + } + + /// Phase 8.3f (#1358): callee-scoped rest-param key `callee::restName` must + /// be consulted when the bare receiver has no typeMap entry. + #[test] + fn rest_param_scoped_type_map_key() { + let all_nodes = vec![ + node(1, "useRest", "function", "a.js", 1), + node(2, "E4.e4", "method", "a.js", 30), + ]; + let files = vec![make_file( + "a.js", + 10, + vec![def("useRest", "function", 1, 10)], + vec![call("e4", 5, Some("eerest"))], + vec![type_map_entry("useRest::eerest", "E4", 0.85)], + vec![], + )]; + let edges = build_call_edges(files, all_nodes, vec![]); + assert!( + edges.iter().any(|e| e.kind == "calls" && e.source_id == 1 && e.target_id == 2), + "expected calls edge useRest → E4.e4 via rest-param key; got: {:?}", + edges.iter().map(|e| (&e.kind, e.source_id, e.target_id)).collect::>() + ); + } + + /// Bare (no-receiver) calls in JS/TS are module-scoped: `flush()` inside + /// `Processor.run` must NOT resolve to `Processor.flush` (#1422 parity). + #[test] + fn bare_call_in_js_skips_same_class_fallback() { + let all_nodes = vec![ + node(1, "Processor.run", "method", "proc.js", 10), + node(2, "Processor.flush", "method", "proc.js", 30), + ]; + let files = vec![make_file( + "proc.js", + 10, + vec![def("Processor.run", "method", 10, 20)], + vec![call("flush", 15, None)], + vec![], + vec![], + )]; + let edges = build_call_edges(files, all_nodes, vec![]); + assert!( + !edges.iter().any(|e| e.kind == "calls" && e.source_id == 1 && e.target_id == 2), + "bare call must not resolve to same-class sibling in a module-scoped language" + ); + } + + /// In class-scoped languages (e.g. C#), bare sibling calls DO resolve: + /// `IsValidEmail()` inside `Validators.ValidateUser` → `Validators.IsValidEmail`. + #[test] + fn bare_call_in_class_scoped_language_resolves_sibling() { + let all_nodes = vec![ + node(1, "Validators.ValidateUser", "method", "v.cs", 10), + node(2, "Validators.IsValidEmail", "method", "v.cs", 30), + ]; + let files = vec![make_file( + "v.cs", + 10, + vec![def("Validators.ValidateUser", "method", 10, 20)], + vec![call("IsValidEmail", 15, None)], + vec![], + vec![], + )]; + let edges = build_call_edges(files, all_nodes, vec![]); + assert!( + edges.iter().any(|e| e.kind == "calls" && e.source_id == 1 && e.target_id == 2), + "bare sibling call must resolve in a class-scoped language; got: {:?}", + edges.iter().map(|e| (&e.kind, e.source_id, e.target_id)).collect::>() + ); + } + + /// `self.area()` inside a namespace-qualified method `Geo.Shape.describe` + /// must resolve via the bare class segment (`Shape.area`), not the full + /// prefix (`Geo.Shape.area`) — symbols are stored under their bare class name. + #[test] + fn class_scoped_fallback_uses_segment_before_method() { + let all_nodes = vec![ + node(1, "Geo.Shape.describe", "method", "s.py", 10), + node(2, "Shape.area", "method", "s.py", 30), + ]; + let files = vec![make_file( + "s.py", + 10, + vec![def("Geo.Shape.describe", "method", 10, 20)], + vec![call("area", 15, Some("self"))], + vec![], + vec![], + )]; + let edges = build_call_edges(files, all_nodes, vec![]); + assert!( + edges.iter().any(|e| e.kind == "calls" && e.source_id == 1 && e.target_id == 2), + "expected Geo.Shape.describe → Shape.area via bare class segment; got: {:?}", + edges.iter().map(|e| (&e.kind, e.source_id, e.target_id)).collect::>() + ); + } + + /// Receiver-edge confidence must propagate the stored typeMap confidence + /// (e.g. 0.85 from a pts property-write) instead of a flat 0.9 — mirrors + /// `typeConfidence ?? (typeName ? 0.9 : 0.7)` in resolveReceiverEdge. + #[test] + fn receiver_edge_uses_stored_type_map_confidence() { + let all_nodes = vec![ + node(1, "main", "function", "index.js", 3), + node(2, "Calculator", "class", "utils.js", 1), + node(3, "Calculator.compute", "method", "utils.js", 3), + ]; + let files = vec![make_file( + "index.js", + 10, + vec![def("main", "function", 3, 8)], + vec![call("compute", 7, Some("calc"))], + vec![type_map_entry("calc", "Calculator", 0.85)], + vec![], + )]; + let edges = build_call_edges(files, all_nodes, vec![]); + let re = edges.iter().find(|e| e.kind == "receiver").expect("receiver edge"); + assert!( + (re.confidence - 0.85).abs() < 1e-9, + "expected stored confidence 0.85, got {}", + re.confidence + ); + } + /// When the receiver name is already a class (not a variable), the edge /// should still be emitted using the raw receiver name as lookup key. #[test] @@ -1457,6 +1909,242 @@ mod call_edge_tests { assert!(receiver_edge.is_some(), "expected receiver edge for direct class-name receiver"); assert_eq!(receiver_edge.unwrap().target_id, 2); } + + // ── Points-to constraint solver (parity with buildPointsToMap) ────────── + + fn def_with_params(name: &str, line: u32, end_line: u32, params: &[&str]) -> DefInfo { + DefInfo { + name: name.to_string(), + kind: "function".to_string(), + line, + end_line: Some(end_line), + params: Some(params.iter().map(|s| s.to_string()).collect()), + } + } + + /// `hof(target)` + `cb()` inside `hof(cb)` must emit hof→target via the + /// param-flow constraint `hof::cb ⊇ target`. + #[test] + fn pts_param_flow_resolves_callback_through_parameter() { + let all_nodes = vec![ + node(1, "hof", "function", "lib.js", 1), + node(2, "target", "function", "lib.js", 5), + node(3, "main", "function", "lib.js", 8), + ]; + let mut file = make_file( + "lib.js", + 10, + vec![ + def_with_params("hof", 1, 3, &["cb"]), + def("target", "function", 5, 6), + def("main", "function", 8, 10), + ], + vec![call("cb", 2, None), call("hof", 9, None)], + vec![], + vec![], + ); + file.param_bindings = Some(vec![ParamBinding { + callee: "hof".to_string(), + arg_index: 0, + arg_name: "target".to_string(), + }]); + + let edges = build_call_edges(vec![file], all_nodes, vec![]); + + assert!( + edges.iter().any(|e| e.source_id == 1 && e.target_id == 2 && e.kind == "calls"), + "expected pts edge hof→target; got: {:?}", + edges.iter().map(|e| (e.source_id, e.target_id, &e.kind)).collect::>() + ); + assert!( + edges.iter().any(|e| e.source_id == 3 && e.target_id == 1 && e.kind == "calls"), + "expected direct edge main→hof; got: {:?}", + edges.iter().map(|e| (e.source_id, e.target_id, &e.kind)).collect::>() + ); + } + + /// `invoker.call(handler, 10)` + `this()` inside `invoker` must emit + /// invoker→handler via the thisCallBinding conversion `invoker::this ⊇ handler`. + #[test] + fn pts_this_call_binding_resolves_this_invocation() { + let all_nodes = vec![ + node(1, "invoker", "function", "lib.js", 1), + node(2, "handler", "function", "lib.js", 5), + node(3, "runCallThis", "function", "lib.js", 8), + ]; + let mut file = make_file( + "lib.js", + 10, + vec![ + def("invoker", "function", 1, 3), + def("handler", "function", 5, 6), + def("runCallThis", "function", 8, 10), + ], + vec![ + // this() inside invoker + call("this", 2, None), + // invoker.call(handler, 10) — extractor emits dynamic call to invoker + CallInfo { name: "invoker".to_string(), line: 9, dynamic: Some(true), receiver: None }, + ], + vec![], + vec![], + ); + file.this_call_bindings = Some(vec![ThisCallBinding { + callee: "invoker".to_string(), + this_arg: "handler".to_string(), + }]); + + let edges = build_call_edges(vec![file], all_nodes, vec![]); + + assert!( + edges.iter().any(|e| e.source_id == 1 && e.target_id == 2 && e.kind == "calls"), + "expected pts edge invoker→handler; got: {:?}", + edges.iter().map(|e| (e.source_id, e.target_id, &e.kind)).collect::>() + ); + assert!( + edges.iter().any(|e| e.source_id == 3 && e.target_id == 1 && e.kind == "calls"), + "expected direct edge runCallThis→invoker; got: {:?}", + edges.iter().map(|e| (e.source_id, e.target_id, &e.kind)).collect::>() + ); + } + + /// for-of over a function array: `for (const cb of arr) cb()` must emit + /// iterPlain→forOf1 and iterPlain→forOf2 through the wildcard constraint + /// `iterPlain::cb ⊇ arr[*]`. + #[test] + fn pts_for_of_over_array_elements_resolves_all_elements() { + let all_nodes = vec![ + node(1, "forOf1", "function", "for-of.js", 1), + node(2, "forOf2", "function", "for-of.js", 3), + node(3, "iterPlain", "function", "for-of.js", 6), + ]; + let mut file = make_file( + "for-of.js", + 10, + vec![ + def("forOf1", "function", 1, 2), + def("forOf2", "function", 3, 4), + def("iterPlain", "function", 6, 9), + ], + vec![call("cb", 8, None)], + vec![], + vec![], + ); + file.array_elem_bindings = Some(vec![ + ArrayElemBinding { array_name: "arr".to_string(), index: 0, elem_name: "forOf1".to_string() }, + ArrayElemBinding { array_name: "arr".to_string(), index: 1, elem_name: "forOf2".to_string() }, + ]); + file.for_of_bindings = Some(vec![ForOfBinding { + var_name: "cb".to_string(), + source_name: "arr".to_string(), + enclosing_func: "iterPlain".to_string(), + }]); + + let edges = build_call_edges(vec![file], all_nodes, vec![]); + + for target in [1u32, 2u32] { + assert!( + edges.iter().any(|e| e.source_id == 3 && e.target_id == target && e.kind == "calls"), + "expected pts edge iterPlain→node{}; got: {:?}", + target, + edges.iter().map(|e| (e.source_id, e.target_id, &e.kind)).collect::>() + ); + } + } + + /// Object-rest dispatch: `f3(obj)` where `obj = {{ e4 }}` and `f3({{...rest}})` + /// calls `rest.e4()` — resolves via the seeded pts key `rest.e4`. + #[test] + fn pts_object_rest_receiver_call_resolves_via_seeded_prop() { + let all_nodes = vec![ + node(1, "f3", "function", "lib.js", 1), + node(2, "e4", "function", "other.js", 1), + node(3, "main", "function", "lib.js", 8), + ]; + let mut file = make_file( + "lib.js", + 10, + vec![def("f3", "function", 1, 3), def("main", "function", 8, 10)], + vec![ + // eerest.e4() inside f3 + CallInfo { name: "e4".to_string(), line: 2, dynamic: None, receiver: Some("eerest".to_string()) }, + call("f3", 9, None), + ], + vec![], + vec![], + ); + file.imported_names = vec![ImportedName { name: "e4".to_string(), file: "other.js".to_string() }]; + file.param_bindings = Some(vec![ParamBinding { + callee: "f3".to_string(), + arg_index: 0, + arg_name: "obj".to_string(), + }]); + file.object_rest_param_bindings = Some(vec![ObjectRestParamBinding { + callee: "f3".to_string(), + rest_name: "eerest".to_string(), + arg_index: 0, + }]); + file.object_prop_bindings = Some(vec![ObjectPropBinding { + object_name: "obj".to_string(), + prop_name: "e4".to_string(), + value_name: "e4".to_string(), + }]); + + let edges = build_call_edges(vec![file], all_nodes, vec![]); + + assert!( + edges.iter().any(|e| e.source_id == 1 && e.target_id == 2 && e.kind == "calls"), + "expected pts edge f3→e4 via rest receiver; got: {:?}", + edges.iter().map(|e| (e.source_id, e.target_id, &e.kind)).collect::>() + ); + } + + /// Spread dispatch: `callAll(...fns)` with `fns = [x, y]` flows the array + /// elements into callAll's parameters positionally. + #[test] + fn pts_spread_args_flow_array_elements_into_params() { + let all_nodes = vec![ + node(1, "callAll", "function", "spread.js", 1), + node(2, "x", "function", "spread.js", 5), + node(3, "y", "function", "spread.js", 6), + node(4, "main", "function", "spread.js", 8), + ]; + let mut file = make_file( + "spread.js", + 10, + vec![ + def_with_params("callAll", 1, 3, &["a", "b"]), + def("x", "function", 5, 5), + def("y", "function", 6, 6), + def("main", "function", 8, 10), + ], + vec![call("a", 2, None), call("b", 2, None), call("callAll", 9, None)], + vec![], + vec![], + ); + file.array_elem_bindings = Some(vec![ + ArrayElemBinding { array_name: "fns".to_string(), index: 0, elem_name: "x".to_string() }, + ArrayElemBinding { array_name: "fns".to_string(), index: 1, elem_name: "y".to_string() }, + ]); + file.spread_arg_bindings = Some(vec![SpreadArgBinding { + callee: "callAll".to_string(), + array_name: "fns".to_string(), + start_index: 0, + }]); + + let edges = build_call_edges(vec![file], all_nodes, vec![]); + + assert!( + edges.iter().any(|e| e.source_id == 1 && e.target_id == 2 && e.kind == "calls"), + "expected pts edge callAll→x; got: {:?}", + edges.iter().map(|e| (e.source_id, e.target_id, &e.kind)).collect::>() + ); + assert!( + edges.iter().any(|e| e.source_id == 1 && e.target_id == 3 && e.kind == "calls"), + "expected pts edge callAll→y; got: {:?}", + edges.iter().map(|e| (e.source_id, e.target_id, &e.kind)).collect::>() + ); + } } #[cfg(test)] diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/import_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/import_edges.rs index 7308ece75..7471e8a16 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/import_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/import_edges.rs @@ -566,6 +566,14 @@ mod tests { dataflow: None, line_count: None, fn_ref_bindings: vec![], + param_bindings: vec![], + this_call_bindings: vec![], + array_elem_bindings: vec![], + spread_arg_bindings: vec![], + for_of_bindings: vec![], + array_callback_bindings: vec![], + object_rest_param_bindings: vec![], + object_prop_bindings: vec![], } } diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 769bac06b..260df4dec 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -33,6 +33,9 @@ impl SymbolExtractor for JsExtractor { walk_tree(&tree.root_node(), source, &mut symbols, match_js_prototype_methods); // call_assignments runs after type_map is populated (needs receiver types) walk_tree(&tree.root_node(), source, &mut symbols, match_js_call_assignments); + // Phase 8.3c–8.3f: points-to bindings (params, this-rebinding, arrays, + // spread, for-of, object rest/props) for the pts constraint solver. + walk_tree(&tree.root_node(), source, &mut symbols, match_js_pts_bindings); symbols } } @@ -72,6 +75,27 @@ fn extract_new_expr_type_name<'a>(node: &Node<'a>, source: &'a [u8]) -> Option<& } } +/// Nearest enclosing class context for class-scoped typeMap keys. +/// +/// Mirrors the TS walk's `childTypeMapClass` propagation: a `class_declaration` +/// (or `abstract_class_declaration`) provides its name; a `class` *expression* +/// resets the context to None because the expression-internal name is never +/// visible to the resolver, preserving the `this.prop` key fallback. +fn enclosing_type_map_class<'a>(node: &Node<'a>, source: &'a [u8]) -> Option<&'a str> { + let mut cur = node.parent(); + while let Some(n) = cur { + match n.kind() { + "class_declaration" | "abstract_class_declaration" => { + return n.child_by_field_name("name").map(|name| node_text(&name, source)); + } + "class" => return None, + _ => {} + } + cur = n.parent(); + } + None +} + fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { match node.kind() { "variable_declarator" => { @@ -126,24 +150,46 @@ fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep } } // Phase 8.3d: property-write pts tracking — `obj.prop = fn` seeds composite key. + // Also seeds `this.prop = new Ctor()` constructor-assigned property types, + // keyed as `ClassName.prop` (class-scoped) so two classes with identically-named + // properties don't overwrite each other's typeMap entry (issue #1323). + // Mirrors handlePropWriteTypeMap in src/extractors/javascript.ts. "assignment_expression" => { let lhs = node.child_by_field_name("left"); let rhs = node.child_by_field_name("right"); if let (Some(lhs), Some(rhs)) = (lhs, rhs) { - if lhs.kind() == "member_expression" && rhs.kind() == "identifier" { + if lhs.kind() == "member_expression" { let obj = lhs.child_by_field_name("object"); let prop = lhs.child_by_field_name("property"); if let (Some(obj), Some(prop)) = (obj, prop) { - if obj.kind() == "identifier" { - let obj_name = node_text(&obj, source); - if !is_js_builtin_global(obj_name) { - let key = format!("{}.{}", obj_name, node_text(&prop, source)); - let rhs_name = node_text(&rhs, source).to_string(); - symbols.type_map.push(TypeMapEntry { - name: key, - type_name: rhs_name, - confidence: 0.85, - }); + // Guard: only static property access, not computed subscripts. + let prop_kind = prop.kind(); + if prop_kind == "property_identifier" || prop_kind == "identifier" { + if obj.kind() == "this" && rhs.kind() == "new_expression" { + if let Some(ctor_type) = extract_new_expr_type_name(&rhs, source) { + let key = match enclosing_type_map_class(node, source) { + Some(class_name) => { + format!("{}.{}", class_name, node_text(&prop, source)) + } + None => format!("this.{}", node_text(&prop, source)), + }; + symbols.type_map.push(TypeMapEntry { + name: key, + type_name: ctor_type.to_string(), + confidence: 1.0, + }); + } + } else if obj.kind() == "identifier" && rhs.kind() == "identifier" { + let obj_name = node_text(&obj, source); + if !is_js_builtin_global(obj_name) { + let key = format!("{}.{}", obj_name, node_text(&prop, source)); + let rhs_name = node_text(&rhs, source).to_string(); + symbols.type_map.push(TypeMapEntry { + name: key, + type_name: rhs_name, + confidence: 0.85, + }); + } } } } @@ -1084,17 +1130,52 @@ fn handle_var_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { // Phase 8.3: `const alias = obj.method` — record for pts analysis. // Mirror the JS BUILTIN_GLOBALS guard: skip bindings where the // receiver object is a well-known JS global (e.g. `const fn = Math.random`). + // Guards mirror the TS extractor: only static property access on a plain + // identifier receiver — chained `a.b.method` and computed subscripts are + // skipped because they can never match pts keys. if let (Some(obj), Some(prop)) = ( value_n.child_by_field_name("object"), value_n.child_by_field_name("property"), ) { - let obj_text = node_text(&obj, source); - if !JS_BUILTIN_GLOBALS.contains(&obj_text) { - symbols.fn_ref_bindings.push(FnRefBinding { - lhs: node_text(&name_n, source).to_string(), - rhs: node_text(&prop, source).to_string(), - rhs_receiver: Some(obj_text.to_string()), - }); + let prop_kind = prop.kind(); + if (prop_kind == "property_identifier" || prop_kind == "identifier") + && obj.kind() == "identifier" + { + let obj_text = node_text(&obj, source); + if !JS_BUILTIN_GLOBALS.contains(&obj_text) { + symbols.fn_ref_bindings.push(FnRefBinding { + lhs: node_text(&name_n, source).to_string(), + rhs: node_text(&prop, source).to_string(), + rhs_receiver: Some(obj_text.to_string()), + }); + } + } + } + } else if name_n.kind() == "identifier" && value_n.kind() == "call_expression" { + // Phase 8.3: `const f = fn.bind(ctx)` — bind returns a bound copy of fn; + // track f → fn so pts(f) ⊇ pts(fn) and subsequent `f(args)` calls resolve + // to fn. Only flat-identifier binds (fn.bind) are tracked, mirroring the + // TS extractor; method-receiver binds like `obj.method.bind(ctx)` are not. + if let Some(call_fn) = value_n.child_by_field_name("function") { + if call_fn.kind() == "member_expression" { + let is_bind = call_fn + .child_by_field_name("property") + .map(|p| node_text(&p, source) == "bind") + .unwrap_or(false); + if is_bind { + if let Some(bound_fn) = call_fn.child_by_field_name("object") { + if bound_fn.kind() == "identifier" { + let bound_name = node_text(&bound_fn, source); + if !JS_BUILTIN_GLOBALS.contains(&bound_name) { + symbols.fn_ref_bindings.push(FnRefBinding { + lhs: node_text(&name_n, source).to_string(), + rhs: bound_name.to_string(), + rhs_receiver: None, + }); + } + } + } + } } } } @@ -1741,6 +1822,14 @@ fn extract_callback_reference_calls(call_node: &Node, source: &[u8], calls: &mut let call_line = start_line(call_node); let callee_name = extract_callee_name(call_node, source); + // .call() / .apply() / .bind() — the first arg is the `this` context (not a + // callback of the enclosing function) and subsequent args flow into the + // delegated function's parameters. Emitting them here would produce + // false-positive edges from the *calling* function. This-rebinding + // (fn::this → ctx) is handled separately by collect_this_call_and_bindings. + if matches!(callee_name, Some("call") | Some("apply") | Some("bind")) { + return; + } let mut member_expr_args_allowed = callee_name .map(|n| CALLBACK_ACCEPTING_CALLEES.contains(&n)) .unwrap_or(false); @@ -1769,7 +1858,7 @@ fn extract_callback_reference_calls(call_node: &Node, source: &[u8], calls: &mut "member_expression" if member_expr_args_allowed => { if let Some(prop) = child.child_by_field_name("property") { let receiver = child.child_by_field_name("object") - .map(|obj| node_text(&obj, source).to_string()); + .map(|obj| extract_receiver_name(&obj, source)); calls.push(Call { name: node_text(&prop, source).to_string(), line: call_line, @@ -1828,6 +1917,34 @@ fn extract_destructured_bindings( } } +/// Mirrors `extractReceiverName` in src/extractors/javascript.ts: normalize a +/// call receiver node to a resolvable name. Inline-new (`new Foo().method()`) +/// and single-paren-wrapped new (`(new Foo()).method()`) yield the constructor +/// name so the resolver can look up `Foo.method` directly. +fn extract_receiver_name(obj: &Node, source: &[u8]) -> String { + match obj.kind() { + "new_expression" => { + if let Some(name) = extract_new_expr_type_name(obj, source) { + return name.to_string(); + } + } + "parenthesized_expression" => { + // Only one level of parentheses is unwrapped, matching the TS + // extractor; deeper nesting falls through to raw-text handling. + for i in 0..obj.child_count() { + let Some(child) = obj.child(i) else { continue }; + if child.kind() == "new_expression" { + if let Some(name) = extract_new_expr_type_name(&child, source) { + return name.to_string(); + } + } + } + } + _ => {} + } + node_text(obj, source).to_string() +} + fn extract_call_info(fn_node: &Node, call_node: &Node, source: &[u8]) -> Option { match fn_node.kind() { "identifier" => Some(Call { @@ -1868,8 +1985,7 @@ fn extract_call_info(fn_node: &Node, call_node: &Node, source: &[u8]) -> Option< if prop.kind() == "string" || prop.kind() == "string_fragment" { let method_name = node_text(&prop, source).replace(&['\'', '"'][..], ""); if !method_name.is_empty() { - let receiver = named_child_text(&fn_node, "object", source) - .map(|s| s.to_string()); + let receiver = obj.as_ref().map(|o| extract_receiver_name(o, source)); return Some(Call { name: method_name, line: start_line(call_node), @@ -1879,8 +1995,7 @@ fn extract_call_info(fn_node: &Node, call_node: &Node, source: &[u8]) -> Option< } } - let receiver = named_child_text(&fn_node, "object", source) - .map(|s| s.to_string()); + let receiver = obj.as_ref().map(|o| extract_receiver_name(o, source)); Some(Call { name: prop_text.to_string(), line: start_line(call_node), @@ -1895,8 +2010,8 @@ fn extract_call_info(fn_node: &Node, call_node: &Node, source: &[u8]) -> Option< let method_name = node_text(&index, source) .replace(&['\'', '"', '`'][..], ""); if !method_name.is_empty() && !method_name.contains('$') { - let receiver = named_child_text(&fn_node, "object", source) - .map(|s| s.to_string()); + let receiver = fn_node.child_by_field_name("object") + .map(|o| extract_receiver_name(&o, source)); return Some(Call { name: method_name, line: start_line(call_node), @@ -2224,6 +2339,594 @@ fn scan_import_names_depth(node: &Node, source: &[u8], names: &mut Vec, } } +// ── Points-to binding collectors (Phase 8.3c–8.3f) ────────────────────────── +// Mirror the TS collectors invoked from runCollectorWalk / runContextCollectorWalk +// in `src/extractors/javascript.ts`. Each collector records bindings consumed by +// the pts constraint solver in `build_edges.rs`. + +/// Collectors whose interest spans multiple node kinds, dispatched per node. +fn match_js_pts_bindings(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "call_expression" => { + collect_this_call_and_bindings(node, source, symbols); + collect_param_bindings(node, source, symbols); + collect_spread_and_array_from_bindings(node, source, symbols); + } + "variable_declarator" => { + collect_array_elem_bindings(node, source, symbols); + collect_object_prop_bindings(node, source, symbols); + collect_collection_wrap_binding(node, source, symbols); + } + "for_in_statement" => collect_for_of_binding(node, source, symbols), + _ => {} + } + collect_object_rest_params(node, source, symbols); +} + +/// Nearest enclosing *named* callable for for-of binding context. +/// +/// Mirrors the TS `funcStack` in runContextCollectorWalk: named function +/// declarations, class methods (qualified `Class.method` when the class name +/// parses as `identifier` — TS class names are `type_identifier` and stay +/// unqualified), variables initialized with arrow/function expressions, and +/// `obj.method = function()` property assignments. Anonymous callables are +/// skipped so the outer context wins. Top level → ``. +fn enclosing_func_context(node: &Node, source: &[u8]) -> String { + let mut cur = node.parent(); + while let Some(n) = cur { + match n.kind() { + "function_declaration" | "generator_function_declaration" => { + if let Some(name_n) = n.child_by_field_name("name") { + if name_n.kind() == "identifier" { + return node_text(&name_n, source).to_string(); + } + } + } + "method_definition" => { + if let Some(name_n) = n.child_by_field_name("name") { + let method = node_text(&name_n, source); + let class_name = find_parent_of_types( + &n, + &["class_declaration", "abstract_class_declaration", "class"], + ) + .and_then(|c| c.child_by_field_name("name")) + .filter(|name| name.kind() == "identifier") + .map(|name| node_text(&name, source)); + return match class_name { + Some(c) => format!("{c}.{method}"), + None => method.to_string(), + }; + } + } + "arrow_function" | "function_expression" | "generator_function" => { + if let Some(parent) = n.parent() { + if parent.kind() == "variable_declarator" { + if let Some(name_n) = parent.child_by_field_name("name") { + if name_n.kind() == "identifier" { + return node_text(&name_n, source).to_string(); + } + } + } else if parent.kind() == "assignment_expression" { + // `obj.method = function() { ... }` — func-prop assignment. + if let Some(lhs) = parent.child_by_field_name("left") { + if lhs.kind() == "member_expression" { + if let (Some(obj), Some(prop)) = ( + lhs.child_by_field_name("object"), + lhs.child_by_field_name("property"), + ) { + let prop_kind = prop.kind(); + let obj_text = node_text(&obj, source); + let prop_text = node_text(&prop, source); + if obj.kind() == "identifier" + && (prop_kind == "property_identifier" + || prop_kind == "identifier") + && !JS_BUILTIN_GLOBALS.contains(&obj_text) + && prop_text != "prototype" + { + return format!("{obj_text}.{prop_text}"); + } + } + } + } + } + } + } + _ => {} + } + cur = n.parent(); + } + "".to_string() +} + +/// Collect from a call_expression node: +/// - `this(args)` → `Call { name: "this" }` (this used as a function) +/// - `fn.call(ctx, ...)` / `fn.apply(ctx, ...)` → ThisCallBinding +fn collect_this_call_and_bindings(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(fn_node) = node.child_by_field_name("function") else { return }; + if fn_node.kind() == "this" { + symbols.calls.push(Call { + name: "this".to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + return; + } + if fn_node.kind() != "member_expression" { + return; + } + let (Some(obj), Some(prop)) = ( + fn_node.child_by_field_name("object"), + fn_node.child_by_field_name("property"), + ) else { + return; + }; + let prop_text = node_text(&prop, source); + let obj_text = node_text(&obj, source); + if obj.kind() != "identifier" + || (prop_text != "call" && prop_text != "apply") + || JS_BUILTIN_GLOBALS.contains(&obj_text) + { + return; + } + let args = node + .child_by_field_name("arguments") + .or_else(|| find_child(node, "arguments")); + let Some(args) = args else { return }; + // First real argument: only bind if it's a plain identifier. + for i in 0..args.child_count() { + let Some(child) = args.child(i) else { continue }; + let t = child.kind(); + if t == "(" || t == ")" || t == "," { + continue; + } + if t == "identifier" { + let arg_text = node_text(&child, source); + if !JS_BUILTIN_GLOBALS.contains(&arg_text) && arg_text != "undefined" && arg_text != "null" { + symbols.this_call_bindings.push(ThisCallBinding { + callee: obj_text.to_string(), + this_arg: arg_text.to_string(), + }); + } + } + break; + } +} + +/// Phase 8.3c: `f(x)` identifier-argument bindings, including inline +/// `f(...[a, b])` array-literal spread expansion. +fn collect_param_bindings(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(fn_node) = node.child_by_field_name("function") else { return }; + if fn_node.kind() != "identifier" { + return; + } + let fn_text = node_text(&fn_node, source); + if JS_BUILTIN_GLOBALS.contains(&fn_text) { + return; + } + let args = node + .child_by_field_name("arguments") + .or_else(|| find_child(node, "arguments")); + let Some(args) = args else { return }; + let mut arg_idx: u32 = 0; + for i in 0..args.child_count() { + let Some(child) = args.child(i) else { continue }; + let ct = child.kind(); + if ct == "," || ct == "(" || ct == ")" { + continue; + } + if ct == "identifier" { + let arg_text = node_text(&child, source); + if !JS_BUILTIN_GLOBALS.contains(&arg_text) { + symbols.param_bindings.push(ParamBinding { + callee: fn_text.to_string(), + arg_index: arg_idx, + arg_name: arg_text.to_string(), + }); + } + } else if ct == "spread_element" { + // f(...[a, b]) — inline array literal: expand each element as a direct binding. + let inner = child + .child_by_field_name("argument") + .or_else(|| if child.child_count() > 1 { child.child(1) } else { None }); + if let Some(inner) = inner { + if inner.kind() == "array" { + let mut elem_count: u32 = 0; + for j in 0..inner.child_count() { + let Some(elem) = inner.child(j) else { continue }; + let et = elem.kind(); + if et == "," || et == "[" || et == "]" { + continue; + } + if et == "identifier" { + let elem_text = node_text(&elem, source); + if !JS_BUILTIN_GLOBALS.contains(&elem_text) { + symbols.param_bindings.push(ParamBinding { + callee: fn_text.to_string(), + arg_index: arg_idx + elem_count, + arg_name: elem_text.to_string(), + }); + } + } + elem_count += 1; + } + // Advance by the exact number of slots this spread occupies so + // zero-element spreads (...[]) don't shift subsequent indices. + arg_idx += elem_count; + continue; + } + } + } + arg_idx += 1; + } +} + +/// Phase 8.3e: `f(...arr)` spread bindings and `Array.from(src, cb)` callbacks. +fn collect_spread_and_array_from_bindings(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(fn_node) = node.child_by_field_name("function") else { return }; + let args = node + .child_by_field_name("arguments") + .or_else(|| find_child(node, "arguments")); + let Some(args) = args else { return }; + + // Spread: f(...arr) + if fn_node.kind() == "identifier" { + let fn_text = node_text(&fn_node, source); + if !JS_BUILTIN_GLOBALS.contains(&fn_text) { + let mut arg_idx: u32 = 0; + for i in 0..args.child_count() { + let Some(child) = args.child(i) else { continue }; + let ct = child.kind(); + if ct == "," || ct == "(" || ct == ")" { + continue; + } + if ct == "spread_element" { + let target = child + .child_by_field_name("argument") + .or_else(|| if child.child_count() > 1 { child.child(1) } else { None }); + if let Some(target) = target { + if target.kind() == "identifier" { + let target_text = node_text(&target, source); + if !JS_BUILTIN_GLOBALS.contains(&target_text) { + symbols.spread_arg_bindings.push(SpreadArgBinding { + callee: fn_text.to_string(), + array_name: target_text.to_string(), + start_index: arg_idx, + }); + } + } + } + } + arg_idx += 1; + } + } + } + + // Array.from(source, cb) + if fn_node.kind() == "member_expression" { + let (Some(obj), Some(prop)) = ( + fn_node.child_by_field_name("object"), + fn_node.child_by_field_name("property"), + ) else { + return; + }; + if node_text(&obj, source) != "Array" || node_text(&prop, source) != "from" { + return; + } + let mut fn_args: Vec = Vec::new(); + for i in 0..args.child_count() { + let Some(child) = args.child(i) else { continue }; + let ct = child.kind(); + if ct == "," || ct == "(" || ct == ")" { + continue; + } + fn_args.push(child); + } + if fn_args.len() >= 2 { + let src_arg = &fn_args[0]; + let cb_arg = &fn_args[1]; + let src_text = node_text(src_arg, source); + let cb_text = node_text(cb_arg, source); + if src_arg.kind() == "identifier" + && !JS_BUILTIN_GLOBALS.contains(&src_text) + && cb_arg.kind() == "identifier" + && !JS_BUILTIN_GLOBALS.contains(&cb_text) + { + symbols.array_callback_bindings.push(ArrayCallbackBinding { + source_name: src_text.to_string(), + callee_name: cb_text.to_string(), + }); + } + } + } +} + +/// Phase 8.3e: `const arr = [fn1, fn2]` array-element bindings. +fn collect_array_elem_bindings(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let (Some(name_n), Some(value_n)) = ( + node.child_by_field_name("name"), + node.child_by_field_name("value"), + ) else { + return; + }; + if name_n.kind() != "identifier" || value_n.kind() != "array" { + return; + } + let array_name = node_text(&name_n, source); + let mut idx: u32 = 0; + for i in 0..value_n.child_count() { + let Some(elem) = value_n.child(i) else { continue }; + let et = elem.kind(); + if et == "," || et == "[" || et == "]" { + continue; + } + if et == "identifier" { + let elem_text = node_text(&elem, source); + if !JS_BUILTIN_GLOBALS.contains(&elem_text) { + symbols.array_elem_bindings.push(ArrayElemBinding { + array_name: array_name.to_string(), + index: idx, + elem_name: elem_text.to_string(), + }); + } + } + idx += 1; + } +} + +/// Phase 8.3e: collection wrap `const s = new Set(arr)` / `new Map(arr)` → +/// FnRefBinding `s[*] ⊇ arr[*]`. +fn collect_collection_wrap_binding(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let (Some(name_n), Some(value_n)) = ( + node.child_by_field_name("name"), + node.child_by_field_name("value"), + ) else { + return; + }; + if name_n.kind() != "identifier" || value_n.kind() != "new_expression" { + return; + } + let (Some(ctor), Some(args)) = ( + value_n.child_by_field_name("constructor"), + value_n.child_by_field_name("arguments"), + ) else { + return; + }; + let ctor_text = node_text(&ctor, source); + if ctor_text != "Set" && ctor_text != "Map" { + return; + } + for i in 0..args.child_count() { + let Some(arg) = args.child(i) else { continue }; + let at = arg.kind(); + if at == "(" || at == ")" { + continue; + } + if at == "identifier" { + let arg_text = node_text(&arg, source); + if !JS_BUILTIN_GLOBALS.contains(&arg_text) { + symbols.fn_ref_bindings.push(FnRefBinding { + lhs: format!("{}[*]", node_text(&name_n, source)), + rhs: format!("{arg_text}[*]"), + rhs_receiver: None, + }); + break; + } + } + break; + } +} + +/// Phase 8.3e: `for (const x of arr)` iteration bindings +/// (for_in_statement with an `of` keyword). +fn collect_for_of_binding(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mut is_for_of = false; + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if node_text(&child, source) == "of" { + is_for_of = true; + break; + } + } + } + if !is_for_of { + return; + } + let Some(right) = node.child_by_field_name("right") else { return }; + let right_text = node_text(&right, source); + if right.kind() != "identifier" || JS_BUILTIN_GLOBALS.contains(&right_text) { + return; + } + let Some(left) = node.child_by_field_name("left") else { return }; + let mut var_name: Option<&str> = None; + if left.kind() == "identifier" { + var_name = Some(node_text(&left, source)); + } else { + for i in 0..left.child_count() { + let Some(lc) = left.child(i) else { continue }; + if lc.kind() == "variable_declarator" { + if let Some(nc) = lc.child_by_field_name("name") { + if nc.kind() == "identifier" { + var_name = Some(node_text(&nc, source)); + break; + } + } + } else if lc.kind() == "identifier" { + let lc_text = node_text(&lc, source); + if lc_text != "const" && lc_text != "let" && lc_text != "var" { + var_name = Some(lc_text); + break; + } + } + } + } + if let Some(var_name) = var_name { + if !JS_BUILTIN_GLOBALS.contains(&var_name) { + let enclosing_func = enclosing_func_context(node, source); + symbols.for_of_bindings.push(ForOfBinding { + var_name: var_name.to_string(), + source_name: right_text.to_string(), + enclosing_func, + }); + } + } +} + +/// Phase 8.3f: object-destructuring rest-parameter bindings from function +/// definitions (`function f({ a, ...rest })` → callee "f", restName "rest"). +/// Class methods are qualified `ClassName.method`, mirroring the TS +/// `objectRestClass` propagation (class_declaration|class → class_body → +/// method_definition; abstract classes intentionally excluded). +fn collect_object_rest_params(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let t = node.kind(); + let mut fn_name: Option = None; + let mut params_node: Option = None; + + match t { + "function_declaration" | "generator_function_declaration" => { + if let Some(name_n) = node.child_by_field_name("name") { + if name_n.kind() == "identifier" { + fn_name = Some(node_text(&name_n, source).to_string()); + } + } + params_node = node + .child_by_field_name("parameters") + .or_else(|| find_child(node, "formal_parameters")); + } + "variable_declarator" => { + if let (Some(name_n), Some(value_n)) = ( + node.child_by_field_name("name"), + node.child_by_field_name("value"), + ) { + let vt = value_n.kind(); + if name_n.kind() == "identifier" + && (vt == "arrow_function" || vt == "function_expression" || vt == "generator_function") + { + fn_name = Some(node_text(&name_n, source).to_string()); + params_node = value_n + .child_by_field_name("parameters") + .or_else(|| find_child(&value_n, "formal_parameters")); + } + } + } + "method_definition" => { + // class method `class Foo { bar({ ...rest }) {} }` or object-literal + // shorthand method `{ bar({ ...rest }) {} }`. + if let Some(name_n) = node.child_by_field_name("name") { + let method = node_text(&name_n, source); + let current_class = node + .parent() + .filter(|p| p.kind() == "class_body") + .and_then(|p| p.parent()) + .filter(|c| c.kind() == "class_declaration" || c.kind() == "class") + .and_then(|c| c.child_by_field_name("name").map(|n| node_text(&n, source).to_string())); + fn_name = Some(match current_class { + Some(c) => format!("{c}.{method}"), + None => method.to_string(), + }); + params_node = node + .child_by_field_name("parameters") + .or_else(|| find_child(node, "formal_parameters")); + } + } + "pair" => { + // object-literal method: `{ bar: function({ ...rest }) {} }`. + // Computed keys are skipped — they can never match a paramBinding callee. + if let (Some(key_n), Some(value_n)) = ( + node.child_by_field_name("key"), + node.child_by_field_name("value"), + ) { + let vt = value_n.kind(); + if key_n.kind() != "computed_property_name" + && (vt == "arrow_function" || vt == "function_expression" || vt == "generator_function") + { + let key_text = node_text(&key_n, source); + fn_name = Some(if key_n.kind() == "string" { + key_text[1..key_text.len() - 1].to_string() + } else { + key_text.to_string() + }); + params_node = value_n + .child_by_field_name("parameters") + .or_else(|| find_child(&value_n, "formal_parameters")); + } + } + } + _ => {} + } + + let (Some(fn_name), Some(params_node)) = (fn_name, params_node) else { return }; + let mut param_idx: u32 = 0; + for i in 0..params_node.child_count() { + let Some(child) = params_node.child(i) else { continue }; + let ct = child.kind(); + if ct == "," || ct == "(" || ct == ")" { + continue; + } + if ct == "object_pattern" { + for j in 0..child.child_count() { + let Some(inner) = child.child(j) else { continue }; + if inner.kind() == "rest_pattern" || inner.kind() == "rest_element" { + let rest_id = inner.child(1).or_else(|| inner.child_by_field_name("name")); + if let Some(rest_id) = rest_id { + if rest_id.kind() == "identifier" { + symbols.object_rest_param_bindings.push(ObjectRestParamBinding { + callee: fn_name.clone(), + rest_name: node_text(&rest_id, source).to_string(), + arg_index: param_idx, + }); + } + } + } + } + } + param_idx += 1; + } +} + +/// Phase 8.3f: object-property bindings from object literals. +/// `const obj = { e4 }` and `const obj = { e1: fn }` (identifier values only). +fn collect_object_prop_bindings(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let (Some(name_n), Some(value_n)) = ( + node.child_by_field_name("name"), + node.child_by_field_name("value"), + ) else { + return; + }; + if name_n.kind() != "identifier" || value_n.kind() != "object" { + return; + } + let object_name = node_text(&name_n, source); + for i in 0..value_n.child_count() { + let Some(child) = value_n.child(i) else { continue }; + if child.kind() == "shorthand_property_identifier" { + let prop = node_text(&child, source); + symbols.object_prop_bindings.push(ObjectPropBinding { + object_name: object_name.to_string(), + prop_name: prop.to_string(), + value_name: prop.to_string(), + }); + } else if child.kind() == "pair" { + if let (Some(key_n), Some(val_n)) = ( + child.child_by_field_name("key"), + child.child_by_field_name("value"), + ) { + let val_text = node_text(&val_n, source); + if key_n.kind() == "property_identifier" + && val_n.kind() == "identifier" + && !JS_BUILTIN_GLOBALS.contains(&val_text) + { + symbols.object_prop_bindings.push(ObjectPropBinding { + object_name: object_name.to_string(), + prop_name: node_text(&key_n, source).to_string(), + value_name: val_text.to_string(), + }); + } + } + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -2824,6 +3527,76 @@ mod tests { ); } + /// Issue #1453: `this.prop = new Ctor()` inside a class must seed a + /// class-scoped typeMap key `ClassName.prop` (mirrors issue #1323 in TS). + #[test] + fn this_prop_constructor_assignment_seeds_class_scoped_type_map() { + let s = parse_js( + "class Logger { error(m) {} }\n\ + class UserService {\n\ + constructor() { this.logger = new Logger(); }\n\ + run() { this.logger.error('x'); }\n\ + }", + ); + let tm = s.type_map.iter().find(|t| t.name == "UserService.logger"); + assert!( + tm.is_some(), + "type_map should contain 'UserService.logger'; got: {:?}", + s.type_map + ); + assert_eq!(tm.unwrap().type_name, "Logger"); + assert_eq!(tm.unwrap().confidence, 1.0); + } + + /// `this.prop = new Ctor()` outside any class declaration (function-style + /// constructor) falls back to the un-scoped `this.prop` key. + #[test] + fn this_prop_constructor_assignment_outside_class_uses_this_key() { + let s = parse_js( + "function Service() { this.client = new HttpClient(); }", + ); + let tm = s.type_map.iter().find(|t| t.name == "this.client"); + assert!( + tm.is_some(), + "type_map should contain 'this.client'; got: {:?}", + s.type_map + ); + assert_eq!(tm.unwrap().type_name, "HttpClient"); + } + + /// Issue #1453 (edge 4): `const f = fn.bind(ctx)` must record a + /// fnRefBinding f → fn so later `f()` calls resolve through pts. + #[test] + fn bind_call_records_fn_ref_binding() { + let s = parse_js( + "function doWork() {}\n\ + const f = doWork.bind(null);", + ); + let b = s.fn_ref_bindings.iter().find(|b| b.lhs == "f"); + assert!( + b.is_some(), + "fn_ref_bindings should contain lhs 'f'; got: {:?}", + s.fn_ref_bindings + ); + assert_eq!(b.unwrap().rhs, "doWork"); + assert!(b.unwrap().rhs_receiver.is_none()); + } + + /// Method-receiver binds (`obj.method.bind`) and builtin-global binds + /// (`Math.max.bind`) are not tracked, mirroring the TS extractor. + #[test] + fn bind_call_skips_method_receiver_and_builtins() { + let s = parse_js( + "const a = obj.method.bind(ctx);\n\ + const b = Math.bind(null);", + ); + assert!( + s.fn_ref_bindings.iter().all(|b| b.lhs != "a" && b.lhs != "b"), + "method-receiver and builtin binds must not be tracked; got: {:?}", + s.fn_ref_bindings + ); + } + // ── Prototype-method extraction ───────────────────────────────────────── #[test] @@ -3147,4 +3920,264 @@ mod tests { s.calls.iter().map(|c| (&c.name, &c.receiver)).collect::>() ); } + + // ── Pts binding collectors (parity with src/extractors/javascript.ts) ─── + + #[test] + fn param_binding_recorded_for_identifier_args() { + let s = parse_js( + "function target() {}\n\ + function hof(cb) { cb(); }\n\ + hof(target);", + ); + let b = s + .param_bindings + .iter() + .find(|b| b.callee == "hof" && b.arg_name == "target"); + assert!(b.is_some(), "param_bindings should contain hof←target; got: {:?}", s.param_bindings); + assert_eq!(b.unwrap().arg_index, 0); + } + + #[test] + fn param_binding_inline_spread_array_expands_elements() { + let s = parse_js( + "function a() {}\n\ + function b() {}\n\ + function pair(x, y) { x(); y(); }\n\ + pair(...[a, b]);", + ); + let idx: Vec<(u32, &str)> = s + .param_bindings + .iter() + .filter(|p| p.callee == "pair") + .map(|p| (p.arg_index, p.arg_name.as_str())) + .collect(); + assert!(idx.contains(&(0, "a")), "expected (0, a); got: {:?}", idx); + assert!(idx.contains(&(1, "b")), "expected (1, b); got: {:?}", idx); + } + + #[test] + fn this_call_binding_recorded_for_call_and_apply() { + let s = parse_js( + "function f() { this(); }\n\ + function ctx() {}\n\ + f.call(ctx);\n\ + f.apply(ctx);", + ); + let bindings: Vec<(&str, &str)> = s + .this_call_bindings + .iter() + .map(|b| (b.callee.as_str(), b.this_arg.as_str())) + .collect(); + assert_eq!( + bindings.iter().filter(|b| **b == ("f", "ctx")).count(), + 2, + "expected f→ctx from both .call and .apply; got: {:?}", + bindings + ); + // `this()` inside f must be recorded as a call named "this". + assert!( + s.calls.iter().any(|c| c.name == "this"), + "calls should contain bare this(); got: {:?}", + s.calls.iter().map(|c| &c.name).collect::>() + ); + } + + #[test] + fn this_call_binding_skips_null_and_undefined() { + let s = parse_js( + "function f() {}\n\ + f.call(null);\n\ + f.apply(undefined);", + ); + assert!( + s.this_call_bindings.is_empty(), + "null/undefined this-args must not bind; got: {:?}", + s.this_call_bindings + ); + } + + /// `invoker.call(handler, 10)` must emit a dynamic call to `invoker` only. + /// Emitting the identifier args too would create a false runCallThis→handler + /// edge; the handler flow is covered by the ThisCallBinding (invoker::this). + #[test] + fn call_apply_bind_args_do_not_emit_callback_reference_calls() { + let s = parse_js( + "function invoker(x) { return this(x); }\n\ + function handler(n) { return n * 2; }\n\ + function runCallThis() { return invoker.call(handler, 10); }", + ); + assert!( + s.calls.iter().any(|c| c.name == "invoker" && c.dynamic == Some(true)), + "invoker.call() should emit a dynamic call to invoker; got: {:?}", + s.calls.iter().map(|c| (&c.name, c.dynamic)).collect::>() + ); + assert!( + !s.calls.iter().any(|c| c.name == "handler"), + ".call() args must not become callback-reference calls; got: {:?}", + s.calls.iter().map(|c| (&c.name, c.dynamic)).collect::>() + ); + let b = s.this_call_bindings.iter().find(|b| b.callee == "invoker"); + assert!(b.is_some(), "this_call_bindings should contain invoker→handler; got: {:?}", s.this_call_bindings); + assert_eq!(b.unwrap().this_arg, "handler"); + } + + #[test] + fn array_elem_bindings_recorded() { + let s = parse_js( + "function fn1() {}\n\ + function fn2() {}\n\ + const arr = [fn1, fn2];", + ); + let got: Vec<(u32, &str)> = s + .array_elem_bindings + .iter() + .filter(|b| b.array_name == "arr") + .map(|b| (b.index, b.elem_name.as_str())) + .collect(); + assert!(got.contains(&(0, "fn1")), "expected (0, fn1); got: {:?}", got); + assert!(got.contains(&(1, "fn2")), "expected (1, fn2); got: {:?}", got); + } + + #[test] + fn spread_arg_binding_recorded() { + let s = parse_js( + "function callAll(a, b) { a(); b(); }\n\ + const fns = [x, y];\n\ + callAll(...fns);", + ); + let b = s.spread_arg_bindings.iter().find(|b| b.callee == "callAll"); + assert!(b.is_some(), "spread_arg_bindings missing; got: {:?}", s.spread_arg_bindings); + let b = b.unwrap(); + assert_eq!(b.array_name, "fns"); + assert_eq!(b.start_index, 0); + } + + #[test] + fn collection_wrap_set_emits_wildcard_fn_ref_binding() { + let s = parse_js( + "const arr = [f1];\n\ + const wrapped = new Set(arr);", + ); + let b = s.fn_ref_bindings.iter().find(|b| b.lhs == "wrapped[*]"); + assert!(b.is_some(), "Set wrap should bind wrapped[*] ⊇ arr[*]; got: {:?}", s.fn_ref_bindings); + assert_eq!(b.unwrap().rhs, "arr[*]"); + } + + #[test] + fn for_of_binding_records_enclosing_func() { + let s = parse_js( + "function run(handlers) {\n\ + for (const h of handlers) { h(); }\n\ + }", + ); + let b = s.for_of_bindings.iter().find(|b| b.var_name == "h"); + assert!(b.is_some(), "for_of_bindings missing; got: {:?}", s.for_of_bindings); + let b = b.unwrap(); + assert_eq!(b.source_name, "handlers"); + assert_eq!(b.enclosing_func, "run"); + } + + #[test] + fn for_of_binding_in_method_uses_class_qualified_context() { + let s = parse_js( + "class Runner {\n\ + runAll() { for (const h of this.handlers) {} const x = 1; for (const g of list) { g(); } }\n\ + }", + ); + let b = s.for_of_bindings.iter().find(|b| b.var_name == "g"); + assert!(b.is_some(), "for_of_bindings missing for g; got: {:?}", s.for_of_bindings); + assert_eq!(b.unwrap().enclosing_func, "Runner.runAll"); + } + + #[test] + fn for_of_binding_at_module_level_uses_module_context() { + let s = parse_js("for (const cb of callbacks) { cb(); }"); + let b = s.for_of_bindings.iter().find(|b| b.var_name == "cb"); + assert!(b.is_some(), "for_of_bindings missing; got: {:?}", s.for_of_bindings); + assert_eq!(b.unwrap().enclosing_func, ""); + } + + #[test] + fn array_from_callback_binding_recorded() { + let s = parse_js( + "function makeThing(x) { return x; }\n\ + const things = Array.from(items, makeThing);", + ); + let b = s + .array_callback_bindings + .iter() + .find(|b| b.callee_name == "makeThing"); + assert!(b.is_some(), "array_callback_bindings missing; got: {:?}", s.array_callback_bindings); + assert_eq!(b.unwrap().source_name, "items"); + } + + #[test] + fn object_rest_param_binding_recorded() { + let s = parse_js("function f3({ e1, ...eerest }) { eerest.e4(); }"); + let b = s + .object_rest_param_bindings + .iter() + .find(|b| b.callee == "f3"); + assert!(b.is_some(), "object_rest_param_bindings missing; got: {:?}", s.object_rest_param_bindings); + let b = b.unwrap(); + assert_eq!(b.rest_name, "eerest"); + assert_eq!(b.arg_index, 0); + } + + #[test] + fn object_rest_param_binding_in_method_uses_class_context() { + let s = parse_js( + "class Svc {\n\ + handle({ id, ...rest }) { rest.go(); }\n\ + }", + ); + let b = s.object_rest_param_bindings.iter().find(|b| b.rest_name == "rest"); + assert!(b.is_some(), "object_rest_param_bindings missing; got: {:?}", s.object_rest_param_bindings); + assert_eq!(b.unwrap().callee, "Svc.handle"); + } + + #[test] + fn object_prop_bindings_recorded_for_shorthand_and_pair() { + let s = parse_js( + "function e4() {}\n\ + function named() {}\n\ + const obj = { e4, alias: named };", + ); + let shorthand = s + .object_prop_bindings + .iter() + .find(|b| b.object_name == "obj" && b.prop_name == "e4"); + assert!(shorthand.is_some(), "shorthand binding missing; got: {:?}", s.object_prop_bindings); + assert_eq!(shorthand.unwrap().value_name, "e4"); + + let pair = s + .object_prop_bindings + .iter() + .find(|b| b.object_name == "obj" && b.prop_name == "alias"); + assert!(pair.is_some(), "pair binding missing; got: {:?}", s.object_prop_bindings); + assert_eq!(pair.unwrap().value_name, "named"); + } + + #[test] + fn inline_new_receiver_normalized_to_constructor_name() { + let s = parse_js( + "class A { t() {} }\n\ + export function testPrototypeAlias() { new A().t(); }", + ); + let call = s.calls.iter().find(|c| c.name == "t"); + assert!(call.is_some(), "t() call missing; got: {:?}", s.calls); + assert_eq!(call.unwrap().receiver.as_deref(), Some("A")); + } + + #[test] + fn paren_wrapped_new_receiver_normalized_to_constructor_name() { + let s = parse_js( + "class Dog { bark() {} }\n\ + export function run() { (new Dog()).bark(); }", + ); + let call = s.calls.iter().find(|c| c.name == "bark"); + assert!(call.is_some(), "bark() call missing; got: {:?}", s.calls); + assert_eq!(call.unwrap().receiver.as_deref(), Some("Dog")); + } } diff --git a/crates/codegraph-core/src/features/structure.rs b/crates/codegraph-core/src/features/structure.rs index 8343881e3..7b9897cb7 100644 --- a/crates/codegraph-core/src/features/structure.rs +++ b/crates/codegraph-core/src/features/structure.rs @@ -921,21 +921,8 @@ mod tests { #[test] fn line_count_map_from_symbols() { let mut file_symbols = HashMap::new(); - let mut sym = FileSymbols { - file: "src/a.ts".to_string(), - definitions: vec![], - imports: vec![], - calls: vec![], - classes: vec![], - exports: vec![], - type_map: vec![], - return_type_map: vec![], - call_assignments: vec![], - ast_nodes: vec![], - dataflow: None, - line_count: Some(42), - fn_ref_bindings: vec![], - }; + let mut sym = FileSymbols::new("src/a.ts".to_string()); + sym.line_count = Some(42); file_symbols.insert("src/a.ts".to_string(), sym.clone()); sym.file = "src/b.ts".to_string(); diff --git a/crates/codegraph-core/src/types.rs b/crates/codegraph-core/src/types.rs index ae40af751..5884cc77a 100644 --- a/crates/codegraph-core/src/types.rs +++ b/crates/codegraph-core/src/types.rs @@ -318,6 +318,102 @@ pub struct FnRefBinding { pub rhs_receiver: Option, } +/// Argument-to-parameter binding at a call site (Phase 8.3c). +/// Records `f(x)` where `x` is an identifier that may carry a function reference. +/// Mirrors the `ParamBinding` interface in `src/types.ts`. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ParamBinding { + pub callee: String, + #[napi(js_name = "argIndex")] + pub arg_index: u32, + #[napi(js_name = "argName")] + pub arg_name: String, +} + +/// This-context binding from `fn.call(ctx, ...)` / `fn.apply(ctx, ...)`. +/// Mirrors the `ThisCallBinding` interface in `src/types.ts`. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ThisCallBinding { + pub callee: String, + #[napi(js_name = "thisArg")] + pub this_arg: String, +} + +/// Array-element binding from `const arr = [fn1, fn2]` (Phase 8.3e). +/// Mirrors the `ArrayElemBinding` interface in `src/types.ts`. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArrayElemBinding { + #[napi(js_name = "arrayName")] + pub array_name: String, + pub index: u32, + #[napi(js_name = "elemName")] + pub elem_name: String, +} + +/// Spread-argument binding from `f(...arr)` (Phase 8.3e). +/// Mirrors the `SpreadArgBinding` interface in `src/types.ts`. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SpreadArgBinding { + pub callee: String, + #[napi(js_name = "arrayName")] + pub array_name: String, + #[napi(js_name = "startIndex")] + pub start_index: u32, +} + +/// For-of iteration binding from `for (const x of arr)` (Phase 8.3e). +/// Mirrors the `ForOfBinding` interface in `src/types.ts`. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ForOfBinding { + #[napi(js_name = "varName")] + pub var_name: String, + #[napi(js_name = "sourceName")] + pub source_name: String, + #[napi(js_name = "enclosingFunc")] + pub enclosing_func: String, +} + +/// Array-callback binding from `Array.from(arr, cb)` (Phase 8.3e). +/// Mirrors the `ArrayCallbackBinding` interface in `src/types.ts`. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArrayCallbackBinding { + #[napi(js_name = "sourceName")] + pub source_name: String, + #[napi(js_name = "calleeName")] + pub callee_name: String, +} + +/// Object-rest parameter binding from `function f({ a, ...rest })` (Phase 8.3f). +/// Mirrors the `ObjectRestParamBinding` interface in `src/types.ts`. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ObjectRestParamBinding { + pub callee: String, + #[napi(js_name = "restName")] + pub rest_name: String, + #[napi(js_name = "argIndex")] + pub arg_index: u32, +} + +/// Object-property binding from `const obj = { e4 }` / `{ e4: fn }` (Phase 8.3f). +/// Mirrors the `ObjectPropBinding` interface in `src/types.ts`. +#[napi(object)] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ObjectPropBinding { + #[napi(js_name = "objectName")] + pub object_name: String, + #[napi(js_name = "propName")] + pub prop_name: String, + #[napi(js_name = "valueName")] + pub value_name: String, +} + #[napi(object)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FileSymbols { @@ -341,6 +437,30 @@ pub struct FileSymbols { /// Phase 8.3: function-reference bindings for points-to analysis. #[napi(js_name = "fnRefBindings")] pub fn_ref_bindings: Vec, + /// Phase 8.3c: argument-to-parameter bindings for parameter-flow pts. + #[napi(js_name = "paramBindings")] + pub param_bindings: Vec, + /// This-context bindings from `fn.call(ctx)` / `fn.apply(ctx)`. + #[napi(js_name = "thisCallBindings")] + pub this_call_bindings: Vec, + /// Phase 8.3e: array-element bindings from `const arr = [fn1, fn2]`. + #[napi(js_name = "arrayElemBindings")] + pub array_elem_bindings: Vec, + /// Phase 8.3e: spread-argument bindings from `f(...arr)`. + #[napi(js_name = "spreadArgBindings")] + pub spread_arg_bindings: Vec, + /// Phase 8.3e: for-of iteration variable bindings. + #[napi(js_name = "forOfBindings")] + pub for_of_bindings: Vec, + /// Phase 8.3e: array callback bindings from `Array.from(arr, cb)`. + #[napi(js_name = "arrayCallbackBindings")] + pub array_callback_bindings: Vec, + /// Phase 8.3f: object-rest parameter bindings from `function f({ ...rest })`. + #[napi(js_name = "objectRestParamBindings")] + pub object_rest_param_bindings: Vec, + /// Phase 8.3f: object-property bindings from `const obj = { fn }`. + #[napi(js_name = "objectPropBindings")] + pub object_prop_bindings: Vec, } impl FileSymbols { @@ -359,6 +479,14 @@ impl FileSymbols { return_type_map: Vec::new(), call_assignments: Vec::new(), fn_ref_bindings: Vec::new(), + param_bindings: Vec::new(), + this_call_bindings: Vec::new(), + array_elem_bindings: Vec::new(), + spread_arg_bindings: Vec::new(), + for_of_bindings: Vec::new(), + array_callback_bindings: Vec::new(), + object_rest_param_bindings: Vec::new(), + object_prop_bindings: Vec::new(), } } } diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index fa1d0f44a..3c0f2e3c3 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -12,15 +12,23 @@ import { PROPAGATION_HOP_PENALTY } from '../../../../extractors/javascript.js'; import { debug } from '../../../../infrastructure/logger.js'; import { loadNative } from '../../../../infrastructure/native.js'; import type { + ArrayCallbackBinding, + ArrayElemBinding, BetterSqlite3Database, Call, ClassRelation, Definition, ExtractorOutput, FnRefBinding, + ForOfBinding, Import, NativeAddon, NodeRow, + ObjectPropBinding, + ObjectRestParamBinding, + ParamBinding, + SpreadArgBinding, + ThisCallBinding, TypeMapEntry, } from '../../../../types.js'; import { computeConfidence } from '../../resolve.js'; @@ -61,13 +69,27 @@ interface QueryNodeRow { interface NativeFileEntry { file: string; fileNodeId: number; - definitions: Array<{ name: string; kind: string; line: number; endLine: number | null }>; + definitions: Array<{ + name: string; + kind: string; + line: number; + endLine: number | null; + params?: string[]; + }>; calls: Call[]; importedNames: Array<{ name: string; file: string }>; classes: ClassRelation[]; typeMap: Array<{ name: string; typeName: string; confidence: number }>; /** Phase 8.3: function-reference bindings for pts analysis. */ fnRefBindings?: Array<{ lhs: string; rhs: string; rhsReceiver?: string }>; + paramBindings?: ParamBinding[]; + thisCallBindings?: ThisCallBinding[]; + arrayElemBindings?: ArrayElemBinding[]; + spreadArgBindings?: SpreadArgBinding[]; + forOfBindings?: ForOfBinding[]; + arrayCallbackBindings?: ArrayCallbackBinding[]; + objectRestParamBindings?: ObjectRestParamBinding[]; + objectPropBindings?: ObjectPropBinding[]; } /** Shape returned by native buildCallEdges. */ @@ -509,17 +531,35 @@ function buildCallEdgesNative( nativeFiles.push({ file: relPath, fileNodeId: fileNodeRow.id, - definitions: symbols.definitions.map((d) => ({ - name: d.name, - kind: d.kind, - line: d.line, - endLine: d.endLine ?? null, - })), + definitions: symbols.definitions.map((d) => { + const params = d.children?.filter((c) => c.kind === 'parameter').map((c) => c.name); + return { + name: d.name, + kind: d.kind, + line: d.line, + endLine: d.endLine ?? null, + params: params?.length ? params : undefined, + }; + }), calls: symbols.calls, importedNames, classes: symbols.classes, typeMap, fnRefBindings: symbols.fnRefBindings?.length ? symbols.fnRefBindings : undefined, + paramBindings: symbols.paramBindings?.length ? symbols.paramBindings : undefined, + thisCallBindings: symbols.thisCallBindings?.length ? symbols.thisCallBindings : undefined, + arrayElemBindings: symbols.arrayElemBindings?.length ? symbols.arrayElemBindings : undefined, + spreadArgBindings: symbols.spreadArgBindings?.length ? symbols.spreadArgBindings : undefined, + forOfBindings: symbols.forOfBindings?.length ? symbols.forOfBindings : undefined, + arrayCallbackBindings: symbols.arrayCallbackBindings?.length + ? symbols.arrayCallbackBindings + : undefined, + objectRestParamBindings: symbols.objectRestParamBindings?.length + ? symbols.objectRestParamBindings + : undefined, + objectPropBindings: symbols.objectPropBindings?.length + ? symbols.objectPropBindings + : undefined, }); } @@ -538,363 +578,6 @@ function buildCallEdgesNative( } } -/** - * Phase 8.3c pts post-pass for the native call-edge path. - * - * The native Rust engine builds call edges without knowledge of paramBindings, - * so `fn()` calls inside higher-order functions are not resolved to their - * concrete targets. This JS post-pass runs after the native edge pass and adds - * only the parameter-flow pts edges that the native engine missed. - * - * To avoid duplicating edges already emitted by the native engine, the current - * allEdgeRows snapshot is used to seed a seenByPair set before processing each - * file. - */ -function buildParamFlowPtsPostPass( - ctx: PipelineContext, - getNodeIdStmt: NodeIdStmt, - allEdgeRows: EdgeRowTuple[], - sharedLookup?: CallNodeLookup, -): void { - // Only process files that actually have paramBindings (avoid useless work). - const filesWithParams = [...ctx.fileSymbols].filter( - ([, symbols]) => symbols.paramBindings && symbols.paramBindings.length > 0, - ); - if (filesWithParams.length === 0) return; - - // Seed seenByPair from the existing rows so we don't duplicate native edges. - // This is O(|allEdgeRows|) once per post-pass, which is acceptable. - const seenByPair = new Set(); - for (const [srcId, tgtId] of allEdgeRows) { - seenByPair.add(`${srcId}|${tgtId}`); - } - - const { barrelOnlyFiles, rootDir } = ctx; - const lookup = sharedLookup ?? makeContextLookup(ctx, getNodeIdStmt); - - for (const [relPath, symbols] of filesWithParams) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); - const typeMap: Map = symbols.typeMap || new Map(); - const ptsMap = buildPointsToMapForFile(symbols, importedNames); - if (!ptsMap) continue; - - for (const call of symbols.calls) { - if (call.receiver || call.dynamic) continue; // pts post-pass handles only param-flow (non-dynamic) - - const caller = findCaller(lookup, call, symbols.definitions, relPath, fileNodeRow); - const scopedKey = caller.callerName != null ? `${caller.callerName}::${call.name}` : null; - if (!scopedKey || !ptsMap.has(scopedKey)) continue; - - // Only resolve calls that had no direct targets (same guard as buildFileCallEdges). - const { targets } = resolveCallTargets( - lookup, - call, - relPath, - importedNames, - typeMap as Map, - ); - if (targets.length > 0) continue; - - for (const alias of resolveViaPointsTo(scopedKey, ptsMap)) { - const { targets: aliasTargets, importedFrom: aliasFrom } = resolveCallTargets( - lookup, - { name: alias }, - relPath, - importedNames, - typeMap as Map, - ); - for (const t of aliasTargets) { - const edgeKey = `${caller.id}|${t.id}`; - if (t.id !== caller.id && !seenByPair.has(edgeKey)) { - const conf = - computeConfidence(relPath, t.file, aliasFrom ?? null) - PROPAGATION_HOP_PENALTY; - if (conf > 0) { - seenByPair.add(edgeKey); - allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'points-to']); - } - } - } - } - } - } -} - -/** - * bind/alias pts post-pass for the native call-edge path. - * - * The native Rust engine has no knowledge of JS-layer fnRefBindings (e.g. - * `const f = fn.bind(ctx)`), so calls to bind-created aliases are not resolved - * to their original function on the native path. This JS post-pass runs after - * the native edge pass and adds only the fnRefBindings-seeded pts edges that the - * native engine missed. - * - * Uses the same seenByPair dedup guard as buildParamFlowPtsPostPass to avoid - * duplicating edges already emitted by the native engine. - */ -function buildFnRefBindingsPtsPostPass( - ctx: PipelineContext, - getNodeIdStmt: NodeIdStmt, - allEdgeRows: EdgeRowTuple[], - sharedLookup?: CallNodeLookup, -): void { - // Only process files that actually have fnRefBindings. - const filesWithBindings = [...ctx.fileSymbols].filter( - ([, symbols]) => symbols.fnRefBindings && symbols.fnRefBindings.length > 0, - ); - if (filesWithBindings.length === 0) return; - - // Seed seenByPair from the existing rows so we don't duplicate native edges. - const seenByPair = new Set(); - for (const [srcId, tgtId] of allEdgeRows) { - seenByPair.add(`${srcId}|${tgtId}`); - } - - const { barrelOnlyFiles, rootDir } = ctx; - const lookup = sharedLookup ?? makeContextLookup(ctx, getNodeIdStmt); - - for (const [relPath, symbols] of filesWithBindings) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); - const typeMap: Map = symbols.typeMap || new Map(); - const ptsMap = buildPointsToMapForFile(symbols, importedNames); - if (!ptsMap) continue; - - // Only resolve calls whose name is an lhs in fnRefBindings — the same - // narrowed guard used in buildFileCallEdges case (c). - const fnRefBindingLhs = new Set(symbols.fnRefBindings!.map((b) => b.lhs)); - - for (const call of symbols.calls) { - if (call.receiver || call.dynamic) continue; // bind aliases are flat-keyed, never dynamic - if (!fnRefBindingLhs.has(call.name)) continue; - if (!ptsMap.has(call.name)) continue; - - const caller = findCaller(lookup, call, symbols.definitions, relPath, fileNodeRow); - - // Only resolve calls that had no direct targets (same guard as buildFileCallEdges). - const { targets } = resolveCallTargets( - lookup, - call, - relPath, - importedNames, - typeMap as Map, - ); - if (targets.length > 0) continue; - - for (const alias of resolveViaPointsTo(call.name, ptsMap)) { - const { targets: aliasTargets, importedFrom: aliasFrom } = resolveCallTargets( - lookup, - { name: alias }, - relPath, - importedNames, - typeMap as Map, - ); - for (const t of aliasTargets) { - const edgeKey = `${caller.id}|${t.id}`; - if (t.id !== caller.id && !seenByPair.has(edgeKey)) { - const conf = - computeConfidence(relPath, t.file, aliasFrom ?? null) - PROPAGATION_HOP_PENALTY; - if (conf > 0) { - seenByPair.add(edgeKey); - allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'points-to']); - } - } - } - } - } - } -} - -/** - * this-rebinding post-pass for the native call-edge path. - * - * When `fn.call(namedCtx, ...)` or `fn.apply(namedCtx, ...)` is extracted by the - * WASM layer, `thisCallBindings` records `{ callee: 'fn', thisArg: 'namedCtx' }`. - * The native Rust engine has no knowledge of these bindings, so `this()` calls - * inside `fn` remain unresolved. This JS post-pass adds the missing edges by - * resolving `this()` calls inside each `fn` that has a thisCallBinding. - */ -function buildThisCallBindingsPtsPostPass( - ctx: PipelineContext, - getNodeIdStmt: NodeIdStmt, - allEdgeRows: EdgeRowTuple[], - sharedLookup?: CallNodeLookup, -): void { - const filesWithBindings = [...ctx.fileSymbols].filter( - ([, symbols]) => symbols.thisCallBindings && symbols.thisCallBindings.length > 0, - ); - if (filesWithBindings.length === 0) return; - - const seenByPair = new Set(); - for (const [srcId, tgtId] of allEdgeRows) { - seenByPair.add(`${srcId}|${tgtId}`); - } - - const { barrelOnlyFiles, rootDir } = ctx; - const lookup = sharedLookup ?? makeContextLookup(ctx, getNodeIdStmt); - - for (const [relPath, symbols] of filesWithBindings) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); - const typeMap: Map = symbols.typeMap || new Map(); - const ptsMap = buildPointsToMapForFile(symbols, importedNames); - if (!ptsMap) continue; - - // Only process calls named 'this' (callee-not-receiver usage) - for (const call of symbols.calls) { - if (call.name !== 'this' || call.receiver) continue; - - const caller = findCaller(lookup, call, symbols.definitions, relPath, fileNodeRow); - if (caller.callerName == null) continue; - - const scopedKey = `${caller.callerName}::this`; - if (!ptsMap.has(scopedKey)) continue; - - for (const alias of resolveViaPointsTo(scopedKey, ptsMap)) { - const { targets: aliasTargets, importedFrom: aliasFrom } = resolveCallTargets( - lookup, - { name: alias }, - relPath, - importedNames, - typeMap as Map, - ); - for (const t of aliasTargets) { - const edgeKey = `${caller.id}|${t.id}`; - if (t.id !== caller.id && !seenByPair.has(edgeKey)) { - const conf = - computeConfidence(relPath, t.file, aliasFrom ?? null) - PROPAGATION_HOP_PENALTY; - if (conf > 0) { - seenByPair.add(edgeKey); - allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'points-to']); - } - } - } - } - } - } -} - -/** - * Phase 8.3f post-pass for the native call-edge path. - * - * The native Rust engine builds call edges without knowledge of - * objectRestParamBindings, so `rest.method()` calls inside functions with - * object-destructuring rest parameters are not resolved via the typeMap chain. - * The Rust engine already resolves same-file and directly-imported callees - * (via steps 1–2 of its resolution logic), so this post-pass only adds edges - * that require the typeMap-chain path: - * typeMap[restName] → argName → typeMap[argName.method] → target - * - * Mirrors the seeding in buildCallEdgesJS (Phase 8.3f) to ensure both engine - * paths produce identical results for receiver-typed rest-param calls. - */ -function buildObjectRestParamPostPass( - ctx: PipelineContext, - getNodeIdStmt: NodeIdStmt, - allEdgeRows: EdgeRowTuple[], - sharedLookup?: CallNodeLookup, -): void { - const filesWithRestBindings = [...ctx.fileSymbols].filter( - ([, symbols]) => - symbols.objectRestParamBindings && - symbols.objectRestParamBindings.length > 0 && - symbols.paramBindings && - symbols.paramBindings.length > 0, - ); - if (filesWithRestBindings.length === 0) return; - - const seenByPair = new Set(); - for (const [srcId, tgtId] of allEdgeRows) { - seenByPair.add(`${srcId}|${tgtId}`); - } - - const { barrelOnlyFiles, rootDir } = ctx; - const lookup = sharedLookup ?? makeContextLookup(ctx, getNodeIdStmt); - - for (const [relPath, symbols] of filesWithRestBindings) { - if (barrelOnlyFiles.has(relPath)) continue; - const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); - if (!fileNodeRow) continue; - - const importedNames = buildImportedNamesMap(ctx, relPath, symbols, rootDir); - const typeMap: Map = new Map( - symbols.typeMap instanceof Map ? symbols.typeMap : [], - ); - - // Seed typeMap[callee::restName] = { type: argName } for each matching pair. - // Mirrors the seeding in buildCallEdgesJS Phase 8.3f. Keys are scoped by - // callee so two functions with the same rest-param name (e.g. `...rest`) in - // the same file don't collide (#1358). - // When only one callee uses a given rest name, also seed the unscoped key - // as a null-callerName fallback so edges aren't silently dropped if - // findCaller can't identify the enclosing function (#1358). - const restNameCallees = new Map>(); - for (const orpb of symbols.objectRestParamBindings!) { - if (!restNameCallees.has(orpb.restName)) restNameCallees.set(orpb.restName, new Set()); - restNameCallees.get(orpb.restName)!.add(orpb.callee); - } - const restNames = new Set(); - for (const orpb of symbols.objectRestParamBindings!) { - for (const pb of symbols.paramBindings!) { - if (pb.callee === orpb.callee && pb.argIndex === orpb.argIndex) { - const scopedKey = `${orpb.callee}::${orpb.restName}`; - if (!typeMap.has(scopedKey)) { - typeMap.set(scopedKey, { type: pb.argName, confidence: 0.65 }); - if (restNameCallees.get(orpb.restName)!.size === 1 && !typeMap.has(orpb.restName)) { - typeMap.set(orpb.restName, { type: pb.argName, confidence: 0.65 }); - } - } - // restNames tracks every rest-parameter name found, regardless of whether the - // scoped key was already in typeMap. This ensures the post-pass (below) processes - // all calls whose receiver matches a known rest binding — not just those whose - // typeMap entry was seeded in this iteration. - restNames.add(orpb.restName); - } - } - } - if (restNames.size === 0) continue; - - for (const call of symbols.calls) { - // Only process calls whose receiver is a known rest-binding name. - if (!call.receiver || !restNames.has(call.receiver)) continue; - - const caller = findCaller(lookup, call, symbols.definitions, relPath, fileNodeRow); - - // Resolve with the enriched typeMap. callerName is passed so - // resolveByMethodOrGlobal can look up the scoped key callee::restName (#1358). - // seenByPair deduplicates edges the native engine already emitted. - const { targets, importedFrom } = resolveCallTargets( - lookup, - call, - relPath, - importedNames, - typeMap as Map, - caller.callerName, - ); - for (const t of targets) { - const edgeKey = `${caller.id}|${t.id}`; - if (t.id !== caller.id && !seenByPair.has(edgeKey)) { - const conf = - computeConfidence(relPath, t.file, importedFrom ?? null) - PROPAGATION_HOP_PENALTY; - if (conf > 0) { - seenByPair.add(edgeKey); - allEdgeRows.push([caller.id, t.id, 'calls', conf, 0, 'points-to']); - } - } - } - } - } -} - /** * Object.defineProperty accessor post-pass for the native call-edge path. * @@ -988,11 +671,11 @@ function buildDefinePropertyPostPass( * * The native Rust engine has no knowledge of the CHA context, so `this.method()` * calls and interface method dispatches are not expanded to their concrete - * implementations. This JS post-pass runs after the native edges (and the pts - * post-pass) and adds only the CHA-resolved edges that the native engine missed. + * implementations. This JS post-pass runs after the native edges and adds only + * the CHA-resolved edges that the native engine missed. * - * Like buildParamFlowPtsPostPass, it seeds seenByPair from the current allEdgeRows - * snapshot to avoid duplicating edges the native engine already produced. + * Seeds seenByPair from the current allEdgeRows snapshot to avoid duplicating + * edges the native engine already produced. */ function buildChaPostPass( ctx: PipelineContext, @@ -1921,26 +1604,12 @@ export async function buildEdges(ctx: PipelineContext): Promise { (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold); if (useNativeCallEdges) { buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!); - // Build the shared lookup once — both pts post-passes use it, avoiding - // redundant construction of the same context closure. + // The native engine receives all pts bindings (paramBindings, + // fnRefBindings, thisCallBindings, objectRestParamBindings, …) through + // NativeFileEntry and runs the same points-to solver as the JS path, so + // no pts post-passes are needed here. Only capabilities that remain + // JS-only run as post-passes below. const sharedLookup = makeContextLookup(ctx, getNodeIdStmt); - // Phase 8.3c post-pass: augment native call edges with parameter-flow pts - // edges. The native Rust engine has no knowledge of paramBindings, so any - // `fn()` call inside a higher-order function would be missed. This JS pass - // runs on top of the native edges and adds only the pts-resolved edges that - // the native engine could not produce. - buildParamFlowPtsPostPass(ctx, getNodeIdStmt, allEdgeRows, sharedLookup); - // bind/alias post-pass: augment native call edges with fnRefBindings-seeded - // pts edges. The native Rust engine has no knowledge of JS fnRefBindings - // (e.g. `const f = fn.bind(ctx)`), so calls to bind-created aliases are - // not resolved to their original function on the native path. - buildFnRefBindingsPtsPostPass(ctx, getNodeIdStmt, allEdgeRows, sharedLookup); - // this-rebinding post-pass: resolve `this()` calls inside functions that - // were invoked via `.call(namedCtx, ...)` / `.apply(namedCtx, ...)`. - buildThisCallBindingsPtsPostPass(ctx, getNodeIdStmt, allEdgeRows, sharedLookup); - // Phase 8.3f post-pass: augment native call edges with object rest-param - // receiver resolution — typeMap[restName] → argName → typeMap[argName.method]. - buildObjectRestParamPostPass(ctx, getNodeIdStmt, allEdgeRows, sharedLookup); // Object.defineProperty accessor post-pass: resolve this-dispatch inside // getter/setter functions registered via Object.defineProperty. buildDefinePropertyPostPass(ctx, getNodeIdStmt, allEdgeRows, sharedLookup); diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index c89f9e2f0..e5c5bd9b5 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -564,6 +564,9 @@ function runPostNativeCha(db: BetterSqlite3Database): { if (newEdges.length > 0) { db.transaction(() => batchInsertEdges(db, newEdges))(); + // Account for post-pass edges excluded from the build summary line (#1452), + // mirroring the this/super dispatch post-pass insertion log. + debug(`CHA expansion post-pass: inserted ${newEdgeCount} edge(s)`); } return { newEdgeCount, affectedFiles }; } @@ -1344,9 +1347,9 @@ export async function tryNativeOrchestrator( built_at: new Date().toISOString(), }); - info( - `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`, - ); + // The build summary is logged after the JS edge-writing post-passes below + // (dropped-language backfill, CHA, this/super dispatch) so the reported + // counts include their edges (#1452). // ── Post-native structure + analysis ────────────────────────────── let analysisTiming = { @@ -1479,6 +1482,27 @@ export async function tryNativeOrchestrator( // this/super dispatch) so every new edge in this build cycle gets a label. backfillEdgeTechniquesAfterNativeOrchestrator(ctx.db, !!result.isFullBuild, result.changedFiles); + // Re-count nodes/edges now that all edge-writing post-passes have run: the + // Rust orchestrator captured its counts before the JS post-passes added + // edges, so both its summary and build_meta under-report (#1452). + let finalNodeCount = result.nodeCount ?? 0; + let finalEdgeCount = result.edgeCount ?? 0; + try { + const counts = (ctx.db as unknown as BetterSqlite3Database) + .prepare('SELECT (SELECT COUNT(*) FROM nodes) AS n, (SELECT COUNT(*) FROM edges) AS e') + .get() as { n: number; e: number }; + if (counts.n !== finalNodeCount || counts.e !== finalEdgeCount) { + finalNodeCount = counts.n; + finalEdgeCount = counts.e; + setBuildMeta(ctx.db, { node_count: finalNodeCount, edge_count: finalEdgeCount }); + } + } catch (err) { + debug(`Post-pass node/edge re-count failed: ${toErrorMessage(err)}`); + } + info( + `Native build orchestrator completed: ${finalNodeCount} nodes, ${finalEdgeCount} edges, ${result.fileCount ?? 0} files`, + ); + // ── Structure and analysis fallback (run after edge-writing so roles see full graph) ── // Reconstruct fileSymbols once for both structure and analysis to avoid two // expensive DB scans. The DB handoff above already ensured ctx.db is a proper diff --git a/src/domain/wasm-worker-entry.ts b/src/domain/wasm-worker-entry.ts index c9dcc300a..25c05829b 100644 --- a/src/domain/wasm-worker-entry.ts +++ b/src/domain/wasm-worker-entry.ts @@ -818,6 +818,7 @@ function serializeExtractorOutput( ...(symbols.objectPropBindings?.length ? { objectPropBindings: symbols.objectPropBindings } : {}), + ...(symbols.thisCallBindings?.length ? { thisCallBindings: symbols.thisCallBindings } : {}), ...(symbols.newExpressions?.length ? { newExpressions: symbols.newExpressions } : {}), ...(symbols.definePropertyReceivers?.size ? { definePropertyReceivers: Array.from(symbols.definePropertyReceivers.entries()) } diff --git a/src/domain/wasm-worker-pool.ts b/src/domain/wasm-worker-pool.ts index e4a7f1c2f..4f4413318 100644 --- a/src/domain/wasm-worker-pool.ts +++ b/src/domain/wasm-worker-pool.ts @@ -115,6 +115,7 @@ function deserializeResult(ser: SerializedExtractorOutput | null): ExtractorOutp if (ser.objectRestParamBindings?.length) out.objectRestParamBindings = ser.objectRestParamBindings; if (ser.objectPropBindings?.length) out.objectPropBindings = ser.objectPropBindings; + if (ser.thisCallBindings?.length) out.thisCallBindings = ser.thisCallBindings; if (ser.newExpressions?.length) out.newExpressions = ser.newExpressions; if (ser.definePropertyReceivers?.length) { const m = new Map(); diff --git a/src/domain/wasm-worker-protocol.ts b/src/domain/wasm-worker-protocol.ts index 71c1e9c97..6302f81ee 100644 --- a/src/domain/wasm-worker-protocol.ts +++ b/src/domain/wasm-worker-protocol.ts @@ -71,6 +71,7 @@ export interface SerializedExtractorOutput { objectRestParamBindings?: import('../types.js').ObjectRestParamBinding[]; objectPropBindings?: import('../types.js').ObjectPropBinding[]; paramBindings?: import('../types.js').ParamBinding[]; + thisCallBindings?: import('../types.js').ThisCallBinding[]; newExpressions?: readonly string[]; /** Serialized definePropertyReceivers map (funcName → receiverVarName) as tuple array. */ definePropertyReceivers?: Array<[string, string]>; diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index d422ff5cd..2be0ac4b3 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -1866,6 +1866,8 @@ function runContextCollectorWalk(rootNode: TreeSitterNode, out: ContextCollector collectCollectionWrapBinding(node, out.fnRefBindings); } else if (t === 'required_parameter' || t === 'optional_parameter') { handleParamTypeMap(node, out.typeMap); + } else if (t === 'public_field_definition' || t === 'field_definition') { + handleFieldDefTypeMap(node, out.typeMap); } else if (t === 'assignment_expression') { handlePropWriteTypeMap(node, out.typeMap, typeMapClass); } else if (t === 'call_expression') { @@ -2090,6 +2092,34 @@ function handleParamTypeMap(node: TreeSitterNode, typeMap: Map`. + * Seeds both "repo" and "this.repo" so `this.repo.method()` calls resolve to the + * declared type via the type map. Mirrors the field_definition branch of + * match_js_type_map in crates/codegraph-core/src/extractors/javascript.rs. + */ +function handleFieldDefTypeMap(node: TreeSitterNode, typeMap: Map): void { + const nameNode = + node.childForFieldName('name') || + node.childForFieldName('property') || + findChild(node, 'property_identifier'); + if (!nameNode) return; + const kind = nameNode.type; + if ( + kind !== 'property_identifier' && + kind !== 'identifier' && + kind !== 'private_property_identifier' + ) + return; + const typeAnno = findChild(node, 'type_annotation'); + if (!typeAnno) return; + const typeName = extractSimpleTypeName(typeAnno); + if (!typeName) return; + setTypeMapEntry(typeMap, nameNode.text, typeName, 0.9); + // "this.fieldName" key resolves `this.repo.method()` calls. + setTypeMapEntry(typeMap, `this.${nameNode.text}`, typeName, 0.9); +} + /** * Phase 8.3d: seed the pts map from object property writes. * diff --git a/tests/parsers/javascript.test.ts b/tests/parsers/javascript.test.ts index c666fe7e1..1ab62c16c 100644 --- a/tests/parsers/javascript.test.ts +++ b/tests/parsers/javascript.test.ts @@ -217,6 +217,17 @@ describe('JavaScript parser', () => { expect(symbols.typeMap.get('res')).toEqual({ type: 'Response', confidence: 0.9 }); }); + it('extracts class field annotations into typeMap with confidence 0.9', () => { + const symbols = parseTS(` + class UserService { + private repo: Repository; + run() { this.repo.save(); } + } + `); + expect(symbols.typeMap.get('repo')).toEqual({ type: 'Repository', confidence: 0.9 }); + expect(symbols.typeMap.get('this.repo')).toEqual({ type: 'Repository', confidence: 0.9 }); + }); + it('returns empty typeMap when no annotations', () => { const symbols = parseJS(`const x = 42; function foo(a, b) {}`); expect(symbols.typeMap).toBeInstanceOf(Map); From aa88fdada8c677d8b03706098a0da12ffa40b050 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 11 Jun 2026 19:01:46 -0600 Subject: [PATCH 3/5] fix(native): port Phase 8.2 cross-file return-type propagation to the Rust orchestrator The JS pipeline seeds each file's typeMap with the return types of imported factory functions (propagateReturnTypesAcrossFiles) before edge resolution, so `const svc = buildService(); svc.createUser()` resolves across files. The Rust orchestrator extracted returnTypeMap and callAssignments but never consumed them, dropping those calls and receiver edges on the native path (hybrid was unaffected because the JS pipeline pre-seeds the typeMap it sends over napi). Mirror the JS pass in pipeline.rs: build a per-file + global return-type index, resolve each call assignment through the file's imports (or the qualified Type.method global map), and inject typeMap entries at confidence minus PROPAGATION_HOP_PENALTY, never overwriting locally typed variables. Verified with scripts/parity-compare.mjs: the javascript fixture now matches exactly across wasm/native/hybrid (180 edges incl. driver.mjs conf=0.7 calls + conf=0.75 receiver edges). --- .../src/domain/graph/builder/pipeline.rs | 227 +++++++++++++++++- .../graph/builder/stages/build_edges.rs | 2 +- 2 files changed, 227 insertions(+), 2 deletions(-) diff --git a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs index 36b9d7059..25f518965 100644 --- a/crates/codegraph-core/src/domain/graph/builder/pipeline.rs +++ b/crates/codegraph-core/src/domain/graph/builder/pipeline.rs @@ -27,7 +27,7 @@ use crate::domain::parallel; use crate::db::repository::ast::{self, AstInsertNode, FileAstBatch}; use crate::graph::classifiers::roles; use crate::features::structure; -use crate::types::{FileSymbols, ImportResolutionInput}; +use crate::types::{FileSymbols, ImportResolutionInput, TypeMapEntry}; use rusqlite::Connection; use serde::Serialize; use std::collections::{HashMap, HashSet}; @@ -550,6 +550,11 @@ pub fn run_pipeline( let import_edge_rows = import_edges::build_import_edges(conn, &import_ctx); import_edges::insert_edges(conn, &import_edge_rows); + // Phase 8.2: cross-file return-type propagation — seed each file's + // type_map with the return types of imported functions before call-edge + // building, mirroring propagateReturnTypesAcrossFiles in build-edges.ts. + propagate_return_types_across_files(&mut file_symbols, &import_ctx); + // Build call edges using existing Rust edge_builder (internal path) // For now, call edges are built via the existing napi-exported function's // internal logic. We load nodes from DB and pass to the edge builder. @@ -1288,6 +1293,106 @@ fn collect_imported_names_for_file( imported_names } +/// Phase 8.2: cross-file return-type propagation. +/// +/// Mirrors `propagateReturnTypesAcrossFiles` in `build-edges.ts`: when a file +/// assigns the return value of an imported function to a variable +/// (`const svc = buildService()`), look up the callee's return type in the +/// defining file's `return_type_map` and seed the assigning file's `type_map` +/// so method calls and receiver edges on that variable resolve. Must run +/// before `build_and_insert_call_edges`. +fn propagate_return_types_across_files( + file_symbols: &mut HashMap, + import_ctx: &ImportEdgeContext, +) { + use crate::domain::graph::builder::stages::build_edges::PROPAGATION_HOP_PENALTY; + + // rel_path → (fn_name → (type_name, confidence)) + let mut return_type_index: HashMap> = HashMap::new(); + for (rel_path, symbols) in file_symbols.iter() { + if symbols.return_type_map.is_empty() { + continue; + } + let per_file = return_type_index.entry(rel_path.clone()).or_default(); + for e in &symbols.return_type_map { + per_file.insert(e.name.clone(), (e.type_name.clone(), e.confidence)); + } + } + if return_type_index.is_empty() { + return; + } + + // Flat map for qualified `Type.method` lookups. Higher confidence wins; + // ties keep the first writer. Files are visited in sorted order so the + // tie-break is deterministic (HashMap iteration order is not). + let mut global_return_types: HashMap = HashMap::new(); + let mut sorted_paths: Vec<&String> = return_type_index.keys().collect(); + sorted_paths.sort(); + for rel_path in sorted_paths { + for (name, entry) in &return_type_index[rel_path] { + let replace = match global_return_types.get(name) { + Some(existing) => entry.1 > existing.1, + None => true, + }; + if replace { + global_return_types.insert(name.clone(), entry.clone()); + } + } + } + + for (rel_path, symbols) in file_symbols.iter_mut() { + if symbols.call_assignments.is_empty() { + continue; + } + + let abs_file = Path::new(&import_ctx.root_dir).join(rel_path.as_str()); + let abs_str = abs_file.to_str().unwrap_or(""); + let imported_names = collect_imported_names_for_file(abs_str, symbols, import_ctx); + // Later entries overwrite earlier ones on duplicate names — same as the + // HashMap collect in build_call_edges. + let imported_map: HashMap = imported_names + .into_iter() + .map(|e| (e.name, e.file)) + .collect(); + + let mut injections: Vec = Vec::new(); + let mut injected: HashSet = HashSet::new(); + for ca in &symbols.call_assignments { + // Already resolved locally (JS: `typeMap.has(varName)`); first + // successful injection wins for repeated assignments to one name. + if injected.contains(&ca.var_name) + || symbols.type_map.iter().any(|t| t.name == ca.var_name) + { + continue; + } + + let found = match &ca.receiver_type_name { + Some(receiver) => { + global_return_types.get(&format!("{receiver}.{}", ca.callee_name)) + } + None => imported_map.get(&ca.callee_name).and_then(|from| { + return_type_index + .get(from) + .and_then(|m| m.get(&ca.callee_name)) + }), + }; + + if let Some((type_name, confidence)) = found { + let propagated = confidence - PROPAGATION_HOP_PENALTY; + if propagated > 0.0 { + injections.push(TypeMapEntry { + name: ca.var_name.clone(), + type_name: type_name.clone(), + confidence: propagated, + }); + injected.insert(ca.var_name.clone()); + } + } + } + symbols.type_map.extend(injections); + } +} + /// Insert the edges produced by the native edge builder into the edges table. fn insert_call_edge_rows(conn: &Connection, edges: &[crate::domain::graph::builder::stages::build_edges::ComputedEdge]) { if edges.is_empty() { @@ -1815,3 +1920,123 @@ fn now_ms() -> f64 { .map(|d| d.as_millis() as f64) .unwrap_or(0.0) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{Import, PathAliases}; + + fn make_import_ctx(file_symbols: &HashMap) -> ImportEdgeContext { + let mut batch_resolved = HashMap::new(); + batch_resolved.insert("/repo/driver.js|./service.js".to_string(), "service.js".to_string()); + ImportEdgeContext { + batch_resolved, + reexport_map: HashMap::new(), + barrel_only_files: HashSet::new(), + file_symbols: file_symbols.clone(), + root_dir: "/repo".to_string(), + aliases: PathAliases { base_url: None, paths: vec![] }, + known_files: HashSet::new(), + } + } + + fn entry(name: &str, type_name: &str, confidence: f64) -> TypeMapEntry { + TypeMapEntry { + name: name.to_string(), + type_name: type_name.to_string(), + confidence, + } + } + + #[test] + fn propagates_imported_factory_return_type_into_type_map() { + let mut service = FileSymbols::new("service.js".to_string()); + service.return_type_map.push(entry("buildService", "UserService", 0.85)); + + let mut driver = FileSymbols::new("driver.js".to_string()); + driver.imports.push(Import::new( + "./service.js".to_string(), + vec!["buildService".to_string()], + 1, + )); + driver.call_assignments.push(crate::types::NativeCallAssignment { + var_name: "svc".to_string(), + callee_name: "buildService".to_string(), + receiver_type_name: None, + }); + + let mut file_symbols = HashMap::new(); + file_symbols.insert("service.js".to_string(), service); + file_symbols.insert("driver.js".to_string(), driver); + let import_ctx = make_import_ctx(&file_symbols); + + propagate_return_types_across_files(&mut file_symbols, &import_ctx); + + let driver = &file_symbols["driver.js"]; + let seeded = driver + .type_map + .iter() + .find(|t| t.name == "svc") + .expect("svc should be seeded from buildService's return type"); + assert_eq!(seeded.type_name, "UserService"); + // 0.85 (inferred `return new X()`) minus one propagation hop. + assert!((seeded.confidence - 0.75).abs() < 1e-9); + } + + #[test] + fn qualified_receiver_lookup_uses_global_return_type_map() { + let mut factory = FileSymbols::new("factory.js".to_string()); + factory.return_type_map.push(entry("Factory.create", "Widget", 1.0)); + + let mut driver = FileSymbols::new("driver.js".to_string()); + driver.type_map.push(entry("factory", "Factory", 0.9)); + driver.call_assignments.push(crate::types::NativeCallAssignment { + var_name: "w".to_string(), + callee_name: "create".to_string(), + receiver_type_name: Some("Factory".to_string()), + }); + + let mut file_symbols = HashMap::new(); + file_symbols.insert("factory.js".to_string(), factory); + file_symbols.insert("driver.js".to_string(), driver); + let import_ctx = make_import_ctx(&file_symbols); + + propagate_return_types_across_files(&mut file_symbols, &import_ctx); + + let driver = &file_symbols["driver.js"]; + let seeded = driver.type_map.iter().find(|t| t.name == "w").expect("w seeded"); + assert_eq!(seeded.type_name, "Widget"); + assert!((seeded.confidence - 0.9).abs() < 1e-9); + } + + #[test] + fn locally_typed_variables_are_not_overwritten() { + let mut service = FileSymbols::new("service.js".to_string()); + service.return_type_map.push(entry("buildService", "UserService", 0.85)); + + let mut driver = FileSymbols::new("driver.js".to_string()); + driver.imports.push(Import::new( + "./service.js".to_string(), + vec!["buildService".to_string()], + 1, + )); + driver.type_map.push(entry("svc", "LocalOverride", 1.0)); + driver.call_assignments.push(crate::types::NativeCallAssignment { + var_name: "svc".to_string(), + callee_name: "buildService".to_string(), + receiver_type_name: None, + }); + + let mut file_symbols = HashMap::new(); + file_symbols.insert("service.js".to_string(), service); + file_symbols.insert("driver.js".to_string(), driver); + let import_ctx = make_import_ctx(&file_symbols); + + propagate_return_types_across_files(&mut file_symbols, &import_ctx); + + let driver = &file_symbols["driver.js"]; + let svc_entries: Vec<_> = driver.type_map.iter().filter(|t| t.name == "svc").collect(); + assert_eq!(svc_entries.len(), 1, "no duplicate entry should be injected"); + assert_eq!(svc_entries[0].type_name, "LocalOverride"); + } +} diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index 2aedf7fdd..f22d87ded 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -19,7 +19,7 @@ const IMPLEMENTS_TARGET_KINDS: &[&str] = &["interface", "trait", "class"]; /// Confidence penalty per alias hop — mirrors `PROPAGATION_HOP_PENALTY` in /// `src/extractors/javascript.ts`. -const PROPAGATION_HOP_PENALTY: f64 = 0.1; +pub(crate) const PROPAGATION_HOP_PENALTY: f64 = 0.1; #[napi(object)] pub struct NodeInfo { From 9075c122865a86e17510a8f6808b6c47f76ce4c3 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 11 Jun 2026 19:23:06 -0600 Subject: [PATCH 4/5] fix(native): add process/window/document/globalThis to JS_BUILTIN_GLOBALS (#1465) --- crates/codegraph-core/src/extractors/javascript.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs index 260df4dec..2c00a615e 100644 --- a/crates/codegraph-core/src/extractors/javascript.rs +++ b/crates/codegraph-core/src/extractors/javascript.rs @@ -6,7 +6,8 @@ use crate::types::*; use tree_sitter::{Node, Tree}; /// Well-known JS globals that must not be recorded as pts targets. -/// Mirrors the `BUILTIN_GLOBALS` set in `src/extractors/javascript.ts`. +/// Mirrors the `BUILTIN_GLOBALS` set in `src/extractors/javascript.ts` +/// and must be identical to the set tested in `is_js_builtin_global`. const JS_BUILTIN_GLOBALS: &[&str] = &[ "Math", "JSON", "Promise", "Array", "Object", "Date", "Error", "Symbol", "Map", "Set", "RegExp", "Number", "String", "Boolean", @@ -17,7 +18,11 @@ const JS_BUILTIN_GLOBALS: &[&str] = &[ "URL", "URLSearchParams", "TextEncoder", "TextDecoder", "AbortController", "AbortSignal", "Headers", "Request", "Response", "FormData", "Blob", "File", "ReadableStream", "WritableStream", - "TransformStream", "console", "Buffer", "EventEmitter", "Stream", + "TransformStream", + // Browser/runtime globals — must match is_js_builtin_global below + "console", "process", "window", "document", "globalThis", + // Node.js built-ins + "Buffer", "EventEmitter", "Stream", ]; pub struct JsExtractor; From ad2d191e00c51a8edf561d62158d37bf1aaea3e6 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 11 Jun 2026 21:10:35 -0600 Subject: [PATCH 5/5] fix(native): add safety comment on max_idx usize cast guard (#1465) --- .../src/domain/graph/builder/stages/build_edges.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs index 98b8ad90f..3475adebe 100644 --- a/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs +++ b/crates/codegraph-core/src/domain/graph/builder/stages/build_edges.rs @@ -243,6 +243,8 @@ fn build_points_to_map( for sb in bindings.spread_arg_bindings { let Some(params) = definition_params.get(sb.callee.as_str()) else { continue }; let max_idx = array_max_index.get(sb.array_name.as_str()).copied().unwrap_or(-1); + // Safety: the cast to usize is only reached inside the `max_idx >= 0` guard, + // so max_idx is non-negative here and cannot wrap to usize::MAX. if max_idx >= 0 { for i in 0..=(max_idx as usize) { let param_idx = sb.start_index as usize + i;