From 244621daefbbb8ffc5813ccd4e6fb56c446db7c7 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Thu, 5 Feb 2026 16:17:49 +0000 Subject: [PATCH] Add --report-orphans flag to produce orphan definition report Add a CLI flag that writes a TSV report of all orphan definitions (definitions not linked to any declaration) to a specified file. Each line contains: kind, qualified name, and source location. The qualified name is reconstructed by walking either the Name system's parent_scope chain (for constant-like definitions) or the lexical_nesting_id chain (for method-like definitions). Ungate Offset#to_display_range so it can be reused for location formatting outside of tests. --- rust/rubydex/src/main.rs | 24 +++ rust/rubydex/src/model/definitions.rs | 11 +- rust/rubydex/src/offset.rs | 2 - rust/rubydex/src/stats.rs | 2 + rust/rubydex/src/stats/orphan_report.rs | 262 ++++++++++++++++++++++++ 5 files changed, 298 insertions(+), 3 deletions(-) create mode 100644 rust/rubydex/src/stats/orphan_report.rs diff --git a/rust/rubydex/src/main.rs b/rust/rubydex/src/main.rs index c863391a..ffe4098c 100644 --- a/rust/rubydex/src/main.rs +++ b/rust/rubydex/src/main.rs @@ -27,6 +27,16 @@ struct Args { #[arg(long = "stats", help = "Show detailed performance statistics")] stats: bool, + + #[arg( + long = "report-orphans", + value_name = "PATH", + num_args = 0..=1, + require_equals = true, + default_missing_value = "/tmp/rubydex-orphan-report.txt", + help = "Write orphan definitions report to specified file" + )] + report_orphans: Option, } #[derive(Debug, Clone, ValueEnum)] @@ -101,6 +111,20 @@ fn main() { MemoryStats::print_memory_usage(); } + // Orphan report + if let Some(ref path) = args.report_orphans { + match std::fs::File::create(path) { + Ok(mut file) => { + if let Err(e) = graph.write_orphan_report(&mut file) { + eprintln!("Failed to write orphan report: {e}"); + } else { + println!("Orphan report written to {path}"); + } + } + Err(e) => eprintln!("Failed to create orphan report file: {e}"), + } + } + // Generate visualization or print statistics if args.visualize { println!("{}", dot::generate(&graph)); diff --git a/rust/rubydex/src/model/definitions.rs b/rust/rubydex/src/model/definitions.rs index 4fe7dd38..3f0d72c0 100644 --- a/rust/rubydex/src/model/definitions.rs +++ b/rust/rubydex/src/model/definitions.rs @@ -142,7 +142,16 @@ impl Definition { Definition::SingletonClass(d) => Some(d.name_id()), Definition::Module(d) => Some(d.name_id()), Definition::Constant(d) => Some(d.name_id()), - _ => None, + Definition::ConstantAlias(d) => Some(d.name_id()), + Definition::GlobalVariable(_) + | Definition::InstanceVariable(_) + | Definition::ClassVariable(_) + | Definition::AttrAccessor(_) + | Definition::AttrReader(_) + | Definition::AttrWriter(_) + | Definition::Method(_) + | Definition::MethodAlias(_) + | Definition::GlobalVariableAlias(_) => None, } } diff --git a/rust/rubydex/src/offset.rs b/rust/rubydex/src/offset.rs index 0e975eba..640c7c58 100644 --- a/rust/rubydex/src/offset.rs +++ b/rust/rubydex/src/offset.rs @@ -4,7 +4,6 @@ //! within a file. It can be used to track positions in source code and convert //! between byte offsets and line/column positions. -#[cfg(any(test, feature = "test_utils"))] use crate::model::document::Document; /// Represents a byte offset range within a specific file. @@ -59,7 +58,6 @@ impl Offset { } /// Converts an offset to a display range like `1:1-1:5` - #[cfg(any(test, feature = "test_utils"))] #[must_use] pub fn to_display_range(&self, document: &Document) -> String { let line_index = document.line_index(); diff --git a/rust/rubydex/src/stats.rs b/rust/rubydex/src/stats.rs index 61abaf13..a0683057 100644 --- a/rust/rubydex/src/stats.rs +++ b/rust/rubydex/src/stats.rs @@ -1,4 +1,6 @@ pub mod memory; +// TODO: When the rubydex is stable enough, turn this into a debug-only feature or revisit if we still need it. +pub mod orphan_report; pub mod timer; /// Helper function to compute percentage diff --git a/rust/rubydex/src/stats/orphan_report.rs b/rust/rubydex/src/stats/orphan_report.rs new file mode 100644 index 00000000..b95f96df --- /dev/null +++ b/rust/rubydex/src/stats/orphan_report.rs @@ -0,0 +1,262 @@ +use std::collections::HashSet; +use std::io::Write; + +use crate::model::declaration::Declaration; +use crate::model::definitions::Definition; +use crate::model::graph::Graph; +use crate::model::ids::{DefinitionId, NameId, StringId}; +use crate::model::name::{NameRef, ParentScope}; + +impl Graph { + /// Writes a report of orphan definitions (definitions not linked to any declaration). + /// + /// Format: `type\tconcatenated_name\tlocation` (TSV) + /// + /// # Errors + /// + /// Returns an error if writing fails. + pub fn write_orphan_report(&self, writer: &mut impl Write) -> std::io::Result<()> { + // Collect all definition IDs that are linked to declarations + let linked_definition_ids: HashSet<&DefinitionId> = self + .declarations() + .values() + .flat_map(Declaration::definitions) + .collect(); + + // Find orphan definitions + let mut orphans: Vec<_> = self + .definitions() + .iter() + .filter(|(id, _)| !linked_definition_ids.contains(id)) + .collect(); + + // Sort by type, then by location for consistent output + orphans.sort_by(|(_, a), (_, b)| { + a.kind() + .cmp(b.kind()) + .then_with(|| a.uri_id().cmp(b.uri_id())) + .then_with(|| a.offset().cmp(b.offset())) + }); + + for (_, definition) in orphans { + let kind = definition.kind(); + let name = match definition.name_id().copied() { + Some(id) => self.build_concatenated_name_from_name(id), + None => self.build_concatenated_name_from_lexical_nesting(definition), + }; + let location = self.definition_location(definition); + + writeln!(writer, "{kind}\t{name}\t{location}")?; + } + + Ok(()) + } + + /// Walks the Name system's `parent_scope` chain to reconstruct the constant path. + /// Falls back to `nesting` for enclosing scope context when there is no explicit parent scope. + /// + /// Note: this produces a concatenated name by piecing together name parts, not a properly + /// resolved qualified name. + pub(crate) fn build_concatenated_name_from_name(&self, name_id: NameId) -> String { + let Some(name_ref) = self.names().get(&name_id) else { + return "".to_string(); + }; + let simple_name = self.string_id_to_string(*name_ref.str()); + + match name_ref.parent_scope() { + ParentScope::Some(parent_id) | ParentScope::Attached(parent_id) => { + let parent_name = self.build_concatenated_name_from_name(*parent_id); + format!("{parent_name}::{simple_name}") + } + ParentScope::TopLevel => format!("::{simple_name}"), + ParentScope::None => { + let prefix = name_ref + .nesting() + .as_ref() + .map(|nesting_id| self.build_nesting_prefix(*nesting_id)) + .unwrap_or_default(); + + if prefix.is_empty() { + simple_name + } else { + format!("{prefix}::{simple_name}") + } + } + } + } + + /// Resolves the enclosing nesting `NameId` to a string prefix. + /// For resolved names, uses the declaration's fully qualified name. + /// For unresolved names, recursively walks the name chain. + fn build_nesting_prefix(&self, nesting_id: NameId) -> String { + let Some(name_ref) = self.names().get(&nesting_id) else { + return String::new(); + }; + match name_ref { + NameRef::Resolved(resolved) => self + .declarations() + .get(resolved.declaration_id()) + .map_or_else(String::new, |decl| decl.name().to_string()), + NameRef::Unresolved(_) => self.build_concatenated_name_from_name(nesting_id), + } + } + + /// Builds a concatenated name for non-constant definitions by walking the `lexical_nesting_id` chain. + /// + /// Note: this pieces together name parts from the lexical nesting, not a properly resolved + /// qualified name. + pub(crate) fn build_concatenated_name_from_lexical_nesting(&self, definition: &Definition) -> String { + let simple_name = self.string_id_to_string(self.definition_string_id(definition)); + + // Collect enclosing nesting names from inner to outer + let mut nesting_parts = Vec::new(); + let mut current_nesting = *definition.lexical_nesting_id(); + + while let Some(nesting_id) = current_nesting { + let Some(nesting_def) = self.definitions().get(&nesting_id) else { + break; + }; + nesting_parts.push(self.string_id_to_string(self.definition_string_id(nesting_def))); + current_nesting = *nesting_def.lexical_nesting_id(); + } + + if nesting_parts.is_empty() { + return simple_name; + } + + // Reverse to get outer-to-inner order for the prefix + nesting_parts.reverse(); + let prefix = nesting_parts.join("::"); + + let separator = match definition { + Definition::Method(_) + | Definition::AttrAccessor(_) + | Definition::AttrReader(_) + | Definition::AttrWriter(_) + | Definition::MethodAlias(_) + | Definition::InstanceVariable(_) => "#", + Definition::Class(_) + | Definition::SingletonClass(_) + | Definition::Module(_) + | Definition::Constant(_) + | Definition::ConstantAlias(_) + | Definition::GlobalVariable(_) + | Definition::ClassVariable(_) + | Definition::GlobalVariableAlias(_) => "::", + }; + + format!("{prefix}{separator}{simple_name}") + } + + /// Converts a `StringId` to its string value. + fn string_id_to_string(&self, string_id: StringId) -> String { + self.strings().get(&string_id).unwrap().to_string() + } + + /// Get location in the format of `uri#L` for a definition. + /// The format is clickable in VS Code. + pub(crate) fn definition_location(&self, definition: &Definition) -> String { + let uri_id = definition.uri_id(); + + let Some(document) = self.documents().get(uri_id) else { + return format!("{uri_id}:"); + }; + + let uri = document.uri(); + let line_index = document.line_index(); + let start = line_index.line_col(definition.offset().start().into()); + format!("{uri}#L{}", start.line + 1) + } +} + +#[cfg(test)] +mod tests { + use crate::test_utils::GraphTest; + + #[test] + fn build_concatenated_name_from_name_for_constants() { + let cases = vec![ + ("class Foo; end", "Foo"), + ("module Foo; class Bar; end; end", "Foo::Bar"), + ("module Foo; module Bar; class Baz; end; end; end", "Foo::Bar::Baz"), + ]; + + for (source, expected_name) in cases { + let mut context = GraphTest::new(); + context.index_uri("file:///test.rb", source); + context.resolve(); + + let definitions = context.graph().get(expected_name).unwrap(); + let definition = definitions.first().unwrap(); + let name_id = *definition.name_id().unwrap(); + let actual = context.graph().build_concatenated_name_from_name(name_id); + + assert_eq!(actual, expected_name, "For source: {source}"); + } + } + + #[test] + fn build_concatenated_name_from_lexical_nesting_for_methods() { + let cases = vec![ + ("class Foo; def bar; end; end", "Foo#bar()"), + ("module Foo; class Bar; def baz; end; end; end", "Foo::Bar#baz()"), + ("def bar; end", "bar()"), + ]; + + for (source, expected_name) in cases { + let mut context = GraphTest::new(); + // Index without resolution so methods remain orphans + context.index_uri("file:///test.rb", source); + + let definition = context + .graph() + .definitions() + .values() + .find(|d| d.kind() == "Method" && d.name_id().is_none()) + .unwrap_or_else(|| panic!("No Method definition without name_id found for source: {source}")); + + let actual = context.graph().build_concatenated_name_from_lexical_nesting(definition); + assert_eq!(actual, expected_name, "For source: {source}"); + } + } + + #[test] + fn build_concatenated_name_from_lexical_nesting_for_instance_variables() { + let mut context = GraphTest::new(); + context.index_uri("file:///test.rb", "class Foo; def initialize; @ivar = 1; end; end"); + + let definition = context + .graph() + .definitions() + .values() + .find(|d| d.kind() == "InstanceVariable") + .unwrap(); + + let actual = context.graph().build_concatenated_name_from_lexical_nesting(definition); + assert_eq!(actual, "Foo::initialize()#@ivar"); + } + + #[test] + fn definition_location_uses_clickable_uri_fragment() { + let mut context = GraphTest::new(); + context.index_uri( + "file:///foo.rb", + " + class Foo + def bar + end + end + ", + ); + + let definition = context + .graph() + .definitions() + .values() + .find(|d| d.kind() == "Method") + .unwrap(); + + let actual = context.graph().definition_location(definition); + assert_eq!(actual, "file:///foo.rb#L2"); + } +}