Skip to content

Commit 244621d

Browse files
committed
Add --report-orphans flag to produce orphan definition report
Add a CLI flag that writes a TSV report of all orphan definitions (definitions not linked to any declaration) to a specified file. Each line contains: kind, qualified name, and source location. The qualified name is reconstructed by walking either the Name system's parent_scope chain (for constant-like definitions) or the lexical_nesting_id chain (for method-like definitions). Ungate Offset#to_display_range so it can be reused for location formatting outside of tests.
1 parent f5f2eee commit 244621d

5 files changed

Lines changed: 298 additions & 3 deletions

File tree

rust/rubydex/src/main.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@ struct Args {
2727

2828
#[arg(long = "stats", help = "Show detailed performance statistics")]
2929
stats: bool,
30+
31+
#[arg(
32+
long = "report-orphans",
33+
value_name = "PATH",
34+
num_args = 0..=1,
35+
require_equals = true,
36+
default_missing_value = "/tmp/rubydex-orphan-report.txt",
37+
help = "Write orphan definitions report to specified file"
38+
)]
39+
report_orphans: Option<String>,
3040
}
3141

3242
#[derive(Debug, Clone, ValueEnum)]
@@ -101,6 +111,20 @@ fn main() {
101111
MemoryStats::print_memory_usage();
102112
}
103113

114+
// Orphan report
115+
if let Some(ref path) = args.report_orphans {
116+
match std::fs::File::create(path) {
117+
Ok(mut file) => {
118+
if let Err(e) = graph.write_orphan_report(&mut file) {
119+
eprintln!("Failed to write orphan report: {e}");
120+
} else {
121+
println!("Orphan report written to {path}");
122+
}
123+
}
124+
Err(e) => eprintln!("Failed to create orphan report file: {e}"),
125+
}
126+
}
127+
104128
// Generate visualization or print statistics
105129
if args.visualize {
106130
println!("{}", dot::generate(&graph));

rust/rubydex/src/model/definitions.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,16 @@ impl Definition {
142142
Definition::SingletonClass(d) => Some(d.name_id()),
143143
Definition::Module(d) => Some(d.name_id()),
144144
Definition::Constant(d) => Some(d.name_id()),
145-
_ => None,
145+
Definition::ConstantAlias(d) => Some(d.name_id()),
146+
Definition::GlobalVariable(_)
147+
| Definition::InstanceVariable(_)
148+
| Definition::ClassVariable(_)
149+
| Definition::AttrAccessor(_)
150+
| Definition::AttrReader(_)
151+
| Definition::AttrWriter(_)
152+
| Definition::Method(_)
153+
| Definition::MethodAlias(_)
154+
| Definition::GlobalVariableAlias(_) => None,
146155
}
147156
}
148157

rust/rubydex/src/offset.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
//! within a file. It can be used to track positions in source code and convert
55
//! between byte offsets and line/column positions.
66
7-
#[cfg(any(test, feature = "test_utils"))]
87
use crate::model::document::Document;
98

109
/// Represents a byte offset range within a specific file.
@@ -59,7 +58,6 @@ impl Offset {
5958
}
6059

6160
/// Converts an offset to a display range like `1:1-1:5`
62-
#[cfg(any(test, feature = "test_utils"))]
6361
#[must_use]
6462
pub fn to_display_range(&self, document: &Document) -> String {
6563
let line_index = document.line_index();

rust/rubydex/src/stats.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
pub mod memory;
2+
// TODO: When the rubydex is stable enough, turn this into a debug-only feature or revisit if we still need it.
3+
pub mod orphan_report;
24
pub mod timer;
35

46
/// Helper function to compute percentage
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
use std::collections::HashSet;
2+
use std::io::Write;
3+
4+
use crate::model::declaration::Declaration;
5+
use crate::model::definitions::Definition;
6+
use crate::model::graph::Graph;
7+
use crate::model::ids::{DefinitionId, NameId, StringId};
8+
use crate::model::name::{NameRef, ParentScope};
9+
10+
impl Graph {
11+
/// Writes a report of orphan definitions (definitions not linked to any declaration).
12+
///
13+
/// Format: `type\tconcatenated_name\tlocation` (TSV)
14+
///
15+
/// # Errors
16+
///
17+
/// Returns an error if writing fails.
18+
pub fn write_orphan_report(&self, writer: &mut impl Write) -> std::io::Result<()> {
19+
// Collect all definition IDs that are linked to declarations
20+
let linked_definition_ids: HashSet<&DefinitionId> = self
21+
.declarations()
22+
.values()
23+
.flat_map(Declaration::definitions)
24+
.collect();
25+
26+
// Find orphan definitions
27+
let mut orphans: Vec<_> = self
28+
.definitions()
29+
.iter()
30+
.filter(|(id, _)| !linked_definition_ids.contains(id))
31+
.collect();
32+
33+
// Sort by type, then by location for consistent output
34+
orphans.sort_by(|(_, a), (_, b)| {
35+
a.kind()
36+
.cmp(b.kind())
37+
.then_with(|| a.uri_id().cmp(b.uri_id()))
38+
.then_with(|| a.offset().cmp(b.offset()))
39+
});
40+
41+
for (_, definition) in orphans {
42+
let kind = definition.kind();
43+
let name = match definition.name_id().copied() {
44+
Some(id) => self.build_concatenated_name_from_name(id),
45+
None => self.build_concatenated_name_from_lexical_nesting(definition),
46+
};
47+
let location = self.definition_location(definition);
48+
49+
writeln!(writer, "{kind}\t{name}\t{location}")?;
50+
}
51+
52+
Ok(())
53+
}
54+
55+
/// Walks the Name system's `parent_scope` chain to reconstruct the constant path.
56+
/// Falls back to `nesting` for enclosing scope context when there is no explicit parent scope.
57+
///
58+
/// Note: this produces a concatenated name by piecing together name parts, not a properly
59+
/// resolved qualified name.
60+
pub(crate) fn build_concatenated_name_from_name(&self, name_id: NameId) -> String {
61+
let Some(name_ref) = self.names().get(&name_id) else {
62+
return "<unknown>".to_string();
63+
};
64+
let simple_name = self.string_id_to_string(*name_ref.str());
65+
66+
match name_ref.parent_scope() {
67+
ParentScope::Some(parent_id) | ParentScope::Attached(parent_id) => {
68+
let parent_name = self.build_concatenated_name_from_name(*parent_id);
69+
format!("{parent_name}::{simple_name}")
70+
}
71+
ParentScope::TopLevel => format!("::{simple_name}"),
72+
ParentScope::None => {
73+
let prefix = name_ref
74+
.nesting()
75+
.as_ref()
76+
.map(|nesting_id| self.build_nesting_prefix(*nesting_id))
77+
.unwrap_or_default();
78+
79+
if prefix.is_empty() {
80+
simple_name
81+
} else {
82+
format!("{prefix}::{simple_name}")
83+
}
84+
}
85+
}
86+
}
87+
88+
/// Resolves the enclosing nesting `NameId` to a string prefix.
89+
/// For resolved names, uses the declaration's fully qualified name.
90+
/// For unresolved names, recursively walks the name chain.
91+
fn build_nesting_prefix(&self, nesting_id: NameId) -> String {
92+
let Some(name_ref) = self.names().get(&nesting_id) else {
93+
return String::new();
94+
};
95+
match name_ref {
96+
NameRef::Resolved(resolved) => self
97+
.declarations()
98+
.get(resolved.declaration_id())
99+
.map_or_else(String::new, |decl| decl.name().to_string()),
100+
NameRef::Unresolved(_) => self.build_concatenated_name_from_name(nesting_id),
101+
}
102+
}
103+
104+
/// Builds a concatenated name for non-constant definitions by walking the `lexical_nesting_id` chain.
105+
///
106+
/// Note: this pieces together name parts from the lexical nesting, not a properly resolved
107+
/// qualified name.
108+
pub(crate) fn build_concatenated_name_from_lexical_nesting(&self, definition: &Definition) -> String {
109+
let simple_name = self.string_id_to_string(self.definition_string_id(definition));
110+
111+
// Collect enclosing nesting names from inner to outer
112+
let mut nesting_parts = Vec::new();
113+
let mut current_nesting = *definition.lexical_nesting_id();
114+
115+
while let Some(nesting_id) = current_nesting {
116+
let Some(nesting_def) = self.definitions().get(&nesting_id) else {
117+
break;
118+
};
119+
nesting_parts.push(self.string_id_to_string(self.definition_string_id(nesting_def)));
120+
current_nesting = *nesting_def.lexical_nesting_id();
121+
}
122+
123+
if nesting_parts.is_empty() {
124+
return simple_name;
125+
}
126+
127+
// Reverse to get outer-to-inner order for the prefix
128+
nesting_parts.reverse();
129+
let prefix = nesting_parts.join("::");
130+
131+
let separator = match definition {
132+
Definition::Method(_)
133+
| Definition::AttrAccessor(_)
134+
| Definition::AttrReader(_)
135+
| Definition::AttrWriter(_)
136+
| Definition::MethodAlias(_)
137+
| Definition::InstanceVariable(_) => "#",
138+
Definition::Class(_)
139+
| Definition::SingletonClass(_)
140+
| Definition::Module(_)
141+
| Definition::Constant(_)
142+
| Definition::ConstantAlias(_)
143+
| Definition::GlobalVariable(_)
144+
| Definition::ClassVariable(_)
145+
| Definition::GlobalVariableAlias(_) => "::",
146+
};
147+
148+
format!("{prefix}{separator}{simple_name}")
149+
}
150+
151+
/// Converts a `StringId` to its string value.
152+
fn string_id_to_string(&self, string_id: StringId) -> String {
153+
self.strings().get(&string_id).unwrap().to_string()
154+
}
155+
156+
/// Get location in the format of `uri#L<line>` for a definition.
157+
/// The format is clickable in VS Code.
158+
pub(crate) fn definition_location(&self, definition: &Definition) -> String {
159+
let uri_id = definition.uri_id();
160+
161+
let Some(document) = self.documents().get(uri_id) else {
162+
return format!("{uri_id}:<unknown>");
163+
};
164+
165+
let uri = document.uri();
166+
let line_index = document.line_index();
167+
let start = line_index.line_col(definition.offset().start().into());
168+
format!("{uri}#L{}", start.line + 1)
169+
}
170+
}
171+
172+
#[cfg(test)]
173+
mod tests {
174+
use crate::test_utils::GraphTest;
175+
176+
#[test]
177+
fn build_concatenated_name_from_name_for_constants() {
178+
let cases = vec![
179+
("class Foo; end", "Foo"),
180+
("module Foo; class Bar; end; end", "Foo::Bar"),
181+
("module Foo; module Bar; class Baz; end; end; end", "Foo::Bar::Baz"),
182+
];
183+
184+
for (source, expected_name) in cases {
185+
let mut context = GraphTest::new();
186+
context.index_uri("file:///test.rb", source);
187+
context.resolve();
188+
189+
let definitions = context.graph().get(expected_name).unwrap();
190+
let definition = definitions.first().unwrap();
191+
let name_id = *definition.name_id().unwrap();
192+
let actual = context.graph().build_concatenated_name_from_name(name_id);
193+
194+
assert_eq!(actual, expected_name, "For source: {source}");
195+
}
196+
}
197+
198+
#[test]
199+
fn build_concatenated_name_from_lexical_nesting_for_methods() {
200+
let cases = vec![
201+
("class Foo; def bar; end; end", "Foo#bar()"),
202+
("module Foo; class Bar; def baz; end; end; end", "Foo::Bar#baz()"),
203+
("def bar; end", "bar()"),
204+
];
205+
206+
for (source, expected_name) in cases {
207+
let mut context = GraphTest::new();
208+
// Index without resolution so methods remain orphans
209+
context.index_uri("file:///test.rb", source);
210+
211+
let definition = context
212+
.graph()
213+
.definitions()
214+
.values()
215+
.find(|d| d.kind() == "Method" && d.name_id().is_none())
216+
.unwrap_or_else(|| panic!("No Method definition without name_id found for source: {source}"));
217+
218+
let actual = context.graph().build_concatenated_name_from_lexical_nesting(definition);
219+
assert_eq!(actual, expected_name, "For source: {source}");
220+
}
221+
}
222+
223+
#[test]
224+
fn build_concatenated_name_from_lexical_nesting_for_instance_variables() {
225+
let mut context = GraphTest::new();
226+
context.index_uri("file:///test.rb", "class Foo; def initialize; @ivar = 1; end; end");
227+
228+
let definition = context
229+
.graph()
230+
.definitions()
231+
.values()
232+
.find(|d| d.kind() == "InstanceVariable")
233+
.unwrap();
234+
235+
let actual = context.graph().build_concatenated_name_from_lexical_nesting(definition);
236+
assert_eq!(actual, "Foo::initialize()#@ivar");
237+
}
238+
239+
#[test]
240+
fn definition_location_uses_clickable_uri_fragment() {
241+
let mut context = GraphTest::new();
242+
context.index_uri(
243+
"file:///foo.rb",
244+
"
245+
class Foo
246+
def bar
247+
end
248+
end
249+
",
250+
);
251+
252+
let definition = context
253+
.graph()
254+
.definitions()
255+
.values()
256+
.find(|d| d.kind() == "Method")
257+
.unwrap();
258+
259+
let actual = context.graph().definition_location(definition);
260+
assert_eq!(actual, "file:///foo.rb#L2");
261+
}
262+
}

0 commit comments

Comments
 (0)