Skip to content

Commit e688ae1

Browse files
committed
Add integrity check for owner relationships
1 parent 4c602d8 commit e688ae1

10 files changed

Lines changed: 257 additions & 4 deletions

File tree

ext/rubydex/graph.c

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "utils.h"
1010

1111
static VALUE cGraph;
12+
static VALUE mRubydex;
1213

1314
// Free function for the custom Graph allocator. We always have to call into Rust to free data allocated by it
1415
static void graph_free(void *ptr) {
@@ -412,6 +413,32 @@ static VALUE rdxr_graph_require_paths(VALUE self, VALUE load_path) {
412413
return array;
413414
}
414415

416+
// Graph#check_integrity: () -> Array[Rubydex::IntegrityError]
417+
// Returns an array of IntegrityError objects, empty if no issues found
418+
static VALUE rdxr_graph_check_integrity(VALUE self) {
419+
void *graph;
420+
TypedData_Get_Struct(self, void *, &graph_type, graph);
421+
422+
size_t error_count = 0;
423+
const char *const *errors = rdx_check_integrity(graph, &error_count);
424+
425+
if (errors == NULL) {
426+
return rb_ary_new();
427+
}
428+
429+
VALUE cIntegrityError = rb_const_get(mRubydex, rb_intern("IntegrityError"));
430+
VALUE array = rb_ary_new_capa((long)error_count);
431+
432+
for (size_t i = 0; i < error_count; i++) {
433+
VALUE argv[] = {rb_utf8_str_new_cstr(errors[i])};
434+
VALUE error = rb_class_new_instance(1, argv, cIntegrityError);
435+
rb_ary_push(array, error);
436+
}
437+
438+
free_c_string_array(errors, error_count);
439+
return array;
440+
}
441+
415442
// Graph#diagnostics -> Array[Rubydex::Diagnostic]
416443
static VALUE rdxr_graph_diagnostics(VALUE self) {
417444
void *graph;
@@ -445,7 +472,8 @@ static VALUE rdxr_graph_diagnostics(VALUE self) {
445472
return diagnostics;
446473
}
447474

448-
void rdxi_initialize_graph(VALUE mRubydex) {
475+
void rdxi_initialize_graph(VALUE moduleRubydex) {
476+
mRubydex = moduleRubydex;
449477
cGraph = rb_define_class_under(mRubydex, "Graph", rb_cObject);
450478
rb_define_alloc_func(cGraph, rdxr_graph_alloc);
451479
rb_define_method(cGraph, "index_all", rdxr_graph_index_all, 1);
@@ -457,6 +485,7 @@ void rdxi_initialize_graph(VALUE mRubydex) {
457485
rb_define_method(cGraph, "constant_references", rdxr_graph_constant_references, 0);
458486
rb_define_method(cGraph, "method_references", rdxr_graph_method_references, 0);
459487
rb_define_method(cGraph, "diagnostics", rdxr_graph_diagnostics, 0);
488+
rb_define_method(cGraph, "check_integrity", rdxr_graph_check_integrity, 0);
460489
rb_define_method(cGraph, "[]", rdxr_graph_aref, 1);
461490
rb_define_method(cGraph, "search", rdxr_graph_search, 1);
462491
rb_define_method(cGraph, "set_encoding", rdxr_graph_set_encoding, 1);

lib/rubydex.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class Error < StandardError; end
1818
require "rubydex/rubydex"
1919
end
2020

21+
require "rubydex/errors"
2122
require "rubydex/location"
2223
require "rubydex/comment"
2324
require "rubydex/diagnostic"

lib/rubydex/errors.rb

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# frozen_string_literal: true
2+
3+
module Rubydex
4+
class Error
5+
#: String
6+
attr_reader :message
7+
8+
#: (String) -> void
9+
def initialize(message)
10+
@message = message
11+
end
12+
end
13+
14+
class IntegrityError < Error; end
15+
end

rakelib/index_top_gems.rake

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ task index_top_gems: :compile_release do
5151

5252
# Index the gem's files and yield errors back to the main Ractor
5353
graph = Rubydex::Graph.new
54-
errors = graph.index_all(Dir.glob("#{gem_dir}/**/*.rb"))
54+
indexing_errors = graph.index_all(Dir.glob("#{gem_dir}/**/*.rb"))
55+
graph.resolve
56+
57+
errors = indexing_errors + graph.check_integrity.map(&:message)
5558
next if errors.empty?
5659

5760
@mutex.synchronize { @errors << "#{gem} => #{errors.join(", ")}" }

rust/rubydex-sys/src/graph_api.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use rubydex::model::encoding::Encoding;
99
use rubydex::model::graph::Graph;
1010
use rubydex::model::ids::DeclarationId;
1111
use rubydex::resolution::Resolver;
12-
use rubydex::{indexing, listing, query};
12+
use rubydex::{indexing, integrity, listing, query};
1313
use std::ffi::CString;
1414
use std::path::PathBuf;
1515
use std::{mem, ptr};
@@ -195,6 +195,42 @@ pub extern "C" fn rdx_graph_resolve(pointer: GraphPointer) {
195195
});
196196
}
197197

198+
/// Checks the integrity of the graph and returns an array of error message strings. Returns NULL if there are no
199+
/// errors. Caller must free with `free_c_string_array`.
200+
///
201+
/// # Safety
202+
///
203+
/// - `pointer` must be a valid `GraphPointer` previously returned by this crate.
204+
/// - `out_error_count` must be a valid, writable pointer.
205+
#[unsafe(no_mangle)]
206+
pub unsafe extern "C" fn rdx_check_integrity(
207+
pointer: GraphPointer,
208+
out_error_count: *mut usize,
209+
) -> *const *const c_char {
210+
with_graph(pointer, |graph| {
211+
let errors = integrity::check_integrity(graph);
212+
213+
if errors.is_empty() {
214+
unsafe { *out_error_count = 0 };
215+
return ptr::null();
216+
}
217+
218+
let c_strings: Vec<*const c_char> = errors
219+
.into_iter()
220+
.filter_map(|error| {
221+
CString::new(error.to_string())
222+
.ok()
223+
.map(|c_string| c_string.into_raw().cast_const())
224+
})
225+
.collect();
226+
227+
unsafe { *out_error_count = c_strings.len() };
228+
229+
let boxed = c_strings.into_boxed_slice();
230+
Box::into_raw(boxed).cast::<*const c_char>()
231+
})
232+
}
233+
198234
/// # Safety
199235
///
200236
/// Expects both the graph pointer and encoding string pointer to be valid

rust/rubydex/src/integrity.rs

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
use std::fmt;
2+
3+
use crate::model::{
4+
declaration::{Declaration, Namespace},
5+
graph::{BASIC_OBJECT_ID, Graph, OBJECT_ID},
6+
ids::DeclarationId,
7+
};
8+
9+
#[derive(Debug, PartialEq, Eq)]
10+
pub enum IntegrityErrorKind {
11+
/// A declaration's owner is not a namespace (module, class, or singleton class)
12+
OwnerIsNotNamespace,
13+
/// A declaration's owner does not exist in the graph
14+
OwnerDoesNotExist,
15+
/// A singleton class chain never resolves to a non-singleton namespace
16+
SingletonClassChainDoesNotTerminate,
17+
/// A non-root declaration unexpectedly owns itself
18+
UnexpectedSelfOwnership,
19+
}
20+
21+
/// An integrity error found during graph validation
22+
#[derive(Debug, PartialEq, Eq)]
23+
pub struct IntegrityError {
24+
kind: IntegrityErrorKind,
25+
declaration_name: String,
26+
uris: Vec<String>,
27+
}
28+
29+
impl fmt::Display for IntegrityError {
30+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31+
let message = match self.kind {
32+
IntegrityErrorKind::OwnerIsNotNamespace => {
33+
format!("Declaration `{}` is owned by a non-namespace", self.declaration_name)
34+
}
35+
IntegrityErrorKind::OwnerDoesNotExist => {
36+
format!(
37+
"Declaration `{}` has an owner that does not exist in the graph",
38+
self.declaration_name
39+
)
40+
}
41+
IntegrityErrorKind::SingletonClassChainDoesNotTerminate => {
42+
format!(
43+
"Singleton class `{}` does not eventually attach to a non-singleton namespace",
44+
self.declaration_name
45+
)
46+
}
47+
IntegrityErrorKind::UnexpectedSelfOwnership => {
48+
format!("Declaration `{}` unexpectedly owns itself", self.declaration_name)
49+
}
50+
};
51+
52+
write!(f, "{message}. Defined in: {}", self.uris.join(", "))
53+
}
54+
}
55+
56+
impl std::error::Error for IntegrityError {}
57+
58+
/// Checks the integrity of the graph data
59+
#[must_use]
60+
pub fn check_integrity(graph: &Graph) -> Vec<IntegrityError> {
61+
let mut errors = Vec::new();
62+
let self_owners = [*OBJECT_ID, *BASIC_OBJECT_ID];
63+
64+
for (id, declaration) in graph.declarations() {
65+
let owner_id = declaration.owner_id();
66+
67+
// Check for constants that own themselves. Only `Object` and `BasicObject` own themselves and no other constant
68+
if *id == *owner_id {
69+
if self_owners.contains(id) {
70+
continue;
71+
}
72+
errors.push(IntegrityError {
73+
kind: IntegrityErrorKind::UnexpectedSelfOwnership,
74+
declaration_name: declaration.name().to_string(),
75+
uris: collect_uris(graph, declaration),
76+
});
77+
continue;
78+
}
79+
80+
// Check that the owner exists
81+
let Some(owner) = graph.declarations().get(owner_id) else {
82+
errors.push(IntegrityError {
83+
kind: IntegrityErrorKind::OwnerDoesNotExist,
84+
declaration_name: declaration.name().to_string(),
85+
uris: collect_uris(graph, declaration),
86+
});
87+
continue;
88+
};
89+
90+
// Check that the owner is a namespace
91+
if owner.as_namespace().is_none() {
92+
errors.push(IntegrityError {
93+
kind: IntegrityErrorKind::OwnerIsNotNamespace,
94+
declaration_name: declaration.name().to_string(),
95+
uris: collect_uris(graph, declaration),
96+
});
97+
continue;
98+
}
99+
100+
// Check singleton class chain termination
101+
if let Declaration::Namespace(Namespace::SingletonClass(_)) = declaration
102+
&& !singleton_chain_terminates(graph, *owner_id)
103+
{
104+
errors.push(IntegrityError {
105+
kind: IntegrityErrorKind::SingletonClassChainDoesNotTerminate,
106+
declaration_name: declaration.name().to_string(),
107+
uris: collect_uris(graph, declaration),
108+
});
109+
}
110+
}
111+
112+
errors
113+
}
114+
115+
/// Collects the URIs where a declaration is defined, sorted and deduplicated
116+
fn collect_uris(graph: &Graph, declaration: &Declaration) -> Vec<String> {
117+
declaration
118+
.definitions()
119+
.iter()
120+
.map(|def_id| {
121+
let definition = graph.definitions().get(def_id).unwrap();
122+
let document = graph.documents().get(definition.uri_id()).unwrap();
123+
document.uri().to_string()
124+
})
125+
.collect()
126+
}
127+
128+
/// Walks the singleton class chain to verify that it eventually finds a module or class as its attached object
129+
fn singleton_chain_terminates(graph: &Graph, start_owner_id: DeclarationId) -> bool {
130+
const MAX_SINGLETON_DEPTH: usize = 128;
131+
let mut current_id = start_owner_id;
132+
133+
for _ in 0..MAX_SINGLETON_DEPTH {
134+
let Some(current) = graph.declarations().get(&current_id) else {
135+
return false;
136+
};
137+
138+
match current {
139+
Declaration::Namespace(Namespace::SingletonClass(_)) => {
140+
current_id = *current.owner_id();
141+
}
142+
Declaration::Namespace(_) => return true,
143+
_ => return false,
144+
}
145+
}
146+
147+
false
148+
}

rust/rubydex/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ pub mod compile_assertions;
22
pub mod diagnostic;
33
pub mod errors;
44
pub mod indexing;
5+
pub mod integrity;
56
pub mod job_queue;
67
pub mod listing;
78
pub mod model;

rust/rubydex/src/main.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use clap::{Parser, ValueEnum};
22
use std::mem;
33

44
use rubydex::{
5-
indexing, listing,
5+
indexing, integrity, listing,
66
model::graph::Graph,
77
resolution::Resolver,
88
stats::{
@@ -28,6 +28,9 @@ struct Args {
2828
#[arg(long = "stats", help = "Show detailed performance statistics")]
2929
stats: bool,
3030

31+
#[arg(long = "check-integrity", help = "Check the integrity of the graph after resolution")]
32+
check_integrity: bool,
33+
3134
#[arg(
3235
long = "report-orphans",
3336
value_name = "PATH",
@@ -98,6 +101,21 @@ fn main() {
98101
return exit(args.stats);
99102
}
100103

104+
// Integrity check
105+
if args.check_integrity {
106+
let errors = time_it!(integrity_check, { integrity::check_integrity(&graph) });
107+
108+
if errors.is_empty() {
109+
println!("Integrity check passed: no issues found");
110+
} else {
111+
eprintln!("Integrity check found {} issue(s):", errors.len());
112+
113+
for error in &errors {
114+
eprintln!(" - {error}");
115+
}
116+
}
117+
}
118+
101119
// Querying
102120

103121
if args.stats {

rust/rubydex/src/model/graph.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use crate::model::references::{ConstantReference, MethodRef};
1414
use crate::model::string_ref::StringRef;
1515
use crate::stats;
1616

17+
pub static BASIC_OBJECT_ID: LazyLock<DeclarationId> = LazyLock::new(|| DeclarationId::from("BasicObject"));
1718
pub static OBJECT_ID: LazyLock<DeclarationId> = LazyLock::new(|| DeclarationId::from("Object"));
1819
pub static MODULE_ID: LazyLock<DeclarationId> = LazyLock::new(|| DeclarationId::from("Module"));
1920
pub static CLASS_ID: LazyLock<DeclarationId> = LazyLock::new(|| DeclarationId::from("Class"));

rust/rubydex/src/stats/timer.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,5 +122,6 @@ make_timer! {
122122
listing, "Listing";
123123
indexing, "Indexing";
124124
resolution, "Resolution";
125+
integrity_check, "Integrity check";
125126
querying, "Querying";
126127
}

0 commit comments

Comments
 (0)