From 418846341d63c0c937848fde5a655a3f191f6d4d Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Wed, 13 Aug 2025 07:34:37 -0500 Subject: [PATCH 1/3] filter out ignore globs --- Cargo.lock | 17 +++++--- Cargo.toml | 1 + src/config.rs | 20 ++++++++++ src/ignore.rs | 19 +++++++++ src/lib.rs | 1 + src/ownership/for_file_fast.rs | 1 + src/project_builder.rs | 72 ++++++++++++++++++++++------------ src/project_file_builder.rs | 2 +- 8 files changed, 103 insertions(+), 30 deletions(-) create mode 100644 src/ignore.rs diff --git a/Cargo.lock b/Cargo.lock index 4a62460..1abed4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -184,6 +184,7 @@ dependencies = [ "assert_cmd", "clap", "clap_derive", + "crossbeam-channel", "enum_dispatch", "error-stack", "fast-glob", @@ -213,6 +214,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.3" @@ -239,12 +249,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.15" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" -dependencies = [ - "cfg-if", -] +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "diff" diff --git a/Cargo.toml b/Cargo.toml index 3637047..be0ecb4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ path = "src/lib.rs" [dependencies] clap = { version = "4.5.20", features = ["derive"] } clap_derive = "4.5.18" +crossbeam-channel = "0.5.15" error-stack = "0.5.0" enum_dispatch = "0.3.13" fast-glob = "1.0.0" diff --git a/src/config.rs b/src/config.rs index abb54c6..86aa5df 100644 --- a/src/config.rs +++ b/src/config.rs @@ -21,6 +21,9 @@ pub struct Config { #[serde(default = "default_cache_directory")] pub cache_directory: String, + + #[serde(default = "default_ignore_globs")] + pub ignore_globs: Vec, } #[allow(dead_code)] @@ -57,6 +60,23 @@ fn vendored_gems_path() -> String { "vendored/".to_string() } +fn default_ignore_globs() -> Vec { + vec![ + "/.cursor".to_owned(), + "/.git".to_owned(), + "/.idea".to_owned(), + "/.vscode".to_owned(), + "/.yarn".to_owned(), + "/ar_doc".to_owned(), + "/db".to_owned(), + "/helm".to_owned(), + "/log".to_owned(), + "/node_modules".to_owned(), + "/sorbet".to_owned(), + "/tmp".to_owned(), + ] +} + #[cfg(test)] mod tests { use std::{ diff --git a/src/ignore.rs b/src/ignore.rs new file mode 100644 index 0000000..de1d27a --- /dev/null +++ b/src/ignore.rs @@ -0,0 +1,19 @@ +pub(crate) fn matches_path(ignore_globs: &[String], path: &str) -> bool { + ignore_globs.iter().any(|rule| glob_match(rule, path)) +} + +fn glob_match(glob: &str, path: &str) -> bool { + let glob = glob::Pattern::new(glob).unwrap(); + glob.matches(path) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_matches_path() { + assert!(matches_path(&["**/*.rs".to_string()], "src/main.rs")); + assert!(matches_path(&["src/**".to_string()], "src/main.rs")); + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index e769652..7382645 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod cache; pub(crate) mod common_test; pub mod config; +pub(crate) mod ignore; pub mod ownership; pub(crate) mod project; pub mod project_builder; diff --git a/src/ownership/for_file_fast.rs b/src/ownership/for_file_fast.rs index 97babc7..8aef51d 100644 --- a/src/ownership/for_file_fast.rs +++ b/src/ownership/for_file_fast.rs @@ -303,6 +303,7 @@ mod tests { unowned_globs: vec![], vendored_gems_path: vendored_path.to_string(), cache_directory: "tmp/cache/codeowners".to_string(), + ignore_globs: vec![], } } diff --git a/src/project_builder.rs b/src/project_builder.rs index 41bdc54..5e9f596 100644 --- a/src/project_builder.rs +++ b/src/project_builder.rs @@ -4,9 +4,9 @@ use std::{ sync::{Arc, Mutex}, }; -use error_stack::{Result, ResultExt}; +use error_stack::{Report, Result, ResultExt}; use fast_glob::glob_match; -use ignore::{WalkBuilder, WalkParallel, WalkState}; +use ignore::{DirEntry, WalkBuilder, WalkParallel, WalkState}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use tracing::{instrument, warn}; @@ -51,53 +51,77 @@ impl<'a> ProjectBuilder<'a> { #[instrument(level = "debug", skip_all)] pub fn build(&mut self) -> Result { - let mut entry_types = Vec::with_capacity(INITIAL_VECTOR_CAPACITY); let mut builder = WalkBuilder::new(&self.base_path); builder.hidden(false); + builder.follow_links(false); + // Prune traversal early: skip heavy and irrelevant directories + let ignore_globs = self.config.ignore_globs.clone(); + let base_path = self.base_path.clone(); + + builder.filter_entry(move |entry: &DirEntry| { + if let Ok(relative_path) = entry.path().strip_prefix(&base_path) { + if let Some(relative_path_str) = relative_path.to_str() { + if crate::ignore::matches_path(&ignore_globs, relative_path_str) { + return false; + } + } + } + + true + }); + let walk_parallel: WalkParallel = builder.build_parallel(); - let collected = Arc::new(Mutex::new(Vec::with_capacity(INITIAL_VECTOR_CAPACITY))); - let collected_for_threads = Arc::clone(&collected); + let (tx, rx) = crossbeam_channel::unbounded::(); + let error_holder: Arc>>> = Arc::new(Mutex::new(None)); + let error_holder_for_threads = Arc::clone(&error_holder); + + let this: &ProjectBuilder<'a> = self; walk_parallel.run(move || { - let collected = Arc::clone(&collected_for_threads); + let error_holder = Arc::clone(&error_holder_for_threads); + let tx = tx.clone(); Box::new(move |res| { if let Ok(entry) = res { - if let Ok(mut v) = collected.lock() { - v.push(entry); + match this.build_entry_type(entry) { + Ok(entry_type) => { + let _ = tx.send(entry_type); + } + Err(report) => { + if let Ok(mut slot) = error_holder.lock() { + if slot.is_none() { + *slot = Some(report); + } + } + } } } WalkState::Continue }) }); - // Process sequentially with &mut self without panicking on Arc/Mutex unwraps - let collected_entries = match Arc::try_unwrap(collected) { - // We are the sole owner of the Arc + // Take ownership of the collected entry types + let entry_types: Vec = rx.iter().collect(); + + // If any error occurred while building entry types, return it + let maybe_error = match Arc::try_unwrap(error_holder) { Ok(mutex) => match mutex.into_inner() { - // Mutex not poisoned - Ok(entries) => entries, - // Recover entries even if the mutex was poisoned + Ok(err_opt) => err_opt, Err(poisoned) => poisoned.into_inner(), }, - // There are still other Arc references; lock and take the contents Err(arc) => match arc.lock() { - Ok(mut guard) => std::mem::take(&mut *guard), - // Recover guard even if poisoned, then take contents - Err(poisoned) => { - let mut guard = poisoned.into_inner(); - std::mem::take(&mut *guard) - } + Ok(mut guard) => guard.take(), + Err(poisoned) => poisoned.into_inner().take(), }, }; - for entry in collected_entries { - entry_types.push(self.build_entry_type(entry)?); + if let Some(report) = maybe_error { + return Err(report); } self.build_project_from_entry_types(entry_types) } - fn build_entry_type(&mut self, entry: ignore::DirEntry) -> Result { + fn build_entry_type(&self, entry: ignore::DirEntry) -> Result { let absolute_path = entry.path(); let is_dir = entry.file_type().ok_or(Error::Io).change_context(Error::Io)?.is_dir(); diff --git a/src/project_file_builder.rs b/src/project_file_builder.rs index ba4c0ed..9a8e79d 100644 --- a/src/project_file_builder.rs +++ b/src/project_file_builder.rs @@ -21,7 +21,7 @@ impl<'a> ProjectFileBuilder<'a> { Self { global_cache } } - pub(crate) fn build(&mut self, path: PathBuf) -> ProjectFile { + pub(crate) fn build(&self, path: PathBuf) -> ProjectFile { if let Ok(Some(cached_project_file)) = self.get_project_file_from_cache(&path) { return cached_project_file; } From 1e629f85e8f4a86c3395fd1c0715307599fdea78 Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Wed, 13 Aug 2025 07:42:40 -0500 Subject: [PATCH 2/3] ignore dirs --- src/config.rs | 30 +++++++++++++++--------------- src/ignore.rs | 19 ------------------- src/lib.rs | 1 - src/ownership/for_file_fast.rs | 2 +- src/project_builder.rs | 14 +++++++++----- 5 files changed, 25 insertions(+), 41 deletions(-) delete mode 100644 src/ignore.rs diff --git a/src/config.rs b/src/config.rs index 86aa5df..aa087df 100644 --- a/src/config.rs +++ b/src/config.rs @@ -22,8 +22,8 @@ pub struct Config { #[serde(default = "default_cache_directory")] pub cache_directory: String, - #[serde(default = "default_ignore_globs")] - pub ignore_globs: Vec, + #[serde(default = "default_ignore_dirs")] + pub ignore_dirs: Vec, } #[allow(dead_code)] @@ -60,20 +60,20 @@ fn vendored_gems_path() -> String { "vendored/".to_string() } -fn default_ignore_globs() -> Vec { +fn default_ignore_dirs() -> Vec { vec![ - "/.cursor".to_owned(), - "/.git".to_owned(), - "/.idea".to_owned(), - "/.vscode".to_owned(), - "/.yarn".to_owned(), - "/ar_doc".to_owned(), - "/db".to_owned(), - "/helm".to_owned(), - "/log".to_owned(), - "/node_modules".to_owned(), - "/sorbet".to_owned(), - "/tmp".to_owned(), + ".cursor".to_owned(), + ".git".to_owned(), + ".idea".to_owned(), + ".vscode".to_owned(), + ".yarn".to_owned(), + "ar_doc".to_owned(), + "db".to_owned(), + "helm".to_owned(), + "log".to_owned(), + "node_modules".to_owned(), + "sorbet".to_owned(), + "tmp".to_owned(), ] } diff --git a/src/ignore.rs b/src/ignore.rs deleted file mode 100644 index de1d27a..0000000 --- a/src/ignore.rs +++ /dev/null @@ -1,19 +0,0 @@ -pub(crate) fn matches_path(ignore_globs: &[String], path: &str) -> bool { - ignore_globs.iter().any(|rule| glob_match(rule, path)) -} - -fn glob_match(glob: &str, path: &str) -> bool { - let glob = glob::Pattern::new(glob).unwrap(); - glob.matches(path) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_matches_path() { - assert!(matches_path(&["**/*.rs".to_string()], "src/main.rs")); - assert!(matches_path(&["src/**".to_string()], "src/main.rs")); - } -} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 7382645..e769652 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,6 @@ pub mod cache; pub(crate) mod common_test; pub mod config; -pub(crate) mod ignore; pub mod ownership; pub(crate) mod project; pub mod project_builder; diff --git a/src/ownership/for_file_fast.rs b/src/ownership/for_file_fast.rs index 8aef51d..081cb8a 100644 --- a/src/ownership/for_file_fast.rs +++ b/src/ownership/for_file_fast.rs @@ -303,7 +303,7 @@ mod tests { unowned_globs: vec![], vendored_gems_path: vendored_path.to_string(), cache_directory: "tmp/cache/codeowners".to_string(), - ignore_globs: vec![], + ignore_dirs: vec![], } } diff --git a/src/project_builder.rs b/src/project_builder.rs index 5e9f596..3fecf05 100644 --- a/src/project_builder.rs +++ b/src/project_builder.rs @@ -55,14 +55,18 @@ impl<'a> ProjectBuilder<'a> { builder.hidden(false); builder.follow_links(false); // Prune traversal early: skip heavy and irrelevant directories - let ignore_globs = self.config.ignore_globs.clone(); + let ignore_dirs = self.config.ignore_dirs.clone(); let base_path = self.base_path.clone(); builder.filter_entry(move |entry: &DirEntry| { - if let Ok(relative_path) = entry.path().strip_prefix(&base_path) { - if let Some(relative_path_str) = relative_path.to_str() { - if crate::ignore::matches_path(&ignore_globs, relative_path_str) { - return false; + let path = entry.path(); + let file_name = entry.file_name().to_str().unwrap_or(""); + if let Some(ft) = entry.file_type() { + if ft.is_dir() { + if let Ok(rel) = path.strip_prefix(&base_path) { + if rel.components().count() == 1 && ignore_dirs.iter().any(|d| *d == file_name) { + return false; + } } } } From 6374df694080c884d296b2f7fca310b0541d8b41 Mon Sep 17 00:00:00 2001 From: Perry Hertler Date: Wed, 13 Aug 2025 08:04:12 -0500 Subject: [PATCH 3/3] bumping version --- Cargo.lock | 2 +- Cargo.toml | 2 +- dev/run_benchmarks_for_gv.sh | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1abed4f..88a434e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,7 +179,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "codeowners" -version = "0.2.6" +version = "0.2.7" dependencies = [ "assert_cmd", "clap", diff --git a/Cargo.toml b/Cargo.toml index be0ecb4..7b14a05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codeowners" -version = "0.2.6" +version = "0.2.7" edition = "2024" [profile.release] diff --git a/dev/run_benchmarks_for_gv.sh b/dev/run_benchmarks_for_gv.sh index 1277351..460f24c 100755 --- a/dev/run_benchmarks_for_gv.sh +++ b/dev/run_benchmarks_for_gv.sh @@ -9,4 +9,5 @@ echo "To run these benchmarks on your application, you can place this repo next hyperfine --warmup=2 --runs=3 --export-markdown tmp/codeowners_benchmarks_gv.md \ '../rubyatscale/codeowners-rs/target/release/codeowners gv' \ - 'bin/codeownership validate' \ No newline at end of file + 'bin/codeownership validate' \ + 'bin/codeowners-rs gv' \ No newline at end of file