diff --git a/Cargo.lock b/Cargo.lock index 4a62460..88a434e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,11 +179,12 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "codeowners" -version = "0.2.6" +version = "0.2.7" dependencies = [ "assert_cmd", "clap", "clap_derive", + "crossbeam-channel", "enum_dispatch", "error-stack", "fast-glob", @@ -213,6 +214,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.3" @@ -239,12 +249,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.15" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" -dependencies = [ - "cfg-if", -] +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "diff" diff --git a/Cargo.toml b/Cargo.toml index 3637047..7b14a05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codeowners" -version = "0.2.6" +version = "0.2.7" edition = "2024" [profile.release] @@ -12,6 +12,7 @@ path = "src/lib.rs" [dependencies] clap = { version = "4.5.20", features = ["derive"] } clap_derive = "4.5.18" +crossbeam-channel = "0.5.15" error-stack = "0.5.0" enum_dispatch = "0.3.13" fast-glob = "1.0.0" diff --git a/dev/run_benchmarks_for_gv.sh b/dev/run_benchmarks_for_gv.sh index 1277351..460f24c 100755 --- a/dev/run_benchmarks_for_gv.sh +++ b/dev/run_benchmarks_for_gv.sh @@ -9,4 +9,5 @@ echo "To run these benchmarks on your application, you can place this repo next hyperfine --warmup=2 --runs=3 --export-markdown tmp/codeowners_benchmarks_gv.md \ '../rubyatscale/codeowners-rs/target/release/codeowners gv' \ - 'bin/codeownership validate' \ No newline at end of file + 'bin/codeownership validate' \ + 'bin/codeowners-rs gv' \ No newline at end of file diff --git a/src/config.rs b/src/config.rs index abb54c6..aa087df 100644 --- a/src/config.rs +++ b/src/config.rs @@ -21,6 +21,9 @@ pub struct Config { #[serde(default = "default_cache_directory")] pub cache_directory: String, + + #[serde(default = "default_ignore_dirs")] + pub ignore_dirs: Vec, } #[allow(dead_code)] @@ -57,6 +60,23 @@ fn vendored_gems_path() -> String { "vendored/".to_string() } +fn default_ignore_dirs() -> Vec { + vec![ + ".cursor".to_owned(), + ".git".to_owned(), + ".idea".to_owned(), + ".vscode".to_owned(), + ".yarn".to_owned(), + "ar_doc".to_owned(), + "db".to_owned(), + "helm".to_owned(), + "log".to_owned(), + "node_modules".to_owned(), + "sorbet".to_owned(), + "tmp".to_owned(), + ] +} + #[cfg(test)] mod tests { use std::{ diff --git a/src/ownership/for_file_fast.rs b/src/ownership/for_file_fast.rs index 97babc7..081cb8a 100644 --- a/src/ownership/for_file_fast.rs +++ b/src/ownership/for_file_fast.rs @@ -303,6 +303,7 @@ mod tests { unowned_globs: vec![], vendored_gems_path: vendored_path.to_string(), cache_directory: "tmp/cache/codeowners".to_string(), + ignore_dirs: vec![], } } diff --git a/src/project_builder.rs b/src/project_builder.rs index 41bdc54..3fecf05 100644 --- a/src/project_builder.rs +++ b/src/project_builder.rs @@ -4,9 +4,9 @@ use std::{ sync::{Arc, Mutex}, }; -use error_stack::{Result, ResultExt}; +use error_stack::{Report, Result, ResultExt}; use fast_glob::glob_match; -use ignore::{WalkBuilder, WalkParallel, WalkState}; +use ignore::{DirEntry, WalkBuilder, WalkParallel, WalkState}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use tracing::{instrument, warn}; @@ -51,53 +51,81 @@ impl<'a> ProjectBuilder<'a> { #[instrument(level = "debug", skip_all)] pub fn build(&mut self) -> Result { - let mut entry_types = Vec::with_capacity(INITIAL_VECTOR_CAPACITY); let mut builder = WalkBuilder::new(&self.base_path); builder.hidden(false); + builder.follow_links(false); + // Prune traversal early: skip heavy and irrelevant directories + let ignore_dirs = self.config.ignore_dirs.clone(); + let base_path = self.base_path.clone(); + + builder.filter_entry(move |entry: &DirEntry| { + let path = entry.path(); + let file_name = entry.file_name().to_str().unwrap_or(""); + if let Some(ft) = entry.file_type() { + if ft.is_dir() { + if let Ok(rel) = path.strip_prefix(&base_path) { + if rel.components().count() == 1 && ignore_dirs.iter().any(|d| *d == file_name) { + return false; + } + } + } + } + + true + }); + let walk_parallel: WalkParallel = builder.build_parallel(); - let collected = Arc::new(Mutex::new(Vec::with_capacity(INITIAL_VECTOR_CAPACITY))); - let collected_for_threads = Arc::clone(&collected); + let (tx, rx) = crossbeam_channel::unbounded::(); + let error_holder: Arc>>> = Arc::new(Mutex::new(None)); + let error_holder_for_threads = Arc::clone(&error_holder); + + let this: &ProjectBuilder<'a> = self; walk_parallel.run(move || { - let collected = Arc::clone(&collected_for_threads); + let error_holder = Arc::clone(&error_holder_for_threads); + let tx = tx.clone(); Box::new(move |res| { if let Ok(entry) = res { - if let Ok(mut v) = collected.lock() { - v.push(entry); + match this.build_entry_type(entry) { + Ok(entry_type) => { + let _ = tx.send(entry_type); + } + Err(report) => { + if let Ok(mut slot) = error_holder.lock() { + if slot.is_none() { + *slot = Some(report); + } + } + } } } WalkState::Continue }) }); - // Process sequentially with &mut self without panicking on Arc/Mutex unwraps - let collected_entries = match Arc::try_unwrap(collected) { - // We are the sole owner of the Arc + // Take ownership of the collected entry types + let entry_types: Vec = rx.iter().collect(); + + // If any error occurred while building entry types, return it + let maybe_error = match Arc::try_unwrap(error_holder) { Ok(mutex) => match mutex.into_inner() { - // Mutex not poisoned - Ok(entries) => entries, - // Recover entries even if the mutex was poisoned + Ok(err_opt) => err_opt, Err(poisoned) => poisoned.into_inner(), }, - // There are still other Arc references; lock and take the contents Err(arc) => match arc.lock() { - Ok(mut guard) => std::mem::take(&mut *guard), - // Recover guard even if poisoned, then take contents - Err(poisoned) => { - let mut guard = poisoned.into_inner(); - std::mem::take(&mut *guard) - } + Ok(mut guard) => guard.take(), + Err(poisoned) => poisoned.into_inner().take(), }, }; - for entry in collected_entries { - entry_types.push(self.build_entry_type(entry)?); + if let Some(report) = maybe_error { + return Err(report); } self.build_project_from_entry_types(entry_types) } - fn build_entry_type(&mut self, entry: ignore::DirEntry) -> Result { + fn build_entry_type(&self, entry: ignore::DirEntry) -> Result { let absolute_path = entry.path(); let is_dir = entry.file_type().ok_or(Error::Io).change_context(Error::Io)?.is_dir(); diff --git a/src/project_file_builder.rs b/src/project_file_builder.rs index ba4c0ed..9a8e79d 100644 --- a/src/project_file_builder.rs +++ b/src/project_file_builder.rs @@ -21,7 +21,7 @@ impl<'a> ProjectFileBuilder<'a> { Self { global_cache } } - pub(crate) fn build(&mut self, path: PathBuf) -> ProjectFile { + pub(crate) fn build(&self, path: PathBuf) -> ProjectFile { if let Ok(Some(cached_project_file)) = self.get_project_file_from_cache(&path) { return cached_project_file; }