From a1be7426df622cfb4c2627a35e8d0da5109c81f6 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Thu, 18 Dec 2025 17:15:32 -0600 Subject: [PATCH 01/15] refactor uu_ls so that crate users can call the command without having the internal print stdout output --- src/uu/ls/src/ls.rs | 757 ++++++++++++++++++++++++++++------------ src/uu/ls/src/output.rs | 358 +++++++++++++++++++ 2 files changed, 900 insertions(+), 215 deletions(-) create mode 100644 src/uu/ls/src/output.rs diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 632d6ae591d..714beee7205 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -15,6 +15,7 @@ use std::cell::RefCell; use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::{ cell::OnceCell, + collections::HashSet, cmp::Reverse, ffi::{OsStr, OsString}, fs::{self, DirEntry, FileType, Metadata, ReadDir}, @@ -50,6 +51,8 @@ mod config; mod dired; mod display; +pub mod output; +pub use output::{CollectorOutput, EntryInfo, LsOutput}; pub use config::{Config, options}; pub use display::Format; @@ -787,7 +790,7 @@ pub fn uu_app() -> Command { /// Represents the possible values of [`PathData::display_name`]. The reason this is a /// separate enum is to avoid a self-referential struct, as it is moved in hot loops. -#[derive(Debug)] +#[derive(Debug, Clone)] enum PathDataDisplayName<'a> { SelfReferential, Custom(Cow<'a, OsStr>), @@ -797,7 +800,11 @@ enum PathDataDisplayName<'a> { /// Any data that will be reused several times makes sense to be added to this structure. /// Caching data here helps eliminate redundant syscalls to fetch same information. #[derive(Debug)] -struct PathData<'a> { +/// Internal representation of file/directory entry data. +/// +/// This struct is used internally for file enumeration. It can be converted +/// to [`EntryInfo`] for programmatic access via the [`LsOutput`] trait. +pub struct PathData<'a> { // Result got from symlink_metadata() or metadata() based on config md: OnceCell>, ft: OnceCell>, @@ -814,6 +821,19 @@ struct PathData<'a> { } impl<'a> PathData<'a> { + /// Convert this PathData to an EntryInfo for programmatic access + pub fn to_entry_info(&self, config: &Config) -> EntryInfo { + EntryInfo { + path: self.p_buf.clone().into_owned(), + display_name: self.display_name.clone(), + file_type: self.file_type().copied(), + metadata: self.metadata().cloned(), + security_context: self.security_context(config).to_string(), + command_line: self.command_line, + must_dereference: self.must_dereference, + } + } + fn new( p_buf: Cow<'a, Path>, dir_entry: Option, @@ -1002,39 +1022,145 @@ struct ListState<'a> { display_buf: Vec, } -#[allow(clippy::cognitive_complexity)] -pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { +/// Text output implementation that formats entries for terminal display. +/// +/// This is the default output sink used by [`list`] for standard ls behavior. +/// It handles all text formatting including colors, columns, long format, etc. +struct TextOutput<'a> { + state: ListState<'a>, + dired: DiredOutput, +} + +impl<'a> TextOutput<'a> { + fn new(config: &'a Config) -> Self { + Self { + state: ListState { + out: BufWriter::new(stdout()), + style_manager: config.color.as_ref().map(StyleManager::new), + #[cfg(unix)] + uid_cache: HashMap::default(), + #[cfg(unix)] + gid_cache: HashMap::default(), + recent_time_range: (SystemTime::now() - Duration::new(31_556_952 / 2, 0)) + ..=SystemTime::now(), + }, + dired: DiredOutput::default(), + } + } +} + +impl<'a> LsOutput for TextOutput<'a> { + fn write_entries(&mut self, entries: &[PathData], config: &Config) -> UResult<()> { + display_items(entries, config, &mut self.state, &mut self.dired) + } + + fn write_dir_header( + &mut self, + path_data: &PathData, + config: &Config, + is_first: bool, + ) -> UResult<()> { + if is_first { + if config.dired { + dired::indent(&mut self.state.out)?; + } + show_dir_name(path_data, &mut self.state.out, config)?; + writeln!(self.state.out)?; + if config.dired { + let dir_len = path_data.display_name().len(); + dired::calculate_subdired(&mut self.dired, dir_len); + dired::add_dir_name(&mut self.dired, dir_len); + } + } else { + writeln!(self.state.out)?; + if config.dired { + self.dired.padding = 2; + dired::indent(&mut self.state.out)?; + let dir_name_size = path_data.path().to_string_lossy().len(); + dired::calculate_subdired(&mut self.dired, dir_name_size); + dired::add_dir_name(&mut self.dired, dir_name_size); + } + show_dir_name(path_data, &mut self.state.out, config)?; + writeln!(self.state.out)?; + } + Ok(()) + } + + fn write_total(&mut self, total_size: u64, config: &Config) -> UResult<()> { + if config.dired { + dired::indent(&mut self.state.out)?; + } + let total_str = format!( + "{}{}", + translate!("ls-total", "size" => display_size(total_size, config)), + config.line_ending + ); + write!(self.state.out, "{}", total_str)?; + if config.dired { + dired::add_total(&mut self.dired, total_str.len()); + } + Ok(()) + } + + fn flush(&mut self) -> UResult<()> { + self.state.out.flush()?; + Ok(()) + } + + fn finalize(&mut self, config: &Config) -> UResult<()> { + if config.dired && !config.hyperlink { + dired::print_dired_output(config, &self.dired, &mut self.state.out)?; + } + Ok(()) + } + + fn initialize(&mut self, _config: &Config) -> UResult<()> { + if let Some(style_manager) = self.state.style_manager.as_mut() { + if style_manager.get_normal_style().is_some() { + let to_write = style_manager.reset(true); + write!(self.state.out, "{to_write}")?; + } + } + Ok(()) + } +} + +/// Lists files and directories, sending structured output to a custom sink. +/// +/// This function provides programmatic access to ls functionality without +/// requiring text parsing. It enumerates files and directories according +/// to the provided configuration and sends each entry to the output sink. +/// +/// # Arguments +/// +/// * `locs` - Paths to list +/// * `config` - Configuration controlling listing behavior +/// * `output` - A sink implementing [`LsOutput`] to receive entries +/// +/// # Example +/// +/// ```ignore +/// use uu_ls::{Config, list_with_output, CollectorOutput}; +/// use std::path::Path; +/// +/// let config = Config::from(&matches)?; +/// let mut output = CollectorOutput::new(); +/// list_with_output(vec![Path::new(".")], &config, &mut output)?; +/// +/// for entry in output.entries() { +/// println!("{}: {:?}", entry.display_name.to_string_lossy(), entry.file_type); +/// } +/// ``` +pub fn list_with_output( + locs: Vec<&Path>, + config: &Config, + output: &mut O, +) -> UResult<()> { let mut files = Vec::::new(); let mut dirs = Vec::::new(); - let mut dired = DiredOutput::default(); let initial_locs_len = locs.len(); let now = SystemTime::now(); - let mut state = ListState { - out: BufWriter::new(stdout()), - style_manager: config.color.as_ref().map(StyleManager::new), - #[cfg(unix)] - uid_cache: FxHashMap::default(), - #[cfg(unix)] - gid_cache: FxHashMap::default(), - #[cfg(not(unix))] - uid_cache: (), - #[cfg(not(unix))] - gid_cache: (), - // Time range for which to use the "recent" format. Anything from 0.5 year in the past to now - // (files with modification time in the future use "old" format). - // According to GNU a Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds on the average. - recent_time_range: (now - Duration::new(31_556_952 / 2, 0))..=now, - stack: Vec::new(), - listed_ancestors: FxHashSet::default(), - initial_locs_len, - display_buf: Vec::with_capacity(if config.format == Format::Long { - 128 - } else { - 0 - }), - }; - for loc in locs { let path_data = PathData::new(loc.into(), None, None, config, true); @@ -1065,16 +1191,10 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { sort_entries(&mut files, config); sort_entries(&mut dirs, config); - if let Some(style_manager) = state.style_manager.as_mut() { - // ls will try to write a reset before anything is written if normal - // color is given - if style_manager.get_normal_style().is_some() { - let to_write = style_manager.reset(true); - write!(state.out, "{to_write}")?; - } - } + output.initialize(config)?; - display_items(&files, config, &mut state, &mut dired)?; + // Write file entries + output.write_entries(&files, config)?; for (pos, path_data) in dirs.iter().enumerate() { let needs_blank_line = pos != 0 || !files.is_empty(); @@ -1083,7 +1203,7 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { let read_dir = match fs::read_dir(path_data.path()) { Err(err) => { // flush stdout buffer before the error to preserve formatting and order - state.out.flush()?; + output.flush()?; show!(LsError::IOErrorContext( path_data.path().to_path_buf(), err, @@ -1094,56 +1214,134 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { Ok(rd) => rd, }; - state.listed_ancestors.insert(FileInformation::from_path( + // Write dir heading for multiple arguments or recursive mode + if initial_locs_len > 1 || config.recursive { + let is_first = pos == 0 && files.is_empty(); + output.write_dir_header(path_data, config, is_first)?; + } + + let mut listed_ancestors = HashSet::default(); + listed_ancestors.insert(FileInformation::from_path( path_data.path(), path_data.must_dereference, )?); + enter_directory(path_data, read_dir, config, &mut listed_ancestors, output)?; + } + + output.finalize(config)?; + Ok(()) +} - // List each of the arguments to ls first. - depth_first_list( - (path_data.path().to_path_buf(), needs_blank_line), - read_dir, - config, - &mut state, - &mut dired, - true, - )?; - - // Only runs if it must list recursively. - while let Some(dir_data) = state.stack.pop() { - let read_dir = match fs::read_dir(&dir_data.0) { +fn enter_directory( + path_data: &PathData, + mut read_dir: ReadDir, + config: &Config, + listed_ancestors: &mut HashSet, + output: &mut O, +) -> UResult<()> { + // Create vec of entries with initial dot files + let mut entries: Vec = if config.files == Files::All { + vec![ + PathData::new( + path_data.path().to_path_buf(), + None, + Some(".".into()), + config, + false, + ), + PathData::new( + path_data.path().join(".."), + None, + Some("..".into()), + config, + false, + ), + ] + } else { + vec![] + }; + + // Convert those entries to the PathData struct + for raw_entry in read_dir.by_ref() { + let dir_entry = match raw_entry { + Ok(path) => path, + Err(err) => { + output.flush()?; + show!(LsError::IOError(err)); + continue; + } + }; + + if should_display(&dir_entry, config) { + let entry_path_data = + PathData::new(dir_entry.path(), Some(dir_entry), None, config, false); + entries.push(entry_path_data); + } + } + + sort_entries(&mut entries, config); + + // Print total after any error display + if config.format == Format::Long || config.alloc_size { + let mut total_size = 0u64; + for item in &entries { + total_size += item + .metadata() + .as_ref() + .map_or(0, |md| get_block_size(md, config)); + } + output.write_total(total_size, config)?; + } + + output.write_entries(&entries, config)?; + + if config.recursive { + for e in entries + .iter() + .skip(if config.files == Files::All { 2 } else { 0 }) + .filter(|p| p.file_type().is_some_and(|ft| ft.is_dir())) + { + match fs::read_dir(e.path()) { Err(err) => { - // flush stdout buffer before the error to preserve formatting and order - state.out.flush()?; + output.flush()?; show!(LsError::IOErrorContext( - path_data.path().to_path_buf(), + e.path().to_path_buf(), err, - path_data.command_line + e.command_line )); - continue; } - Ok(rd) => rd, - }; - - depth_first_list(dir_data, read_dir, config, &mut state, &mut dired, false)?; - - // Heuristic to ensure stack does not keep its capacity forever if there is - // combinatorial explosion; we decrease it logarithmically here. - let (cap, len) = (state.stack.capacity(), state.stack.len()); - if cap > (len + 4) * 2 { - state.stack.shrink_to(len + (cap - len) / 2); + Ok(rd) => { + if listed_ancestors + .insert(FileInformation::from_path(e.path(), e.must_dereference)?) + { + // when listing several directories in recursive mode, we show + // "dirname:" at the beginning of the file list + output.write_dir_header(e, config, false)?; + enter_directory(e, rd, config, listed_ancestors, output)?; + listed_ancestors + .remove(&FileInformation::from_path(e.path(), e.must_dereference)?); + } else { + output.flush()?; + show!(LsError::AlreadyListedError(e.path().to_path_buf())); + } + } } } - - // No need to clear state.buf since [`enter_directory`] drains it. - state.listed_ancestors.clear(); - } - if config.dired && !config.hyperlink { - dired::print_dired_output(config, &dired, &mut state.out)?; } + Ok(()) } +/// Lists files and directories with text output to stdout. +/// +/// This is the standard ls entry point that formats output as text. +/// It uses [`list_with_output`] internally with a text formatter. +#[allow(clippy::cognitive_complexity)] +pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { + let mut output = TextOutput::new(config); + list_with_output(locs, config, &mut output) +} + fn sort_entries(entries: &mut [PathData], config: &Config) { match config.sort { Sort::Time => entries.sort_unstable_by_key(|k| { @@ -1208,155 +1406,56 @@ fn sort_entries(entries: &mut [PathData], config: &Config) { } } -fn depth_first_list( - (dir_path, needs_blank_line): DirData, - mut read_dir: ReadDir, - config: &Config, - state: &mut ListState, - dired: &mut DiredOutput, - is_top_level: bool, -) -> UResult<()> { - let path_data = PathData::new(dir_path.as_path().into(), None, None, config, false); - - // Print dir heading - name... 'total' comes after error display - if state.initial_locs_len > 1 || config.recursive { - if is_top_level { - if needs_blank_line { - writeln!(state.out)?; - if config.dired { - dired.padding += 1; - } - } - if config.dired { - dired::indent(&mut state.out)?; - } - show_dir_name(&path_data, &mut state.out, config)?; - writeln!(state.out)?; - if config.dired { - let dir_len = path_data.path().as_os_str().len(); - // add the //SUBDIRED// coordinates - dired::calculate_subdired(dired, dir_len); - // Add the padding for the dir name - dired::add_dir_name(dired, dir_len); - } - } else { - writeln!(state.out)?; - if config.dired { - dired.padding += 1; - dired::indent(&mut state.out)?; - let dir_name_size = path_data.path().as_os_str().len(); - dired::calculate_subdired(dired, dir_name_size); - dired::add_dir_name(dired, dir_name_size); - } - show_dir_name(&path_data, &mut state.out, config)?; - writeln!(state.out)?; - } +fn is_hidden(file_path: &DirEntry) -> bool { + #[cfg(windows)] + { + let metadata = file_path.metadata().unwrap(); + let attr = metadata.file_attributes(); + (attr & 0x2) > 0 } + #[cfg(not(windows))] + { + file_path + .file_name() + .to_str() + .is_some_and(|res| res.starts_with('.')) + } +} - // Append entries with initial dot files and record their existence - let (ref mut buf, trim) = if config.files == Files::All { - const DOT_DIRECTORIES: usize = 2; - let v = vec![ - PathData::new( - path_data.path().into(), - None, - Some(OsStr::new(".").into()), - config, - false, - ), - PathData::new( - // On WASI the sandbox may block access to ".." at the - // preopened root. Fall back to "." so the entry still - // appears with valid metadata instead of an error. - { - let dotdot = path_data.path().join(".."); - #[cfg(target_os = "wasi")] - let dotdot = if dotdot.metadata().is_err() { - path_data.path().into() - } else { - dotdot - }; - dotdot.into() - }, - None, - Some(OsStr::new("..").into()), - config, - false, - ), - ]; - (v, DOT_DIRECTORIES) - } else { - (Vec::new(), 0) - }; - - // Convert those entries to the PathData struct - for raw_entry in read_dir.by_ref() { - match raw_entry { - Ok(dir_entry) => { - if should_display(&dir_entry, config) { - buf.push(PathData::new( - dir_entry.path().into(), - Some(dir_entry), - None, - config, - false, - )); - } - } - Err(err) => { - state.out.flush()?; - show!(LsError::IOError(err)); - } - } +fn should_display(entry: &DirEntry, config: &Config) -> bool { + // check if hidden + if config.files == Files::Normal && is_hidden(entry) { + return false; } - // Relinquish unused space since we won't need it anymore. - buf.shrink_to_fit(); - sort_entries(buf, config); + // check if it is among ignore_patterns + let options = MatchOptions { + // setting require_literal_leading_dot to match behavior in GNU ls + require_literal_leading_dot: true, + require_literal_separator: false, + case_sensitive: true, + }; - if config.format == Format::Long || config.alloc_size { - let total = write_total(buf, config, &mut state.out)?; - if config.dired { - dired::add_total(dired, total); - } - } + let file_name = entry.file_name(); + // If the decoding fails, still match best we can + // FIXME: use OsStrings or Paths once we have a glob crate that supports it: + // https://github.com/rust-lang/glob/issues/23 + // https://github.com/rust-lang/glob/issues/78 + // https://github.com/BurntSushi/ripgrep/issues/1250 - display_items(buf, config, state, dired)?; + let file_name = match file_name.to_str() { + Some(s) => Cow::Borrowed(s), + None => file_name.to_string_lossy(), + }; - if config.recursive { - for e in buf - .iter() - .skip(trim) - .filter(|p| p.file_type().is_some_and(FileType::is_dir)) - .rev() - { - // Try to open only to report any errors in order to match GNU semantics. - if let Err(err) = fs::read_dir(e.path()) { - state.out.flush()?; - show!(LsError::IOErrorContext( - e.path().to_path_buf(), - err, - e.command_line - )); - } else { - let fi = FileInformation::from_path(e.path(), e.must_dereference)?; - if state.listed_ancestors.insert(fi) { - // Push to stack, but with a less aggressive growth curve. - let (cap, len) = (state.stack.capacity(), state.stack.len()); - if cap == len { - state.stack.reserve_exact(len / 4 + 4); - } - state.stack.push((e.path().to_path_buf(), true)); - } else { - state.out.flush()?; - show!(LsError::AlreadyListedError(e.path().to_path_buf())); - } - } - } - } - Ok(()) + !config + .ignore_patterns + .iter() + .any(|p| p.matches_with(&file_name, options)) } +#[allow(clippy::cognitive_complexity)] + fn get_metadata_with_deref_opt(p_buf: &Path, dereference: bool) -> std::io::Result { if dereference { p_buf.metadata() @@ -1380,6 +1479,234 @@ fn write_total(items: &[PathData], config: &Config, out: &mut BufWriter) out.write_all(total.as_bytes())?; out.write_all(&[config.line_ending as u8])?; Ok(total.len() + 1) + +fn display_dir_entry_size( + entry: &PathData, + config: &Config, + state: &mut ListState, +) -> (usize, usize, usize, usize, usize, usize) { + // TODO: Cache/memorize the display_* results so we don't have to recalculate them. + if let Some(md) = entry.metadata() { + let (size_len, major_len, minor_len) = match display_len_or_rdev(md, config) { + SizeOrDeviceId::Device(major, minor) => { + (major.len() + minor.len() + 2usize, major.len(), minor.len()) + } + SizeOrDeviceId::Size(size) => (size.len(), 0usize, 0usize), + }; + ( + display_symlink_count(md).len(), + display_uname(md, config, state).len(), + display_group(md, config, state).len(), + size_len, + major_len, + minor_len, + ) + } else { + (0, 0, 0, 0, 0, 0) + } +} + +// A simple, performant, ExtendPad trait to add a string to a Vec, padding with spaces +// on the left or right, without making additional copies, or using formatting functions. +trait ExtendPad { + fn extend_pad_left(&mut self, string: &str, count: usize); + fn extend_pad_right(&mut self, string: &str, count: usize); +} + +impl ExtendPad for Vec { + fn extend_pad_left(&mut self, string: &str, count: usize) { + if string.len() < count { + self.extend(iter::repeat_n(b' ', count - string.len())); + } + self.extend(string.as_bytes()); + } + + fn extend_pad_right(&mut self, string: &str, count: usize) { + self.extend(string.as_bytes()); + if string.len() < count { + self.extend(iter::repeat_n(b' ', count - string.len())); + } + } +} + +// TODO: Consider converting callers to use ExtendPad instead, as it avoids +// additional copies. +fn pad_left(string: &str, count: usize) -> String { + format!("{string:>count$}") +} + +fn display_additional_leading_info( + item: &PathData, + padding: &PaddingCollection, + config: &Config, +) -> UResult { + let mut result = String::new(); + #[cfg(unix)] + { + if config.inode { + let i = if let Some(md) = item.metadata() { + get_inode(md) + } else { + "?".to_owned() + }; + write!(result, "{} ", pad_left(&i, padding.inode)).unwrap(); + } + } + + if config.alloc_size { + let s = if let Some(md) = item.metadata() { + display_size(get_block_size(md, config), config) + } else { + "?".to_owned() + }; + // extra space is insert to align the sizes, as needed for all formats, except for the comma format. + if config.format == Format::Commas { + write!(result, "{s} ").unwrap(); + } else { + write!(result, "{} ", pad_left(&s, padding.block_size)).unwrap(); + } + } + Ok(result) +} + +#[allow(clippy::cognitive_complexity)] +fn display_items( + items: &[PathData], + config: &Config, + state: &mut ListState, + dired: &mut DiredOutput, +) -> UResult<()> { + // `-Z`, `--context`: + // Display the SELinux security context or '?' if none is found. When used with the `-l` + // option, print the security context to the left of the size column. + + let quoted = items.iter().any(|item| { + let name = locale_aware_escape_name(item.display_name(), config.quoting_style); + os_str_starts_with(&name, b"'") + }); + + if config.format == Format::Long { + let padding_collection = calculate_padding_collection(items, config, state); + + for item in items { + #[cfg(unix)] + let should_display_leading_info = config.inode || config.alloc_size; + #[cfg(not(unix))] + let should_display_leading_info = config.alloc_size; + + if should_display_leading_info { + let more_info = display_additional_leading_info(item, &padding_collection, config)?; + + write!(state.out, "{more_info}")?; + } + + display_item_long(item, &padding_collection, config, state, dired, quoted)?; + } + } else { + let mut longest_context_len = 1; + let prefix_context = if config.context { + for item in items { + let context_len = item.security_context(config).len(); + longest_context_len = context_len.max(longest_context_len); + } + Some(longest_context_len) + } else { + None + }; + + let padding = calculate_padding_collection(items, config, state); + + // we need to apply normal color to non filename output + if let Some(style_manager) = &mut state.style_manager { + write!(state.out, "{}", style_manager.apply_normal())?; + } + + let mut names_vec = Vec::new(); + + #[cfg(unix)] + let should_display_leading_info = config.inode || config.alloc_size; + #[cfg(not(unix))] + let should_display_leading_info = config.alloc_size; + + for i in items { + let more_info = if should_display_leading_info { + Some(display_additional_leading_info(i, &padding, config)?) + } else { + None + }; + // it's okay to set current column to zero which is used to decide + // whether text will wrap or not, because when format is grid or + // column ls will try to place the item name in a new line if it + // wraps. + let cell = display_item_name( + i, + config, + prefix_context, + more_info, + state, + LazyCell::new(Box::new(|| 0)), + ); + + names_vec.push(cell); + } + + let mut names = names_vec.into_iter(); + + match config.format { + Format::Columns => { + display_grid( + names, + config.width, + Direction::TopToBottom, + &mut state.out, + quoted, + config.tab_size, + )?; + } + Format::Across => { + display_grid( + names, + config.width, + Direction::LeftToRight, + &mut state.out, + quoted, + config.tab_size, + )?; + } + Format::Commas => { + let mut current_col = 0; + if let Some(name) = names.next() { + write_os_str(&mut state.out, &name)?; + current_col = ansi_width(&name.to_string_lossy()) as u16 + 2; + } + for name in names { + let name_width = ansi_width(&name.to_string_lossy()) as u16; + // If the width is 0 we print one single line + if config.width != 0 && current_col + name_width + 1 > config.width { + current_col = name_width + 2; + writeln!(state.out, ",")?; + } else { + current_col += name_width + 2; + write!(state.out, ", ")?; + } + write_os_str(&mut state.out, &name)?; + } + // Current col is never zero again if names have been printed. + // So we print a newline. + if current_col > 0 { + write!(state.out, "{}", config.line_ending)?; + } + } + _ => { + for name in names { + write_os_str(&mut state.out, &name)?; + write!(state.out, "{}", config.line_ending)?; + } + } + } + } + + Ok(()) } #[allow(unused_variables)] diff --git a/src/uu/ls/src/output.rs b/src/uu/ls/src/output.rs new file mode 100644 index 00000000000..f5d780f5909 --- /dev/null +++ b/src/uu/ls/src/output.rs @@ -0,0 +1,358 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Output traits and types for programmatic access to ls functionality. +//! +//! This module provides a visitor/sink pattern that separates file enumeration +//! logic from output formatting. This allows programmatic consumers (shells, +//! file managers, etc.) to receive structured data without parsing text output. +//! +//! # Example +//! +//! ```ignore +//! use uu_ls::{Config, list_with_output, CollectorOutput}; +//! use std::path::Path; +//! +//! let config = Config::from(&matches)?; +//! let mut output = CollectorOutput::new(); +//! list_with_output(vec![Path::new(".")], &config, &mut output)?; +//! +//! for entry in output.entries() { +//! println!("{}: {} bytes", +//! entry.display_name.to_string_lossy(), +//! entry.size().unwrap_or(0)); +//! } +//! ``` + +use std::ffi::OsString; +use std::fs::{FileType, Metadata}; +use std::path::PathBuf; + +use uucore::error::UResult; + +use crate::{Config, PathData}; + +/// Information about a single file/directory entry. +/// +/// This struct provides programmatic access to file metadata without +/// requiring text parsing. All fields are pre-computed and ready for use. +#[derive(Debug, Clone)] +pub struct EntryInfo { + /// The full path to the file + pub path: PathBuf, + /// The display name (file name portion, may differ from path for . and ..) + pub display_name: OsString, + /// The file type (file, directory, symlink, etc.) + pub file_type: Option, + /// File metadata (size, permissions, timestamps, etc.) + pub metadata: Option, + /// Security context (SELinux) if available + pub security_context: String, + /// Whether this entry was specified on the command line + pub command_line: bool, + /// Whether symlinks should be dereferenced for this entry + pub must_dereference: bool, +} + +impl EntryInfo { + /// Returns true if this entry represents a directory + pub fn is_dir(&self) -> bool { + self.file_type.as_ref().is_some_and(|ft| ft.is_dir()) + } + + /// Returns true if this entry represents a regular file + pub fn is_file(&self) -> bool { + self.file_type.as_ref().is_some_and(|ft| ft.is_file()) + } + + /// Returns true if this entry represents a symbolic link + pub fn is_symlink(&self) -> bool { + self.file_type.as_ref().is_some_and(|ft| ft.is_symlink()) + } + + /// Returns the file size in bytes, if metadata is available + pub fn size(&self) -> Option { + self.metadata.as_ref().map(|m| m.len()) + } + + /// Returns the file name as a string slice, if valid UTF-8 + pub fn file_name(&self) -> Option<&str> { + self.display_name.to_str() + } +} + +/// Trait for receiving ls output entries. +/// +/// Implement this trait to receive structured data from the ls enumeration +/// process. The trait is designed to support both streaming (one entry at a time) +/// and batched (all entries at once) use cases. +/// +/// For programmatic access, implement [`write_entry`](LsOutput::write_entry) to +/// receive each entry individually. +/// +/// The internal `TextOutput` implementation uses [`write_entries`](LsOutput::write_entries) +/// to receive batches for proper column alignment and grid formatting. +pub trait LsOutput { + /// Called for each file/directory entry (streaming mode). + /// + /// Default implementation does nothing. Override this for programmatic access + /// where you want to process entries one at a time. + fn write_entry(&mut self, _entry: &EntryInfo) -> UResult<()> { + Ok(()) + } + + /// Called with a batch of entries for a directory. + /// + /// Default implementation calls `write_entry` for each entry. + /// Override this for text output that needs all entries for formatting. + fn write_entries(&mut self, entries: &[PathData], config: &Config) -> UResult<()> { + for entry in entries { + self.write_entry(&entry.to_entry_info(config))?; + } + Ok(()) + } + + /// Called when entering a directory (for recursive listings or multiple arguments). + /// + /// # Arguments + /// * `path_data` - The directory being entered + /// * `config` - The ls configuration + /// * `is_first` - Whether this is the first directory (affects newline handling) + fn write_dir_header( + &mut self, + _path_data: &PathData, + _config: &Config, + _is_first: bool, + ) -> UResult<()> { + Ok(()) + } + + /// Called to report the total blocks for a directory in long format. + /// + /// The `total_size` parameter is the total number of blocks used by + /// files in the directory. + fn write_total(&mut self, _total_size: u64, _config: &Config) -> UResult<()> { + Ok(()) + } + + /// Called to flush any buffered output (e.g., before error messages). + fn flush(&mut self) -> UResult<()> { + Ok(()) + } + + /// Called when all entries have been written. + /// + /// Use this for final cleanup, printing dired output, etc. + fn finalize(&mut self, _config: &Config) -> UResult<()> { + Ok(()) + } + + /// Called at the start of listing, before any entries are processed. + /// + /// Use this for initialization that needs the config (e.g., color reset). + fn initialize(&mut self, _config: &Config) -> UResult<()> { + Ok(()) + } +} + +/// A simple output sink that collects all entries into a Vec. +/// +/// This is useful for programmatic access where you want to collect +/// all entries and process them after enumeration is complete. +/// +/// # Example +/// +/// ```ignore +/// use uu_ls::{Config, list_with_output, CollectorOutput}; +/// use std::path::Path; +/// +/// let mut output = CollectorOutput::new(); +/// list_with_output(vec![Path::new(".")], &config, &mut output)?; +/// +/// for entry in output.entries() { +/// println!("{}: {} bytes", +/// entry.display_name.to_string_lossy(), +/// entry.size().unwrap_or(0)); +/// } +/// ``` +#[derive(Debug, Default)] +pub struct CollectorOutput { + entries: Vec, + directories: Vec, + totals: Vec, +} + +impl CollectorOutput { + /// Create a new empty collector + pub fn new() -> Self { + Self::default() + } + + /// Get all collected entries + pub fn entries(&self) -> &[EntryInfo] { + &self.entries + } + + /// Consume the collector and return all entries + pub fn into_entries(self) -> Vec { + self.entries + } + + /// Get all directory headers that were encountered + pub fn directories(&self) -> &[PathBuf] { + &self.directories + } + + /// Get all totals that were written + pub fn totals(&self) -> &[u64] { + &self.totals + } + + /// Clear all collected data + pub fn clear(&mut self) { + self.entries.clear(); + self.directories.clear(); + self.totals.clear(); + } +} + +impl LsOutput for CollectorOutput { + fn write_entry(&mut self, entry: &EntryInfo) -> UResult<()> { + self.entries.push(entry.clone()); + Ok(()) + } + + fn write_dir_header( + &mut self, + path_data: &PathData, + _config: &Config, + _is_first: bool, + ) -> UResult<()> { + self.directories.push(path_data.path().to_path_buf()); + Ok(()) + } + + fn write_total(&mut self, total_size: u64, _config: &Config) -> UResult<()> { + self.totals.push(total_size); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_entry_info_is_dir() { + let entry = EntryInfo { + path: PathBuf::from("/test/dir"), + display_name: OsString::from("dir"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + assert!(!entry.is_dir()); + } + + #[test] + fn test_entry_info_size() { + let entry = EntryInfo { + path: PathBuf::from("/test/file"), + display_name: OsString::from("file"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + assert_eq!(entry.size(), None); + } + + #[test] + fn test_entry_info_file_name() { + let entry = EntryInfo { + path: PathBuf::from("/test/file.txt"), + display_name: OsString::from("file.txt"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + assert_eq!(entry.file_name(), Some("file.txt")); + } + + #[test] + fn test_collector_output_new() { + let collector = CollectorOutput::new(); + assert!(collector.entries().is_empty()); + assert!(collector.directories().is_empty()); + assert!(collector.totals().is_empty()); + } + + #[test] + fn test_collector_output_write_entry() { + let mut collector = CollectorOutput::new(); + let entry = EntryInfo { + path: PathBuf::from("/test/file"), + display_name: OsString::from("file"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + collector.write_entry(&entry).unwrap(); + assert_eq!(collector.entries().len(), 1); + assert_eq!(collector.entries()[0].display_name, OsString::from("file")); + } + + #[test] + fn test_collector_output_clear() { + let mut collector = CollectorOutput::new(); + let entry = EntryInfo { + path: PathBuf::from("/test/file"), + display_name: OsString::from("file"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + collector.write_entry(&entry).unwrap(); + + collector.clear(); + assert!(collector.entries().is_empty()); + assert!(collector.directories().is_empty()); + assert!(collector.totals().is_empty()); + } + + #[test] + fn test_collector_output_into_entries() { + let mut collector = CollectorOutput::new(); + let entry = EntryInfo { + path: PathBuf::from("/test/file"), + display_name: OsString::from("file"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + collector.write_entry(&entry).unwrap(); + + let entries = collector.into_entries(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].display_name, OsString::from("file")); + } + + #[test] + fn test_collector_output_flush() { + let mut collector = CollectorOutput::new(); + assert!(collector.flush().is_ok()); + } +} From f7ce22c462e7db2f6ee9c28fd0c98f8a72fee632 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Mon, 29 Dec 2025 07:32:09 -0600 Subject: [PATCH 02/15] redo changes --- src/uu/ls/src/ls.rs | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 714beee7205..cc6700c80ed 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1161,6 +1161,20 @@ pub fn list_with_output( let initial_locs_len = locs.len(); let now = SystemTime::now(); + let mut state = ListState { + out: BufWriter::new(stdout()), + style_manager: config.color.as_ref().map(StyleManager::new), + #[cfg(unix)] + uid_cache: HashMap::default(), + #[cfg(unix)] + gid_cache: HashMap::default(), + // Time range for which to use the "recent" format. Anything from 0.5 year in the past to now + // (files with modification time in the future use "old" format). + // According to GNU a Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds on the average. + recent_time_range: (SystemTime::now() - Duration::new(31_556_952 / 2, 0)) + ..=SystemTime::now(), + }; + for loc in locs { let path_data = PathData::new(loc.into(), None, None, config, true); @@ -1225,7 +1239,14 @@ pub fn list_with_output( path_data.path(), path_data.must_dereference, )?); - enter_directory(path_data, read_dir, config, &mut listed_ancestors, output)?; + enter_directory( + path_data, + read_dir, + config, + &mut state, + &mut listed_ancestors, + output, + )?; } output.finalize(config)?; @@ -1236,6 +1257,7 @@ fn enter_directory( path_data: &PathData, mut read_dir: ReadDir, config: &Config, + state: &mut ListState, listed_ancestors: &mut HashSet, output: &mut O, ) -> UResult<()> { @@ -1317,7 +1339,7 @@ fn enter_directory( // when listing several directories in recursive mode, we show // "dirname:" at the beginning of the file list output.write_dir_header(e, config, false)?; - enter_directory(e, rd, config, listed_ancestors, output)?; + enter_directory(e, rd, config, state, listed_ancestors, output)?; listed_ancestors .remove(&FileInformation::from_path(e.path(), e.must_dereference)?); } else { @@ -1455,7 +1477,6 @@ fn should_display(entry: &DirEntry, config: &Config) -> bool { } #[allow(clippy::cognitive_complexity)] - fn get_metadata_with_deref_opt(p_buf: &Path, dereference: bool) -> std::io::Result { if dereference { p_buf.metadata() @@ -1479,6 +1500,7 @@ fn write_total(items: &[PathData], config: &Config, out: &mut BufWriter) out.write_all(total.as_bytes())?; out.write_all(&[config.line_ending as u8])?; Ok(total.len() + 1) +} fn display_dir_entry_size( entry: &PathData, From 440ca9fc4af10f5f0465601032710a70c983d40a Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Mon, 29 Dec 2025 18:37:54 -0600 Subject: [PATCH 03/15] clippy --- src/uu/ls/src/ls.rs | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index cc6700c80ed..4d9977e4a84 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1049,7 +1049,7 @@ impl<'a> TextOutput<'a> { } } -impl<'a> LsOutput for TextOutput<'a> { +impl LsOutput for TextOutput<'_> { fn write_entries(&mut self, entries: &[PathData], config: &Config) -> UResult<()> { display_items(entries, config, &mut self.state, &mut self.dired) } @@ -1095,7 +1095,7 @@ impl<'a> LsOutput for TextOutput<'a> { translate!("ls-total", "size" => display_size(total_size, config)), config.line_ending ); - write!(self.state.out, "{}", total_str)?; + write!(self.state.out, "{total_str}")?; if config.dired { dired::add_total(&mut self.dired, total_str.len()); } @@ -1161,20 +1161,6 @@ pub fn list_with_output( let initial_locs_len = locs.len(); let now = SystemTime::now(); - let mut state = ListState { - out: BufWriter::new(stdout()), - style_manager: config.color.as_ref().map(StyleManager::new), - #[cfg(unix)] - uid_cache: HashMap::default(), - #[cfg(unix)] - gid_cache: HashMap::default(), - // Time range for which to use the "recent" format. Anything from 0.5 year in the past to now - // (files with modification time in the future use "old" format). - // According to GNU a Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds on the average. - recent_time_range: (SystemTime::now() - Duration::new(31_556_952 / 2, 0)) - ..=SystemTime::now(), - }; - for loc in locs { let path_data = PathData::new(loc.into(), None, None, config, true); @@ -1239,14 +1225,7 @@ pub fn list_with_output( path_data.path(), path_data.must_dereference, )?); - enter_directory( - path_data, - read_dir, - config, - &mut state, - &mut listed_ancestors, - output, - )?; + enter_directory(path_data, read_dir, config, &mut listed_ancestors, output)?; } output.finalize(config)?; @@ -1257,7 +1236,6 @@ fn enter_directory( path_data: &PathData, mut read_dir: ReadDir, config: &Config, - state: &mut ListState, listed_ancestors: &mut HashSet, output: &mut O, ) -> UResult<()> { @@ -1339,7 +1317,7 @@ fn enter_directory( // when listing several directories in recursive mode, we show // "dirname:" at the beginning of the file list output.write_dir_header(e, config, false)?; - enter_directory(e, rd, config, state, listed_ancestors, output)?; + enter_directory(e, rd, config, listed_ancestors, output)?; listed_ancestors .remove(&FileInformation::from_path(e.path(), e.must_dereference)?); } else { From fcf657af8a3826fc0171555a64981edc9b129b79 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Tue, 30 Dec 2025 10:45:53 -0600 Subject: [PATCH 04/15] help cspell understand dired --- .vscode/cspell.dictionaries/jargon.wordlist.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 289875c1766..c468ef0ea74 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -26,6 +26,7 @@ conv cyclomatic dedup deduplication +dired demangle denoland deque From 11f7f6cb6b6bd7c991b56ab7703742be78a3624c Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Thu, 9 Apr 2026 11:30:06 -0500 Subject: [PATCH 05/15] rebase --- src/uu/ls/src/ls.rs | 300 ++------------------------------------------ 1 file changed, 13 insertions(+), 287 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 4d9977e4a84..6c87c0af69b 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -15,7 +15,7 @@ use std::cell::RefCell; use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::{ cell::OnceCell, - collections::HashSet, + collections::{HashMap, HashSet}, cmp::Reverse, ffi::{OsStr, OsString}, fs::{self, DirEntry, FileType, Metadata, ReadDir}, @@ -825,7 +825,7 @@ impl<'a> PathData<'a> { pub fn to_entry_info(&self, config: &Config) -> EntryInfo { EntryInfo { path: self.p_buf.clone().into_owned(), - display_name: self.display_name.clone(), + display_name: self.display_name().to_os_string(), file_type: self.file_type().copied(), metadata: self.metadata().cloned(), security_context: self.security_context(config).to_string(), @@ -1043,6 +1043,10 @@ impl<'a> TextOutput<'a> { gid_cache: HashMap::default(), recent_time_range: (SystemTime::now() - Duration::new(31_556_952 / 2, 0)) ..=SystemTime::now(), + stack: Vec::new(), + listed_ancestors: FxHashSet::default(), + initial_locs_len: 0, + display_buf: Vec::new(), }, dired: DiredOutput::default(), } @@ -1159,7 +1163,7 @@ pub fn list_with_output( let mut files = Vec::::new(); let mut dirs = Vec::::new(); let initial_locs_len = locs.len(); - let now = SystemTime::now(); + let _now = SystemTime::now(); for loc in locs { let path_data = PathData::new(loc.into(), None, None, config, true); @@ -1197,7 +1201,7 @@ pub fn list_with_output( output.write_entries(&files, config)?; for (pos, path_data) in dirs.iter().enumerate() { - let needs_blank_line = pos != 0 || !files.is_empty(); + let _needs_blank_line = pos != 0 || !files.is_empty(); // Do read_dir call here to match GNU semantics by printing // read_dir errors before directory headings, names and totals let read_dir = match fs::read_dir(path_data.path()) { @@ -1243,16 +1247,16 @@ fn enter_directory( let mut entries: Vec = if config.files == Files::All { vec![ PathData::new( - path_data.path().to_path_buf(), + path_data.path().to_path_buf().into(), None, - Some(".".into()), + Some(OsStr::new(".").into()), config, false, ), PathData::new( - path_data.path().join(".."), + path_data.path().join("..").into(), None, - Some("..".into()), + Some(OsStr::new("..").into()), config, false, ), @@ -1274,7 +1278,7 @@ fn enter_directory( if should_display(&dir_entry, config) { let entry_path_data = - PathData::new(dir_entry.path(), Some(dir_entry), None, config, false); + PathData::new(dir_entry.path().into(), Some(dir_entry), None, config, false); entries.push(entry_path_data); } } @@ -1422,38 +1426,6 @@ fn is_hidden(file_path: &DirEntry) -> bool { } } -fn should_display(entry: &DirEntry, config: &Config) -> bool { - // check if hidden - if config.files == Files::Normal && is_hidden(entry) { - return false; - } - - // check if it is among ignore_patterns - let options = MatchOptions { - // setting require_literal_leading_dot to match behavior in GNU ls - require_literal_leading_dot: true, - require_literal_separator: false, - case_sensitive: true, - }; - - let file_name = entry.file_name(); - // If the decoding fails, still match best we can - // FIXME: use OsStrings or Paths once we have a glob crate that supports it: - // https://github.com/rust-lang/glob/issues/23 - // https://github.com/rust-lang/glob/issues/78 - // https://github.com/BurntSushi/ripgrep/issues/1250 - - let file_name = match file_name.to_str() { - Some(s) => Cow::Borrowed(s), - None => file_name.to_string_lossy(), - }; - - !config - .ignore_patterns - .iter() - .any(|p| p.matches_with(&file_name, options)) -} - #[allow(clippy::cognitive_complexity)] fn get_metadata_with_deref_opt(p_buf: &Path, dereference: bool) -> std::io::Result { if dereference { @@ -1463,252 +1435,6 @@ fn get_metadata_with_deref_opt(p_buf: &Path, dereference: bool) -> std::io::Resu } } -fn write_total(items: &[PathData], config: &Config, out: &mut BufWriter) -> UResult { - let mut total_size = 0; - for item in items { - total_size += item - .metadata() - .as_ref() - .map_or(0, |md| get_block_size(md, config)); - } - if config.dired { - dired::indent(out)?; - } - let total = translate!("ls-total", "size" => display_size(total_size, config)); - out.write_all(total.as_bytes())?; - out.write_all(&[config.line_ending as u8])?; - Ok(total.len() + 1) -} - -fn display_dir_entry_size( - entry: &PathData, - config: &Config, - state: &mut ListState, -) -> (usize, usize, usize, usize, usize, usize) { - // TODO: Cache/memorize the display_* results so we don't have to recalculate them. - if let Some(md) = entry.metadata() { - let (size_len, major_len, minor_len) = match display_len_or_rdev(md, config) { - SizeOrDeviceId::Device(major, minor) => { - (major.len() + minor.len() + 2usize, major.len(), minor.len()) - } - SizeOrDeviceId::Size(size) => (size.len(), 0usize, 0usize), - }; - ( - display_symlink_count(md).len(), - display_uname(md, config, state).len(), - display_group(md, config, state).len(), - size_len, - major_len, - minor_len, - ) - } else { - (0, 0, 0, 0, 0, 0) - } -} - -// A simple, performant, ExtendPad trait to add a string to a Vec, padding with spaces -// on the left or right, without making additional copies, or using formatting functions. -trait ExtendPad { - fn extend_pad_left(&mut self, string: &str, count: usize); - fn extend_pad_right(&mut self, string: &str, count: usize); -} - -impl ExtendPad for Vec { - fn extend_pad_left(&mut self, string: &str, count: usize) { - if string.len() < count { - self.extend(iter::repeat_n(b' ', count - string.len())); - } - self.extend(string.as_bytes()); - } - - fn extend_pad_right(&mut self, string: &str, count: usize) { - self.extend(string.as_bytes()); - if string.len() < count { - self.extend(iter::repeat_n(b' ', count - string.len())); - } - } -} - -// TODO: Consider converting callers to use ExtendPad instead, as it avoids -// additional copies. -fn pad_left(string: &str, count: usize) -> String { - format!("{string:>count$}") -} - -fn display_additional_leading_info( - item: &PathData, - padding: &PaddingCollection, - config: &Config, -) -> UResult { - let mut result = String::new(); - #[cfg(unix)] - { - if config.inode { - let i = if let Some(md) = item.metadata() { - get_inode(md) - } else { - "?".to_owned() - }; - write!(result, "{} ", pad_left(&i, padding.inode)).unwrap(); - } - } - - if config.alloc_size { - let s = if let Some(md) = item.metadata() { - display_size(get_block_size(md, config), config) - } else { - "?".to_owned() - }; - // extra space is insert to align the sizes, as needed for all formats, except for the comma format. - if config.format == Format::Commas { - write!(result, "{s} ").unwrap(); - } else { - write!(result, "{} ", pad_left(&s, padding.block_size)).unwrap(); - } - } - Ok(result) -} - -#[allow(clippy::cognitive_complexity)] -fn display_items( - items: &[PathData], - config: &Config, - state: &mut ListState, - dired: &mut DiredOutput, -) -> UResult<()> { - // `-Z`, `--context`: - // Display the SELinux security context or '?' if none is found. When used with the `-l` - // option, print the security context to the left of the size column. - - let quoted = items.iter().any(|item| { - let name = locale_aware_escape_name(item.display_name(), config.quoting_style); - os_str_starts_with(&name, b"'") - }); - - if config.format == Format::Long { - let padding_collection = calculate_padding_collection(items, config, state); - - for item in items { - #[cfg(unix)] - let should_display_leading_info = config.inode || config.alloc_size; - #[cfg(not(unix))] - let should_display_leading_info = config.alloc_size; - - if should_display_leading_info { - let more_info = display_additional_leading_info(item, &padding_collection, config)?; - - write!(state.out, "{more_info}")?; - } - - display_item_long(item, &padding_collection, config, state, dired, quoted)?; - } - } else { - let mut longest_context_len = 1; - let prefix_context = if config.context { - for item in items { - let context_len = item.security_context(config).len(); - longest_context_len = context_len.max(longest_context_len); - } - Some(longest_context_len) - } else { - None - }; - - let padding = calculate_padding_collection(items, config, state); - - // we need to apply normal color to non filename output - if let Some(style_manager) = &mut state.style_manager { - write!(state.out, "{}", style_manager.apply_normal())?; - } - - let mut names_vec = Vec::new(); - - #[cfg(unix)] - let should_display_leading_info = config.inode || config.alloc_size; - #[cfg(not(unix))] - let should_display_leading_info = config.alloc_size; - - for i in items { - let more_info = if should_display_leading_info { - Some(display_additional_leading_info(i, &padding, config)?) - } else { - None - }; - // it's okay to set current column to zero which is used to decide - // whether text will wrap or not, because when format is grid or - // column ls will try to place the item name in a new line if it - // wraps. - let cell = display_item_name( - i, - config, - prefix_context, - more_info, - state, - LazyCell::new(Box::new(|| 0)), - ); - - names_vec.push(cell); - } - - let mut names = names_vec.into_iter(); - - match config.format { - Format::Columns => { - display_grid( - names, - config.width, - Direction::TopToBottom, - &mut state.out, - quoted, - config.tab_size, - )?; - } - Format::Across => { - display_grid( - names, - config.width, - Direction::LeftToRight, - &mut state.out, - quoted, - config.tab_size, - )?; - } - Format::Commas => { - let mut current_col = 0; - if let Some(name) = names.next() { - write_os_str(&mut state.out, &name)?; - current_col = ansi_width(&name.to_string_lossy()) as u16 + 2; - } - for name in names { - let name_width = ansi_width(&name.to_string_lossy()) as u16; - // If the width is 0 we print one single line - if config.width != 0 && current_col + name_width + 1 > config.width { - current_col = name_width + 2; - writeln!(state.out, ",")?; - } else { - current_col += name_width + 2; - write!(state.out, ", ")?; - } - write_os_str(&mut state.out, &name)?; - } - // Current col is never zero again if names have been printed. - // So we print a newline. - if current_col > 0 { - write!(state.out, "{}", config.line_ending)?; - } - } - _ => { - for name in names { - write_os_str(&mut state.out, &name)?; - write!(state.out, "{}", config.line_ending)?; - } - } - } - } - - Ok(()) -} - #[allow(unused_variables)] fn get_block_size(md: &Metadata, config: &Config) -> u64 { /* GNU ls will display sizes in terms of block size From 77a5546446fd21fe04d8e16b3b468f6cfcc3de35 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Fri, 10 Apr 2026 09:20:18 -0500 Subject: [PATCH 06/15] try and optimize for memory and speed performance. update output handling to add streaming mode. --- src/uu/ls/src/ls.rs | 284 ++++++++++++++++++++++++---------------- src/uu/ls/src/output.rs | 110 +++++++++++----- 2 files changed, 249 insertions(+), 145 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 6c87c0af69b..a7d1fd36d4b 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -6,17 +6,21 @@ // spell-checker:ignore (ToDO) somegroup nlink tabsize dired subdired dtype colorterm stringly // spell-checker:ignore nohash strtime clocale +use clap::{ + Arg, ArgAction, Command, + builder::{NonEmptyStringValueParser, PossibleValue, ValueParser}, +}; +use lscolors::Colorable; #[cfg(unix)] use rustc_hash::FxHashMap; -use rustc_hash::FxHashSet; use std::borrow::Cow; use std::cell::RefCell; #[cfg(unix)] use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::{ cell::OnceCell, - collections::{HashMap, HashSet}, cmp::Reverse, + collections::{HashMap, HashSet}, ffi::{OsStr, OsString}, fs::{self, DirEntry, FileType, Metadata, ReadDir}, io::{BufWriter, ErrorKind, Stdout, Write, stdout}, @@ -24,12 +28,6 @@ use std::{ path::{Path, PathBuf}, time::{Duration, SystemTime, UNIX_EPOCH}, }; - -use clap::{ - Arg, ArgAction, Command, - builder::{NonEmptyStringValueParser, PossibleValue, ValueParser}, -}; -use lscolors::Colorable; use thiserror::Error; #[cfg(unix)] @@ -52,9 +50,9 @@ mod dired; mod display; pub mod output; -pub use output::{CollectorOutput, EntryInfo, LsOutput}; pub use config::{Config, options}; pub use display::Format; +pub use output::{EntryInfo, LsOutput, StreamMode, StreamingOutput}; use colors::StyleManager; use config::options::QUOTING_STYLE; @@ -995,8 +993,6 @@ impl Colorable for PathData<'_> { } } -type DirData = (PathBuf, bool); - // A struct to encapsulate state that is passed around from `list` functions. #[cfg_attr(not(unix), allow(dead_code))] struct ListState<'a> { @@ -1016,9 +1012,6 @@ struct ListState<'a> { #[cfg(not(unix))] gid_cache: (), recent_time_range: RangeInclusive, - stack: Vec, - listed_ancestors: FxHashSet, - initial_locs_len: usize, display_buf: Vec, } @@ -1026,13 +1019,13 @@ struct ListState<'a> { /// /// This is the default output sink used by [`list`] for standard ls behavior. /// It handles all text formatting including colors, columns, long format, etc. -struct TextOutput<'a> { +pub struct TextOutput<'a> { state: ListState<'a>, dired: DiredOutput, } impl<'a> TextOutput<'a> { - fn new(config: &'a Config) -> Self { + pub fn new(config: &'a Config) -> Self { Self { state: ListState { out: BufWriter::new(stdout()), @@ -1043,10 +1036,11 @@ impl<'a> TextOutput<'a> { gid_cache: HashMap::default(), recent_time_range: (SystemTime::now() - Duration::new(31_556_952 / 2, 0)) ..=SystemTime::now(), - stack: Vec::new(), - listed_ancestors: FxHashSet::default(), - initial_locs_len: 0, - display_buf: Vec::new(), + display_buf: Vec::with_capacity(if config.format == Format::Long { + 128 + } else { + 0 + }), }, dired: DiredOutput::default(), } @@ -1144,11 +1138,11 @@ impl LsOutput for TextOutput<'_> { /// # Example /// /// ```ignore -/// use uu_ls::{Config, list_with_output, CollectorOutput}; +/// use uu_ls::{Config, list_with_output, StreamingOutput}; /// use std::path::Path; /// /// let config = Config::from(&matches)?; -/// let mut output = CollectorOutput::new(); +/// let mut output = StreamingOutput::new(); /// list_with_output(vec![Path::new(".")], &config, &mut output)?; /// /// for entry in output.entries() { @@ -1160,10 +1154,9 @@ pub fn list_with_output( config: &Config, output: &mut O, ) -> UResult<()> { - let mut files = Vec::::new(); - let mut dirs = Vec::::new(); + let mut files = Vec::with_capacity(locs.len()); + let mut dirs = Vec::with_capacity(locs.len()); let initial_locs_len = locs.len(); - let _now = SystemTime::now(); for loc in locs { let path_data = PathData::new(loc.into(), None, None, config, true); @@ -1197,11 +1190,18 @@ pub fn list_with_output( output.initialize(config)?; - // Write file entries - output.write_entries(&files, config)?; + // Write file entries. + if matches!(output.stream_mode(), StreamMode::Streaming) { + for file_entry in &files { + output.write_entry(&file_entry.to_entry_info(config))?; + } + } else { + output.write_entries(&files, config)?; + } + + let mut entries = Vec::::with_capacity(2); for (pos, path_data) in dirs.iter().enumerate() { - let _needs_blank_line = pos != 0 || !files.is_empty(); // Do read_dir call here to match GNU semantics by printing // read_dir errors before directory headings, names and totals let read_dir = match fs::read_dir(path_data.path()) { @@ -1224,48 +1224,51 @@ pub fn list_with_output( output.write_dir_header(path_data, config, is_first)?; } - let mut listed_ancestors = HashSet::default(); + let mut listed_ancestors = HashSet::with_capacity(1); listed_ancestors.insert(FileInformation::from_path( path_data.path(), path_data.must_dereference, )?); - enter_directory(path_data, read_dir, config, &mut listed_ancestors, output)?; + enter_directory( + path_data, + read_dir, + config, + &mut listed_ancestors, + output, + &mut entries, + )?; } output.finalize(config)?; Ok(()) } -fn enter_directory( +fn collect_directory_entries( + entries: &mut Vec, path_data: &PathData, - mut read_dir: ReadDir, config: &Config, - listed_ancestors: &mut HashSet, output: &mut O, + mut read_dir: ReadDir, ) -> UResult<()> { - // Create vec of entries with initial dot files - let mut entries: Vec = if config.files == Files::All { - vec![ - PathData::new( - path_data.path().to_path_buf().into(), - None, - Some(OsStr::new(".").into()), - config, - false, - ), - PathData::new( - path_data.path().join("..").into(), - None, - Some(OsStr::new("..").into()), - config, - false, - ), - ] - } else { - vec![] - }; + entries.clear(); + + if config.files == Files::All { + entries.push(PathData::new( + path_data.path().to_path_buf().into(), + None, + Some(OsStr::new(".").into()), + config, + false, + )); + entries.push(PathData::new( + path_data.path().join("..").into(), + None, + Some(OsStr::new("..").into()), + config, + false, + )); + } - // Convert those entries to the PathData struct for raw_entry in read_dir.by_ref() { let dir_entry = match raw_entry { Ok(path) => path, @@ -1277,56 +1280,131 @@ fn enter_directory( }; if should_display(&dir_entry, config) { - let entry_path_data = - PathData::new(dir_entry.path().into(), Some(dir_entry), None, config, false); - entries.push(entry_path_data); + entries.push(PathData::new( + dir_entry.path().into(), + Some(dir_entry), + None, + config, + false, + )); } } - sort_entries(&mut entries, config); + sort_entries(entries, config); + if entries.capacity() > entries.len().saturating_mul(2) { + entries.shrink_to_fit(); + } + + Ok(()) +} - // Print total after any error display +fn write_directory_entries( + entries: &[PathData], + config: &Config, + output: &mut O, +) -> UResult<()> { if config.format == Format::Long || config.alloc_size { - let mut total_size = 0u64; - for item in &entries { - total_size += item - .metadata() - .as_ref() - .map_or(0, |md| get_block_size(md, config)); - } + let total_size: u64 = entries + .iter() + .map(|item| { + item.metadata() + .as_ref() + .map_or(0, |md| get_block_size(md, config)) + }) + .sum(); output.write_total(total_size, config)?; } - output.write_entries(&entries, config)?; + if matches!(output.stream_mode(), StreamMode::Streaming) { + for entry in entries { + output.write_entry(&entry.to_entry_info(config))?; + } + Ok(()) + } else { + output.write_entries(entries, config) + } +} - if config.recursive { - for e in entries - .iter() - .skip(if config.files == Files::All { 2 } else { 0 }) - .filter(|p| p.file_type().is_some_and(|ft| ft.is_dir())) - { - match fs::read_dir(e.path()) { - Err(err) => { - output.flush()?; - show!(LsError::IOErrorContext( - e.path().to_path_buf(), - err, - e.command_line - )); - } - Ok(rd) => { - if listed_ancestors - .insert(FileInformation::from_path(e.path(), e.must_dereference)?) - { - // when listing several directories in recursive mode, we show - // "dirname:" at the beginning of the file list - output.write_dir_header(e, config, false)?; - enter_directory(e, rd, config, listed_ancestors, output)?; - listed_ancestors - .remove(&FileInformation::from_path(e.path(), e.must_dereference)?); - } else { +/// Recursively traverse directories using an explicit stack. +/// +/// This avoids deep recursive call chains while preserving GNU-style +/// directory traversal order and ancestor detection. +fn enter_directory( + path_data: &PathData, + read_dir: ReadDir, + config: &Config, + listed_ancestors: &mut HashSet, + output: &mut O, + entries: &mut Vec, +) -> UResult<()> { + struct StackEntry { + path: PathBuf, + command_line: bool, + is_first: bool, + read_dir: ReadDir, + } + + let mut stack = Vec::new(); + stack.push(StackEntry { + path: path_data.path().to_path_buf(), + command_line: path_data.command_line, + is_first: true, + read_dir, + }); + + while let Some(entry) = stack.pop() { + let path_data = PathData::new( + entry.path.clone().into(), + None, + None, + config, + entry.command_line, + ); + + if !entry.is_first { + output.write_dir_header(&path_data, config, false)?; + } + + collect_directory_entries(entries, &path_data, config, output, entry.read_dir)?; + write_directory_entries(entries, config, output)?; + + if config.recursive { + let start = if config.files == Files::All { 2 } else { 0 }; + + for child in entries + .iter() + .skip(start) + .filter(|p| p.file_type().is_some_and(FileType::is_dir)) + .rev() + { + let child_path = child.path().to_path_buf(); + let child_must_dereference = child.must_dereference; + let child_command_line = child.command_line; + + match fs::read_dir(&child_path) { + Err(err) => { output.flush()?; - show!(LsError::AlreadyListedError(e.path().to_path_buf())); + show!(LsError::IOErrorContext( + child_path.clone(), + err, + child_command_line, + )); + } + Ok(read_dir) => { + if listed_ancestors.insert(FileInformation::from_path( + &child_path, + child_must_dereference, + )?) { + stack.push(StackEntry { + path: child_path, + command_line: child_command_line, + is_first: false, + read_dir, + }); + } else { + output.flush()?; + show!(LsError::AlreadyListedError(child_path)); + } } } } @@ -1410,22 +1488,6 @@ fn sort_entries(entries: &mut [PathData], config: &Config) { } } -fn is_hidden(file_path: &DirEntry) -> bool { - #[cfg(windows)] - { - let metadata = file_path.metadata().unwrap(); - let attr = metadata.file_attributes(); - (attr & 0x2) > 0 - } - #[cfg(not(windows))] - { - file_path - .file_name() - .to_str() - .is_some_and(|res| res.starts_with('.')) - } -} - #[allow(clippy::cognitive_complexity)] fn get_metadata_with_deref_opt(p_buf: &Path, dereference: bool) -> std::io::Result { if dereference { diff --git a/src/uu/ls/src/output.rs b/src/uu/ls/src/output.rs index f5d780f5909..f022277bdcf 100644 --- a/src/uu/ls/src/output.rs +++ b/src/uu/ls/src/output.rs @@ -12,28 +12,46 @@ //! # Example //! //! ```ignore -//! use uu_ls::{Config, list_with_output, CollectorOutput}; //! use std::path::Path; +//! use uu_ls::{Config, list_with_output, EntryInfo, LsOutput, StreamingOutput}; //! -//! let config = Config::from(&matches)?; -//! let mut output = CollectorOutput::new(); -//! list_with_output(vec![Path::new(".")], &config, &mut output)?; +//! struct MySink { +//! pub count: usize, +//! } +//! +//! impl MySink { +//! fn new() -> Self { +//! Self { count: 0 } +//! } +//! } +//! +//! impl LsOutput for MySink { +//! fn stream_mode(&self) -> StreamMode { +//! StreamMode::Streaming +//! } //! -//! for entry in output.entries() { -//! println!("{}: {} bytes", -//! entry.display_name.to_string_lossy(), -//! entry.size().unwrap_or(0)); +//! fn write_entry(&mut self, entry: &EntryInfo) -> uucore::error::UResult<()> { +//! println!("{} -> {:?}", entry.path.display(), entry.file_type); +//! self.count += 1; +//! Ok(()) +//! } //! } +//! +//! let config = Config::from(&matches)?; +//! let mut output = MySink::new(); +//! list_with_output(vec![Path::new(".")], &config, &mut output)?; +//! println!("processed {} entries", output.count); //! ``` +//! Alternatively, use [`StreamingOutput`] when you want a reusable streaming sink +//! that collects `EntryInfo` objects as they arrive. + +use crate::{Config, PathData}; use std::ffi::OsString; use std::fs::{FileType, Metadata}; use std::path::PathBuf; - use uucore::error::UResult; -use crate::{Config, PathData}; - /// Information about a single file/directory entry. /// /// This struct provides programmatic access to file metadata without @@ -59,22 +77,22 @@ pub struct EntryInfo { impl EntryInfo { /// Returns true if this entry represents a directory pub fn is_dir(&self) -> bool { - self.file_type.as_ref().is_some_and(|ft| ft.is_dir()) + self.file_type.as_ref().is_some_and(FileType::is_dir) } /// Returns true if this entry represents a regular file pub fn is_file(&self) -> bool { - self.file_type.as_ref().is_some_and(|ft| ft.is_file()) + self.file_type.as_ref().is_some_and(FileType::is_file) } /// Returns true if this entry represents a symbolic link pub fn is_symlink(&self) -> bool { - self.file_type.as_ref().is_some_and(|ft| ft.is_symlink()) + self.file_type.as_ref().is_some_and(FileType::is_symlink) } /// Returns the file size in bytes, if metadata is available pub fn size(&self) -> Option { - self.metadata.as_ref().map(|m| m.len()) + self.metadata.as_ref().map(Metadata::len) } /// Returns the file name as a string slice, if valid UTF-8 @@ -83,6 +101,16 @@ impl EntryInfo { } } +/// Streaming mode for `LsOutput` sinks. +/// +/// `Batch` sinks receive a directory's entries all at once via +/// [`write_entries`]. `Streaming` sinks receive one entry at a time via +/// [`write_entry`]. +pub enum StreamMode { + Batch, + Streaming, +} + /// Trait for receiving ls output entries. /// /// Implement this trait to receive structured data from the ls enumeration @@ -95,6 +123,15 @@ impl EntryInfo { /// The internal `TextOutput` implementation uses [`write_entries`](LsOutput::write_entries) /// to receive batches for proper column alignment and grid formatting. pub trait LsOutput { + /// Returns the preferred output mode for this sink. + /// + /// Default is `Batch` so text-formatting sinks can continue to receive full + /// directory batches. Streaming sinks should override this to return + /// `StreamMode::Streaming`. + fn stream_mode(&self) -> StreamMode { + StreamMode::Batch + } + /// Called for each file/directory entry (streaming mode). /// /// Default implementation does nothing. Override this for programmatic access @@ -157,18 +194,19 @@ pub trait LsOutput { } } -/// A simple output sink that collects all entries into a Vec. +/// A dedicated streaming output sink. /// -/// This is useful for programmatic access where you want to collect -/// all entries and process them after enumeration is complete. +/// This sink is intended for programmatic consumers that want to receive +/// `EntryInfo` objects one at a time as they are emitted. It implements +/// [`LsOutput::stream_mode`] as `StreamMode::Streaming`. /// /// # Example /// /// ```ignore -/// use uu_ls::{Config, list_with_output, CollectorOutput}; +/// use uu_ls::{Config, list_with_output, StreamingOutput}; /// use std::path::Path; /// -/// let mut output = CollectorOutput::new(); +/// let mut output = StreamingOutput::new(); /// list_with_output(vec![Path::new(".")], &config, &mut output)?; /// /// for entry in output.entries() { @@ -178,14 +216,14 @@ pub trait LsOutput { /// } /// ``` #[derive(Debug, Default)] -pub struct CollectorOutput { +pub struct StreamingOutput { entries: Vec, directories: Vec, totals: Vec, } -impl CollectorOutput { - /// Create a new empty collector +impl StreamingOutput { + /// Create a new empty streaming sink. pub fn new() -> Self { Self::default() } @@ -218,7 +256,11 @@ impl CollectorOutput { } } -impl LsOutput for CollectorOutput { +impl LsOutput for StreamingOutput { + fn stream_mode(&self) -> StreamMode { + StreamMode::Streaming + } + fn write_entry(&mut self, entry: &EntryInfo) -> UResult<()> { self.entries.push(entry.clone()); Ok(()) @@ -287,16 +329,16 @@ mod tests { } #[test] - fn test_collector_output_new() { - let collector = CollectorOutput::new(); + fn test_streaming_output_new() { + let collector = StreamingOutput::new(); assert!(collector.entries().is_empty()); assert!(collector.directories().is_empty()); assert!(collector.totals().is_empty()); } #[test] - fn test_collector_output_write_entry() { - let mut collector = CollectorOutput::new(); + fn test_streaming_output_write_entry() { + let mut collector = StreamingOutput::new(); let entry = EntryInfo { path: PathBuf::from("/test/file"), display_name: OsString::from("file"), @@ -312,8 +354,8 @@ mod tests { } #[test] - fn test_collector_output_clear() { - let mut collector = CollectorOutput::new(); + fn test_streaming_output_clear() { + let mut collector = StreamingOutput::new(); let entry = EntryInfo { path: PathBuf::from("/test/file"), display_name: OsString::from("file"), @@ -332,8 +374,8 @@ mod tests { } #[test] - fn test_collector_output_into_entries() { - let mut collector = CollectorOutput::new(); + fn test_streaming_output_into_entries() { + let mut collector = StreamingOutput::new(); let entry = EntryInfo { path: PathBuf::from("/test/file"), display_name: OsString::from("file"), @@ -351,8 +393,8 @@ mod tests { } #[test] - fn test_collector_output_flush() { - let mut collector = CollectorOutput::new(); + fn test_streaming_output_flush() { + let mut collector = StreamingOutput::new(); assert!(collector.flush().is_ok()); } } From b4b2de4d50b9404b7f346483dac5469c488edbb9 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Fri, 10 Apr 2026 12:20:09 -0500 Subject: [PATCH 07/15] work on ci issues, optimize uid and gid cache handling, improve dired output calculations --- src/uu/ls/src/ls.rs | 48 +++++++++++++++++++++++++++++----------- tests/by-util/test_ls.rs | 4 ++-- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index a7d1fd36d4b..3e713caaa41 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -20,7 +20,7 @@ use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::{ cell::OnceCell, cmp::Reverse, - collections::{HashMap, HashSet}, + collections::HashSet, ffi::{OsStr, OsString}, fs::{self, DirEntry, FileType, Metadata, ReadDir}, io::{BufWriter, ErrorKind, Stdout, Write, stdout}, @@ -1031,9 +1031,13 @@ impl<'a> TextOutput<'a> { out: BufWriter::new(stdout()), style_manager: config.color.as_ref().map(StyleManager::new), #[cfg(unix)] - uid_cache: HashMap::default(), + uid_cache: FxHashMap::default(), #[cfg(unix)] - gid_cache: HashMap::default(), + gid_cache: FxHashMap::default(), + #[cfg(not(unix))] + uid_cache: (), + #[cfg(not(unix))] + gid_cache: (), recent_time_range: (SystemTime::now() - Duration::new(31_556_952 / 2, 0)) ..=SystemTime::now(), display_buf: Vec::with_capacity(if config.format == Format::Long { @@ -1065,16 +1069,17 @@ impl LsOutput for TextOutput<'_> { show_dir_name(path_data, &mut self.state.out, config)?; writeln!(self.state.out)?; if config.dired { - let dir_len = path_data.display_name().len(); + let dir_len = path_data.path().as_os_str().len(); dired::calculate_subdired(&mut self.dired, dir_len); dired::add_dir_name(&mut self.dired, dir_len); } } else { writeln!(self.state.out)?; if config.dired { - self.dired.padding = 2; + self.dired.line_offset += 1; // account for the blank line before recursive directory headings + self.dired.padding = 0; dired::indent(&mut self.state.out)?; - let dir_name_size = path_data.path().to_string_lossy().len(); + let dir_name_size = path_data.path().as_os_str().len(); dired::calculate_subdired(&mut self.dired, dir_name_size); dired::add_dir_name(&mut self.dired, dir_name_size); } @@ -1341,18 +1346,17 @@ fn enter_directory( path: PathBuf, command_line: bool, is_first: bool, - read_dir: ReadDir, } let mut stack = Vec::new(); - stack.push(StackEntry { + let mut current = Some(StackEntry { path: path_data.path().to_path_buf(), command_line: path_data.command_line, is_first: true, - read_dir, }); + let mut initial_read_dir = Some(read_dir); - while let Some(entry) = stack.pop() { + while let Some(entry) = current.take().or_else(|| stack.pop()) { let path_data = PathData::new( entry.path.clone().into(), None, @@ -1365,7 +1369,26 @@ fn enter_directory( output.write_dir_header(&path_data, config, false)?; } - collect_directory_entries(entries, &path_data, config, output, entry.read_dir)?; + let current_read_dir = if entry.is_first { + initial_read_dir + .take() + .expect("initial read_dir is present for first entry") + } else { + match fs::read_dir(&entry.path) { + Err(err) => { + output.flush()?; + show!(LsError::IOErrorContext( + entry.path.clone(), + err, + entry.command_line, + )); + continue; + } + Ok(rd) => rd, + } + }; + + collect_directory_entries(entries, &path_data, config, output, current_read_dir)?; write_directory_entries(entries, config, output)?; if config.recursive { @@ -1390,7 +1413,7 @@ fn enter_directory( child_command_line, )); } - Ok(read_dir) => { + Ok(_) => { if listed_ancestors.insert(FileInformation::from_path( &child_path, child_must_dereference, @@ -1399,7 +1422,6 @@ fn enter_directory( path: child_path, command_line: child_command_line, is_first: false, - read_dir, }); } else { output.flush()?; diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 5058de3e08e..3372741a867 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -5340,7 +5340,7 @@ fn test_ls_dired_recursive_multiple() { .map(|chunk| { let start_pos = chunk[0]; let end_pos = chunk[1]; - let filename = String::from_utf8(output.as_bytes()[start_pos..=end_pos].to_vec()) + let filename = String::from_utf8(output.as_bytes()[start_pos..end_pos].to_vec()) .unwrap() .trim() .to_string(); @@ -5486,7 +5486,7 @@ fn test_ls_dired_complex() { .map(|chunk| { let start_pos = chunk[0]; let end_pos = chunk[1]; - let filename = String::from_utf8(output.as_bytes()[start_pos..=end_pos].to_vec()) + let filename = String::from_utf8(output.as_bytes()[start_pos..end_pos].to_vec()) .unwrap() .trim() .to_string(); From a4aa2a86e1f3b99ca43f21a71a03b38e71fad0bc Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Sat, 11 Apr 2026 08:52:05 -0500 Subject: [PATCH 08/15] test fx: change read_dir parameter to mutable reference in collect_directory_entries and enter_directory functions --- src/uu/ls/src/ls.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 3e713caaa41..ea65702f605 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1253,7 +1253,7 @@ fn collect_directory_entries( path_data: &PathData, config: &Config, output: &mut O, - mut read_dir: ReadDir, + read_dir: &mut ReadDir, ) -> UResult<()> { entries.clear(); @@ -1369,7 +1369,7 @@ fn enter_directory( output.write_dir_header(&path_data, config, false)?; } - let current_read_dir = if entry.is_first { + let mut current_read_dir = if entry.is_first { initial_read_dir .take() .expect("initial read_dir is present for first entry") @@ -1388,7 +1388,7 @@ fn enter_directory( } }; - collect_directory_entries(entries, &path_data, config, output, current_read_dir)?; + collect_directory_entries(entries, &path_data, config, output, &mut current_read_dir)?; write_directory_entries(entries, config, output)?; if config.recursive { From 7ef4f68f9d434e8444948bfefd98f411153555a2 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Sat, 11 Apr 2026 09:15:01 -0500 Subject: [PATCH 09/15] update documentation --- src/uu/ls/src/output.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uu/ls/src/output.rs b/src/uu/ls/src/output.rs index f022277bdcf..530ba26c23b 100644 --- a/src/uu/ls/src/output.rs +++ b/src/uu/ls/src/output.rs @@ -104,8 +104,8 @@ impl EntryInfo { /// Streaming mode for `LsOutput` sinks. /// /// `Batch` sinks receive a directory's entries all at once via -/// [`write_entries`]. `Streaming` sinks receive one entry at a time via -/// [`write_entry`]. +/// [`LsOutput::write_entries`]. `Streaming` sinks receive one entry at a time via +/// [`LsOutput::write_entry`]. pub enum StreamMode { Batch, Streaming, From a8ed2ea5c51c1ea961dff53eb7ffcf00a34f1b86 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Sat, 11 Apr 2026 11:39:06 -0500 Subject: [PATCH 10/15] more perf tweaks --- src/uu/ls/src/ls.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index ea65702f605..d61062a7a89 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -13,6 +13,7 @@ use clap::{ use lscolors::Colorable; #[cfg(unix)] use rustc_hash::FxHashMap; +use rustc_hash::FxHashSet; use std::borrow::Cow; use std::cell::RefCell; #[cfg(unix)] @@ -20,7 +21,7 @@ use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::{ cell::OnceCell, cmp::Reverse, - collections::HashSet, + ffi::{OsStr, OsString}, fs::{self, DirEntry, FileType, Metadata, ReadDir}, io::{BufWriter, ErrorKind, Stdout, Write, stdout}, @@ -1229,7 +1230,7 @@ pub fn list_with_output( output.write_dir_header(path_data, config, is_first)?; } - let mut listed_ancestors = HashSet::with_capacity(1); + let mut listed_ancestors = FxHashSet::default(); listed_ancestors.insert(FileInformation::from_path( path_data.path(), path_data.must_dereference, @@ -1338,7 +1339,7 @@ fn enter_directory( path_data: &PathData, read_dir: ReadDir, config: &Config, - listed_ancestors: &mut HashSet, + listed_ancestors: &mut FxHashSet, output: &mut O, entries: &mut Vec, ) -> UResult<()> { @@ -1358,7 +1359,7 @@ fn enter_directory( while let Some(entry) = current.take().or_else(|| stack.pop()) { let path_data = PathData::new( - entry.path.clone().into(), + entry.path.as_path().into(), None, None, config, From 6781aa22aa19d24c9b6c3747a2ea053878a9da07 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Sat, 11 Apr 2026 13:43:47 -0500 Subject: [PATCH 11/15] fmt --- src/uu/ls/src/ls.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index d61062a7a89..444418115c6 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -21,7 +21,6 @@ use std::os::unix::fs::{FileTypeExt, MetadataExt}; use std::{ cell::OnceCell, cmp::Reverse, - ffi::{OsStr, OsString}, fs::{self, DirEntry, FileType, Metadata, ReadDir}, io::{BufWriter, ErrorKind, Stdout, Write, stdout}, From a5204c46c2b5ed65bdae14c594327a4a673798b0 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Sun, 12 Apr 2026 15:39:55 -0500 Subject: [PATCH 12/15] tweak perf --- src/uu/ls/src/ls.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 444418115c6..8923b0c6962 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1093,14 +1093,12 @@ impl LsOutput for TextOutput<'_> { if config.dired { dired::indent(&mut self.state.out)?; } - let total_str = format!( - "{}{}", - translate!("ls-total", "size" => display_size(total_size, config)), - config.line_ending - ); - write!(self.state.out, "{total_str}")?; + let total = translate!("ls-total", "size" => display_size(total_size, config)); + let total_len = total.len() + 1; + self.state.out.write_all(total.as_bytes())?; + self.state.out.write_all(&[config.line_ending as u8])?; if config.dired { - dired::add_total(&mut self.dired, total_str.len()); + dired::add_total(&mut self.dired, total_len); } Ok(()) } @@ -1296,9 +1294,7 @@ fn collect_directory_entries( } sort_entries(entries, config); - if entries.capacity() > entries.len().saturating_mul(2) { - entries.shrink_to_fit(); - } + entries.shrink_to_fit(); Ok(()) } From dbed28ef0de969d5b26b0899126e4dd8f49cd8e0 Mon Sep 17 00:00:00 2001 From: Darren Schroeder <343840+fdncred@users.noreply.github.com> Date: Thu, 16 Apr 2026 06:31:47 -0500 Subject: [PATCH 13/15] Add back (and shorten) comment for 31_556_952 --- src/uu/ls/src/ls.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 8923b0c6962..f17b1f7166f 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1038,6 +1038,8 @@ impl<'a> TextOutput<'a> { uid_cache: (), #[cfg(not(unix))] gid_cache: (), + // Use "recent" format for files modified within the last ~0.5 years (31556952s). + // According to GNU a Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds on the average. recent_time_range: (SystemTime::now() - Duration::new(31_556_952 / 2, 0)) ..=SystemTime::now(), display_buf: Vec::with_capacity(if config.format == Format::Long { From a4548758e4bbabfbcda574ad9ed73f01c9fe77d3 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 17 Apr 2026 11:25:03 +0200 Subject: [PATCH 14/15] Update src/uu/ls/src/ls.rs Co-authored-by: Daniel Hofstetter --- src/uu/ls/src/ls.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index f17b1f7166f..a150349a74c 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1438,7 +1438,6 @@ fn enter_directory( /// /// This is the standard ls entry point that formats output as text. /// It uses [`list_with_output`] internally with a text formatter. -#[allow(clippy::cognitive_complexity)] pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { let mut output = TextOutput::new(config); list_with_output(locs, config, &mut output) From 0f506deae258673b1bc1d7c82ab1fbe90ec9aa1c Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 17 Apr 2026 11:25:14 +0200 Subject: [PATCH 15/15] Update src/uu/ls/src/ls.rs Co-authored-by: Daniel Hofstetter --- src/uu/ls/src/ls.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index a150349a74c..1f8889acfac 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -1507,7 +1507,6 @@ fn sort_entries(entries: &mut [PathData], config: &Config) { } } -#[allow(clippy::cognitive_complexity)] fn get_metadata_with_deref_opt(p_buf: &Path, dereference: bool) -> std::io::Result { if dereference { p_buf.metadata()