diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 289875c1766..c468ef0ea74 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -26,6 +26,7 @@ conv cyclomatic dedup deduplication +dired demangle denoland deque diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 632d6ae591d..1f8889acfac 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -6,6 +6,11 @@ // spell-checker:ignore (ToDO) somegroup nlink tabsize dired subdired dtype colorterm stringly // spell-checker:ignore nohash strtime clocale +use clap::{ + Arg, ArgAction, Command, + builder::{NonEmptyStringValueParser, PossibleValue, ValueParser}, +}; +use lscolors::Colorable; #[cfg(unix)] use rustc_hash::FxHashMap; use rustc_hash::FxHashSet; @@ -23,12 +28,6 @@ use std::{ path::{Path, PathBuf}, time::{Duration, SystemTime, UNIX_EPOCH}, }; - -use clap::{ - Arg, ArgAction, Command, - builder::{NonEmptyStringValueParser, PossibleValue, ValueParser}, -}; -use lscolors::Colorable; use thiserror::Error; #[cfg(unix)] @@ -50,8 +49,10 @@ mod config; mod dired; mod display; +pub mod output; pub use config::{Config, options}; pub use display::Format; +pub use output::{EntryInfo, LsOutput, StreamMode, StreamingOutput}; use colors::StyleManager; use config::options::QUOTING_STYLE; @@ -787,7 +788,7 @@ pub fn uu_app() -> Command { /// Represents the possible values of [`PathData::display_name`]. The reason this is a /// separate enum is to avoid a self-referential struct, as it is moved in hot loops. -#[derive(Debug)] +#[derive(Debug, Clone)] enum PathDataDisplayName<'a> { SelfReferential, Custom(Cow<'a, OsStr>), @@ -797,7 +798,11 @@ enum PathDataDisplayName<'a> { /// Any data that will be reused several times makes sense to be added to this structure. /// Caching data here helps eliminate redundant syscalls to fetch same information. #[derive(Debug)] -struct PathData<'a> { +/// Internal representation of file/directory entry data. +/// +/// This struct is used internally for file enumeration. It can be converted +/// to [`EntryInfo`] for programmatic access via the [`LsOutput`] trait. +pub struct PathData<'a> { // Result got from symlink_metadata() or metadata() based on config md: OnceCell>, ft: OnceCell>, @@ -814,6 +819,19 @@ struct PathData<'a> { } impl<'a> PathData<'a> { + /// Convert this PathData to an EntryInfo for programmatic access + pub fn to_entry_info(&self, config: &Config) -> EntryInfo { + EntryInfo { + path: self.p_buf.clone().into_owned(), + display_name: self.display_name().to_os_string(), + file_type: self.file_type().copied(), + metadata: self.metadata().cloned(), + security_context: self.security_context(config).to_string(), + command_line: self.command_line, + must_dereference: self.must_dereference, + } + } + fn new( p_buf: Cow<'a, Path>, dir_entry: Option, @@ -975,8 +993,6 @@ impl Colorable for PathData<'_> { } } -type DirData = (PathBuf, bool); - // A struct to encapsulate state that is passed around from `list` functions. #[cfg_attr(not(unix), allow(dead_code))] struct ListState<'a> { @@ -996,44 +1012,156 @@ struct ListState<'a> { #[cfg(not(unix))] gid_cache: (), recent_time_range: RangeInclusive, - stack: Vec, - listed_ancestors: FxHashSet, - initial_locs_len: usize, display_buf: Vec, } -#[allow(clippy::cognitive_complexity)] -pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { - let mut files = Vec::::new(); - let mut dirs = Vec::::new(); - let mut dired = DiredOutput::default(); - let initial_locs_len = locs.len(); - let now = SystemTime::now(); - - let mut state = ListState { - out: BufWriter::new(stdout()), - style_manager: config.color.as_ref().map(StyleManager::new), - #[cfg(unix)] - uid_cache: FxHashMap::default(), - #[cfg(unix)] - gid_cache: FxHashMap::default(), - #[cfg(not(unix))] - uid_cache: (), - #[cfg(not(unix))] - gid_cache: (), - // Time range for which to use the "recent" format. Anything from 0.5 year in the past to now - // (files with modification time in the future use "old" format). - // According to GNU a Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds on the average. - recent_time_range: (now - Duration::new(31_556_952 / 2, 0))..=now, - stack: Vec::new(), - listed_ancestors: FxHashSet::default(), - initial_locs_len, - display_buf: Vec::with_capacity(if config.format == Format::Long { - 128 +/// Text output implementation that formats entries for terminal display. +/// +/// This is the default output sink used by [`list`] for standard ls behavior. +/// It handles all text formatting including colors, columns, long format, etc. +pub struct TextOutput<'a> { + state: ListState<'a>, + dired: DiredOutput, +} + +impl<'a> TextOutput<'a> { + pub fn new(config: &'a Config) -> Self { + Self { + state: ListState { + out: BufWriter::new(stdout()), + style_manager: config.color.as_ref().map(StyleManager::new), + #[cfg(unix)] + uid_cache: FxHashMap::default(), + #[cfg(unix)] + gid_cache: FxHashMap::default(), + #[cfg(not(unix))] + uid_cache: (), + #[cfg(not(unix))] + gid_cache: (), + // Use "recent" format for files modified within the last ~0.5 years (31556952s). + // According to GNU a Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds on the average. + recent_time_range: (SystemTime::now() - Duration::new(31_556_952 / 2, 0)) + ..=SystemTime::now(), + display_buf: Vec::with_capacity(if config.format == Format::Long { + 128 + } else { + 0 + }), + }, + dired: DiredOutput::default(), + } + } +} + +impl LsOutput for TextOutput<'_> { + fn write_entries(&mut self, entries: &[PathData], config: &Config) -> UResult<()> { + display_items(entries, config, &mut self.state, &mut self.dired) + } + + fn write_dir_header( + &mut self, + path_data: &PathData, + config: &Config, + is_first: bool, + ) -> UResult<()> { + if is_first { + if config.dired { + dired::indent(&mut self.state.out)?; + } + show_dir_name(path_data, &mut self.state.out, config)?; + writeln!(self.state.out)?; + if config.dired { + let dir_len = path_data.path().as_os_str().len(); + dired::calculate_subdired(&mut self.dired, dir_len); + dired::add_dir_name(&mut self.dired, dir_len); + } } else { - 0 - }), - }; + writeln!(self.state.out)?; + if config.dired { + self.dired.line_offset += 1; // account for the blank line before recursive directory headings + self.dired.padding = 0; + dired::indent(&mut self.state.out)?; + let dir_name_size = path_data.path().as_os_str().len(); + dired::calculate_subdired(&mut self.dired, dir_name_size); + dired::add_dir_name(&mut self.dired, dir_name_size); + } + show_dir_name(path_data, &mut self.state.out, config)?; + writeln!(self.state.out)?; + } + Ok(()) + } + + fn write_total(&mut self, total_size: u64, config: &Config) -> UResult<()> { + if config.dired { + dired::indent(&mut self.state.out)?; + } + let total = translate!("ls-total", "size" => display_size(total_size, config)); + let total_len = total.len() + 1; + self.state.out.write_all(total.as_bytes())?; + self.state.out.write_all(&[config.line_ending as u8])?; + if config.dired { + dired::add_total(&mut self.dired, total_len); + } + Ok(()) + } + + fn flush(&mut self) -> UResult<()> { + self.state.out.flush()?; + Ok(()) + } + + fn finalize(&mut self, config: &Config) -> UResult<()> { + if config.dired && !config.hyperlink { + dired::print_dired_output(config, &self.dired, &mut self.state.out)?; + } + Ok(()) + } + + fn initialize(&mut self, _config: &Config) -> UResult<()> { + if let Some(style_manager) = self.state.style_manager.as_mut() { + if style_manager.get_normal_style().is_some() { + let to_write = style_manager.reset(true); + write!(self.state.out, "{to_write}")?; + } + } + Ok(()) + } +} + +/// Lists files and directories, sending structured output to a custom sink. +/// +/// This function provides programmatic access to ls functionality without +/// requiring text parsing. It enumerates files and directories according +/// to the provided configuration and sends each entry to the output sink. +/// +/// # Arguments +/// +/// * `locs` - Paths to list +/// * `config` - Configuration controlling listing behavior +/// * `output` - A sink implementing [`LsOutput`] to receive entries +/// +/// # Example +/// +/// ```ignore +/// use uu_ls::{Config, list_with_output, StreamingOutput}; +/// use std::path::Path; +/// +/// let config = Config::from(&matches)?; +/// let mut output = StreamingOutput::new(); +/// list_with_output(vec![Path::new(".")], &config, &mut output)?; +/// +/// for entry in output.entries() { +/// println!("{}: {:?}", entry.display_name.to_string_lossy(), entry.file_type); +/// } +/// ``` +pub fn list_with_output( + locs: Vec<&Path>, + config: &Config, + output: &mut O, +) -> UResult<()> { + let mut files = Vec::with_capacity(locs.len()); + let mut dirs = Vec::with_capacity(locs.len()); + let initial_locs_len = locs.len(); for loc in locs { let path_data = PathData::new(loc.into(), None, None, config, true); @@ -1065,25 +1193,26 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { sort_entries(&mut files, config); sort_entries(&mut dirs, config); - if let Some(style_manager) = state.style_manager.as_mut() { - // ls will try to write a reset before anything is written if normal - // color is given - if style_manager.get_normal_style().is_some() { - let to_write = style_manager.reset(true); - write!(state.out, "{to_write}")?; + output.initialize(config)?; + + // Write file entries. + if matches!(output.stream_mode(), StreamMode::Streaming) { + for file_entry in &files { + output.write_entry(&file_entry.to_entry_info(config))?; } + } else { + output.write_entries(&files, config)?; } - display_items(&files, config, &mut state, &mut dired)?; + let mut entries = Vec::::with_capacity(2); for (pos, path_data) in dirs.iter().enumerate() { - let needs_blank_line = pos != 0 || !files.is_empty(); // Do read_dir call here to match GNU semantics by printing // read_dir errors before directory headings, names and totals let read_dir = match fs::read_dir(path_data.path()) { Err(err) => { // flush stdout buffer before the error to preserve formatting and order - state.out.flush()?; + output.flush()?; show!(LsError::IOErrorContext( path_data.path().to_path_buf(), err, @@ -1094,56 +1223,226 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { Ok(rd) => rd, }; - state.listed_ancestors.insert(FileInformation::from_path( + // Write dir heading for multiple arguments or recursive mode + if initial_locs_len > 1 || config.recursive { + let is_first = pos == 0 && files.is_empty(); + output.write_dir_header(path_data, config, is_first)?; + } + + let mut listed_ancestors = FxHashSet::default(); + listed_ancestors.insert(FileInformation::from_path( path_data.path(), path_data.must_dereference, )?); - - // List each of the arguments to ls first. - depth_first_list( - (path_data.path().to_path_buf(), needs_blank_line), + enter_directory( + path_data, read_dir, config, - &mut state, - &mut dired, - true, + &mut listed_ancestors, + output, + &mut entries, )?; + } + + output.finalize(config)?; + Ok(()) +} + +fn collect_directory_entries( + entries: &mut Vec, + path_data: &PathData, + config: &Config, + output: &mut O, + read_dir: &mut ReadDir, +) -> UResult<()> { + entries.clear(); - // Only runs if it must list recursively. - while let Some(dir_data) = state.stack.pop() { - let read_dir = match fs::read_dir(&dir_data.0) { + if config.files == Files::All { + entries.push(PathData::new( + path_data.path().to_path_buf().into(), + None, + Some(OsStr::new(".").into()), + config, + false, + )); + entries.push(PathData::new( + path_data.path().join("..").into(), + None, + Some(OsStr::new("..").into()), + config, + false, + )); + } + + for raw_entry in read_dir.by_ref() { + let dir_entry = match raw_entry { + Ok(path) => path, + Err(err) => { + output.flush()?; + show!(LsError::IOError(err)); + continue; + } + }; + + if should_display(&dir_entry, config) { + entries.push(PathData::new( + dir_entry.path().into(), + Some(dir_entry), + None, + config, + false, + )); + } + } + + sort_entries(entries, config); + entries.shrink_to_fit(); + + Ok(()) +} + +fn write_directory_entries( + entries: &[PathData], + config: &Config, + output: &mut O, +) -> UResult<()> { + if config.format == Format::Long || config.alloc_size { + let total_size: u64 = entries + .iter() + .map(|item| { + item.metadata() + .as_ref() + .map_or(0, |md| get_block_size(md, config)) + }) + .sum(); + output.write_total(total_size, config)?; + } + + if matches!(output.stream_mode(), StreamMode::Streaming) { + for entry in entries { + output.write_entry(&entry.to_entry_info(config))?; + } + Ok(()) + } else { + output.write_entries(entries, config) + } +} + +/// Recursively traverse directories using an explicit stack. +/// +/// This avoids deep recursive call chains while preserving GNU-style +/// directory traversal order and ancestor detection. +fn enter_directory( + path_data: &PathData, + read_dir: ReadDir, + config: &Config, + listed_ancestors: &mut FxHashSet, + output: &mut O, + entries: &mut Vec, +) -> UResult<()> { + struct StackEntry { + path: PathBuf, + command_line: bool, + is_first: bool, + } + + let mut stack = Vec::new(); + let mut current = Some(StackEntry { + path: path_data.path().to_path_buf(), + command_line: path_data.command_line, + is_first: true, + }); + let mut initial_read_dir = Some(read_dir); + + while let Some(entry) = current.take().or_else(|| stack.pop()) { + let path_data = PathData::new( + entry.path.as_path().into(), + None, + None, + config, + entry.command_line, + ); + + if !entry.is_first { + output.write_dir_header(&path_data, config, false)?; + } + + let mut current_read_dir = if entry.is_first { + initial_read_dir + .take() + .expect("initial read_dir is present for first entry") + } else { + match fs::read_dir(&entry.path) { Err(err) => { - // flush stdout buffer before the error to preserve formatting and order - state.out.flush()?; + output.flush()?; show!(LsError::IOErrorContext( - path_data.path().to_path_buf(), + entry.path.clone(), err, - path_data.command_line + entry.command_line, )); continue; } Ok(rd) => rd, - }; + } + }; + + collect_directory_entries(entries, &path_data, config, output, &mut current_read_dir)?; + write_directory_entries(entries, config, output)?; - depth_first_list(dir_data, read_dir, config, &mut state, &mut dired, false)?; + if config.recursive { + let start = if config.files == Files::All { 2 } else { 0 }; - // Heuristic to ensure stack does not keep its capacity forever if there is - // combinatorial explosion; we decrease it logarithmically here. - let (cap, len) = (state.stack.capacity(), state.stack.len()); - if cap > (len + 4) * 2 { - state.stack.shrink_to(len + (cap - len) / 2); + for child in entries + .iter() + .skip(start) + .filter(|p| p.file_type().is_some_and(FileType::is_dir)) + .rev() + { + let child_path = child.path().to_path_buf(); + let child_must_dereference = child.must_dereference; + let child_command_line = child.command_line; + + match fs::read_dir(&child_path) { + Err(err) => { + output.flush()?; + show!(LsError::IOErrorContext( + child_path.clone(), + err, + child_command_line, + )); + } + Ok(_) => { + if listed_ancestors.insert(FileInformation::from_path( + &child_path, + child_must_dereference, + )?) { + stack.push(StackEntry { + path: child_path, + command_line: child_command_line, + is_first: false, + }); + } else { + output.flush()?; + show!(LsError::AlreadyListedError(child_path)); + } + } + } } } - - // No need to clear state.buf since [`enter_directory`] drains it. - state.listed_ancestors.clear(); - } - if config.dired && !config.hyperlink { - dired::print_dired_output(config, &dired, &mut state.out)?; } + Ok(()) } +/// Lists files and directories with text output to stdout. +/// +/// This is the standard ls entry point that formats output as text. +/// It uses [`list_with_output`] internally with a text formatter. +pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> { + let mut output = TextOutput::new(config); + list_with_output(locs, config, &mut output) +} + fn sort_entries(entries: &mut [PathData], config: &Config) { match config.sort { Sort::Time => entries.sort_unstable_by_key(|k| { @@ -1208,155 +1507,6 @@ fn sort_entries(entries: &mut [PathData], config: &Config) { } } -fn depth_first_list( - (dir_path, needs_blank_line): DirData, - mut read_dir: ReadDir, - config: &Config, - state: &mut ListState, - dired: &mut DiredOutput, - is_top_level: bool, -) -> UResult<()> { - let path_data = PathData::new(dir_path.as_path().into(), None, None, config, false); - - // Print dir heading - name... 'total' comes after error display - if state.initial_locs_len > 1 || config.recursive { - if is_top_level { - if needs_blank_line { - writeln!(state.out)?; - if config.dired { - dired.padding += 1; - } - } - if config.dired { - dired::indent(&mut state.out)?; - } - show_dir_name(&path_data, &mut state.out, config)?; - writeln!(state.out)?; - if config.dired { - let dir_len = path_data.path().as_os_str().len(); - // add the //SUBDIRED// coordinates - dired::calculate_subdired(dired, dir_len); - // Add the padding for the dir name - dired::add_dir_name(dired, dir_len); - } - } else { - writeln!(state.out)?; - if config.dired { - dired.padding += 1; - dired::indent(&mut state.out)?; - let dir_name_size = path_data.path().as_os_str().len(); - dired::calculate_subdired(dired, dir_name_size); - dired::add_dir_name(dired, dir_name_size); - } - show_dir_name(&path_data, &mut state.out, config)?; - writeln!(state.out)?; - } - } - - // Append entries with initial dot files and record their existence - let (ref mut buf, trim) = if config.files == Files::All { - const DOT_DIRECTORIES: usize = 2; - let v = vec![ - PathData::new( - path_data.path().into(), - None, - Some(OsStr::new(".").into()), - config, - false, - ), - PathData::new( - // On WASI the sandbox may block access to ".." at the - // preopened root. Fall back to "." so the entry still - // appears with valid metadata instead of an error. - { - let dotdot = path_data.path().join(".."); - #[cfg(target_os = "wasi")] - let dotdot = if dotdot.metadata().is_err() { - path_data.path().into() - } else { - dotdot - }; - dotdot.into() - }, - None, - Some(OsStr::new("..").into()), - config, - false, - ), - ]; - (v, DOT_DIRECTORIES) - } else { - (Vec::new(), 0) - }; - - // Convert those entries to the PathData struct - for raw_entry in read_dir.by_ref() { - match raw_entry { - Ok(dir_entry) => { - if should_display(&dir_entry, config) { - buf.push(PathData::new( - dir_entry.path().into(), - Some(dir_entry), - None, - config, - false, - )); - } - } - Err(err) => { - state.out.flush()?; - show!(LsError::IOError(err)); - } - } - } - // Relinquish unused space since we won't need it anymore. - buf.shrink_to_fit(); - - sort_entries(buf, config); - - if config.format == Format::Long || config.alloc_size { - let total = write_total(buf, config, &mut state.out)?; - if config.dired { - dired::add_total(dired, total); - } - } - - display_items(buf, config, state, dired)?; - - if config.recursive { - for e in buf - .iter() - .skip(trim) - .filter(|p| p.file_type().is_some_and(FileType::is_dir)) - .rev() - { - // Try to open only to report any errors in order to match GNU semantics. - if let Err(err) = fs::read_dir(e.path()) { - state.out.flush()?; - show!(LsError::IOErrorContext( - e.path().to_path_buf(), - err, - e.command_line - )); - } else { - let fi = FileInformation::from_path(e.path(), e.must_dereference)?; - if state.listed_ancestors.insert(fi) { - // Push to stack, but with a less aggressive growth curve. - let (cap, len) = (state.stack.capacity(), state.stack.len()); - if cap == len { - state.stack.reserve_exact(len / 4 + 4); - } - state.stack.push((e.path().to_path_buf(), true)); - } else { - state.out.flush()?; - show!(LsError::AlreadyListedError(e.path().to_path_buf())); - } - } - } - } - Ok(()) -} - fn get_metadata_with_deref_opt(p_buf: &Path, dereference: bool) -> std::io::Result { if dereference { p_buf.metadata() @@ -1365,23 +1515,6 @@ fn get_metadata_with_deref_opt(p_buf: &Path, dereference: bool) -> std::io::Resu } } -fn write_total(items: &[PathData], config: &Config, out: &mut BufWriter) -> UResult { - let mut total_size = 0; - for item in items { - total_size += item - .metadata() - .as_ref() - .map_or(0, |md| get_block_size(md, config)); - } - if config.dired { - dired::indent(out)?; - } - let total = translate!("ls-total", "size" => display_size(total_size, config)); - out.write_all(total.as_bytes())?; - out.write_all(&[config.line_ending as u8])?; - Ok(total.len() + 1) -} - #[allow(unused_variables)] fn get_block_size(md: &Metadata, config: &Config) -> u64 { /* GNU ls will display sizes in terms of block size diff --git a/src/uu/ls/src/output.rs b/src/uu/ls/src/output.rs new file mode 100644 index 00000000000..530ba26c23b --- /dev/null +++ b/src/uu/ls/src/output.rs @@ -0,0 +1,400 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Output traits and types for programmatic access to ls functionality. +//! +//! This module provides a visitor/sink pattern that separates file enumeration +//! logic from output formatting. This allows programmatic consumers (shells, +//! file managers, etc.) to receive structured data without parsing text output. +//! +//! # Example +//! +//! ```ignore +//! use std::path::Path; +//! use uu_ls::{Config, list_with_output, EntryInfo, LsOutput, StreamingOutput}; +//! +//! struct MySink { +//! pub count: usize, +//! } +//! +//! impl MySink { +//! fn new() -> Self { +//! Self { count: 0 } +//! } +//! } +//! +//! impl LsOutput for MySink { +//! fn stream_mode(&self) -> StreamMode { +//! StreamMode::Streaming +//! } +//! +//! fn write_entry(&mut self, entry: &EntryInfo) -> uucore::error::UResult<()> { +//! println!("{} -> {:?}", entry.path.display(), entry.file_type); +//! self.count += 1; +//! Ok(()) +//! } +//! } +//! +//! let config = Config::from(&matches)?; +//! let mut output = MySink::new(); +//! list_with_output(vec![Path::new(".")], &config, &mut output)?; +//! println!("processed {} entries", output.count); +//! ``` + +//! Alternatively, use [`StreamingOutput`] when you want a reusable streaming sink +//! that collects `EntryInfo` objects as they arrive. + +use crate::{Config, PathData}; +use std::ffi::OsString; +use std::fs::{FileType, Metadata}; +use std::path::PathBuf; +use uucore::error::UResult; + +/// Information about a single file/directory entry. +/// +/// This struct provides programmatic access to file metadata without +/// requiring text parsing. All fields are pre-computed and ready for use. +#[derive(Debug, Clone)] +pub struct EntryInfo { + /// The full path to the file + pub path: PathBuf, + /// The display name (file name portion, may differ from path for . and ..) + pub display_name: OsString, + /// The file type (file, directory, symlink, etc.) + pub file_type: Option, + /// File metadata (size, permissions, timestamps, etc.) + pub metadata: Option, + /// Security context (SELinux) if available + pub security_context: String, + /// Whether this entry was specified on the command line + pub command_line: bool, + /// Whether symlinks should be dereferenced for this entry + pub must_dereference: bool, +} + +impl EntryInfo { + /// Returns true if this entry represents a directory + pub fn is_dir(&self) -> bool { + self.file_type.as_ref().is_some_and(FileType::is_dir) + } + + /// Returns true if this entry represents a regular file + pub fn is_file(&self) -> bool { + self.file_type.as_ref().is_some_and(FileType::is_file) + } + + /// Returns true if this entry represents a symbolic link + pub fn is_symlink(&self) -> bool { + self.file_type.as_ref().is_some_and(FileType::is_symlink) + } + + /// Returns the file size in bytes, if metadata is available + pub fn size(&self) -> Option { + self.metadata.as_ref().map(Metadata::len) + } + + /// Returns the file name as a string slice, if valid UTF-8 + pub fn file_name(&self) -> Option<&str> { + self.display_name.to_str() + } +} + +/// Streaming mode for `LsOutput` sinks. +/// +/// `Batch` sinks receive a directory's entries all at once via +/// [`LsOutput::write_entries`]. `Streaming` sinks receive one entry at a time via +/// [`LsOutput::write_entry`]. +pub enum StreamMode { + Batch, + Streaming, +} + +/// Trait for receiving ls output entries. +/// +/// Implement this trait to receive structured data from the ls enumeration +/// process. The trait is designed to support both streaming (one entry at a time) +/// and batched (all entries at once) use cases. +/// +/// For programmatic access, implement [`write_entry`](LsOutput::write_entry) to +/// receive each entry individually. +/// +/// The internal `TextOutput` implementation uses [`write_entries`](LsOutput::write_entries) +/// to receive batches for proper column alignment and grid formatting. +pub trait LsOutput { + /// Returns the preferred output mode for this sink. + /// + /// Default is `Batch` so text-formatting sinks can continue to receive full + /// directory batches. Streaming sinks should override this to return + /// `StreamMode::Streaming`. + fn stream_mode(&self) -> StreamMode { + StreamMode::Batch + } + + /// Called for each file/directory entry (streaming mode). + /// + /// Default implementation does nothing. Override this for programmatic access + /// where you want to process entries one at a time. + fn write_entry(&mut self, _entry: &EntryInfo) -> UResult<()> { + Ok(()) + } + + /// Called with a batch of entries for a directory. + /// + /// Default implementation calls `write_entry` for each entry. + /// Override this for text output that needs all entries for formatting. + fn write_entries(&mut self, entries: &[PathData], config: &Config) -> UResult<()> { + for entry in entries { + self.write_entry(&entry.to_entry_info(config))?; + } + Ok(()) + } + + /// Called when entering a directory (for recursive listings or multiple arguments). + /// + /// # Arguments + /// * `path_data` - The directory being entered + /// * `config` - The ls configuration + /// * `is_first` - Whether this is the first directory (affects newline handling) + fn write_dir_header( + &mut self, + _path_data: &PathData, + _config: &Config, + _is_first: bool, + ) -> UResult<()> { + Ok(()) + } + + /// Called to report the total blocks for a directory in long format. + /// + /// The `total_size` parameter is the total number of blocks used by + /// files in the directory. + fn write_total(&mut self, _total_size: u64, _config: &Config) -> UResult<()> { + Ok(()) + } + + /// Called to flush any buffered output (e.g., before error messages). + fn flush(&mut self) -> UResult<()> { + Ok(()) + } + + /// Called when all entries have been written. + /// + /// Use this for final cleanup, printing dired output, etc. + fn finalize(&mut self, _config: &Config) -> UResult<()> { + Ok(()) + } + + /// Called at the start of listing, before any entries are processed. + /// + /// Use this for initialization that needs the config (e.g., color reset). + fn initialize(&mut self, _config: &Config) -> UResult<()> { + Ok(()) + } +} + +/// A dedicated streaming output sink. +/// +/// This sink is intended for programmatic consumers that want to receive +/// `EntryInfo` objects one at a time as they are emitted. It implements +/// [`LsOutput::stream_mode`] as `StreamMode::Streaming`. +/// +/// # Example +/// +/// ```ignore +/// use uu_ls::{Config, list_with_output, StreamingOutput}; +/// use std::path::Path; +/// +/// let mut output = StreamingOutput::new(); +/// list_with_output(vec![Path::new(".")], &config, &mut output)?; +/// +/// for entry in output.entries() { +/// println!("{}: {} bytes", +/// entry.display_name.to_string_lossy(), +/// entry.size().unwrap_or(0)); +/// } +/// ``` +#[derive(Debug, Default)] +pub struct StreamingOutput { + entries: Vec, + directories: Vec, + totals: Vec, +} + +impl StreamingOutput { + /// Create a new empty streaming sink. + pub fn new() -> Self { + Self::default() + } + + /// Get all collected entries + pub fn entries(&self) -> &[EntryInfo] { + &self.entries + } + + /// Consume the collector and return all entries + pub fn into_entries(self) -> Vec { + self.entries + } + + /// Get all directory headers that were encountered + pub fn directories(&self) -> &[PathBuf] { + &self.directories + } + + /// Get all totals that were written + pub fn totals(&self) -> &[u64] { + &self.totals + } + + /// Clear all collected data + pub fn clear(&mut self) { + self.entries.clear(); + self.directories.clear(); + self.totals.clear(); + } +} + +impl LsOutput for StreamingOutput { + fn stream_mode(&self) -> StreamMode { + StreamMode::Streaming + } + + fn write_entry(&mut self, entry: &EntryInfo) -> UResult<()> { + self.entries.push(entry.clone()); + Ok(()) + } + + fn write_dir_header( + &mut self, + path_data: &PathData, + _config: &Config, + _is_first: bool, + ) -> UResult<()> { + self.directories.push(path_data.path().to_path_buf()); + Ok(()) + } + + fn write_total(&mut self, total_size: u64, _config: &Config) -> UResult<()> { + self.totals.push(total_size); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_entry_info_is_dir() { + let entry = EntryInfo { + path: PathBuf::from("/test/dir"), + display_name: OsString::from("dir"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + assert!(!entry.is_dir()); + } + + #[test] + fn test_entry_info_size() { + let entry = EntryInfo { + path: PathBuf::from("/test/file"), + display_name: OsString::from("file"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + assert_eq!(entry.size(), None); + } + + #[test] + fn test_entry_info_file_name() { + let entry = EntryInfo { + path: PathBuf::from("/test/file.txt"), + display_name: OsString::from("file.txt"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + assert_eq!(entry.file_name(), Some("file.txt")); + } + + #[test] + fn test_streaming_output_new() { + let collector = StreamingOutput::new(); + assert!(collector.entries().is_empty()); + assert!(collector.directories().is_empty()); + assert!(collector.totals().is_empty()); + } + + #[test] + fn test_streaming_output_write_entry() { + let mut collector = StreamingOutput::new(); + let entry = EntryInfo { + path: PathBuf::from("/test/file"), + display_name: OsString::from("file"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + collector.write_entry(&entry).unwrap(); + assert_eq!(collector.entries().len(), 1); + assert_eq!(collector.entries()[0].display_name, OsString::from("file")); + } + + #[test] + fn test_streaming_output_clear() { + let mut collector = StreamingOutput::new(); + let entry = EntryInfo { + path: PathBuf::from("/test/file"), + display_name: OsString::from("file"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + collector.write_entry(&entry).unwrap(); + + collector.clear(); + assert!(collector.entries().is_empty()); + assert!(collector.directories().is_empty()); + assert!(collector.totals().is_empty()); + } + + #[test] + fn test_streaming_output_into_entries() { + let mut collector = StreamingOutput::new(); + let entry = EntryInfo { + path: PathBuf::from("/test/file"), + display_name: OsString::from("file"), + file_type: None, + metadata: None, + security_context: String::new(), + command_line: false, + must_dereference: false, + }; + collector.write_entry(&entry).unwrap(); + + let entries = collector.into_entries(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].display_name, OsString::from("file")); + } + + #[test] + fn test_streaming_output_flush() { + let mut collector = StreamingOutput::new(); + assert!(collector.flush().is_ok()); + } +} diff --git a/tests/by-util/test_ls.rs b/tests/by-util/test_ls.rs index 5058de3e08e..3372741a867 100644 --- a/tests/by-util/test_ls.rs +++ b/tests/by-util/test_ls.rs @@ -5340,7 +5340,7 @@ fn test_ls_dired_recursive_multiple() { .map(|chunk| { let start_pos = chunk[0]; let end_pos = chunk[1]; - let filename = String::from_utf8(output.as_bytes()[start_pos..=end_pos].to_vec()) + let filename = String::from_utf8(output.as_bytes()[start_pos..end_pos].to_vec()) .unwrap() .trim() .to_string(); @@ -5486,7 +5486,7 @@ fn test_ls_dired_complex() { .map(|chunk| { let start_pos = chunk[0]; let end_pos = chunk[1]; - let filename = String::from_utf8(output.as_bytes()[start_pos..=end_pos].to_vec()) + let filename = String::from_utf8(output.as_bytes()[start_pos..end_pos].to_vec()) .unwrap() .trim() .to_string();