diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index c468ef0ea74..020cf8aed7b 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -34,6 +34,15 @@ dequeue dev EINTR eintr +EMFILE +emfile +ENFILE +ENOMEM +getdents +btrfs +diriter +NOFILE +readdir nextest SIGUSR nonprinting diff --git a/src/uu/rm/src/platform/unix.rs b/src/uu/rm/src/platform/unix.rs index d2bcf6e2f46..89312128394 100644 --- a/src/uu/rm/src/platform/unix.rs +++ b/src/uu/rm/src/platform/unix.rs @@ -8,15 +8,15 @@ // spell-checker:ignore fstatat unlinkat statx behaviour use indicatif::ProgressBar; -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::fs; -use std::io::{IsTerminal, stdin}; +use std::io::{self, IsTerminal, stdin}; use std::os::unix::fs::PermissionsExt; -use std::path::Path; +use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::error::FromIo; use uucore::prompt_yes; -use uucore::safe_traversal::{DirFd, SymlinkBehavior}; +use uucore::safe_traversal::{DirEntry, DirEntryType, DirFd, DirIter, SymlinkBehavior}; use uucore::show_error; use uucore::translate; @@ -132,7 +132,7 @@ pub fn safe_remove_file( Some(false) } Err(e) => { - if e.kind() == std::io::ErrorKind::PermissionDenied { + if e.kind() == io::ErrorKind::PermissionDenied { show_error!("cannot remove {}: Permission denied", path.quote()); } else { let _ = show_removal_error(e, path); @@ -172,10 +172,10 @@ pub fn safe_remove_empty_dir( } /// Helper to handle errors with force mode consideration -fn handle_error_with_force(e: std::io::Error, path: &Path, options: &Options) -> bool { +fn handle_error_with_force(e: io::Error, path: &Path, options: &Options) -> bool { // Permission denied errors should be shown even in force mode // This matches GNU rm behavior - if e.kind() == std::io::ErrorKind::PermissionDenied { + if e.kind() == io::ErrorKind::PermissionDenied { show_permission_denied_error(path); return true; } @@ -187,17 +187,20 @@ fn handle_error_with_force(e: std::io::Error, path: &Path, options: &Options) -> !options.force } -/// Helper to handle permission denied errors +/// Helper to handle permission denied errors. +/// +/// `unlink_result` is the result of a prior `unlink_at` attempt on the entry. +/// The caller must perform the unlink before calling this function. fn handle_permission_denied( - dir_fd: &DirFd, - entry_name: &OsStr, + unlink_result: io::Result<()>, entry_path: &Path, options: &Options, + progress_bar: Option<&ProgressBar>, ) -> bool { // When we can't open a subdirectory due to permission denied, // try to remove it directly (it might be empty). // This matches GNU rm behavior with -f flag. - if let Err(_remove_err) = dir_fd.unlink_at(entry_name, true) { + if unlink_result.is_err() { // The directory is not empty (or another error) and we can't read it // to remove its contents. Report the original permission denied error. // This matches GNU rm behavior — the real problem is we lack @@ -206,24 +209,33 @@ fn handle_permission_denied( return true; } // Successfully removed empty directory + if let Some(pb) = progress_bar { + pb.inc(1); + } verbose_removed_directory(entry_path, options); false } -/// Helper to handle unlink operation with error reporting +/// Helper to handle unlink operation with error reporting. +/// +/// `unlink_result` is the result of a prior `unlink_at` attempt on the entry. +/// The caller must perform the unlink before calling this function. fn handle_unlink( - dir_fd: &DirFd, - entry_name: &OsStr, + unlink_result: io::Result<()>, entry_path: &Path, is_dir: bool, options: &Options, + progress_bar: Option<&ProgressBar>, ) -> bool { - if let Err(e) = dir_fd.unlink_at(entry_name, is_dir) { + if let Err(e) = unlink_result { let e = e .map_err_context(|| translate!("rm-error-cannot-remove", "file" => entry_path.quote())); show_error!("{e}"); true } else { + if let Some(pb) = progress_bar { + pb.inc(1); + } if is_dir { verbose_removed_directory(entry_path, options); } else { @@ -287,7 +299,7 @@ pub fn safe_remove_dir_recursive( Err(e) => { // If we can't open the directory for safe traversal, // handle the error appropriately and try to remove if possible - if e.kind() == std::io::ErrorKind::PermissionDenied { + if e.kind() == io::ErrorKind::PermissionDenied { // Try to remove the directory directly if it's empty if fs::remove_dir(path).is_ok() { verbose_removed_directory(path, options); @@ -301,7 +313,8 @@ pub fn safe_remove_dir_recursive( } }; - let error = safe_remove_dir_recursive_impl(path, &dir_fd, options); + let mut current_path = path.to_path_buf(); + let error = safe_remove_dir_recursive_impl(&mut current_path, dir_fd, options, progress_bar); // After processing all children, remove the directory itself if error { @@ -338,96 +351,516 @@ pub fn safe_remove_dir_recursive( } #[cfg(not(target_os = "redox"))] -pub fn safe_remove_dir_recursive_impl(path: &Path, dir_fd: &DirFd, options: &Options) -> bool { - // Read directory entries using safe traversal - let entries = match dir_fd.read_dir() { - Ok(entries) => entries, - Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => { - if !options.force { - show_permission_denied_error(path); - } - return !options.force; +/// Returns `true` if the I/O error is EMFILE or ENFILE (too many open files). +fn is_emfile(e: &io::Error) -> bool { + matches!(e.raw_os_error(), Some(libc::EMFILE) | Some(libc::ENFILE)) +} + +#[cfg(not(target_os = "redox"))] +/// Source of directory entries for a [`StackFrame`]. +/// +/// A frame starts as `Live` (fd open, lazy iteration via `DirIter`). When +/// the process runs out of file descriptors — which limits tree depth to +/// approximately `RLIMIT_NOFILE` (≈ 1 024) — `try_reclaim_fd` demotes the +/// oldest live frame to `Drained` by materializing its remaining entries into +/// a `Vec` and closing the fd. Subsequent entry-yielding uses the vec; fd +/// operations (stat, unlink, open-child) temporarily re-open the directory +/// from `StackFrame::dir_path`. +enum FrameIter { + /// The directory fd is open. All operations go through the [`DirIter`]. + Live(DirIter), + /// The fd was closed to reclaim it for a deeper level. + /// Remaining entries are pre-collected. Operations that need a directory + /// fd temporarily re-open `StackFrame::dir_path`. + Drained(std::vec::IntoIter), +} + +#[cfg(not(target_os = "redox"))] +/// One level of the directory stack used by the iterative traversal. +/// +/// # File-descriptor budget +/// +/// Each frame ordinarily holds exactly **one** open file descriptor: `iter` +/// owns the fd for the directory being traversed and exposes it for +/// `stat_at`, `open_child_iter`, and `unlink_at` operations as well. +/// +/// When `RLIMIT_NOFILE` is exhausted, [`try_reclaim_fd`] demotes the oldest +/// `Live` frame to [`FrameIter::Drained`], closing its fd. Operations on a +/// `Drained` frame re-open the directory from `dir_path` on demand; this +/// costs one extra `openat` per entry in that frame but allows trees of +/// arbitrary depth. +struct StackFrame { + /// Entry source and directory-operation handle. + iter: FrameIter, + /// Accumulates whether any child removal failed. + had_error: bool, + /// Permission bits of this directory, stored for the interactive prompt + /// that fires when the directory is about to be unlinked from its parent. + /// Not meaningful for the root frame (handled by `safe_remove_dir_recursive`). + mode: libc::mode_t, + /// Name of this directory inside its parent, used for `unlinkat` once all + /// children have been processed. Empty string for the root frame. + entry_name: OsString, + /// Full path to this directory. Used to re-open the directory when the + /// frame has been demoted to [`FrameIter::Drained`] and a fd-requiring + /// operation (stat, unlink, open-child) is needed. + /// + /// `None` for child frames that have never been demoted; populated lazily + /// by [`try_reclaim_fd`] at demotion time, so no path allocation is + /// incurred for frames that remain `Live` for their entire lifetime. + /// The root frame always has `Some(path)` set at construction time. + dir_path: Option, +} + +#[cfg(not(target_os = "redox"))] +/// Demote the oldest `Live` frame (excluding the top) to `Drained`, freeing +/// one file descriptor. +/// +/// Returns `true` if a frame was demoted; `false` if all non-top frames are +/// already `Drained` (which means we are genuinely out of fds). +/// +/// Materializing the oldest frame first minimizes the chance of collecting a +/// large sibling list: for deep linear chains the bottom frames have already +/// exhausted their children before a new level is pushed. +/// +/// # Security note +/// +/// Once a frame is demoted to `Drained`, subsequent fd-requiring operations +/// (`frame_stat_at`, `frame_open_child`, `frame_unlink_at`) re-open the +/// directory via its stored `dir_path` with `O_NOFOLLOW`. This protects the +/// **final path component** against a concurrent symlink swap. Intermediate +/// components of the path are resolved normally — the same limitation present +/// in GNU `rm` under fd pressure. The window only exists in the EMFILE +/// fallback path, never during normal traversal. +fn try_reclaim_fd(stack: &mut [StackFrame]) -> bool { + // Leave the top frame (currently active) alone. + let limit = stack.len().saturating_sub(1); + for i in 0..limit { + if !matches!(stack[i].iter, FrameIter::Live(_)) { + continue; } - Err(e) => { - return handle_error_with_force(e, path, options); + // Reconstruct this frame's full path from the root frame's stored + // path plus the `entry_name` of every intermediate frame. This is + // computed before the mutable borrow of `stack[i]` below so that the + // immutable borrows of `stack[0..=i]` are released first. + let dir_path = { + // stack[0] is the root frame and always has `dir_path` set. + let root = stack[0] + .dir_path + .as_ref() + .expect("root StackFrame always has dir_path set"); + let mut p = root.clone(); + for frame in &stack[1..=i] { + p.push(&frame.entry_name); + } + p + }; + // Drain the remaining entries into a Vec so the fd can be closed. + // If readdir itself fails mid-drain we cannot enumerate the rest of + // this directory's children. Report the error and mark the frame as + // failed so the directory is not unlinked (it would be non-empty). + let mut remaining = Vec::new(); + let mut had_error = false; + if let FrameIter::Live(ref mut di) = stack[i].iter { + for result in di.by_ref() { + match result { + Ok(entry) => remaining.push(entry), + Err(e) => { + show_error!( + "{}", + e.map_err_context(|| translate!( + "rm-error-cannot-remove", + "file" => dir_path.quote() + )) + ); + had_error = true; + break; + } + } + } } - }; + stack[i].had_error |= had_error; + stack[i].iter = FrameIter::Drained(remaining.into_iter()); + stack[i].dir_path = Some(dir_path); + return true; + } + false +} - let mut error = false; +#[cfg(not(target_os = "redox"))] +/// Stat an entry relative to the given frame's directory. +/// +/// If the frame is [`FrameIter::Live`] the existing fd is used. If it is +/// [`FrameIter::Drained`] the directory is temporarily re-opened from +/// `frame.dir_path` with `O_NOFOLLOW`, which protects the final path +/// component against a concurrent symlink swap. +fn frame_stat_at(frame: &StackFrame, name: &OsStr) -> io::Result { + match &frame.iter { + FrameIter::Live(di) => di.stat_at(name, SymlinkBehavior::NoFollow), + FrameIter::Drained(_) => DirFd::open( + frame + .dir_path + .as_deref() + .expect("Drained frame always has dir_path set"), + SymlinkBehavior::NoFollow, + ) + .and_then(|fd| fd.stat_at(name, SymlinkBehavior::NoFollow)), + } +} - // Process each entry - for entry_name in entries { - let entry_path = path.join(&entry_name); +#[cfg(not(target_os = "redox"))] +/// Open a child directory relative to the given frame's directory, returning +/// a new [`DirIter`] that owns exactly one fd. +/// +/// If the frame is [`FrameIter::Live`] the existing fd is used via +/// `open_child_iter` (no dup). If it is [`FrameIter::Drained`] the parent +/// directory is temporarily re-opened from `frame.dir_path` with `O_NOFOLLOW`, +/// which protects the final path component against a concurrent symlink swap. +/// +/// In both cases the child is opened with `O_NOFOLLOW` to prevent symlink +/// substitution attacks. +fn frame_open_child(frame: &StackFrame, name: &OsStr) -> io::Result { + match &frame.iter { + FrameIter::Live(di) => di.open_child_iter(name, SymlinkBehavior::NoFollow), + FrameIter::Drained(_) => DirFd::open( + frame + .dir_path + .as_deref() + .expect("Drained frame always has dir_path set"), + SymlinkBehavior::NoFollow, + ) + .and_then(|fd| fd.open_subdir(name, SymlinkBehavior::NoFollow)) + .and_then(DirFd::into_iter_dir), + } +} + +#[cfg(not(target_os = "redox"))] +/// Unlink an entry relative to the given frame's directory. +/// +/// If the frame is [`FrameIter::Live`] the existing fd is used. If it is +/// [`FrameIter::Drained`] the directory is temporarily re-opened from +/// `frame.dir_path` with `O_NOFOLLOW`, which protects the final path +/// component against a concurrent symlink swap. +fn frame_unlink_at(frame: &StackFrame, name: &OsStr, is_dir: bool) -> io::Result<()> { + match &frame.iter { + FrameIter::Live(di) => di.unlink_at(name, is_dir), + FrameIter::Drained(_) => DirFd::open( + frame + .dir_path + .as_deref() + .expect("Drained frame always has dir_path set"), + SymlinkBehavior::NoFollow, + ) + .and_then(|fd| fd.unlink_at(name, is_dir)), + } +} - // Get metadata for the entry using fstatat - let entry_stat = match dir_fd.stat_at(&entry_name, SymlinkBehavior::NoFollow) { - Ok(stat) => stat, - Err(e) => { - error |= handle_error_with_force(e, &entry_path, options); - continue; +#[cfg(not(target_os = "redox"))] +pub(super) fn safe_remove_dir_recursive_impl( + current_path: &mut PathBuf, + dir_fd: DirFd, + options: &Options, + progress_bar: Option<&ProgressBar>, +) -> bool { + // Iterative, allocation-minimizing implementation. + // + // Design goals (see https://github.com/uutils/coreutils/issues/11222): + // • One shared `PathBuf` for the entire traversal instead of one `join()` + // allocation per child entry. + // • Explicit `Vec` work-stack instead of recursive call frames, + // removing one heap allocation per directory level and preventing stack + // overflow on deep trees. + // • Lazy `DirIter` (backed by `nix::dir::OwningIter`) instead of the eager + // `read_dir()` helper, avoiding the intermediate `Vec` per level. + // • Single fd per stack frame: `DirFd::into_iter_dir` transfers ownership + // directly into the iterator without `dup(2)`. + // + // Fd budget: each `Live` frame holds exactly one open fd. When the process + // runs out of file descriptors (`EMFILE`/`ENFILE`), `try_reclaim_fd` demotes + // the oldest live frame to `Drained` — its remaining entries are materialized + // into a `Vec` and its fd is closed. Subsequent fd-requiring + // operations on a `Drained` frame re-open the directory from `dir_path` on + // demand. This allows traversal of trees of arbitrary depth at the cost of + // one extra `openat(2)` per entry in a drained frame. + + // Obtain the initial iterator via into_iter_dir (consuming, no dup). + let root_iter = match dir_fd.into_iter_dir() { + Ok(it) => it, + Err(e) if e.kind() == io::ErrorKind::PermissionDenied => { + if !options.force { + show_permission_denied_error(current_path.as_path()); } + return !options.force; + } + Err(e) => return handle_error_with_force(e, current_path.as_path(), options), + }; + + // Pre-size to a reasonable depth to avoid early reallocations. + let mut stack: Vec = Vec::with_capacity(32); + stack.push(StackFrame { + iter: FrameIter::Live(root_iter), + had_error: false, + mode: 0, // root mode is not used here; handled by safe_remove_dir_recursive + entry_name: OsString::new(), + dir_path: Some(current_path.clone()), + }); + + loop { + // Pull the next child from the top frame's iterator. + // The mutable borrow of `stack` is released as soon as `next()` returns + // an owned value, so subsequent borrows within the match arms are allowed. + let entry = match stack.last_mut() { + None => unreachable!( + "stack is non-empty at every iteration: the root frame is only \ + removed in the `None` arm below, which returns immediately" + ), + Some(frame) => match &mut frame.iter { + FrameIter::Live(di) => di.next(), + FrameIter::Drained(it) => it.next().map(Ok), + }, }; - // Check if it's a directory - let is_dir = ((entry_stat.st_mode as libc::mode_t) & libc::S_IFMT) == libc::S_IFDIR; + match entry { + // ── All children of the current directory have been processed ────── + None => { + let completed = stack.pop().unwrap(); - if is_dir { - // Ask user if they want to descend into this directory - if options.interactive == InteractiveMode::Always - && !is_dir_empty(&entry_path) - && !prompt_descend(&entry_path) - { - continue; - } + if stack.is_empty() { + // Root frame exhausted — return its error flag to the caller. + // current_path is still the root path; no pop needed. + return completed.had_error; + } - // Recursively remove subdirectory using safe traversal - let child_dir_fd = match dir_fd.open_subdir(&entry_name, SymlinkBehavior::Follow) { - Ok(fd) => fd, - Err(e) => { - // If we can't open the subdirectory for safe traversal, - // try to handle it as best we can with safe operations - if e.kind() == std::io::ErrorKind::PermissionDenied { - error |= handle_permission_denied( - dir_fd, - entry_name.as_ref(), - &entry_path, - options, - ); - } else { - error |= handle_error_with_force(e, &entry_path, options); + // current_path == path of the completed (child) directory here. + let child_error = completed.had_error; + stack.last_mut().unwrap().had_error |= child_error; + + // Only unlink this directory if all its children were removed. + if !child_error { + if options.interactive == InteractiveMode::Always + && !prompt_dir_with_mode(current_path.as_path(), completed.mode, options) + { + // User declined this subdirectory. We intentionally do + // NOT propagate an error to the parent frame here: GNU rm + // only fails when an *unlink* syscall fails, not when the + // user explicitly skips a removal. If the parent itself + // cannot be removed later (because it is now non-empty), + // the caller `safe_remove_dir_recursive` detects the + // non-empty condition via `is_dir_empty` and silently + // skips it — matching GNU rm's exit-0 behaviour in that + // situation. + current_path.pop(); + continue; } - continue; + // Unlink the completed child directory from its parent. + // `stack.last()` is now the parent frame; `frame_unlink_at` + // handles both Live (fd already open) and Drained (re-open + // from dir_path) cases. + let unlink_result = + frame_unlink_at(stack.last().unwrap(), completed.entry_name.as_ref(), true); + let unlink_err = handle_unlink( + unlink_result, + current_path.as_path(), + true, + options, + progress_bar, + ); + stack.last_mut().unwrap().had_error |= unlink_err; } - }; - let child_error = safe_remove_dir_recursive_impl(&entry_path, &child_dir_fd, options); - error |= child_error; - - // Ask user permission if needed for this subdirectory - if !child_error - && options.interactive == InteractiveMode::Always - && !prompt_dir_with_mode(&entry_path, entry_stat.st_mode as libc::mode_t, options) - { - continue; + // Restore current_path to the parent directory. + current_path.pop(); } - // Remove the now-empty subdirectory using safe unlinkat - if !child_error { - error |= handle_unlink(dir_fd, entry_name.as_ref(), &entry_path, true, options); + // ── readdir returned an I/O error mid-iteration ─────────────────── + Some(Err(e)) => { + // current_path is the directory being iterated; no child was pushed. + let err = handle_error_with_force(e, current_path.as_path(), options); + stack.last_mut().unwrap().had_error |= err; } - } else { - // Remove file - check if user wants to remove it first - if prompt_file_with_stat(&entry_path, &entry_stat, options) { - error |= handle_unlink(dir_fd, entry_name.as_ref(), &entry_path, false, options); + + // ── Normal child entry ──────────────────────────────────────────── + Some(Ok(entry)) => { + // Extend the shared path accumulator in-place — amortized O(1), + // no heap allocation if there is spare capacity. + current_path.push(&entry.name); + + // Determine whether this entry is a directory and, if needed, + // obtain its full stat. + // + // Fast path (non-interactive, d_type known): use the file-type + // hint from `getdents`/`d_type` to skip the `fstatat` syscall. + // On modern Linux filesystems (ext4, xfs, btrfs, tmpfs, …) this + // is always available, eliminating one syscall per entry in the + // common case. + // + // Fallback: stat when in interactive mode (need mode/size for + // the prompt) or when the filesystem reports DT_UNKNOWN. + let needs_stat = + options.interactive != InteractiveMode::Never || entry.file_type.is_none(); + + let (is_dir, stat_opt) = if needs_stat { + match frame_stat_at(stack.last().unwrap(), entry.name.as_ref()) { + Ok(s) => { + let is_dir = + ((s.st_mode as libc::mode_t) & libc::S_IFMT) == libc::S_IFDIR; + (is_dir, Some(s)) + } + Err(e) => { + let err = handle_error_with_force(e, current_path.as_path(), options); + stack.last_mut().unwrap().had_error |= err; + current_path.pop(); + continue; + } + } + } else { + ( + matches!(entry.file_type, Some(DirEntryType::Directory)), + None, + ) + }; + + if is_dir { + // Interactive: ask whether to descend into this directory. + // + // Note: is_dir_empty resolves `current_path` through a fresh + // path-based read_dir(2) call, then closes the fd before + // open_child_iter opens the directory again below. There is a + // narrow TOCTOU window between these two operations where a + // concurrent process could modify the directory's contents, + // causing the prompt to be shown (or skipped) incorrectly. + // This is pre-existing behaviour inherited from GNU rm, which + // performs the same non-atomic check. The window is limited to + // interactive mode and carries no security consequence: even if + // the directory is swapped, open_child_iter uses O_NOFOLLOW so + // symlink substitution is rejected by the kernel. + if options.interactive == InteractiveMode::Always + && !is_dir_empty(current_path.as_path()) + && !prompt_descend(current_path.as_path()) + { + current_path.pop(); + continue; + } + + // Open the child directory for safe traversal. + // Use NoFollow: the entry was already confirmed to be a real + // directory (not a symlink) by stat_at(NoFollow) above, or by + // d_type == DT_DIR (which the kernel sets from the inode, not + // the symlink target). Using Follow would re-introduce a TOCTOU + // window — a concurrent process could replace the directory with + // a symlink between the stat and the open, causing rm to traverse + // an unintended target. + // + // EMFILE / ENFILE recovery: if the process has run out of file + // descriptors, demote the oldest Live frame to Drained (freeing + // one fd) and retry. This allows trees deeper than RLIMIT_NOFILE + // to be traversed at the cost of one extra openat(2) per entry in + // the drained frame. + let child_iter = + match frame_open_child(stack.last().unwrap(), entry.name.as_ref()) { + Ok(it) => it, + Err(e) if is_emfile(&e) => { + if try_reclaim_fd(&mut stack) { + // Retry with the freed fd. + match frame_open_child( + stack.last().unwrap(), + entry.name.as_ref(), + ) { + Ok(it) => it, + Err(e) => { + let err = handle_error_with_force( + e, + current_path.as_path(), + options, + ); + stack.last_mut().unwrap().had_error |= err; + current_path.pop(); + continue; + } + } + } else { + // No frames to reclaim — genuinely out of fds. + let err = + handle_error_with_force(e, current_path.as_path(), options); + stack.last_mut().unwrap().had_error |= err; + current_path.pop(); + continue; + } + } + Err(e) if e.kind() == io::ErrorKind::PermissionDenied => { + // Attempt to remove it directly in case it is empty. + let unlink_result = frame_unlink_at( + stack.last().unwrap(), + entry.name.as_ref(), + true, + ); + let err = handle_permission_denied( + unlink_result, + current_path.as_path(), + options, + progress_bar, + ); + stack.last_mut().unwrap().had_error |= err; + current_path.pop(); + continue; + } + Err(e) => { + let err = + handle_error_with_force(e, current_path.as_path(), options); + stack.last_mut().unwrap().had_error |= err; + current_path.pop(); + continue; + } + }; + + // Push the child frame. current_path now ends with entry_name + // and will be popped when this frame is exhausted (None arm). + // dir_path is None — populated lazily by try_reclaim_fd only + // if this frame is ever demoted to Drained (EMFILE fallback). + // mode is 0 when stat_opt is None (non-interactive path); it is + // only consumed by prompt_dir_with_mode which is guarded by + // `interactive == Always`, so the zero is never observed. + stack.push(StackFrame { + iter: FrameIter::Live(child_iter), + had_error: false, + mode: stat_opt.map_or(0, |s| s.st_mode as libc::mode_t), + entry_name: entry.name, + dir_path: None, + }); + } else { + // File or symlink: prompt then unlink. + // In Never mode stat_opt is None and we always remove. + let should_remove = match stat_opt { + Some(ref s) => prompt_file_with_stat(current_path.as_path(), s, options), + None => true, + }; + if should_remove { + let unlink_result = + frame_unlink_at(stack.last().unwrap(), entry.name.as_ref(), false); + let err = handle_unlink( + unlink_result, + current_path.as_path(), + false, + options, + progress_bar, + ); + stack.last_mut().unwrap().had_error |= err; + } + // Restore path after processing the file/symlink. + current_path.pop(); + } } } } - - error } #[cfg(target_os = "redox")] -pub fn safe_remove_dir_recursive_impl(_path: &Path, _dir_fd: &DirFd, _options: &Options) -> bool { +pub(super) fn safe_remove_dir_recursive_impl( + _current_path: &mut PathBuf, + _dir_fd: DirFd, + _options: &Options, + _progress_bar: Option<&ProgressBar>, +) -> bool { // safe_traversal stat_at is not supported on Redox // This shouldn't be called on Redox, but provide a stub for compilation true // Return error diff --git a/src/uucore/src/lib/features/safe_traversal.rs b/src/uucore/src/lib/features/safe_traversal.rs index 98fc81dd928..5b71e21575e 100644 --- a/src/uucore/src/lib/features/safe_traversal.rs +++ b/src/uucore/src/lib/features/safe_traversal.rs @@ -22,7 +22,7 @@ use std::os::unix::ffi::OsStrExt; use std::os::unix::io::{AsFd, AsRawFd, BorrowedFd, FromRawFd, IntoRawFd, OwnedFd, RawFd}; use std::path::{Path, PathBuf}; -use nix::dir::Dir; +use nix::dir::{Dir, OwningIter}; use nix::fcntl::{OFlag, openat}; use nix::libc; use nix::sys::stat::{FchmodatFlags, FileStat, Mode, fchmodat, fstatat, mkdirat}; @@ -66,6 +66,12 @@ pub enum SafeTraversalError { #[error("{}", translate!("safe-traversal-error-open-failed", "path" => path.quote(), "source" => source))] OpenFailed { + /// The path that could not be opened. + /// + /// When produced by [`DirFd::open_subdir`] or [`DirIter::open_child_iter`] this is the + /// **relative entry name** (e.g. `"foo"`), not a full absolute path. Callers that + /// display errors to the user should reconstruct the full path themselves (e.g. from a + /// shared `current_path` accumulator) before formatting the message. path: PathBuf, #[source] source: io::Error, @@ -108,6 +114,167 @@ impl From for io::Error { } } +/// A lazy iterator over the names of entries in a directory. +/// File-type hint obtained from `getdents`/`d_type` without a separate `stat`. +/// +/// This is the file type as reported in the directory entry itself. On most +/// modern Linux filesystems (ext4, xfs, btrfs, tmpfs, …) this is always +/// populated. On some network filesystems (NFS v2/v3, some FUSE mounts) the +/// kernel may report `DT_UNKNOWN`, in which case [`DirIter`] yields `None` and +/// callers must fall back to [`DirIter::stat_at`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DirEntryType { + /// Regular file. + File, + /// Directory. + Directory, + /// Symbolic link. + Symlink, + /// Any other type (FIFO, socket, block/char device, …). + Other, +} + +/// A directory entry yielded by [`DirIter`]. +#[derive(Debug)] +pub struct DirEntry { + /// Name of the entry (never `"."` or `".."`). + pub name: OsString, + /// File-type hint from the directory entry itself (`d_type`). + /// `None` when the filesystem reports `DT_UNKNOWN`. + pub file_type: Option, +} + +/// +/// Obtained from [`DirFd::iter_dir`] or [`DirFd::into_iter_dir`]. Yields one +/// [`DirEntry`] per entry, skipping `"."` and `".."`. Iteration stops on the +/// first I/O error, which is returned as `Some(Err(_))`. +/// +/// Uses [`nix::dir::OwningIter`] internally, which does **not** rewind the +/// directory on drop (unlike the borrowing [`nix::dir::Iter`]). +/// +/// In addition to iteration, `DirIter` exposes [`stat_at`], [`open_child_iter`], +/// and [`unlink_at`] so that callers can perform all directory operations +/// through a single object — and therefore a single open file descriptor — +/// instead of keeping a separate [`DirFd`] alongside the iterator. +/// +/// [`stat_at`]: DirIter::stat_at +/// [`open_child_iter`]: DirIter::open_child_iter +/// [`unlink_at`]: DirIter::unlink_at +pub struct DirIter { + inner: OwningIter, +} + +impl DirIter { + /// Borrow the underlying directory fd for the duration of `self`. + /// + /// # Safety invariant + /// `OwningIter` owns the fd and keeps it open for its entire lifetime, + /// so the returned `BorrowedFd` is valid as long as `self` is alive. + fn borrowed_fd(&self) -> BorrowedFd<'_> { + // SAFETY: the fd is owned by `inner` (OwningIter) and remains valid + // for `'_` (the lifetime of `self`). + unsafe { BorrowedFd::borrow_raw(self.inner.as_raw_fd()) } + } + + /// Stat a file relative to this directory. + pub fn stat_at(&self, name: &OsStr, symlink_behavior: SymlinkBehavior) -> io::Result { + let name_cstr = + CString::new(name.as_bytes()).map_err(|_| SafeTraversalError::PathContainsNull)?; + let flags = if symlink_behavior.should_follow() { + nix::fcntl::AtFlags::empty() + } else { + nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW + }; + fstatat(self.borrowed_fd(), name_cstr.as_c_str(), flags).map_err(|e| { + SafeTraversalError::StatFailed { + path: name.into(), + source: io::Error::from_raw_os_error(e as i32), + } + .into() + }) + } + + /// Open a subdirectory relative to this directory, returning a new `DirIter`. + /// + /// The returned iterator owns a freshly opened file descriptor; no `dup` is + /// performed. Together with [`DirFd::into_iter_dir`], this means an iterative + /// traversal needs only **one** open fd per directory level (instead of two + /// with the borrowing [`DirFd::iter_dir`]). + pub fn open_child_iter( + &self, + name: &OsStr, + symlink_behavior: SymlinkBehavior, + ) -> io::Result { + let name_cstr = + CString::new(name.as_bytes()).map_err(|_| SafeTraversalError::PathContainsNull)?; + let mut flags = OFlag::O_RDONLY | OFlag::O_DIRECTORY | OFlag::O_CLOEXEC; + if !symlink_behavior.should_follow() { + flags |= OFlag::O_NOFOLLOW; + } + let new_fd: OwnedFd = openat( + self.borrowed_fd(), + name_cstr.as_c_str(), + flags, + Mode::empty(), + ) + .map_err(|e| SafeTraversalError::OpenFailed { + path: name.into(), + source: io::Error::from_raw_os_error(e as i32), + })?; + // new_fd is an OwnedFd; RAII closes it automatically if from_fd returns Err. + let dir = Dir::from_fd(new_fd).map_err(|e| io::Error::from_raw_os_error(e as i32))?; + Ok(Self { + inner: dir.into_iter(), + }) + } + + /// Remove a file or empty directory relative to this directory. + pub fn unlink_at(&self, name: &OsStr, is_dir: bool) -> io::Result<()> { + let name_cstr = + CString::new(name.as_bytes()).map_err(|_| SafeTraversalError::PathContainsNull)?; + let flags = if is_dir { + UnlinkatFlags::RemoveDir + } else { + UnlinkatFlags::NoRemoveDir + }; + unlinkat(self.borrowed_fd(), name_cstr.as_c_str(), flags).map_err(|e| { + SafeTraversalError::UnlinkFailed { + path: name.into(), + source: io::Error::from_raw_os_error(e as i32), + } + })?; + Ok(()) + } +} + +impl Iterator for DirIter { + type Item = io::Result; + + fn next(&mut self) -> Option { + loop { + match self.inner.next()? { + Err(e) => return Some(Err(io::Error::from_raw_os_error(e as i32))), + Ok(entry) => { + let name = OsStr::from_bytes(entry.file_name().to_bytes()); + if name == "." || name == ".." { + continue; + } + let file_type = entry.file_type().map(|t| match t { + nix::dir::Type::Directory => DirEntryType::Directory, + nix::dir::Type::Symlink => DirEntryType::Symlink, + nix::dir::Type::File => DirEntryType::File, + _ => DirEntryType::Other, + }); + return Some(Ok(DirEntry { + name: name.to_os_string(), + file_type, + })); + } + } + } + } +} + // Helper function to read directory entries using nix fn read_dir_entries(fd: &OwnedFd) -> io::Result> { let mut entries = Vec::new(); @@ -229,6 +396,57 @@ impl DirFd { }) } + /// Return a lazy iterator over the names of entries in this directory. + /// + /// Entries are yielded in kernel order (same as `getdents64`). `"."` and + /// `".."` are never yielded. The underlying file descriptor is duplicated + /// so the iterator is independent of `self`. + /// + /// Prefer this over [`DirFd::read_dir`] in hot paths to avoid allocating a + /// `Vec` upfront when only a streaming pass over the entries is needed. + /// + /// # File-descriptor cost + /// + /// This method calls `dup(2)` to transfer fd ownership into [`DirIter`]. + /// While the returned iterator is alive, **two** file descriptors are open + /// for the same directory: the original one in `self` and the duplicated + /// one inside the iterator. Callers that keep both alive simultaneously + /// (e.g. one per level of a directory stack) should be aware that the + /// effective traversal depth is bounded by half the process's open-file + /// limit (`RLIMIT_NOFILE`, commonly 1024 on Linux, giving ~510 levels). + pub fn iter_dir(&self) -> io::Result { + let dup_fd = + nix::unistd::dup(&self.fd).map_err(|e| io::Error::from_raw_os_error(e as i32))?; + // dup_fd is an OwnedFd; RAII closes it automatically if from_fd returns Err. + let dir = Dir::from_fd(dup_fd).map_err(|e| io::Error::from_raw_os_error(e as i32))?; + Ok(DirIter { + inner: dir.into_iter(), + }) + } + + /// Consume this `DirFd` and return a lazy iterator over the directory's entries. + /// + /// Unlike [`iter_dir`], this method transfers fd ownership directly to + /// [`DirIter`] without calling `dup(2)`. The resulting iterator therefore + /// holds exactly **one** open file descriptor. Prefer this over `iter_dir` + /// in iterative traversal code where the `DirFd` is no longer needed after + /// the iterator is created (e.g. when all subsequent operations — `stat_at`, + /// `unlink_at`, opening child directories — are routed through [`DirIter`]'s + /// own methods). + /// + /// [`iter_dir`]: DirFd::iter_dir + pub fn into_iter_dir(self) -> io::Result { + // self.fd is OwnedFd; Dir::from_fd takes ownership — no dup needed. + // If from_fd fails (essentially only on ENOMEM), the fd is leaked. + // This is an upstream nix limitation: Dir::from_fd consumes the OwnedFd + // before it can fail, leaving no Rust-safe way to recover it. An fd + // leak on OOM is acceptable — the process is already in a degraded state. + let dir = Dir::from_fd(self.fd).map_err(|e| io::Error::from_raw_os_error(e as i32))?; + Ok(DirIter { + inner: dir.into_iter(), + }) + } + /// Remove a file or empty directory relative to this directory pub fn unlink_at(&self, name: &OsStr, is_dir: bool) -> io::Result<()> { let name_cstr = @@ -1244,4 +1462,108 @@ mod tests { let result_nofollow = DirFd::open(&link, SymlinkBehavior::NoFollow); assert!(result_nofollow.is_err()); } + + #[test] + fn test_dirfd_iter_dir_basic() { + let temp_dir = TempDir::new().unwrap(); + fs::write(temp_dir.path().join("alpha"), "a").unwrap(); + fs::write(temp_dir.path().join("beta"), "b").unwrap(); + fs::write(temp_dir.path().join("gamma"), "c").unwrap(); + + let dir_fd = DirFd::open(temp_dir.path(), SymlinkBehavior::Follow).unwrap(); + let mut names: Vec = dir_fd + .iter_dir() + .unwrap() + .map(|r| r.unwrap().name) + .collect(); + names.sort(); + + assert_eq!( + names, + vec![ + OsString::from("alpha"), + OsString::from("beta"), + OsString::from("gamma"), + ] + ); + } + + #[test] + fn test_dirfd_iter_dir_empty() { + let temp_dir = TempDir::new().unwrap(); + let dir_fd = DirFd::open(temp_dir.path(), SymlinkBehavior::Follow).unwrap(); + let entries: Vec<_> = dir_fd.iter_dir().unwrap().collect(); + assert!(entries.is_empty()); + } + + #[test] + fn test_dirfd_iter_dir_skips_dots() { + let temp_dir = TempDir::new().unwrap(); + fs::write(temp_dir.path().join("file"), "x").unwrap(); + + let dir_fd = DirFd::open(temp_dir.path(), SymlinkBehavior::Follow).unwrap(); + let names: Vec = dir_fd + .iter_dir() + .unwrap() + .map(|r| r.unwrap().name) + .collect(); + + assert!(!names.contains(&OsString::from("."))); + assert!(!names.contains(&OsString::from(".."))); + assert_eq!(names, vec![OsString::from("file")]); + } + + #[test] + fn test_diriter_open_child_iter() { + let temp_dir = TempDir::new().unwrap(); + let sub = temp_dir.path().join("sub"); + fs::create_dir(&sub).unwrap(); + fs::write(sub.join("x"), "hello").unwrap(); + fs::write(sub.join("y"), "world").unwrap(); + + let dir_fd = DirFd::open(temp_dir.path(), SymlinkBehavior::Follow).unwrap(); + let parent_iter = dir_fd.into_iter_dir().unwrap(); + + // open_child_iter should open "sub" and expose its entries. + let child_iter = parent_iter + .open_child_iter(OsStr::new("sub"), SymlinkBehavior::Follow) + .unwrap(); + let mut names: Vec = child_iter.map(|r| r.unwrap().name).collect(); + names.sort(); + + assert_eq!(names, vec![OsString::from("x"), OsString::from("y")]); + } + + #[test] + fn test_dirfd_iter_dir_propagates_error() { + let temp_dir = TempDir::new().unwrap(); + + // A sentinel file ensures the directory is non-empty, so the first + // `getdents64` call is deferred until after we close the fd below. + // Without it, an empty directory may return EOF (0) during Dir::from_fd + // initialization, making iter.next() return None instead of Some(Err(_)). + fs::write(temp_dir.path().join("sentinel"), "x").unwrap(); + + let dir_fd = DirFd::open(temp_dir.path(), SymlinkBehavior::Follow).unwrap(); + + // Wrap in ManuallyDrop immediately so the destructor never runs, regardless + // of how the test exits. This is the correct, safe Rust mechanism for + // suppressing a destructor — no mem::forget or unsafe needed. + let mut iter = std::mem::ManuallyDrop::new(dir_fd.iter_dir().unwrap()); + + // `inner` is private but accessible here because this test lives in the + // same module as DirIter. Close the fd using nix's safe wrapper so that + // the next readdir(3) call returns EBADF. + let raw_fd = iter.inner.as_raw_fd(); + let _ = nix::unistd::close(raw_fd); + + // `DerefMut` on `ManuallyDrop` gives `&mut DirIter` without + // scheduling the destructor to run — exactly what we want here: advance + // the iterator to observe the EBADF error, but skip the `close(2)` that + // the destructor would attempt on the already-closed fd. + match iter.next() { + Some(Err(_)) => {} // expected: EBADF propagated as io::Error + other => panic!("expected Some(Err(_)) after fd close, got {other:?}"), + } + } } diff --git a/tests/by-util/test_rm.rs b/tests/by-util/test_rm.rs index c2b77ee6d3d..1cbfc0d13e8 100644 --- a/tests/by-util/test_rm.rs +++ b/tests/by-util/test_rm.rs @@ -236,6 +236,64 @@ fn test_recursive_long_filepath() { assert!(!at.file_exists(file_a)); } +// Regression test: the iterative traversal must not overflow the call stack on +// deeply-nested directory trees. The old recursive implementation would panic +// (stack overflow) at depths around 10 000 with default stack sizes. +// +// We use a single-character child name "d" at every level (valid because each +// directory contains exactly one child) so the absolute path stays well below +// PATH_MAX even at depth 800. +// +// Depth is capped at 800 to stay comfortably within the fd-budget: each active +// StackFrame now holds exactly one open file descriptor (the DirIter fd — +// DirFd::into_iter_dir transfers ownership without dup), so the effective +// traversal depth is bounded by RLIMIT_NOFILE (≈ 1 024 on systems with a +// default 1 024 soft limit). +// +// Marked `#[ignore]` because creating 800 directories is slow in CI. +// Run manually with: cargo test -- --ignored test_recursive_deep_tree +#[cfg(not(windows))] +#[test] +#[ignore = "slow: creates 800 nested directories; run manually with --ignored"] +fn test_recursive_deep_tree_no_stack_overflow() { + let (at, mut ucmd) = at_and_ucmd!(); + // "deep/d/d/.../d" — 800 single-char levels, one mkdir_all call. + let deep_path = "deep/".to_string() + &"d/".repeat(799) + "d"; + at.mkdir_all(&deep_path); + ucmd.arg("-r").arg("deep").succeeds().no_stderr(); + assert!(!at.dir_exists("deep")); +} + +// Regression test: FrameIter::Drained / try_reclaim_fd must be exercised when +// the process runs out of file descriptors mid-traversal. +// +// We cap RLIMIT_NOFILE at 30 for the child process. stdin/stdout/stderr +// consume 3 fds; rm opens a handful more internally. A tree of depth 40 +// guarantees that EMFILE is hit before the leaf is reached, forcing +// try_reclaim_fd to demote a Live frame to Drained. The full tree must still +// be removed successfully. +// +// Only runs on Unix (not Redox) because the iterative traversal and fd +// recycling live in platform/unix.rs. +#[cfg(all(unix, not(target_os = "redox")))] +#[test] +fn test_recursive_emfile_fd_recycling() { + use rlimit::Resource; + + let (at, mut ucmd) = at_and_ucmd!(); + // 40-level chain: "deep/d/d/.../d" + let deep_path = "deep/".to_string() + &"d/".repeat(39) + "d"; + at.mkdir_all(&deep_path); + + ucmd.arg("-r") + .arg("deep") + .limit(Resource::NOFILE, 30, 30) + .succeeds() + .no_stderr(); + + assert!(!at.dir_exists("deep")); +} + #[test] fn test_directory_without_flag() { let (at, mut ucmd) = at_and_ucmd!(); @@ -952,7 +1010,29 @@ fn test_recursive_interactive() { assert!(!at.dir_exists("a")); } -// Avoid an infinite recursion due to a symbolic link to the current directory. +// When the user declines to remove a subdirectory in interactive mode, rm +// should NOT report an error. The parent directory is left behind (because it +// is non-empty), but the process should exit 0 — matching GNU rm behavior. +#[cfg(not(windows))] +#[test] +fn test_recursive_interactive_decline_child_no_error() { + let (at, mut ucmd) = at_and_ucmd!(); + at.mkdir("parent"); + at.mkdir("parent/child"); + // Inputs: descend parent? y, remove child? n, remove parent? y + // 'parent' still contains 'child', so rm silently skips removing 'parent'. + let expected = "rm: descend into directory 'parent'? \ + rm: remove directory 'parent/child'? \ + rm: remove directory 'parent'? "; + ucmd.args(&["-r", "-i", "parent"]) + .pipe_in("y\nn\ny\n") + .succeeds() + .stderr_only(expected); + // Both directories must still be present. + assert!(at.dir_exists("parent/child")); + assert!(at.dir_exists("parent")); +} + #[test] fn test_recursive_symlink_loop() { let (at, mut ucmd) = at_and_ucmd!();