diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 568c24fc..9083c851 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,6 +1,9 @@ { "name": "parallel-disk-usage (Rust only)", "image": "mcr.microsoft.com/devcontainers/rust:1", + "remoteEnv": { + "TEST_SKIP": "cross_device_excludes_mount" + }, "customizations": { "vscode": { "extensions": [ diff --git a/.devcontainer/full/devcontainer.json b/.devcontainer/full/devcontainer.json index d95b5a76..f8130092 100644 --- a/.devcontainer/full/devcontainer.json +++ b/.devcontainer/full/devcontainer.json @@ -1,6 +1,9 @@ { "name": "parallel-disk-usage (full)", "image": "mcr.microsoft.com/devcontainers/rust:1", + "remoteEnv": { + "TEST_SKIP": "cross_device_excludes_mount" + }, "features": { "ghcr.io/devcontainers/features/node:1": { "version": "lts", diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index af2162a1..1ea0dd3b 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -29,6 +29,12 @@ jobs: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer bash $installer --default-toolchain $(cat rust-toolchain) -y + - name: Install external test dependencies + if: runner.os == 'Linux' + run: | + sudo apt update + sudo apt install -y squashfs-tools squashfuse fuse3 + - name: Test (dev) shell: bash env: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 34be7cfb..c942c29d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -42,6 +42,12 @@ jobs: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > $installer bash $installer --default-toolchain $(cat rust-toolchain) -y + - name: Install external test dependencies + if: runner.os == 'Linux' + run: | + sudo apt update + sudo apt install -y squashfs-tools squashfuse fuse3 + - name: Test (dev) shell: bash env: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0cba2540..8e761518 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -351,6 +351,16 @@ rustup toolchain install "$(< rust-toolchain)" rustup component add --toolchain "$(< rust-toolchain)" rustfmt clippy ``` +## Optional External Dependencies + +Some integration tests require external (non-Cargo) tools that are **not** managed by `Cargo.toml`. These tests panic when the tools are absent; CI installs them to get full coverage. + +- `squashfs-tools` (provides `mksquashfs`) — cross-device (`--one-file-system`) FUSE test +- `squashfuse` (provides `squashfuse`) — cross-device (`--one-file-system`) FUSE test +- `fuse3` (provides `fusermount3`, `/dev/fuse`) — cross-device (`--one-file-system`) FUSE test + +Tests that need these tools will panic with a diagnostic message if they are missing. The panic message includes the specific `TEST_SKIP` variable to skip the test via `./test.sh`. + ## Automated Checks Before submitting, ensure: diff --git a/Cargo.lock b/Cargo.lock index fa28fb2e..dcf9b338 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -657,6 +657,7 @@ dependencies = [ "sysinfo", "terminal_size", "text-block-macros", + "which", "zero-copy-pads", ] @@ -1040,6 +1041,15 @@ dependencies = [ "semver", ] +[[package]] +name = "which" +version = "8.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81995fafaaaf6ae47a7d0cc83c67caf92aeb7e5331650ae6ff856f7c0c60c459" +dependencies = [ + "libc", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index ced6e8a2..f408062c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -91,3 +91,4 @@ maplit = "1.0.2" normalize-path = "0.2.1" pretty_assertions = "1.4.1" rand = "0.10.0" +which = "8.0.2" diff --git a/README.md b/README.md index 12e52aa2..5f7b0b59 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,6 @@ The benchmark was generated by [a GitHub Workflow](https://github.com/KSXGitHub/ * Ignorant of reflinks (from COW filesystems such as BTRFS and ZFS). * Does not follow symbolic links. -* Does not differentiate filesystems: Mounted folders are counted as normal folders. * The runtime is optimized at the expense of binary size. ## Usage diff --git a/USAGE.md b/USAGE.md index 98d1f36c..683aad2a 100644 --- a/USAGE.md +++ b/USAGE.md @@ -39,6 +39,13 @@ How to display the numbers of bytes. Detect and subtract the sizes of hardlinks from their parent directory totals. + +### `--one-file-system` + +* _Aliases:_ `-x`. + +Skip directories on different filesystems. + ### `--top-down` diff --git a/exports/completion.bash b/exports/completion.bash index 8b06a03b..7b03e2c5 100644 --- a/exports/completion.bash +++ b/exports/completion.bash @@ -23,7 +23,7 @@ _pdu() { case "${cmd}" in pdu) - opts="-b -H -q -d -w -m -s -p -h -V --json-input --json-output --bytes-format --detect-links --dedupe-links --deduplicate-hardlinks --top-down --align-right --quantity --depth --max-depth --width --total-width --column-width --min-ratio --no-sort --no-errors --silent-errors --progress --threads --omit-json-shared-details --omit-json-shared-summary --help --version [FILES]..." + opts="-b -H -x -q -d -w -m -s -p -h -V --json-input --json-output --bytes-format --detect-links --dedupe-links --deduplicate-hardlinks --one-file-system --top-down --align-right --quantity --depth --max-depth --width --total-width --column-width --min-ratio --no-sort --no-errors --silent-errors --progress --threads --omit-json-shared-details --omit-json-shared-summary --help --version [FILES]..." if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) return 0 diff --git a/exports/completion.elv b/exports/completion.elv index d3cda52c..3889da99 100644 --- a/exports/completion.elv +++ b/exports/completion.elv @@ -38,6 +38,8 @@ set edit:completion:arg-completer[pdu] = {|@words| cand --deduplicate-hardlinks 'Detect and subtract the sizes of hardlinks from their parent directory totals' cand --detect-links 'Detect and subtract the sizes of hardlinks from their parent directory totals' cand --dedupe-links 'Detect and subtract the sizes of hardlinks from their parent directory totals' + cand -x 'Skip directories on different filesystems' + cand --one-file-system 'Skip directories on different filesystems' cand --top-down 'Print the tree top-down instead of bottom-up' cand --align-right 'Set the root of the bars to the right' cand --no-sort 'Do not sort the branches in the tree' diff --git a/exports/completion.fish b/exports/completion.fish index 41cc6448..0431ae03 100644 --- a/exports/completion.fish +++ b/exports/completion.fish @@ -12,6 +12,7 @@ complete -c pdu -l threads -d 'Set the maximum number of threads to spawn. Could complete -c pdu -l json-input -d 'Read JSON data from stdin' complete -c pdu -l json-output -d 'Print JSON data instead of an ASCII chart' complete -c pdu -s H -l deduplicate-hardlinks -l detect-links -l dedupe-links -d 'Detect and subtract the sizes of hardlinks from their parent directory totals' +complete -c pdu -s x -l one-file-system -d 'Skip directories on different filesystems' complete -c pdu -l top-down -d 'Print the tree top-down instead of bottom-up' complete -c pdu -l align-right -d 'Set the root of the bars to the right' complete -c pdu -l no-sort -d 'Do not sort the branches in the tree' diff --git a/exports/completion.ps1 b/exports/completion.ps1 index 8814bf76..547404c0 100644 --- a/exports/completion.ps1 +++ b/exports/completion.ps1 @@ -41,6 +41,8 @@ Register-ArgumentCompleter -Native -CommandName 'pdu' -ScriptBlock { [CompletionResult]::new('--deduplicate-hardlinks', '--deduplicate-hardlinks', [CompletionResultType]::ParameterName, 'Detect and subtract the sizes of hardlinks from their parent directory totals') [CompletionResult]::new('--detect-links', '--detect-links', [CompletionResultType]::ParameterName, 'Detect and subtract the sizes of hardlinks from their parent directory totals') [CompletionResult]::new('--dedupe-links', '--dedupe-links', [CompletionResultType]::ParameterName, 'Detect and subtract the sizes of hardlinks from their parent directory totals') + [CompletionResult]::new('-x', '-x', [CompletionResultType]::ParameterName, 'Skip directories on different filesystems') + [CompletionResult]::new('--one-file-system', '--one-file-system', [CompletionResultType]::ParameterName, 'Skip directories on different filesystems') [CompletionResult]::new('--top-down', '--top-down', [CompletionResultType]::ParameterName, 'Print the tree top-down instead of bottom-up') [CompletionResult]::new('--align-right', '--align-right', [CompletionResultType]::ParameterName, 'Set the root of the bars to the right') [CompletionResult]::new('--no-sort', '--no-sort', [CompletionResultType]::ParameterName, 'Do not sort the branches in the tree') diff --git a/exports/completion.zsh b/exports/completion.zsh index dec1cef4..66cfe345 100644 --- a/exports/completion.zsh +++ b/exports/completion.zsh @@ -37,12 +37,14 @@ block-count\:"Count numbers of blocks"))' \ '-m+[Minimal size proportion required to appear]:MIN_RATIO:_default' \ '--min-ratio=[Minimal size proportion required to appear]:MIN_RATIO:_default' \ '--threads=[Set the maximum number of threads to spawn. Could be either "auto", "max", or a positive integer]:THREADS:_default' \ -'(-q --quantity -H --deduplicate-hardlinks)--json-input[Read JSON data from stdin]' \ +'(-q --quantity -H --deduplicate-hardlinks -x --one-file-system)--json-input[Read JSON data from stdin]' \ '--json-output[Print JSON data instead of an ASCII chart]' \ '-H[Detect and subtract the sizes of hardlinks from their parent directory totals]' \ '--deduplicate-hardlinks[Detect and subtract the sizes of hardlinks from their parent directory totals]' \ '--detect-links[Detect and subtract the sizes of hardlinks from their parent directory totals]' \ '--dedupe-links[Detect and subtract the sizes of hardlinks from their parent directory totals]' \ +'-x[Skip directories on different filesystems]' \ +'--one-file-system[Skip directories on different filesystems]' \ '--top-down[Print the tree top-down instead of bottom-up]' \ '--align-right[Set the root of the bars to the right]' \ '--no-sort[Do not sort the branches in the tree]' \ diff --git a/exports/long.help b/exports/long.help index efe31299..47624bb3 100644 --- a/exports/long.help +++ b/exports/long.help @@ -31,6 +31,9 @@ Options: [aliases: --detect-links, --dedupe-links] + -x, --one-file-system + Skip directories on different filesystems + --top-down Print the tree top-down instead of bottom-up diff --git a/exports/short.help b/exports/short.help index 1835edbc..51a1a8de 100644 --- a/exports/short.help +++ b/exports/short.help @@ -14,6 +14,8 @@ Options: How to display the numbers of bytes [default: metric] [possible values: plain, metric, binary] -H, --deduplicate-hardlinks Detect and subtract the sizes of hardlinks from their parent directory totals [aliases: --detect-links, --dedupe-links] + -x, --one-file-system + Skip directories on different filesystems --top-down Print the tree top-down instead of bottom-up --align-right diff --git a/src/app.rs b/src/app.rs index 4569b257..a685df54 100644 --- a/src/app.rs +++ b/src/app.rs @@ -5,6 +5,7 @@ pub use sub::Sub; use crate::{ args::{Args, Quantity, Threads}, bytes_format::BytesFormat, + device::DeviceBoundary, get_size::{GetApparentSize, GetSize}, hardlink, json_data::{JsonData, JsonDataBody, JsonShared, JsonTree}, @@ -133,6 +134,13 @@ impl App { .pipe(Err); } + #[cfg(not(unix))] + if self.args.one_file_system { + return crate::runtime_error::UnsupportedFeature::OneFileSystem + .pipe(RuntimeError::UnsupportedFeature) + .pipe(Err); + } + let threads = match self.args.threads { Threads::Auto => { let disks = Disks::new_with_refreshed_list(); @@ -283,6 +291,7 @@ impl App { progress: $progress, #[cfg(unix)] deduplicate_hardlinks: $hardlinks, #[cfg(not(unix))] deduplicate_hardlinks: _, + one_file_system, files, json_output, bytes_format, @@ -299,6 +308,7 @@ impl App { bar_alignment: BarAlignment::from_align_right(align_right), size_getter: <$size_getter as GetSizeUtils>::INSTANCE, hardlinks_handler: <$size_getter as CreateHardlinksHandler<{ cfg!(unix) && $hardlinks }, $progress>>::create_hardlinks_handler(), + device_boundary: DeviceBoundary::from_one_file_system(one_file_system), reporter: <$size_getter as CreateReporter<$progress>>::create_reporter(report_error), bytes_format: <$size_getter as GetSizeUtils>::formatter(bytes_format), files, diff --git a/src/app/sub.rs b/src/app/sub.rs index 3500a5f3..f1f72447 100644 --- a/src/app/sub.rs +++ b/src/app/sub.rs @@ -1,6 +1,7 @@ use crate::{ args::{Depth, Fraction}, data_tree::DataTree, + device::DeviceBoundary, fs_tree_builder::FsTreeBuilder, get_size::GetSize, hardlink::{DeduplicateSharedSize, HardlinkIgnorant, RecordHardlinks}, @@ -43,6 +44,8 @@ where pub size_getter: SizeGetter, /// Handle to detect, record, and deduplicate hardlinks. pub hardlinks_handler: HardlinksHandler, + /// Whether to cross device boundary into a different filesystem. + pub device_boundary: DeviceBoundary, /// Reports measurement progress. pub reporter: Report, /// Minimal size proportion required to appear. @@ -71,6 +74,7 @@ where max_depth, size_getter, hardlinks_handler, + device_boundary, reporter, min_ratio, no_sort, @@ -86,6 +90,7 @@ where root, size_getter, hardlinks_recorder: &hardlinks_handler, + device_boundary, max_depth, } .into() diff --git a/src/args.rs b/src/args.rs index db6698c8..03e14ea9 100644 --- a/src/args.rs +++ b/src/args.rs @@ -94,7 +94,7 @@ pub struct Args { /// Read JSON data from stdin. #[clap( long, - conflicts_with_all = ["quantity", "deduplicate_hardlinks"] + conflicts_with_all = ["quantity", "deduplicate_hardlinks", "one_file_system"] )] pub json_input: bool, @@ -112,6 +112,11 @@ pub struct Args { #[cfg_attr(not(unix), clap(hide = true))] pub deduplicate_hardlinks: bool, + /// Skip directories on different filesystems. + #[clap(long, short = 'x')] + #[cfg_attr(not(unix), clap(hide = true))] + pub one_file_system: bool, + /// Print the tree top-down instead of bottom-up. #[clap(long)] pub top_down: bool, diff --git a/src/device.rs b/src/device.rs new file mode 100644 index 00000000..1ef5a12b --- /dev/null +++ b/src/device.rs @@ -0,0 +1,17 @@ +/// Whether to cross device boundary into a different filesystem. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DeviceBoundary { + Cross, + Stay, +} + +impl DeviceBoundary { + /// Derive device boundary from `--one-file-system`. + #[cfg(feature = "cli")] + pub(crate) fn from_one_file_system(one_file_system: bool) -> Self { + match one_file_system { + false => DeviceBoundary::Cross, + true => DeviceBoundary::Stay, + } + } +} diff --git a/src/fs_tree_builder.rs b/src/fs_tree_builder.rs index 37167b2c..41deba77 100644 --- a/src/fs_tree_builder.rs +++ b/src/fs_tree_builder.rs @@ -1,5 +1,6 @@ use super::{ data_tree::DataTree, + device::DeviceBoundary, get_size::GetSize, hardlink::{RecordHardlinks, RecordHardlinksArgument}, os_string_display::OsStringDisplay, @@ -7,6 +8,7 @@ use super::{ size, tree_builder::{Info, TreeBuilder}, }; +use device_id::get_device_id; use pipe_trait::Pipe; use std::{ fs::{read_dir, symlink_metadata}, @@ -21,6 +23,7 @@ use std::{ /// # use parallel_disk_usage::fs_tree_builder::FsTreeBuilder; /// use parallel_disk_usage::{ /// data_tree::DataTree, +/// device::DeviceBoundary, /// get_size::GetApparentSize, /// os_string_display::OsStringDisplay, /// reporter::{ErrorOnlyReporter, ErrorReport}, @@ -32,6 +35,7 @@ use std::{ /// hardlinks_recorder: &HardlinkIgnorant, /// size_getter: GetApparentSize, /// reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), +/// device_boundary: DeviceBoundary::Cross, /// max_depth: 10, /// }; /// let data_tree: DataTree = builder.into(); @@ -52,6 +56,8 @@ where pub hardlinks_recorder: &'a HardlinksRecorder, /// Reports progress to external system. pub reporter: &'a Report, + /// Whether to cross device boundary into a different filesystem. + pub device_boundary: DeviceBoundary, /// Deepest level of descendant display in the graph. The sizes beyond the max depth still count toward total. pub max_depth: u64, } @@ -72,16 +78,34 @@ where size_getter, hardlinks_recorder, reporter, + device_boundary, max_depth, } = builder; + // `root` would be inspected multiple times, but its impact on performance is insignificant + // before the (usually) massive fs tree `root` contains. + let root_dev = match device_boundary { + DeviceBoundary::Cross => None, + DeviceBoundary::Stay => match symlink_metadata(&root) { + Err(error) => { + reporter.report(Event::EncounterError(ErrorReport { + operation: SymlinkMetadata, + path: &root, + error, + })); + return DataTree::file(OsStringDisplay::os_string_from(&root), Size::default()); + } + Ok(stats) => Some(get_device_id(&stats)), + }, + }; + TreeBuilder:: { name: OsStringDisplay::os_string_from(&root), path: root, get_info: |path| { - let (is_dir, size) = match symlink_metadata(path) { + let (is_dir, size, same_device) = match symlink_metadata(path) { Err(error) => { reporter.report(Event::EncounterError(ErrorReport { operation: SymlinkMetadata, @@ -96,6 +120,8 @@ where Ok(stats) => { // `stats` should be dropped ASAP to avoid piling up kernel memory usage let is_dir = stats.is_dir(); + let same_device = + root_dev.is_none_or(|root_dev| get_device_id(&stats) == root_dev); let size = size_getter.get_size(&stats); reporter.report(Event::ReceiveData(size)); hardlinks_recorder @@ -103,11 +129,11 @@ where path, &stats, size, reporter, )) .ok(); // ignore the error for now - (is_dir, size) + (is_dir, size, same_device) } }; - let children: Vec<_> = if is_dir { + let children: Vec<_> = if is_dir && same_device { match read_dir(path) { Err(error) => { reporter.report(Event::EncounterError(ErrorReport { @@ -115,7 +141,10 @@ where path, error, })); - return Info::default(); + return Info { + size, + children: Vec::new(), + }; } Ok(entries) => entries, } @@ -145,3 +174,5 @@ where .into() } } + +mod device_id; diff --git a/src/fs_tree_builder/device_id.rs b/src/fs_tree_builder/device_id.rs new file mode 100644 index 00000000..83e5a854 --- /dev/null +++ b/src/fs_tree_builder/device_id.rs @@ -0,0 +1,69 @@ +/// Unique identifier for a device or filesystem. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DeviceId(Inner); + +#[cfg(unix)] +type Inner = u64; + +#[cfg(not(unix))] +type Inner = (); + +/// Retrieve the [`DeviceId`] from filesystem metadata. +#[cfg(unix)] +pub fn get_device_id(stats: &std::fs::Metadata) -> DeviceId { + use std::os::unix::fs::MetadataExt; + DeviceId(stats.dev()) +} + +/// Retrieve the [`DeviceId`] from filesystem metadata. +/// +/// On unsupported platforms, all entries share the same [`DeviceId`], +/// effectively disabling cross-device detection. +#[cfg(not(unix))] +pub fn get_device_id(_stats: &std::fs::Metadata) -> DeviceId { + DeviceId(()) +} + +#[cfg(test)] +#[cfg(unix)] +mod tests { + use super::get_device_id; + use std::fs::symlink_metadata; + + #[test] + fn same_filesystem_returns_equal_ids() { + let root_stats = symlink_metadata("/").expect("stat /"); + let root_stats2 = symlink_metadata("/").expect("stat / again"); + assert_eq!( + get_device_id(&root_stats), + get_device_id(&root_stats2), + "same path should yield the same DeviceId", + ); + } + + /// `/proc` is a virtual filesystem mounted separately from `/` on Linux. + #[test] + #[cfg(target_os = "linux")] + fn different_filesystem_returns_different_ids() { + let root_stats = symlink_metadata("/").expect("stat /"); + let proc_stats = symlink_metadata("/proc").expect("stat /proc"); + assert_ne!( + get_device_id(&root_stats), + get_device_id(&proc_stats), + "/ and /proc should be on different devices", + ); + } + + /// `/dev` is a separate filesystem (`devfs`) from `/` on macOS. + #[test] + #[cfg(target_os = "macos")] + fn different_filesystem_returns_different_ids() { + let root_stats = symlink_metadata("/").expect("stat /"); + let dev_stats = symlink_metadata("/dev").expect("stat /dev"); + assert_ne!( + get_device_id(&root_stats), + get_device_id(&dev_stats), + "/ and /dev should be on different devices", + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index 23765add..7aeb6e90 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,6 +38,7 @@ pub use clap_utilities; pub mod bytes_format; pub mod data_tree; +pub mod device; pub mod fs_tree_builder; pub mod get_size; pub mod hardlink; diff --git a/src/runtime_error.rs b/src/runtime_error.rs index dba94c00..815c4be7 100644 --- a/src/runtime_error.rs +++ b/src/runtime_error.rs @@ -32,6 +32,10 @@ pub enum UnsupportedFeature { #[cfg(not(unix))] #[display("Feature --deduplicate-hardlinks is not available on this platform")] DeduplicateHardlink, + /// Using `--one-file-system` on non-POSIX. + #[cfg(not(unix))] + #[display("Feature --one-file-system is not available on this platform")] + OneFileSystem, } impl From for RuntimeError { diff --git a/tests/_utils.rs b/tests/_utils.rs index 75b4a326..3a9c785a 100644 --- a/tests/_utils.rs +++ b/tests/_utils.rs @@ -4,6 +4,7 @@ use derive_more::{AsRef, Deref}; use into_sorted::IntoSorted; use parallel_disk_usage::{ data_tree::{DataTree, DataTreeReflection}, + device::DeviceBoundary, fs_tree_builder::FsTreeBuilder, get_size::{self, GetSize}, hardlink::HardlinkIgnorant, @@ -373,6 +374,7 @@ where panic!("Unexpected call to report_error: {error:?}") }), root: root.join(suffix), + device_boundary: DeviceBoundary::Cross, max_depth: 10, } .pipe(DataTree::::from) diff --git a/tests/cli_errors.rs b/tests/cli_errors.rs index 0225a314..d854295f 100644 --- a/tests/cli_errors.rs +++ b/tests/cli_errors.rs @@ -15,6 +15,7 @@ use maplit::btreeset; use parallel_disk_usage::{ bytes_format::BytesFormat, data_tree::DataTree, + device::DeviceBoundary, fs_tree_builder::FsTreeBuilder, get_size::GetApparentSize, hardlink::HardlinkIgnorant, @@ -143,6 +144,7 @@ fn fs_errors() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); diff --git a/tests/json.rs b/tests/json.rs index 95aee0b9..74689526 100644 --- a/tests/json.rs +++ b/tests/json.rs @@ -8,6 +8,7 @@ use command_extra::CommandExtra; use parallel_disk_usage::{ bytes_format::BytesFormat, data_tree::DataTree, + device::DeviceBoundary, fs_tree_builder::FsTreeBuilder, get_size::GetApparentSize, hardlink::HardlinkIgnorant, @@ -85,6 +86,7 @@ fn json_output() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let expected = builder diff --git a/tests/one_file_system.rs b/tests/one_file_system.rs new file mode 100644 index 00000000..e96a06d2 --- /dev/null +++ b/tests/one_file_system.rs @@ -0,0 +1,302 @@ +//! Tests for the `--one-file-system` flag. +//! +//! ## Unit-style test +//! +//! [`same_device_on_sample_workspace`] verifies that enabling `--one-file-system` on a +//! single-device workspace produces the same tree as without it. +//! +//! ## Integration test via FUSE +//! +//! [`cross_device_excludes_mount`] uses `squashfuse` to mount a squashfs image via FUSE +//! (no root or user namespaces required) and checks that `--one-file-system` correctly +//! excludes entries on the mounted filesystem. +//! +//! The FUSE test panics when `mksquashfs`, `squashfuse`, `/dev/fuse`, or `fusermount` are +//! unavailable. It can be excluded via `TEST_SKIP='cross_device_excludes_mount' ./test.sh`. + +#![cfg(unix)] +#![cfg(feature = "cli")] + +pub mod _utils; +pub use _utils::*; + +use command_extra::CommandExtra; +use parallel_disk_usage::{ + bytes_format::BytesFormat, + data_tree::DataTree, + device::DeviceBoundary, + fs_tree_builder::FsTreeBuilder, + get_size::GetApparentSize, + hardlink::HardlinkIgnorant, + os_string_display::OsStringDisplay, + reporter::{ErrorOnlyReporter, ErrorReport}, + size::Bytes, + visualizer::{BarAlignment, ColumnWidthDistribution, Direction, Visualizer}, +}; +use pipe_trait::Pipe; +use pretty_assertions::assert_eq; +use std::{ + fs::{create_dir_all, write as write_file}, + path::Path, + process::{Command, Stdio}, + thread::sleep, + time::Duration, +}; +use which::which; + +/// When all files reside on a single filesystem, [`DeviceBoundary::Stay`] should produce +/// the same tree as [`DeviceBoundary::Cross`]. +#[test] +fn same_device_on_sample_workspace() { + let workspace = SampleWorkspace::default(); + + let build_tree = |device_boundary: DeviceBoundary| { + DataTree::from(FsTreeBuilder { + root: workspace.to_path_buf(), + size_getter: GetApparentSize, + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary, + max_depth: 10, + }) + }; + + let crossing = DeviceBoundary::Cross + .pipe(build_tree) + .into_par_sorted(|left, right| left.name().cmp(right.name())) + .into_reflection() + .pipe(sanitize_tree_reflection); + + let staying = DeviceBoundary::Stay + .pipe(build_tree) + .into_par_sorted(|left, right| left.name().cmp(right.name())) + .into_reflection() + .pipe(sanitize_tree_reflection); + + assert_eq!(crossing, staying); +} + +/// Information about the available FUSE tools, discovered by [`fuse_probe`]. +struct FuseTools { + /// The fusermount command to use for unmounting (`"fusermount3"` or `"fusermount"`). + fusermount: &'static str, +} + +/// Probes for `squashfuse`, `mksquashfs`, and FUSE infrastructure. +/// +/// Verifies: +/// 1. `mksquashfs` binary exists +/// 2. `squashfuse` binary exists +/// 3. `/dev/fuse` exists +/// 4. `fusermount3` (or `fusermount`) binary exists +/// +/// Returns `Ok(FuseTools)` with the discovered tool names, or `Err` with a diagnostic message. +fn fuse_probe() -> Result { + which("mksquashfs").map_err(|error| { + format!("`mksquashfs` not found: {error}. Install squashfs-tools for your platform.") + })?; + + which("squashfuse").map_err(|error| { + format!("`squashfuse` not found: {error}. Install squashfuse for your platform.") + })?; + + if !Path::new("/dev/fuse").exists() { + return Err( + "/dev/fuse does not exist. The FUSE kernel module may not be loaded (`modprobe fuse`)." + .to_string(), + ); + } + + let fusermount = if which("fusermount3").is_ok() { + "fusermount3" + } else if which("fusermount").is_ok() { + "fusermount" + } else { + return Err( + "Neither `fusermount3` nor `fusermount` found. Install FUSE for your platform." + .to_string(), + ); + }; + + Ok(FuseTools { fusermount }) +} + +/// RAII guard that unmounts a FUSE mount point when dropped. +/// +/// Its sole purpose is to ensure the FUSE filesystem is cleanly unmounted (via `fusermount -u`) +/// even if the test panics, preventing stale mounts from accumulating. +struct FuseMount<'a> { + mount_point: &'a Path, + fusermount: &'static str, +} + +impl Drop for FuseMount<'_> { + fn drop(&mut self) { + let status = self + .fusermount + .pipe(Command::new) + .with_arg("-u") + .with_arg(self.mount_point) + .status(); + match status { + Ok(status) if status.success() => {} + Ok(status) => eprintln!("warning: {} exited with {status}", self.fusermount), + Err(error) => eprintln!("warning: failed to run {}: {error}", self.fusermount), + } + } +} + +/// When a subdirectory is a mount point for a different filesystem, +/// `--one-file-system` should exclude it. +/// +/// Uses `squashfuse` to mount a squashfs image via FUSE — no root privileges or +/// user namespaces required. The image is pre-built with `mksquashfs` containing the +/// test file, so the mount is read-only (which is fine since `pdu` only reads). +/// Panics when FUSE infrastructure is unavailable; can be excluded via +/// `TEST_SKIP='cross_device_excludes_mount' ./test.sh`. +#[test] +#[cfg_attr(not(target_os = "linux"), ignore = "this test only works on Linux")] +fn cross_device_excludes_mount() { + let fuse_tools = fuse_probe().unwrap_or_else(|reason| { + panic!( + "error: This test requires FUSE (`mksquashfs`, `squashfuse`, `/dev/fuse`, \ + `fusermount`) but the probe failed.\n\ + reason: {reason}\n\ + hint: Install `squashfs-tools`, `squashfuse`, and FUSE for your platform, \ + or rerun via `TEST_SKIP='cross_device_excludes_mount' ./test.sh` to skip this test.", + ) + }); + + let temp = Temp::new_dir().expect("create temp dir for cross-device test"); + let workspace = temp.join("workspace"); + let mount_point = workspace.join("mounted"); + let image_path = temp.join("squash.img"); + let staging_dir = temp.join("staging"); + + create_dir_all(&mount_point).expect("create workspace and mount point"); + create_dir_all(&staging_dir).expect("create staging directory"); + + // Write a file on the root filesystem + let outside_content = "A".repeat(1000); + write_file(workspace.join("outside.txt"), &outside_content).expect("write outside.txt"); + + // Create a file in the staging directory to be packed into the squashfs image + let inside_content = "B".repeat(2000); + write_file(staging_dir.join("inside.txt"), &inside_content).expect("write staging/inside.txt"); + + // Build a squashfs image from the staging directory + let mksquashfs_output = Command::new("mksquashfs") + .with_arg(&staging_dir) + .with_arg(&image_path) + .with_arg("-noappend") + .with_arg("-quiet") + .with_stdout(Stdio::piped()) + .with_stderr(Stdio::piped()) + .output() + .expect("run mksquashfs"); + assert!( + mksquashfs_output.status.success(), + "mksquashfs failed: {}", + String::from_utf8_lossy(&mksquashfs_output.stderr), + ); + + // Mount the squashfs image via squashfuse (read-only). + // The _fuse_mount guard ensures we unmount even if assertions panic. + let mount_output = Command::new("squashfuse") + .with_arg(&image_path) + .with_arg(&mount_point) + .with_stdout(Stdio::piped()) + .with_stderr(Stdio::piped()) + .output() + .expect("run squashfuse"); + assert!( + mount_output.status.success(), + "squashfuse mount failed: {}", + String::from_utf8_lossy(&mount_output.stderr), + ); + let _fuse_mount = FuseMount { + mount_point: &mount_point, + fusermount: fuse_tools.fusermount, + }; + + // Wait for the FUSE mount to become readable (exponential backoff) + let wait_ms_base = 100; + let retries = 5; + let poll_result = (0..retries) + .map(|exponent| wait_ms_base << exponent) + .map(Duration::from_millis) + .map(sleep) + .filter_map(|()| mount_point.read_dir().ok()) + .find_map(|mut entry| entry.next()?.ok()); + assert!( + poll_result.is_some(), + "FUSE mount at {mount_point:?} not ready after {retries} retries" + ); + + let build_expected_tree = |device_boundary: DeviceBoundary| -> String { + let builder = FsTreeBuilder { + root: workspace.clone(), + size_getter: GetApparentSize, + hardlinks_recorder: &HardlinkIgnorant, + reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary, + max_depth: 10, + }; + let mut data_tree: DataTree = builder.into(); + data_tree.par_cull_insignificant_data(0.01); + data_tree.par_sort_by(|left, right| left.size().cmp(&right.size()).reverse()); + let visualizer = Visualizer:: { + data_tree: &data_tree, + bytes_format: BytesFormat::PlainNumber, + direction: Direction::BottomUp, + bar_alignment: BarAlignment::Left, + column_width_distribution: ColumnWidthDistribution::total(100), + }; + let expected = format!("{visualizer}"); + expected.trim_end().to_string() + }; + + let run_pdu = |extra_arg: Option<&'static str>| -> String { + Command::new(PDU) + .with_arg("--quantity=apparent-size") + .with_arg("--total-width=100") + .with_arg("--bytes-format=plain") + .with_arg("--min-ratio=0.01") + .with_args(extra_arg) + .with_arg(&workspace) + .with_stdin(Stdio::null()) + .with_stdout(Stdio::piped()) + .with_stderr(Stdio::piped()) + .output() + .expect("run pdu") + .pipe(stdout_text) + }; + + // Run pdu WITHOUT --one-file-system — should see both files + let actual = run_pdu(None); + let expected = build_expected_tree(DeviceBoundary::Cross); + eprintln!("WITHOUT --one-file-system:\nACTUAL:\n{actual}\n\nEXPECTED:\n{expected}\n"); + assert_eq!(actual, expected); + assert!( + actual.contains("inside.txt"), + "without --one-file-system should show inside.txt:\n{actual}", + ); + assert!( + actual.contains("outside.txt"), + "without --one-file-system should show outside.txt:\n{actual}", + ); + + // Run pdu WITH --one-file-system — should only see outside.txt + let actual = run_pdu(Some("--one-file-system")); + let expected = build_expected_tree(DeviceBoundary::Stay); + eprintln!("WITH --one-file-system:\nACTUAL:\n{actual}\n\nEXPECTED:\n{expected}\n"); + assert_eq!(actual, expected); + assert!( + actual.contains("outside.txt"), + "with --one-file-system should show outside.txt:\n{actual}", + ); + assert!( + !actual.contains("inside.txt"), + "with --one-file-system should exclude inside.txt (on different filesystem):\n{actual}", + ); +} diff --git a/tests/usual_cli.rs b/tests/usual_cli.rs index cbacb9f2..b91b9841 100644 --- a/tests/usual_cli.rs +++ b/tests/usual_cli.rs @@ -7,6 +7,7 @@ use command_extra::CommandExtra; use parallel_disk_usage::{ bytes_format::BytesFormat, data_tree::DataTree, + device::DeviceBoundary, fs_tree_builder::FsTreeBuilder, get_size::GetApparentSize, hardlink::HardlinkIgnorant, @@ -45,6 +46,7 @@ fn total_width() { size_getter: DEFAULT_GET_SIZE, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -84,6 +86,7 @@ fn column_width() { size_getter: DEFAULT_GET_SIZE, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -123,6 +126,7 @@ fn min_ratio_0() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -161,6 +165,7 @@ fn min_ratio() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -200,6 +205,7 @@ fn max_depth_2() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 2, }; let mut data_tree: DataTree = builder.into(); @@ -239,6 +245,7 @@ fn max_depth_1() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 1, }; let mut data_tree: DataTree = builder.into(); @@ -277,6 +284,7 @@ fn top_down() { size_getter: DEFAULT_GET_SIZE, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -315,6 +323,7 @@ fn align_right() { size_getter: DEFAULT_GET_SIZE, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -353,6 +362,7 @@ fn quantity_apparent_size() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -392,6 +402,7 @@ fn quantity_block_size() { size_getter: GetBlockSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -431,6 +442,7 @@ fn quantity_block_count() { size_getter: GetBlockCount, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -470,6 +482,7 @@ fn bytes_format_plain() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -509,6 +522,7 @@ fn bytes_format_metric() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -548,6 +562,7 @@ fn bytes_format_binary() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -586,6 +601,7 @@ fn path_to_workspace() { size_getter: DEFAULT_GET_SIZE, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -629,6 +645,7 @@ fn multiple_names() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into(); @@ -694,6 +711,7 @@ fn multiple_names_max_depth_2() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 1, }; let mut data_tree: DataTree = builder.into(); @@ -754,6 +772,7 @@ fn multiple_names_max_depth_1() { size_getter: GetApparentSize, hardlinks_recorder: &HardlinkIgnorant, reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT), + device_boundary: DeviceBoundary::Cross, max_depth: 10, }; let mut data_tree: DataTree = builder.into();