|
| 1 | +#![cfg(unix)] // This feature is not available in Windows |
| 2 | +#![cfg(feature = "cli")] |
| 3 | + |
| 4 | +pub mod _utils; |
| 5 | +pub use _utils::*; |
| 6 | + |
| 7 | +use command_extra::CommandExtra; |
| 8 | +use itertools::Itertools; |
| 9 | +use parallel_disk_usage::{ |
| 10 | + bytes_format::BytesFormat, |
| 11 | + data_tree::Reflection, |
| 12 | + hardlink::{ |
| 13 | + hardlink_list::{reflection::ReflectionEntry, Summary}, |
| 14 | + LinkPathListReflection, |
| 15 | + }, |
| 16 | + inode::InodeNumber, |
| 17 | + json_data::{JsonData, JsonTree}, |
| 18 | + size::Bytes, |
| 19 | +}; |
| 20 | +use pipe_trait::Pipe; |
| 21 | +use pretty_assertions::assert_eq; |
| 22 | +use std::{ |
| 23 | + collections::HashSet, |
| 24 | + path::PathBuf, |
| 25 | + process::{Command, Stdio}, |
| 26 | +}; |
| 27 | + |
| 28 | +fn stdio(command: Command) -> Command { |
| 29 | + command |
| 30 | + .with_stdin(Stdio::null()) |
| 31 | + .with_stdout(Stdio::piped()) |
| 32 | + .with_stderr(Stdio::piped()) |
| 33 | +} |
| 34 | + |
| 35 | +#[test] |
| 36 | +fn simple_tree_with_some_hardlinks() { |
| 37 | + #![expect(clippy::identity_op)] |
| 38 | + |
| 39 | + let sizes = [200_000, 220_000, 310_000, 110_000, 210_000]; |
| 40 | + let workspace = SampleWorkspace::simple_tree_with_some_hardlinks(sizes); |
| 41 | + |
| 42 | + let mut tree = Command::new(PDU) |
| 43 | + .with_current_dir(&workspace) |
| 44 | + .with_arg("--quantity=apparent-size") |
| 45 | + .with_arg("--deduplicate-hardlinks") |
| 46 | + .with_arg("--json-output") |
| 47 | + .with_arg("main/sources") |
| 48 | + .with_arg("main/internal-hardlinks") |
| 49 | + .pipe(stdio) |
| 50 | + .output() |
| 51 | + .expect("spawn command") |
| 52 | + .pipe(stdout_text) |
| 53 | + .pipe_as_ref(serde_json::from_str::<JsonData>) |
| 54 | + .expect("parse stdout as JsonData") |
| 55 | + .body |
| 56 | + .pipe(JsonTree::<Bytes>::try_from) |
| 57 | + .expect("get tree of bytes"); |
| 58 | + sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name)); |
| 59 | + let tree = tree; |
| 60 | + |
| 61 | + let file_size = |name: &str| { |
| 62 | + workspace |
| 63 | + .join("main/sources") |
| 64 | + .join(name) |
| 65 | + .pipe_as_ref(read_apparent_size) |
| 66 | + .pipe(Bytes::new) |
| 67 | + }; |
| 68 | + |
| 69 | + let inode_size = |path: &str| { |
| 70 | + workspace |
| 71 | + .join(path) |
| 72 | + .pipe_as_ref(read_apparent_size) |
| 73 | + .pipe(Bytes::new) |
| 74 | + }; |
| 75 | + |
| 76 | + let file_inode = |name: &str| { |
| 77 | + workspace |
| 78 | + .join("main/sources") |
| 79 | + .join(name) |
| 80 | + .pipe_as_ref(read_inode_number) |
| 81 | + .pipe(InodeNumber::from) |
| 82 | + }; |
| 83 | + |
| 84 | + let shared_paths = |suffices: &[&str]| { |
| 85 | + suffices |
| 86 | + .iter() |
| 87 | + .map(|suffix| PathBuf::from("main").join(suffix)) |
| 88 | + .collect::<HashSet<_>>() |
| 89 | + .pipe(LinkPathListReflection) |
| 90 | + }; |
| 91 | + |
| 92 | + let actual_size = tree.size; |
| 93 | + let expected_size = Bytes::new(0) |
| 94 | + + inode_size("main/sources") |
| 95 | + + inode_size("main/internal-hardlinks") |
| 96 | + + file_size("no-hardlinks.txt") |
| 97 | + + file_size("one-internal-hardlink.txt") |
| 98 | + + file_size("two-internal-hardlinks.txt") |
| 99 | + + file_size("one-external-hardlink.txt") |
| 100 | + + file_size("one-internal-one-external-hardlinks.txt"); |
| 101 | + assert_eq!(actual_size, expected_size); |
| 102 | + |
| 103 | + let actual_tree = &tree.tree; |
| 104 | + let expected_tree = { |
| 105 | + let mut tree = Command::new(PDU) |
| 106 | + .with_current_dir(&workspace) |
| 107 | + .with_arg("--quantity=apparent-size") |
| 108 | + .with_arg("--deduplicate-hardlinks") |
| 109 | + .with_arg("--json-output") |
| 110 | + .with_arg("main") |
| 111 | + .pipe(stdio) |
| 112 | + .output() |
| 113 | + .expect("spawn command") |
| 114 | + .pipe(stdout_text) |
| 115 | + .pipe_as_ref(serde_json::from_str::<JsonData>) |
| 116 | + .expect("parse stdout as JsonData") |
| 117 | + .body |
| 118 | + .pipe(JsonTree::<Bytes>::try_from) |
| 119 | + .expect("get tree of bytes") |
| 120 | + .tree; |
| 121 | + sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name)); |
| 122 | + tree.name = "(total)".to_string(); |
| 123 | + tree.size = expected_size; |
| 124 | + for child in &mut tree.children { |
| 125 | + let name = match child.name.as_str() { |
| 126 | + "sources" => "main/sources", |
| 127 | + "internal-hardlinks" => "main/internal-hardlinks", |
| 128 | + name => panic!("Unexpected name: {name:?}"), |
| 129 | + }; |
| 130 | + child.name = name.to_string(); |
| 131 | + } |
| 132 | + tree |
| 133 | + }; |
| 134 | + assert_eq!(actual_tree, &expected_tree); |
| 135 | + |
| 136 | + let actual_shared_details: Vec<_> = tree |
| 137 | + .shared |
| 138 | + .details |
| 139 | + .as_ref() |
| 140 | + .expect("get details") |
| 141 | + .iter() |
| 142 | + .cloned() |
| 143 | + .collect(); |
| 144 | + let expected_shared_details = [ |
| 145 | + ReflectionEntry { |
| 146 | + ino: file_inode("one-internal-hardlink.txt"), |
| 147 | + size: file_size("one-internal-hardlink.txt"), |
| 148 | + links: 1 + 1, |
| 149 | + paths: shared_paths(&[ |
| 150 | + "sources/one-internal-hardlink.txt", |
| 151 | + "internal-hardlinks/link-0.txt", |
| 152 | + ]), |
| 153 | + }, |
| 154 | + ReflectionEntry { |
| 155 | + ino: file_inode("two-internal-hardlinks.txt"), |
| 156 | + size: file_size("two-internal-hardlinks.txt"), |
| 157 | + links: 1 + 2, |
| 158 | + paths: shared_paths(&[ |
| 159 | + "sources/two-internal-hardlinks.txt", |
| 160 | + "internal-hardlinks/link-1a.txt", |
| 161 | + "internal-hardlinks/link-1b.txt", |
| 162 | + ]), |
| 163 | + }, |
| 164 | + ReflectionEntry { |
| 165 | + ino: file_inode("one-external-hardlink.txt"), |
| 166 | + size: file_size("one-external-hardlink.txt"), |
| 167 | + links: 1 + 1, |
| 168 | + paths: shared_paths(&["sources/one-external-hardlink.txt"]), |
| 169 | + }, |
| 170 | + ReflectionEntry { |
| 171 | + ino: file_inode("one-internal-one-external-hardlinks.txt"), |
| 172 | + size: file_size("one-internal-one-external-hardlinks.txt"), |
| 173 | + links: 1 + 1 + 1, |
| 174 | + paths: shared_paths(&[ |
| 175 | + "sources/one-internal-one-external-hardlinks.txt", |
| 176 | + "internal-hardlinks/link-3a.txt", |
| 177 | + ]), |
| 178 | + }, |
| 179 | + ] |
| 180 | + .into_sorted_by_key(|item| u64::from(item.ino)); |
| 181 | + assert_eq!(actual_shared_details, expected_shared_details); |
| 182 | + |
| 183 | + let actual_shared_summary = tree.shared.summary; |
| 184 | + let expected_shared_summary = Summary::default() |
| 185 | + .with_inodes(0 + 1 + 1 + 1 + 1) |
| 186 | + .with_exclusive_inodes(0 + 1 + 1 + 0 + 0) |
| 187 | + .with_all_links(0 + 2 + 3 + 2 + 3) |
| 188 | + .with_detected_links(0 + 2 + 3 + 1 + 2) |
| 189 | + .with_exclusive_links(0 + 2 + 3 + 0 + 0) |
| 190 | + .with_shared_size( |
| 191 | + Bytes::new(0) |
| 192 | + + file_size("one-internal-hardlink.txt") |
| 193 | + + file_size("two-internal-hardlinks.txt") |
| 194 | + + file_size("one-external-hardlink.txt") |
| 195 | + + file_size("one-internal-one-external-hardlinks.txt"), |
| 196 | + ) |
| 197 | + .with_exclusive_shared_size( |
| 198 | + Bytes::new(0) |
| 199 | + + file_size("one-internal-hardlink.txt") |
| 200 | + + file_size("two-internal-hardlinks.txt"), |
| 201 | + ) |
| 202 | + .pipe(Some); |
| 203 | + assert_eq!(actual_shared_summary, expected_shared_summary); |
| 204 | + |
| 205 | + let visualization = Command::new(PDU) |
| 206 | + .with_current_dir(&workspace) |
| 207 | + .with_arg("--quantity=apparent-size") |
| 208 | + .with_arg("--deduplicate-hardlinks") |
| 209 | + .with_arg("main/sources") |
| 210 | + .with_arg("main/internal-hardlinks") |
| 211 | + .pipe(stdio) |
| 212 | + .output() |
| 213 | + .expect("spawn command") |
| 214 | + .pipe(stdout_text); |
| 215 | + eprintln!("STDOUT:\n{visualization}"); |
| 216 | + let actual_hardlinks_summary = visualization |
| 217 | + .lines() |
| 218 | + .skip_while(|line| !line.starts_with("Hardlinks detected!")) |
| 219 | + .join("\n"); |
| 220 | + let expected_hardlinks_summary = { |
| 221 | + use parallel_disk_usage::size::Size; |
| 222 | + use std::fmt::Write; |
| 223 | + let mut summary = String::new(); |
| 224 | + writeln!( |
| 225 | + summary, |
| 226 | + "Hardlinks detected! Some files have links outside this tree", |
| 227 | + ) |
| 228 | + .unwrap(); |
| 229 | + writeln!( |
| 230 | + summary, |
| 231 | + "* Number of shared inodes: {total} total, {exclusive} exclusive", |
| 232 | + total = expected_shared_summary.unwrap().inodes, |
| 233 | + exclusive = expected_shared_summary.unwrap().exclusive_inodes, |
| 234 | + ) |
| 235 | + .unwrap(); |
| 236 | + writeln!( |
| 237 | + summary, |
| 238 | + "* Total number of links: {total} total, {detected} detected, {exclusive} exclusive", |
| 239 | + total = expected_shared_summary.unwrap().all_links, |
| 240 | + detected = expected_shared_summary.unwrap().detected_links, |
| 241 | + exclusive = expected_shared_summary.unwrap().exclusive_links, |
| 242 | + ) |
| 243 | + .unwrap(); |
| 244 | + writeln!( |
| 245 | + summary, |
| 246 | + "* Total shared size: {total} total, {exclusive} exclusive", |
| 247 | + total = expected_shared_summary |
| 248 | + .unwrap() |
| 249 | + .shared_size |
| 250 | + .display(BytesFormat::MetricUnits), |
| 251 | + exclusive = expected_shared_summary |
| 252 | + .unwrap() |
| 253 | + .exclusive_shared_size |
| 254 | + .display(BytesFormat::MetricUnits), |
| 255 | + ) |
| 256 | + .unwrap(); |
| 257 | + summary |
| 258 | + }; |
| 259 | + assert_eq!( |
| 260 | + actual_hardlinks_summary.trim_end(), |
| 261 | + expected_hardlinks_summary.trim_end(), |
| 262 | + ); |
| 263 | +} |
| 264 | + |
| 265 | +#[test] |
| 266 | +fn multiple_hardlinks_to_a_single_file() { |
| 267 | + let links = 10; |
| 268 | + let args = ["file.txt", "link.3", "link.5"]; |
| 269 | + let workspace = SampleWorkspace::multiple_hardlinks_to_a_single_file(100_000, links); |
| 270 | + |
| 271 | + let tree = Command::new(PDU) |
| 272 | + .with_current_dir(&workspace) |
| 273 | + .with_arg("--quantity=apparent-size") |
| 274 | + .with_arg("--deduplicate-hardlinks") |
| 275 | + .with_arg("--json-output") |
| 276 | + .with_args(args) |
| 277 | + .pipe(stdio) |
| 278 | + .output() |
| 279 | + .expect("spawn command") |
| 280 | + .pipe(stdout_text) |
| 281 | + .pipe_as_ref(serde_json::from_str::<JsonData>) |
| 282 | + .expect("parse stdout as JsonData") |
| 283 | + .body |
| 284 | + .pipe(JsonTree::<Bytes>::try_from) |
| 285 | + .expect("get tree of bytes"); |
| 286 | + |
| 287 | + let file_size = workspace |
| 288 | + .join("file.txt") |
| 289 | + .pipe_as_ref(read_apparent_size) |
| 290 | + .pipe(Bytes::new); |
| 291 | + |
| 292 | + let file_inode = workspace |
| 293 | + .join("file.txt") |
| 294 | + .pipe_as_ref(read_inode_number) |
| 295 | + .pipe(InodeNumber::from); |
| 296 | + |
| 297 | + let actual_size = tree.size; |
| 298 | + let expected_size = file_size; |
| 299 | + assert_eq!(actual_size, expected_size); |
| 300 | + |
| 301 | + let actual_children = tree |
| 302 | + .children |
| 303 | + .clone() |
| 304 | + .into_sorted_by(|a, b| a.name.cmp(&b.name)); |
| 305 | + let expected_children = args.map(|name| Reflection { |
| 306 | + name: name.to_string(), |
| 307 | + size: file_size, |
| 308 | + children: Vec::new(), |
| 309 | + }); |
| 310 | + assert_eq!(actual_children, expected_children); |
| 311 | + |
| 312 | + let actual_shared_details: Vec<_> = tree |
| 313 | + .shared |
| 314 | + .details |
| 315 | + .as_ref() |
| 316 | + .expect("get details") |
| 317 | + .iter() |
| 318 | + .cloned() |
| 319 | + .collect(); |
| 320 | + let expected_shared_details = [ReflectionEntry { |
| 321 | + ino: file_inode, |
| 322 | + size: file_size, |
| 323 | + links: 1 + links, |
| 324 | + paths: args |
| 325 | + .map(PathBuf::from) |
| 326 | + .pipe(HashSet::from) |
| 327 | + .pipe(LinkPathListReflection), |
| 328 | + }]; |
| 329 | + assert_eq!(actual_shared_details, expected_shared_details); |
| 330 | + |
| 331 | + let actual_shared_summary = tree.shared.summary; |
| 332 | + let expected_shared_summary = Summary::default() |
| 333 | + .with_inodes(1) |
| 334 | + .with_exclusive_inodes(0) |
| 335 | + .with_all_links(1 + links) |
| 336 | + .with_detected_links(args.len()) |
| 337 | + .with_exclusive_links(0) |
| 338 | + .with_shared_size(file_size) |
| 339 | + .with_exclusive_shared_size(Bytes::new(0)) |
| 340 | + .pipe(Some); |
| 341 | + assert_eq!(actual_shared_summary, expected_shared_summary); |
| 342 | + |
| 343 | + let visualization = Command::new(PDU) |
| 344 | + .with_current_dir(&workspace) |
| 345 | + .with_arg("--quantity=apparent-size") |
| 346 | + .with_arg("--deduplicate-hardlinks") |
| 347 | + .with_args(args) |
| 348 | + .pipe(stdio) |
| 349 | + .output() |
| 350 | + .expect("spawn command") |
| 351 | + .pipe(stdout_text); |
| 352 | + eprintln!("STDOUT:\n{visualization}"); |
| 353 | + let actual_hardlinks_summary = visualization |
| 354 | + .lines() |
| 355 | + .skip_while(|line| !line.starts_with("Hardlinks detected!")) |
| 356 | + .join("\n"); |
| 357 | + let expected_hardlinks_summary = { |
| 358 | + use parallel_disk_usage::size::Size; |
| 359 | + use std::fmt::Write; |
| 360 | + let mut summary = String::new(); |
| 361 | + writeln!( |
| 362 | + summary, |
| 363 | + "Hardlinks detected! All hardlinks within this tree have links without", |
| 364 | + ) |
| 365 | + .unwrap(); |
| 366 | + writeln!(summary, "* Number of shared inodes: 1").unwrap(); |
| 367 | + writeln!( |
| 368 | + summary, |
| 369 | + "* Total number of links: {total} total, {detected} detected", |
| 370 | + total = expected_shared_summary.unwrap().all_links, |
| 371 | + detected = expected_shared_summary.unwrap().detected_links, |
| 372 | + ) |
| 373 | + .unwrap(); |
| 374 | + writeln!( |
| 375 | + summary, |
| 376 | + "* Total shared size: {}", |
| 377 | + file_size.display(BytesFormat::MetricUnits), |
| 378 | + ) |
| 379 | + .unwrap(); |
| 380 | + summary |
| 381 | + }; |
| 382 | + assert_eq!( |
| 383 | + actual_hardlinks_summary.trim_end(), |
| 384 | + expected_hardlinks_summary.trim_end(), |
| 385 | + ); |
| 386 | +} |
0 commit comments