Skip to content

Commit e0611e8

Browse files
committed
test: multiple arguments hardlinks deduplication
1 parent a65af09 commit e0611e8

1 file changed

Lines changed: 386 additions & 0 deletions

File tree

Lines changed: 386 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,386 @@
1+
#![cfg(unix)] // This feature is not available in Windows
2+
#![cfg(feature = "cli")]
3+
4+
pub mod _utils;
5+
pub use _utils::*;
6+
7+
use command_extra::CommandExtra;
8+
use itertools::Itertools;
9+
use parallel_disk_usage::{
10+
bytes_format::BytesFormat,
11+
data_tree::Reflection,
12+
hardlink::{
13+
hardlink_list::{reflection::ReflectionEntry, Summary},
14+
LinkPathListReflection,
15+
},
16+
inode::InodeNumber,
17+
json_data::{JsonData, JsonTree},
18+
size::Bytes,
19+
};
20+
use pipe_trait::Pipe;
21+
use pretty_assertions::assert_eq;
22+
use std::{
23+
collections::HashSet,
24+
path::PathBuf,
25+
process::{Command, Stdio},
26+
};
27+
28+
fn stdio(command: Command) -> Command {
29+
command
30+
.with_stdin(Stdio::null())
31+
.with_stdout(Stdio::piped())
32+
.with_stderr(Stdio::piped())
33+
}
34+
35+
#[test]
36+
fn simple_tree_with_some_hardlinks() {
37+
#![expect(clippy::identity_op)]
38+
39+
let sizes = [200_000, 220_000, 310_000, 110_000, 210_000];
40+
let workspace = SampleWorkspace::simple_tree_with_some_hardlinks(sizes);
41+
42+
let mut tree = Command::new(PDU)
43+
.with_current_dir(&workspace)
44+
.with_arg("--quantity=apparent-size")
45+
.with_arg("--deduplicate-hardlinks")
46+
.with_arg("--json-output")
47+
.with_arg("main/sources")
48+
.with_arg("main/internal-hardlinks")
49+
.pipe(stdio)
50+
.output()
51+
.expect("spawn command")
52+
.pipe(stdout_text)
53+
.pipe_as_ref(serde_json::from_str::<JsonData>)
54+
.expect("parse stdout as JsonData")
55+
.body
56+
.pipe(JsonTree::<Bytes>::try_from)
57+
.expect("get tree of bytes");
58+
sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name));
59+
let tree = tree;
60+
61+
let file_size = |name: &str| {
62+
workspace
63+
.join("main/sources")
64+
.join(name)
65+
.pipe_as_ref(read_apparent_size)
66+
.pipe(Bytes::new)
67+
};
68+
69+
let inode_size = |path: &str| {
70+
workspace
71+
.join(path)
72+
.pipe_as_ref(read_apparent_size)
73+
.pipe(Bytes::new)
74+
};
75+
76+
let file_inode = |name: &str| {
77+
workspace
78+
.join("main/sources")
79+
.join(name)
80+
.pipe_as_ref(read_inode_number)
81+
.pipe(InodeNumber::from)
82+
};
83+
84+
let shared_paths = |suffices: &[&str]| {
85+
suffices
86+
.iter()
87+
.map(|suffix| PathBuf::from("main").join(suffix))
88+
.collect::<HashSet<_>>()
89+
.pipe(LinkPathListReflection)
90+
};
91+
92+
let actual_size = tree.size;
93+
let expected_size = Bytes::new(0)
94+
+ inode_size("main/sources")
95+
+ inode_size("main/internal-hardlinks")
96+
+ file_size("no-hardlinks.txt")
97+
+ file_size("one-internal-hardlink.txt")
98+
+ file_size("two-internal-hardlinks.txt")
99+
+ file_size("one-external-hardlink.txt")
100+
+ file_size("one-internal-one-external-hardlinks.txt");
101+
assert_eq!(actual_size, expected_size);
102+
103+
let actual_tree = &tree.tree;
104+
let expected_tree = {
105+
let mut tree = Command::new(PDU)
106+
.with_current_dir(&workspace)
107+
.with_arg("--quantity=apparent-size")
108+
.with_arg("--deduplicate-hardlinks")
109+
.with_arg("--json-output")
110+
.with_arg("main")
111+
.pipe(stdio)
112+
.output()
113+
.expect("spawn command")
114+
.pipe(stdout_text)
115+
.pipe_as_ref(serde_json::from_str::<JsonData>)
116+
.expect("parse stdout as JsonData")
117+
.body
118+
.pipe(JsonTree::<Bytes>::try_from)
119+
.expect("get tree of bytes")
120+
.tree;
121+
sort_reflection_by(&mut tree, |a, b| a.name.cmp(&b.name));
122+
tree.name = "(total)".to_string();
123+
tree.size = expected_size;
124+
for child in &mut tree.children {
125+
let name = match child.name.as_str() {
126+
"sources" => "main/sources",
127+
"internal-hardlinks" => "main/internal-hardlinks",
128+
name => panic!("Unexpected name: {name:?}"),
129+
};
130+
child.name = name.to_string();
131+
}
132+
tree
133+
};
134+
assert_eq!(actual_tree, &expected_tree);
135+
136+
let actual_shared_details: Vec<_> = tree
137+
.shared
138+
.details
139+
.as_ref()
140+
.expect("get details")
141+
.iter()
142+
.cloned()
143+
.collect();
144+
let expected_shared_details = [
145+
ReflectionEntry {
146+
ino: file_inode("one-internal-hardlink.txt"),
147+
size: file_size("one-internal-hardlink.txt"),
148+
links: 1 + 1,
149+
paths: shared_paths(&[
150+
"sources/one-internal-hardlink.txt",
151+
"internal-hardlinks/link-0.txt",
152+
]),
153+
},
154+
ReflectionEntry {
155+
ino: file_inode("two-internal-hardlinks.txt"),
156+
size: file_size("two-internal-hardlinks.txt"),
157+
links: 1 + 2,
158+
paths: shared_paths(&[
159+
"sources/two-internal-hardlinks.txt",
160+
"internal-hardlinks/link-1a.txt",
161+
"internal-hardlinks/link-1b.txt",
162+
]),
163+
},
164+
ReflectionEntry {
165+
ino: file_inode("one-external-hardlink.txt"),
166+
size: file_size("one-external-hardlink.txt"),
167+
links: 1 + 1,
168+
paths: shared_paths(&["sources/one-external-hardlink.txt"]),
169+
},
170+
ReflectionEntry {
171+
ino: file_inode("one-internal-one-external-hardlinks.txt"),
172+
size: file_size("one-internal-one-external-hardlinks.txt"),
173+
links: 1 + 1 + 1,
174+
paths: shared_paths(&[
175+
"sources/one-internal-one-external-hardlinks.txt",
176+
"internal-hardlinks/link-3a.txt",
177+
]),
178+
},
179+
]
180+
.into_sorted_by_key(|item| u64::from(item.ino));
181+
assert_eq!(actual_shared_details, expected_shared_details);
182+
183+
let actual_shared_summary = tree.shared.summary;
184+
let expected_shared_summary = Summary::default()
185+
.with_inodes(0 + 1 + 1 + 1 + 1)
186+
.with_exclusive_inodes(0 + 1 + 1 + 0 + 0)
187+
.with_all_links(0 + 2 + 3 + 2 + 3)
188+
.with_detected_links(0 + 2 + 3 + 1 + 2)
189+
.with_exclusive_links(0 + 2 + 3 + 0 + 0)
190+
.with_shared_size(
191+
Bytes::new(0)
192+
+ file_size("one-internal-hardlink.txt")
193+
+ file_size("two-internal-hardlinks.txt")
194+
+ file_size("one-external-hardlink.txt")
195+
+ file_size("one-internal-one-external-hardlinks.txt"),
196+
)
197+
.with_exclusive_shared_size(
198+
Bytes::new(0)
199+
+ file_size("one-internal-hardlink.txt")
200+
+ file_size("two-internal-hardlinks.txt"),
201+
)
202+
.pipe(Some);
203+
assert_eq!(actual_shared_summary, expected_shared_summary);
204+
205+
let visualization = Command::new(PDU)
206+
.with_current_dir(&workspace)
207+
.with_arg("--quantity=apparent-size")
208+
.with_arg("--deduplicate-hardlinks")
209+
.with_arg("main/sources")
210+
.with_arg("main/internal-hardlinks")
211+
.pipe(stdio)
212+
.output()
213+
.expect("spawn command")
214+
.pipe(stdout_text);
215+
eprintln!("STDOUT:\n{visualization}");
216+
let actual_hardlinks_summary = visualization
217+
.lines()
218+
.skip_while(|line| !line.starts_with("Hardlinks detected!"))
219+
.join("\n");
220+
let expected_hardlinks_summary = {
221+
use parallel_disk_usage::size::Size;
222+
use std::fmt::Write;
223+
let mut summary = String::new();
224+
writeln!(
225+
summary,
226+
"Hardlinks detected! Some files have links outside this tree",
227+
)
228+
.unwrap();
229+
writeln!(
230+
summary,
231+
"* Number of shared inodes: {total} total, {exclusive} exclusive",
232+
total = expected_shared_summary.unwrap().inodes,
233+
exclusive = expected_shared_summary.unwrap().exclusive_inodes,
234+
)
235+
.unwrap();
236+
writeln!(
237+
summary,
238+
"* Total number of links: {total} total, {detected} detected, {exclusive} exclusive",
239+
total = expected_shared_summary.unwrap().all_links,
240+
detected = expected_shared_summary.unwrap().detected_links,
241+
exclusive = expected_shared_summary.unwrap().exclusive_links,
242+
)
243+
.unwrap();
244+
writeln!(
245+
summary,
246+
"* Total shared size: {total} total, {exclusive} exclusive",
247+
total = expected_shared_summary
248+
.unwrap()
249+
.shared_size
250+
.display(BytesFormat::MetricUnits),
251+
exclusive = expected_shared_summary
252+
.unwrap()
253+
.exclusive_shared_size
254+
.display(BytesFormat::MetricUnits),
255+
)
256+
.unwrap();
257+
summary
258+
};
259+
assert_eq!(
260+
actual_hardlinks_summary.trim_end(),
261+
expected_hardlinks_summary.trim_end(),
262+
);
263+
}
264+
265+
#[test]
266+
fn multiple_hardlinks_to_a_single_file() {
267+
let links = 10;
268+
let args = ["file.txt", "link.3", "link.5"];
269+
let workspace = SampleWorkspace::multiple_hardlinks_to_a_single_file(100_000, links);
270+
271+
let tree = Command::new(PDU)
272+
.with_current_dir(&workspace)
273+
.with_arg("--quantity=apparent-size")
274+
.with_arg("--deduplicate-hardlinks")
275+
.with_arg("--json-output")
276+
.with_args(args)
277+
.pipe(stdio)
278+
.output()
279+
.expect("spawn command")
280+
.pipe(stdout_text)
281+
.pipe_as_ref(serde_json::from_str::<JsonData>)
282+
.expect("parse stdout as JsonData")
283+
.body
284+
.pipe(JsonTree::<Bytes>::try_from)
285+
.expect("get tree of bytes");
286+
287+
let file_size = workspace
288+
.join("file.txt")
289+
.pipe_as_ref(read_apparent_size)
290+
.pipe(Bytes::new);
291+
292+
let file_inode = workspace
293+
.join("file.txt")
294+
.pipe_as_ref(read_inode_number)
295+
.pipe(InodeNumber::from);
296+
297+
let actual_size = tree.size;
298+
let expected_size = file_size;
299+
assert_eq!(actual_size, expected_size);
300+
301+
let actual_children = tree
302+
.children
303+
.clone()
304+
.into_sorted_by(|a, b| a.name.cmp(&b.name));
305+
let expected_children = args.map(|name| Reflection {
306+
name: name.to_string(),
307+
size: file_size,
308+
children: Vec::new(),
309+
});
310+
assert_eq!(actual_children, expected_children);
311+
312+
let actual_shared_details: Vec<_> = tree
313+
.shared
314+
.details
315+
.as_ref()
316+
.expect("get details")
317+
.iter()
318+
.cloned()
319+
.collect();
320+
let expected_shared_details = [ReflectionEntry {
321+
ino: file_inode,
322+
size: file_size,
323+
links: 1 + links,
324+
paths: args
325+
.map(PathBuf::from)
326+
.pipe(HashSet::from)
327+
.pipe(LinkPathListReflection),
328+
}];
329+
assert_eq!(actual_shared_details, expected_shared_details);
330+
331+
let actual_shared_summary = tree.shared.summary;
332+
let expected_shared_summary = Summary::default()
333+
.with_inodes(1)
334+
.with_exclusive_inodes(0)
335+
.with_all_links(1 + links)
336+
.with_detected_links(args.len())
337+
.with_exclusive_links(0)
338+
.with_shared_size(file_size)
339+
.with_exclusive_shared_size(Bytes::new(0))
340+
.pipe(Some);
341+
assert_eq!(actual_shared_summary, expected_shared_summary);
342+
343+
let visualization = Command::new(PDU)
344+
.with_current_dir(&workspace)
345+
.with_arg("--quantity=apparent-size")
346+
.with_arg("--deduplicate-hardlinks")
347+
.with_args(args)
348+
.pipe(stdio)
349+
.output()
350+
.expect("spawn command")
351+
.pipe(stdout_text);
352+
eprintln!("STDOUT:\n{visualization}");
353+
let actual_hardlinks_summary = visualization
354+
.lines()
355+
.skip_while(|line| !line.starts_with("Hardlinks detected!"))
356+
.join("\n");
357+
let expected_hardlinks_summary = {
358+
use parallel_disk_usage::size::Size;
359+
use std::fmt::Write;
360+
let mut summary = String::new();
361+
writeln!(
362+
summary,
363+
"Hardlinks detected! All hardlinks within this tree have links without",
364+
)
365+
.unwrap();
366+
writeln!(summary, "* Number of shared inodes: 1").unwrap();
367+
writeln!(
368+
summary,
369+
"* Total number of links: {total} total, {detected} detected",
370+
total = expected_shared_summary.unwrap().all_links,
371+
detected = expected_shared_summary.unwrap().detected_links,
372+
)
373+
.unwrap();
374+
writeln!(
375+
summary,
376+
"* Total shared size: {}",
377+
file_size.display(BytesFormat::MetricUnits),
378+
)
379+
.unwrap();
380+
summary
381+
};
382+
assert_eq!(
383+
actual_hardlinks_summary.trim_end(),
384+
expected_hardlinks_summary.trim_end(),
385+
);
386+
}

0 commit comments

Comments
 (0)