Skip to content

Commit c88ade7

Browse files
committed
fix(ls): replace recursion with DFS
Changes ls to use a Depth-First Search (DFS) algorithm instead of recursion. Fixes #8725 and should help towards fixing #11215; this also opens the door for greater optimizations that fully fix the latter.
1 parent 3b3d5a7 commit c88ade7

1 file changed

Lines changed: 130 additions & 93 deletions

File tree

src/uu/ls/src/ls.rs

Lines changed: 130 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -2189,6 +2189,8 @@ fn push_basic_escape(buf: &mut String, byte: u8) {
21892189
}
21902190
}
21912191

2192+
type DirData = (PathBuf, bool);
2193+
21922194
// A struct to encapsulate state that is passed around from `list` functions.
21932195
struct ListState<'a> {
21942196
out: BufWriter<Stdout>,
@@ -2203,6 +2205,9 @@ struct ListState<'a> {
22032205
#[cfg(unix)]
22042206
gid_cache: FxHashMap<u32, String>,
22052207
recent_time_range: RangeInclusive<SystemTime>,
2208+
stack: Vec<DirData>,
2209+
listed_ancestors: FxHashSet<FileInformation>,
2210+
initial_locs_len: usize,
22062211
}
22072212

22082213
#[allow(clippy::cognitive_complexity)]
@@ -2224,6 +2229,9 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
22242229
// According to GNU a Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds on the average.
22252230
recent_time_range: (SystemTime::now() - Duration::new(31_556_952 / 2, 0))
22262231
..=SystemTime::now(),
2232+
stack: Vec::new(),
2233+
listed_ancestors: FxHashSet::default(),
2234+
initial_locs_len,
22272235
};
22282236

22292237
for loc in locs {
@@ -2268,6 +2276,7 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
22682276
display_items(&files, config, &mut state, &mut dired)?;
22692277

22702278
for (pos, path_data) in dirs.iter().enumerate() {
2279+
let needs_blank_line = pos != 0 || !files.is_empty();
22712280
// Do read_dir call here to match GNU semantics by printing
22722281
// read_dir errors before directory headings, names and totals
22732282
let read_dir = match fs::read_dir(path_data.path()) {
@@ -2284,41 +2293,49 @@ pub fn list(locs: Vec<&Path>, config: &Config) -> UResult<()> {
22842293
Ok(rd) => rd,
22852294
};
22862295

2287-
// Print dir heading - name... 'total' comes after error display
2288-
if initial_locs_len > 1 || config.recursive {
2289-
let needs_blank_line = !(pos.eq(&0usize) && files.is_empty());
2290-
if needs_blank_line {
2291-
writeln!(state.out)?;
2292-
if config.dired {
2293-
dired.padding += 1;
2294-
}
2295-
}
2296-
if config.dired {
2297-
dired::indent(&mut state.out)?;
2298-
}
2299-
show_dir_name(path_data, &mut state.out, config)?;
2300-
writeln!(state.out)?;
2301-
if config.dired {
2302-
let dir_len = path_data.display_name().len();
2303-
// add the //SUBDIRED// coordinates
2304-
dired::calculate_subdired(&mut dired, dir_len);
2305-
// Add the padding for the dir name
2306-
dired::add_dir_name(&mut dired, dir_len);
2307-
}
2308-
}
2309-
let mut listed_ancestors = FxHashSet::default();
2310-
listed_ancestors.insert(FileInformation::from_path(
2296+
state.listed_ancestors.insert(FileInformation::from_path(
23112297
path_data.path(),
23122298
path_data.must_dereference,
23132299
)?);
2314-
enter_directory(
2315-
path_data,
2300+
2301+
// List each of the arguments to ls first.
2302+
depth_first_list(
2303+
(path_data.path().to_path_buf(), needs_blank_line),
23162304
read_dir,
23172305
config,
23182306
&mut state,
2319-
&mut listed_ancestors,
23202307
&mut dired,
2308+
true,
23212309
)?;
2310+
2311+
// Only runs if it must list recursively.
2312+
while let Some(dir_data) = state.stack.pop() {
2313+
let read_dir = match fs::read_dir(&dir_data.0) {
2314+
Err(err) => {
2315+
// flush stdout buffer before the error to preserve formatting and order
2316+
state.out.flush()?;
2317+
show!(LsError::IOErrorContext(
2318+
path_data.path().to_path_buf(),
2319+
err,
2320+
path_data.command_line
2321+
));
2322+
continue;
2323+
}
2324+
Ok(rd) => rd,
2325+
};
2326+
2327+
depth_first_list(dir_data, read_dir, config, &mut state, &mut dired, false)?;
2328+
2329+
// Heuristic to ensure stack does not keep its capacity forever if there is
2330+
// combinatorial explosion; we decrease it logarithmically here.
2331+
let (cap, len) = (state.stack.capacity(), state.stack.len());
2332+
if cap > (len + 4) * 2 {
2333+
state.stack.shrink_to(len + (cap - len) / 2);
2334+
}
2335+
}
2336+
2337+
// No need to clear state.buf since [`enter_directory`] drains it.
2338+
state.listed_ancestors.clear();
23222339
}
23232340
if config.dired && !config.hyperlink {
23242341
dired::print_dired_output(config, &dired, &mut state.out)?;
@@ -2435,18 +2452,55 @@ fn should_display(entry: &DirEntry, config: &Config) -> bool {
24352452
.any(|p| p.matches_with(&file_name, options))
24362453
}
24372454

2438-
#[allow(clippy::cognitive_complexity)]
2439-
fn enter_directory(
2440-
path_data: &PathData,
2455+
fn depth_first_list(
2456+
(dir_path, needs_blank_line): DirData,
24412457
mut read_dir: ReadDir,
24422458
config: &Config,
24432459
state: &mut ListState,
2444-
listed_ancestors: &mut FxHashSet<FileInformation>,
24452460
dired: &mut DiredOutput,
2461+
is_top_level: bool,
24462462
) -> UResult<()> {
2447-
// Create vec of entries with initial dot files
2448-
let mut entries: Vec<PathData> = if config.files == Files::All {
2449-
vec![
2463+
let path_data = PathData::new(dir_path, None, None, config, false);
2464+
2465+
// Print dir heading - name... 'total' comes after error display
2466+
if state.initial_locs_len > 1 || config.recursive {
2467+
if is_top_level {
2468+
if needs_blank_line {
2469+
writeln!(state.out)?;
2470+
if config.dired {
2471+
dired.padding += 1;
2472+
}
2473+
}
2474+
if config.dired {
2475+
dired::indent(&mut state.out)?;
2476+
}
2477+
show_dir_name(&path_data, &mut state.out, config)?;
2478+
writeln!(state.out)?;
2479+
if config.dired {
2480+
let dir_len = path_data.path().as_os_str().len();
2481+
// add the //SUBDIRED// coordinates
2482+
dired::calculate_subdired(dired, dir_len);
2483+
// Add the padding for the dir name
2484+
dired::add_dir_name(dired, dir_len);
2485+
}
2486+
} else {
2487+
writeln!(state.out)?;
2488+
if config.dired {
2489+
dired.padding += 1;
2490+
dired::indent(&mut state.out)?;
2491+
let dir_name_size = path_data.path().as_os_str().len();
2492+
dired::calculate_subdired(dired, dir_name_size);
2493+
dired::add_dir_name(dired, dir_name_size);
2494+
}
2495+
show_dir_name(&path_data, &mut state.out, config)?;
2496+
writeln!(state.out)?;
2497+
}
2498+
}
2499+
2500+
// Append entries with initial dot files and record their existence
2501+
let (ref mut buf, trim) = if config.files == Files::All {
2502+
const DOT_DIRECTORIES: usize = 2;
2503+
let v = vec![
24502504
PathData::new(
24512505
path_data.path().to_path_buf(),
24522506
None,
@@ -2461,95 +2515,78 @@ fn enter_directory(
24612515
config,
24622516
false,
24632517
),
2464-
]
2518+
];
2519+
(v, DOT_DIRECTORIES)
24652520
} else {
2466-
vec![]
2521+
(Vec::new(), 0)
24672522
};
24682523

24692524
// Convert those entries to the PathData struct
24702525
for raw_entry in read_dir.by_ref() {
2471-
let dir_entry = match raw_entry {
2472-
Ok(path) => path,
2526+
match raw_entry {
2527+
Ok(dir_entry) => {
2528+
if should_display(&dir_entry, config) {
2529+
buf.push(PathData::new(
2530+
dir_entry.path(),
2531+
Some(dir_entry),
2532+
None,
2533+
config,
2534+
false,
2535+
));
2536+
}
2537+
}
24732538
Err(err) => {
24742539
state.out.flush()?;
24752540
show!(LsError::IOError(err));
2476-
continue;
24772541
}
2478-
};
2479-
2480-
if should_display(&dir_entry, config) {
2481-
let entry_path_data =
2482-
PathData::new(dir_entry.path(), Some(dir_entry), None, config, false);
2483-
entries.push(entry_path_data);
24842542
}
24852543
}
2544+
// Relinquish unused space since we won't need it anymore.
2545+
buf.shrink_to_fit();
24862546

2487-
sort_entries(&mut entries, config);
2547+
sort_entries(buf, config);
24882548

2489-
// Print total after any error display
24902549
if config.format == Format::Long || config.alloc_size {
2491-
let total = return_total(&entries, config, &mut state.out)?;
2550+
let total = return_total(buf, config, &mut state.out)?;
24922551
write!(state.out, "{}", total.as_str())?;
24932552
if config.dired {
24942553
dired::add_total(dired, total.len());
24952554
}
24962555
}
24972556

2498-
display_items(&entries, config, state, dired)?;
2557+
display_items(buf, config, state, dired)?;
24992558

25002559
if config.recursive {
2501-
// release the open fd before recursing to not run out of resources
2502-
for entry in &entries {
2503-
entry.de.take();
2504-
}
2505-
drop(read_dir);
2506-
for e in entries
2560+
for e in buf
25072561
.iter()
2508-
.skip(if config.files == Files::All { 2 } else { 0 })
2562+
.skip(trim)
25092563
.filter(|p| p.file_type().is_some_and(FileType::is_dir))
2564+
.rev()
25102565
{
2511-
match fs::read_dir(e.path()) {
2512-
Err(err) => {
2513-
state.out.flush()?;
2514-
show!(LsError::IOErrorContext(
2515-
e.path().to_path_buf(),
2516-
err,
2517-
e.command_line
2518-
));
2519-
}
2520-
Ok(rd) => {
2521-
if listed_ancestors
2522-
.insert(FileInformation::from_path(e.path(), e.must_dereference)?)
2523-
{
2524-
// when listing several directories in recursive mode, we show
2525-
// "dirname:" at the beginning of the file list
2526-
writeln!(state.out)?;
2527-
if config.dired {
2528-
// We already injected the first dir
2529-
// Continue with the others
2530-
// blank line between directory sections
2531-
dired.padding += 1;
2532-
dired::indent(&mut state.out)?;
2533-
let dir_name_size = e.path().as_os_str().len();
2534-
dired::calculate_subdired(dired, dir_name_size);
2535-
// inject dir name
2536-
dired::add_dir_name(dired, dir_name_size);
2537-
}
2538-
2539-
show_dir_name(e, &mut state.out, config)?;
2540-
writeln!(state.out)?;
2541-
enter_directory(e, rd, config, state, listed_ancestors, dired)?;
2542-
listed_ancestors
2543-
.remove(&FileInformation::from_path(e.path(), e.must_dereference)?);
2544-
} else {
2545-
state.out.flush()?;
2546-
show!(LsError::AlreadyListedError(e.path().to_path_buf()));
2566+
// Try to open only to report any errors in order to match GNU semantics.
2567+
if let Err(err) = fs::read_dir(e.path()) {
2568+
state.out.flush()?;
2569+
show!(LsError::IOErrorContext(
2570+
e.path().to_path_buf(),
2571+
err,
2572+
e.command_line
2573+
));
2574+
} else {
2575+
let fi = FileInformation::from_path(e.path(), e.must_dereference)?;
2576+
if state.listed_ancestors.insert(fi) {
2577+
// Push to stack, but with a less aggressive growth curve.
2578+
let (cap, len) = (state.stack.capacity(), state.stack.len());
2579+
if cap == len {
2580+
state.stack.reserve_exact(len / 4 + 4);
25472581
}
2582+
state.stack.push((e.path().to_path_buf(), true));
2583+
} else {
2584+
state.out.flush()?;
2585+
show!(LsError::AlreadyListedError(e.path().to_path_buf()));
25482586
}
25492587
}
25502588
}
25512589
}
2552-
25532590
Ok(())
25542591
}
25552592

0 commit comments

Comments
 (0)