Skip to content

Commit 1f815e4

Browse files
committed
refactor: extract manpage post-processing into a function with tests
- Move manpage post-processing logic into post_process_manpage() function - Add comprehensive unit tests for all post-processing functionality - Improve .br macro handling to correctly fix mandoc warnings - Tests cover TH header fixes, whitespace removal, and .br pattern fixes
1 parent 17c3147 commit 1f815e4

2 files changed

Lines changed: 199 additions & 57 deletions

File tree

Cargo.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,13 @@ expensive_tests = []
3131
# "test_risky_names" == enable tests that create problematic file names (would make a network share inaccessible to Windows, breaks SVN on Mac OS, etc.)
3232
test_risky_names = []
3333
# * only build `uudoc` when `--feature uudoc` is activated
34-
uudoc = ["dep:clap_complete", "dep:clap_mangen", "dep:fluent-syntax", "dep:zip"]
34+
uudoc = [
35+
"dep:clap_complete",
36+
"dep:clap_mangen",
37+
"dep:fluent-syntax",
38+
"dep:regex",
39+
"dep:zip",
40+
]
3541
## features
3642
## Optional feature for stdbuf
3743
# "feat_external_libstdbuf" == use an external libstdbuf.so for stdbuf instead of embedding it
@@ -477,6 +483,7 @@ clap_complete = { workspace = true, optional = true }
477483
clap_mangen = { workspace = true, optional = true }
478484
clap.workspace = true
479485
fluent-syntax = { workspace = true, optional = true }
486+
regex = { workspace = true, optional = true }
480487
itertools.workspace = true
481488
phf.workspace = true
482489
selinux = { workspace = true, optional = true }

src/bin/uudoc.rs

Lines changed: 191 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6-
// spell-checker:ignore mangen tldr
6+
// spell-checker:ignore mangen tldr mandoc uppercasing uppercased manpages DESTDIR
77

88
use std::{
9-
collections::HashMap,
9+
collections::{HashMap, HashSet},
1010
ffi::OsString,
1111
fs::File,
1212
io::{self, Read, Seek, Write},
@@ -18,6 +18,7 @@ use clap_complete::Shell;
1818
use clap_mangen::Man;
1919
use fluent_syntax::ast::{Entry, Message, Pattern};
2020
use fluent_syntax::parser;
21+
use regex::Regex;
2122
use textwrap::{fill, indent, termwidth};
2223
use zip::ZipArchive;
2324

@@ -26,6 +27,72 @@ use uucore::Args;
2627

2728
include!(concat!(env!("OUT_DIR"), "/uutils_map.rs"));
2829

30+
/// Post-process a generated manpage to fix mandoc lint issues
31+
///
32+
/// This function:
33+
/// - Fixes the TH header by uppercasing command names and removing invalid date formats
34+
/// - Removes trailing whitespace from all lines
35+
/// - Fixes redundant .br paragraph macros that cause mandoc warnings
36+
fn post_process_manpage(manpage: String) -> String {
37+
// Only match TH headers that have at least a command name on the same line
38+
// Use [ \t] instead of \s to avoid matching newlines
39+
// Use a date format that satisfies mandoc (YYYY-MM-DD)
40+
let date = Zoned::now().strftime("%Y-%m-%d").to_string();
41+
42+
let th_regex = Regex::new(r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$").unwrap();
43+
let mut result = th_regex
44+
.replace_all(&manpage, |caps: &regex::Captures| {
45+
// Add date to satisfy mandoc - date must be quoted
46+
format!(".TH {} 1 \"{date}\"", caps[1].to_uppercase())
47+
})
48+
.to_string();
49+
50+
// Process lines: remove trailing whitespace and fix .br issues in a single pass
51+
let lines: Vec<&str> = result.lines().collect();
52+
let mut fixed_lines = Vec::with_capacity(lines.len());
53+
let mut skip_indices = HashSet::new();
54+
55+
// First pass: identify lines to skip (redundant .br macros)
56+
for i in 0..lines.len() {
57+
let line = lines[i].trim_end();
58+
59+
if line == ".br" && !skip_indices.contains(&i) {
60+
// Check for consecutive .br macros
61+
if i > 0 && lines[i - 1].trim_end() == ".br" {
62+
skip_indices.insert(i);
63+
}
64+
// Check for .br, empty line, .br pattern
65+
else if i + 2 < lines.len()
66+
&& lines[i + 1].trim().is_empty()
67+
&& lines[i + 2].trim_end() == ".br"
68+
{
69+
skip_indices.insert(i + 2);
70+
}
71+
}
72+
}
73+
74+
// Second pass: build the final output
75+
for (i, line) in lines.iter().enumerate() {
76+
if !skip_indices.contains(&i) {
77+
fixed_lines.push(line.trim_end());
78+
}
79+
}
80+
81+
result = fixed_lines.join("\n");
82+
83+
// Fix escape sequence issues
84+
// \\\\0 appears when trying to represent literal \0 string
85+
// In man pages, use \e for literal backslash
86+
result = result.replace("\\\\\\\\0", "\\e0");
87+
result = result.replace("\\\\0", "\\e0");
88+
89+
if !result.ends_with('\n') {
90+
result.push('\n');
91+
}
92+
93+
result
94+
}
95+
2996
/// Print usage information for uudoc
3097
fn usage<T: Args>(utils: &UtilityMap<T>) {
3198
println!("uudoc - Documentation generator for uutils coreutils");
@@ -100,63 +167,15 @@ fn gen_manpage<T: Args>(
100167
man.render(&mut buffer).expect("Man page generation failed");
101168

102169
// Convert to string for processing
103-
let mut manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage");
104-
105-
// Fix the TH line: remove version info from date field and uppercase the command name
106-
if let Some(th_pos) = manpage.find(".TH ") {
107-
if let Some(line_end) = manpage[th_pos..].find('\n') {
108-
let th_line = &manpage[th_pos..th_pos + line_end];
109-
// Parse the TH line parts
110-
let parts: Vec<&str> = th_line.split_whitespace().collect();
111-
if parts.len() >= 2 {
112-
let cmd_name = parts[1].to_uppercase();
113-
// Reconstruct TH line with uppercase command name and no date
114-
let new_th = format!(".TH {} 1", cmd_name);
115-
manpage.replace_range(th_pos..th_pos + line_end, &new_th);
116-
}
117-
}
118-
}
119-
120-
// Remove trailing whitespace from all lines and fix .br issues
121-
let lines: Vec<String> = manpage
122-
.lines()
123-
.map(|line| line.trim_end().to_string())
124-
.collect();
170+
let manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage");
125171

126-
// Fix .br paragraph macro issues
127-
let mut fixed_lines = Vec::new();
128-
let mut skip_next_br = false;
129-
130-
for i in 0..lines.len() {
131-
let line = &lines[i];
132-
133-
if line == ".br" {
134-
// Check for problematic patterns with .br
135-
let prev_is_br = i > 0 && lines[i - 1] == ".br";
136-
let next_is_empty_then_br =
137-
i + 2 < lines.len() && lines[i + 1].is_empty() && lines[i + 2] == ".br";
138-
let prev_is_empty_with_br = i >= 2 && lines[i - 1].is_empty() && lines[i - 2] == ".br";
139-
140-
// Skip redundant .br in these patterns
141-
if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br {
142-
skip_next_br = false;
143-
continue;
144-
}
145-
146-
// If this .br is followed by empty line and another .br, skip the second one
147-
if next_is_empty_then_br {
148-
skip_next_br = true;
149-
}
150-
}
151-
152-
fixed_lines.push(line.clone());
153-
}
154-
155-
manpage = fixed_lines.join("\n");
156-
manpage.push('\n');
172+
// Post-process the manpage to fix mandoc lint issues
173+
let processed_manpage = post_process_manpage(manpage);
157174

158175
// Write the processed manpage to stdout
159-
io::stdout().write_all(manpage.as_bytes()).unwrap();
176+
io::stdout()
177+
.write_all(processed_manpage.as_bytes())
178+
.unwrap();
160179
io::stdout().flush().unwrap();
161180
process::exit(0);
162181
}
@@ -691,3 +710,119 @@ fn format_examples(content: String, output_markdown: bool) -> Result<String, std
691710
)?;
692711
Ok(s)
693712
}
713+
714+
#[cfg(test)]
715+
mod tests {
716+
use super::*;
717+
718+
#[test]
719+
fn test_post_process_manpage_fixes_th_header() {
720+
// Test that command names are uppercased and date is removed
721+
let input =
722+
".TH cat 1 \"cat (uutils coreutils) 0.7.0\"\n.SH NAME\ncat - concatenate files\n";
723+
let expected = ".TH CAT 1 \"2024-01-01\"\n.SH NAME\ncat - concatenate files\n";
724+
725+
let result = post_process_manpage(input.to_string());
726+
assert_eq!(result, expected);
727+
}
728+
729+
#[test]
730+
fn test_post_process_manpage_removes_trailing_whitespace() {
731+
// Test that trailing whitespace is removed from lines
732+
let input = ".TH TEST 1 \nSome text with trailing spaces \n.SH SECTION \n";
733+
let expected = ".TH TEST 1 \"2024-01-01\"\nSome text with trailing spaces\n.SH SECTION\n";
734+
735+
let result = post_process_manpage(input.to_string());
736+
assert_eq!(result, expected);
737+
}
738+
739+
#[test]
740+
fn test_post_process_manpage_fixes_double_br() {
741+
// Test that redundant .br macros are removed
742+
let input = ".TH TEST 1\n.br\n.br\nSome text\n";
743+
let expected = ".TH TEST 1 \"2024-01-01\"\n.br\nSome text\n";
744+
745+
let result = post_process_manpage(input.to_string());
746+
assert_eq!(result, expected);
747+
}
748+
749+
#[test]
750+
fn test_post_process_manpage_fixes_br_with_empty_line() {
751+
// Test that .br with empty line patterns are fixed
752+
// Both .br macros should be removed (first because followed by empty, second because preceded by empty)
753+
let input = ".TH TEST 1\n.br\n\n.br\nSome text\n";
754+
let expected = ".TH TEST 1 \"2024-01-01\"\n\nSome text\n";
755+
756+
let result = post_process_manpage(input.to_string());
757+
assert_eq!(result, expected);
758+
}
759+
760+
#[test]
761+
fn test_post_process_manpage_preserves_single_br() {
762+
// Test that single .br macros are preserved
763+
let input = ".TH TEST 1\nLine 1\n.br\nLine 2\n";
764+
let expected = ".TH TEST 1 \"2024-01-01\"\nLine 1\n.br\nLine 2\n";
765+
766+
let result = post_process_manpage(input.to_string());
767+
assert_eq!(result, expected);
768+
}
769+
770+
#[test]
771+
fn test_post_process_manpage_handles_mixed_case_command() {
772+
// Test that mixed case command names are uppercased
773+
let input = ".TH CaT 1 \"some version info\"\nContent\n";
774+
let expected = ".TH CAT 1 \"2024-01-01\"\nContent\n";
775+
776+
let result = post_process_manpage(input.to_string());
777+
assert_eq!(result, expected);
778+
}
779+
780+
#[test]
781+
fn test_post_process_manpage_handles_no_th_header() {
782+
// Test that manpages without TH headers are handled gracefully
783+
let input = ".SH NAME\ntest - a test utility\n";
784+
let expected = ".SH NAME\ntest - a test utility\n";
785+
786+
let result = post_process_manpage(input.to_string());
787+
assert_eq!(result, expected);
788+
}
789+
790+
#[test]
791+
fn test_post_process_manpage_complex_br_pattern() {
792+
// Test complex .br patterns with multiple occurrences
793+
let input =
794+
".TH TEST 1\nSection 1\n.br\n\n.br\nMiddle\n.br\n.br\nSection 2\n.br\n\n.br\nEnd\n";
795+
// .br followed/preceded by empty lines should be removed, consecutive .br should have one removed
796+
let expected = ".TH TEST 1 \"2024-01-01\"\nSection 1\n\nMiddle\n.br\nSection 2\n\nEnd\n";
797+
798+
let result = post_process_manpage(input.to_string());
799+
assert_eq!(result, expected);
800+
}
801+
802+
#[test]
803+
fn test_post_process_manpage_malformed_th_header() {
804+
// Test that malformed TH headers don't cause panics and are handled gracefully
805+
let input1 = ".TH\nContent\n"; // Missing command name
806+
let expected1 = ".TH\nContent\n";
807+
let result1 = post_process_manpage(input1.to_string());
808+
assert_eq!(result1, expected1);
809+
810+
// TH header with special characters
811+
let input2 = ".TH test-cmd 1 \"version 1.0\"\nContent\n";
812+
let expected2 = ".TH TEST-CMD 1 \"2024-01-01\"\nContent\n";
813+
let result2 = post_process_manpage(input2.to_string());
814+
assert_eq!(result2, expected2);
815+
816+
// TH header at end of file without newline
817+
let input3 = "Content\n.TH test 1";
818+
let expected3 = "Content\n.TH TEST 1 \"2024-01-01\"\n";
819+
let result3 = post_process_manpage(input3.to_string());
820+
assert_eq!(result3, expected3);
821+
822+
// Multiple TH headers (only first should be processed due to ^anchor)
823+
let input4 = ".TH first 1\nMiddle\n.TH second 1\n";
824+
let expected4 = ".TH FIRST 1 \"2024-01-01\"\nMiddle\n.TH SECOND 1 \"2024-01-01\"\n";
825+
let result4 = post_process_manpage(input4.to_string());
826+
assert_eq!(result4, expected4);
827+
}
828+
}

0 commit comments

Comments
 (0)