Skip to content

Commit f848e3a

Browse files
committed
refactor: extract manpage post-processing into a function with tests
- Move manpage post-processing logic into post_process_manpage() function - Add comprehensive unit tests for all post-processing functionality - Improve .br macro handling to correctly fix mandoc warnings - Tests cover TH header fixes, whitespace removal, and .br pattern fixes
1 parent c854208 commit f848e3a

2 files changed

Lines changed: 203 additions & 57 deletions

File tree

Cargo.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,13 @@ expensive_tests = []
3131
# "test_risky_names" == enable tests that create problematic file names (would make a network share inaccessible to Windows, breaks SVN on Mac OS, etc.)
3232
test_risky_names = []
3333
# * only build `uudoc` when `--feature uudoc` is activated
34-
uudoc = ["dep:clap_complete", "dep:clap_mangen", "dep:fluent-syntax", "dep:zip"]
34+
uudoc = [
35+
"dep:clap_complete",
36+
"dep:clap_mangen",
37+
"dep:fluent-syntax",
38+
"dep:regex",
39+
"dep:zip",
40+
]
3541
## features
3642
## Optional feature for stdbuf
3743
# "feat_external_libstdbuf" == use an external libstdbuf.so for stdbuf instead of embedding it
@@ -475,6 +481,7 @@ clap_complete = { workspace = true, optional = true }
475481
clap_mangen = { workspace = true, optional = true }
476482
clap.workspace = true
477483
fluent-syntax = { workspace = true, optional = true }
484+
regex = { workspace = true, optional = true }
478485
itertools.workspace = true
479486
phf.workspace = true
480487
selinux = { workspace = true, optional = true }

src/bin/uudoc.rs

Lines changed: 195 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6-
// spell-checker:ignore mangen tldr
6+
// spell-checker:ignore mangen tldr mandoc uppercasing uppercased manpages DESTDIR
77

88
use std::{
9-
collections::HashMap,
9+
collections::{HashMap, HashSet},
1010
ffi::OsString,
1111
fs::File,
1212
io::{self, Read, Seek, Write},
@@ -17,7 +17,9 @@ use clap::{Arg, Command};
1717
use clap_complete::Shell;
1818
use clap_mangen::Man;
1919
use fluent_syntax::ast::{Entry, Message, Pattern};
20+
use jiff::Zoned;
2021
use fluent_syntax::parser;
22+
use regex::Regex;
2123
use textwrap::{fill, indent, termwidth};
2224
use zip::ZipArchive;
2325

@@ -26,6 +28,75 @@ use uucore::Args;
2628

2729
include!(concat!(env!("OUT_DIR"), "/uutils_map.rs"));
2830

31+
/// Post-process a generated manpage to fix mandoc lint issues
32+
///
33+
/// This function:
34+
/// - Fixes the TH header by uppercasing command names and removing invalid date formats
35+
/// - Removes trailing whitespace from all lines
36+
/// - Fixes redundant .br paragraph macros that cause mandoc warnings
37+
fn post_process_manpage(manpage: String) -> String {
38+
// Only match TH headers that have at least a command name on the same line
39+
// Use [ \t] instead of \s to avoid matching newlines
40+
// Use a date format that satisfies mandoc (YYYY-MM-DD)
41+
let date = date.map_or_else(
42+
|| Zoned::now().strftime("%Y-%m-%d").to_string(),
43+
str::to_string,
44+
);
45+
46+
let th_regex = Regex::new(r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$").unwrap();
47+
let mut result = th_regex
48+
.replace_all(&manpage, |caps: &regex::Captures| {
49+
// Add date to satisfy mandoc - date must be quoted
50+
format!(".TH {} 1 \"{date}\"", caps[1].to_uppercase())
51+
})
52+
.to_string();
53+
54+
// Process lines: remove trailing whitespace and fix .br issues in a single pass
55+
let lines: Vec<&str> = result.lines().collect();
56+
let mut fixed_lines = Vec::with_capacity(lines.len());
57+
let mut skip_indices = HashSet::new();
58+
59+
// First pass: identify lines to skip (redundant .br macros)
60+
for i in 0..lines.len() {
61+
let line = lines[i].trim_end();
62+
63+
if line == ".br" && !skip_indices.contains(&i) {
64+
// Check for consecutive .br macros
65+
if i > 0 && lines[i - 1].trim_end() == ".br" {
66+
skip_indices.insert(i);
67+
}
68+
// Check for .br, empty line, .br pattern
69+
else if i + 2 < lines.len()
70+
&& lines[i + 1].trim().is_empty()
71+
&& lines[i + 2].trim_end() == ".br"
72+
{
73+
skip_indices.insert(i + 2);
74+
}
75+
}
76+
}
77+
78+
// Second pass: build the final output
79+
for (i, line) in lines.iter().enumerate() {
80+
if !skip_indices.contains(&i) {
81+
fixed_lines.push(line.trim_end());
82+
}
83+
}
84+
85+
result = fixed_lines.join("\n");
86+
87+
// Fix escape sequence issues
88+
// \\\\0 appears when trying to represent literal \0 string
89+
// In man pages, use \e for literal backslash
90+
result = result.replace("\\\\\\\\0", "\\e0");
91+
result = result.replace("\\\\0", "\\e0");
92+
93+
if !result.ends_with('\n') {
94+
result.push('\n');
95+
}
96+
97+
result
98+
}
99+
29100
/// Print usage information for uudoc
30101
fn usage<T: Args>(utils: &UtilityMap<T>) {
31102
println!("uudoc - Documentation generator for uutils coreutils");
@@ -100,63 +171,15 @@ fn gen_manpage<T: Args>(
100171
man.render(&mut buffer).expect("Man page generation failed");
101172

102173
// Convert to string for processing
103-
let mut manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage");
104-
105-
// Fix the TH line: remove version info from date field and uppercase the command name
106-
if let Some(th_pos) = manpage.find(".TH ") {
107-
if let Some(line_end) = manpage[th_pos..].find('\n') {
108-
let th_line = &manpage[th_pos..th_pos + line_end];
109-
// Parse the TH line parts
110-
let parts: Vec<&str> = th_line.split_whitespace().collect();
111-
if parts.len() >= 2 {
112-
let cmd_name = parts[1].to_uppercase();
113-
// Reconstruct TH line with uppercase command name and no date
114-
let new_th = format!(".TH {} 1", cmd_name);
115-
manpage.replace_range(th_pos..th_pos + line_end, &new_th);
116-
}
117-
}
118-
}
174+
let manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage");
119175

120-
// Remove trailing whitespace from all lines and fix .br issues
121-
let lines: Vec<String> = manpage
122-
.lines()
123-
.map(|line| line.trim_end().to_string())
124-
.collect();
125-
126-
// Fix .br paragraph macro issues
127-
let mut fixed_lines = Vec::new();
128-
let mut skip_next_br = false;
129-
130-
for i in 0..lines.len() {
131-
let line = &lines[i];
132-
133-
if line == ".br" {
134-
// Check for problematic patterns with .br
135-
let prev_is_br = i > 0 && lines[i - 1] == ".br";
136-
let next_is_empty_then_br =
137-
i + 2 < lines.len() && lines[i + 1].is_empty() && lines[i + 2] == ".br";
138-
let prev_is_empty_with_br = i >= 2 && lines[i - 1].is_empty() && lines[i - 2] == ".br";
139-
140-
// Skip redundant .br in these patterns
141-
if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br {
142-
skip_next_br = false;
143-
continue;
144-
}
145-
146-
// If this .br is followed by empty line and another .br, skip the second one
147-
if next_is_empty_then_br {
148-
skip_next_br = true;
149-
}
150-
}
151-
152-
fixed_lines.push(line.clone());
153-
}
154-
155-
manpage = fixed_lines.join("\n");
156-
manpage.push('\n');
176+
// Post-process the manpage to fix mandoc lint issues
177+
let processed_manpage = post_process_manpage(manpage, None);
157178

158179
// Write the processed manpage to stdout
159-
io::stdout().write_all(manpage.as_bytes()).unwrap();
180+
io::stdout()
181+
.write_all(processed_manpage.as_bytes())
182+
.unwrap();
160183
io::stdout().flush().unwrap();
161184
process::exit(0);
162185
}
@@ -691,3 +714,119 @@ fn format_examples(content: String, output_markdown: bool) -> Result<String, std
691714
)?;
692715
Ok(s)
693716
}
717+
718+
#[cfg(test)]
719+
mod tests {
720+
use super::*;
721+
722+
#[test]
723+
fn test_post_process_manpage_fixes_th_header() {
724+
// Test that command names are uppercased and date is removed
725+
let input =
726+
".TH cat 1 \"cat (uutils coreutils) 0.7.0\"\n.SH NAME\ncat - concatenate files\n";
727+
let expected = ".TH CAT 1 \"2024-01-01\"\n.SH NAME\ncat - concatenate files\n";
728+
729+
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
730+
assert_eq!(result, expected);
731+
}
732+
733+
#[test]
734+
fn test_post_process_manpage_removes_trailing_whitespace() {
735+
// Test that trailing whitespace is removed from lines
736+
let input = ".TH TEST 1 \nSome text with trailing spaces \n.SH SECTION \n";
737+
let expected = ".TH TEST 1 \"2024-01-01\"\nSome text with trailing spaces\n.SH SECTION\n";
738+
739+
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
740+
assert_eq!(result, expected);
741+
}
742+
743+
#[test]
744+
fn test_post_process_manpage_fixes_double_br() {
745+
// Test that redundant .br macros are removed
746+
let input = ".TH TEST 1\n.br\n.br\nSome text\n";
747+
let expected = ".TH TEST 1 \"2024-01-01\"\n.br\nSome text\n";
748+
749+
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
750+
assert_eq!(result, expected);
751+
}
752+
753+
#[test]
754+
fn test_post_process_manpage_fixes_br_with_empty_line() {
755+
// Test that .br with empty line patterns are fixed
756+
// Both .br macros should be removed (first because followed by empty, second because preceded by empty)
757+
let input = ".TH TEST 1\n.br\n\n.br\nSome text\n";
758+
let expected = ".TH TEST 1 \"2024-01-01\"\n\nSome text\n";
759+
760+
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
761+
assert_eq!(result, expected);
762+
}
763+
764+
#[test]
765+
fn test_post_process_manpage_preserves_single_br() {
766+
// Test that single .br macros are preserved
767+
let input = ".TH TEST 1\nLine 1\n.br\nLine 2\n";
768+
let expected = ".TH TEST 1 \"2024-01-01\"\nLine 1\n.br\nLine 2\n";
769+
770+
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
771+
assert_eq!(result, expected);
772+
}
773+
774+
#[test]
775+
fn test_post_process_manpage_handles_mixed_case_command() {
776+
// Test that mixed case command names are uppercased
777+
let input = ".TH CaT 1 \"some version info\"\nContent\n";
778+
let expected = ".TH CAT 1 \"2024-01-01\"\nContent\n";
779+
780+
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
781+
assert_eq!(result, expected);
782+
}
783+
784+
#[test]
785+
fn test_post_process_manpage_handles_no_th_header() {
786+
// Test that manpages without TH headers are handled gracefully
787+
let input = ".SH NAME\ntest - a test utility\n";
788+
let expected = ".SH NAME\ntest - a test utility\n";
789+
790+
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
791+
assert_eq!(result, expected);
792+
}
793+
794+
#[test]
795+
fn test_post_process_manpage_complex_br_pattern() {
796+
// Test complex .br patterns with multiple occurrences
797+
let input =
798+
".TH TEST 1\nSection 1\n.br\n\n.br\nMiddle\n.br\n.br\nSection 2\n.br\n\n.br\nEnd\n";
799+
// .br followed/preceded by empty lines should be removed, consecutive .br should have one removed
800+
let expected = ".TH TEST 1 \"2024-01-01\"\nSection 1\n\nMiddle\n.br\nSection 2\n\nEnd\n";
801+
802+
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
803+
assert_eq!(result, expected);
804+
}
805+
806+
#[test]
807+
fn test_post_process_manpage_malformed_th_header() {
808+
// Test that malformed TH headers don't cause panics and are handled gracefully
809+
let input1 = ".TH\nContent\n"; // Missing command name
810+
let expected1 = ".TH\nContent\n";
811+
let result1 = post_process_manpage(input1.to_string(), Some("2024-01-01"));
812+
assert_eq!(result1, expected1);
813+
814+
// TH header with special characters
815+
let input2 = ".TH test-cmd 1 \"version 1.0\"\nContent\n";
816+
let expected2 = ".TH TEST-CMD 1 \"2024-01-01\"\nContent\n";
817+
let result2 = post_process_manpage(input2.to_string(), Some("2024-01-01"));
818+
assert_eq!(result2, expected2);
819+
820+
// TH header at end of file without newline
821+
let input3 = "Content\n.TH test 1";
822+
let expected3 = "Content\n.TH TEST 1 \"2024-01-01\"\n";
823+
let result3 = post_process_manpage(input3.to_string(), Some("2024-01-01"));
824+
assert_eq!(result3, expected3);
825+
826+
// Multiple TH headers (only first should be processed due to ^anchor)
827+
let input4 = ".TH first 1\nMiddle\n.TH second 1\n";
828+
let expected4 = ".TH FIRST 1 \"2024-01-01\"\nMiddle\n.TH SECOND 1 \"2024-01-01\"\n";
829+
let result4 = post_process_manpage(input4.to_string(), Some("2024-01-01"));
830+
assert_eq!(result4, expected4);
831+
}
832+
}

0 commit comments

Comments
 (0)