Skip to content

Commit 9fe36ff

Browse files
committed
refactor: extract manpage post-processing into a function with tests
- Move manpage post-processing logic into post_process_manpage() function - Add comprehensive unit tests for all post-processing functionality - Improve .br macro handling to correctly fix mandoc warnings - Tests cover TH header fixes, whitespace removal, and .br pattern fixes
1 parent aaedae3 commit 9fe36ff

2 files changed

Lines changed: 179 additions & 56 deletions

File tree

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ expensive_tests = []
3131
# "test_risky_names" == enable tests that create problematic file names (would make a network share inaccessible to Windows, breaks SVN on Mac OS, etc.)
3232
test_risky_names = []
3333
# * only build `uudoc` when `--feature uudoc` is activated
34-
uudoc = ["dep:clap_complete", "dep:clap_mangen", "dep:fluent-syntax", "dep:zip"]
34+
uudoc = ["dep:clap_complete", "dep:clap_mangen", "dep:fluent-syntax", "dep:regex", "dep:zip"]
3535
## features
3636
## Optional feature for stdbuf
3737
# "feat_external_libstdbuf" == use an external libstdbuf.so for stdbuf instead of embedding it
@@ -477,6 +477,7 @@ clap_complete = { workspace = true, optional = true }
477477
clap_mangen = { workspace = true, optional = true }
478478
clap.workspace = true
479479
fluent-syntax = { workspace = true, optional = true }
480+
regex = { workspace = true, optional = true }
480481
itertools.workspace = true
481482
phf.workspace = true
482483
selinux = { workspace = true, optional = true }

src/bin/uudoc.rs

Lines changed: 177 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// spell-checker:ignore mangen tldr
77

88
use std::{
9-
collections::HashMap,
9+
collections::{HashMap, HashSet},
1010
ffi::OsString,
1111
fs::File,
1212
io::{self, Read, Seek, Write},
@@ -18,6 +18,7 @@ use clap_complete::Shell;
1818
use clap_mangen::Man;
1919
use fluent_syntax::ast::{Entry, Message, Pattern};
2020
use fluent_syntax::parser;
21+
use regex::Regex;
2122
use textwrap::{fill, indent, termwidth};
2223
use zip::ZipArchive;
2324

@@ -26,6 +27,61 @@ use uucore::Args;
2627

2728
include!(concat!(env!("OUT_DIR"), "/uutils_map.rs"));
2829

30+
/// Post-process a generated manpage to fix mandoc lint issues
31+
///
32+
/// This function:
33+
/// - Fixes the TH header by uppercasing command names and removing invalid date formats
34+
/// - Removes trailing whitespace from all lines
35+
/// - Fixes redundant .br paragraph macros that cause mandoc warnings
36+
fn post_process_manpage(manpage: String) -> String {
37+
// Only match TH headers that have at least a command name on the same line
38+
// Use [ \t] instead of \s to avoid matching newlines
39+
let th_regex = Regex::new(r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$").unwrap();
40+
let mut result = th_regex
41+
.replace_all(&manpage, |caps: &regex::Captures| {
42+
format!(".TH {} 1", caps[1].to_uppercase())
43+
})
44+
.to_string();
45+
46+
// Process lines: remove trailing whitespace and fix .br issues in a single pass
47+
let lines: Vec<&str> = result.lines().collect();
48+
let mut fixed_lines = Vec::with_capacity(lines.len());
49+
let mut skip_indices = HashSet::new();
50+
51+
// First pass: identify lines to skip (redundant .br macros)
52+
for i in 0..lines.len() {
53+
let line = lines[i].trim_end();
54+
55+
if line == ".br" && !skip_indices.contains(&i) {
56+
// Check for consecutive .br macros
57+
if i > 0 && lines[i - 1].trim_end() == ".br" {
58+
skip_indices.insert(i);
59+
}
60+
// Check for .br, empty line, .br pattern
61+
else if i + 2 < lines.len()
62+
&& lines[i + 1].trim().is_empty()
63+
&& lines[i + 2].trim_end() == ".br"
64+
{
65+
skip_indices.insert(i + 2);
66+
}
67+
}
68+
}
69+
70+
// Second pass: build the final output
71+
for (i, line) in lines.iter().enumerate() {
72+
if !skip_indices.contains(&i) {
73+
fixed_lines.push(line.trim_end());
74+
}
75+
}
76+
77+
result = fixed_lines.join("\n");
78+
if !result.ends_with('\n') {
79+
result.push('\n');
80+
}
81+
82+
result
83+
}
84+
2985
/// Print usage information for uudoc
3086
fn usage<T: Args>(utils: &UtilityMap<T>) {
3187
println!("uudoc - Documentation generator for uutils coreutils");
@@ -100,63 +156,15 @@ fn gen_manpage<T: Args>(
100156
man.render(&mut buffer).expect("Man page generation failed");
101157

102158
// Convert to string for processing
103-
let mut manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage");
104-
105-
// Fix the TH line: remove version info from date field and uppercase the command name
106-
if let Some(th_pos) = manpage.find(".TH ") {
107-
if let Some(line_end) = manpage[th_pos..].find('\n') {
108-
let th_line = &manpage[th_pos..th_pos + line_end];
109-
// Parse the TH line parts
110-
let parts: Vec<&str> = th_line.split_whitespace().collect();
111-
if parts.len() >= 2 {
112-
let cmd_name = parts[1].to_uppercase();
113-
// Reconstruct TH line with uppercase command name and no date
114-
let new_th = format!(".TH {} 1", cmd_name);
115-
manpage.replace_range(th_pos..th_pos + line_end, &new_th);
116-
}
117-
}
118-
}
159+
let manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage");
119160

120-
// Remove trailing whitespace from all lines and fix .br issues
121-
let lines: Vec<String> = manpage
122-
.lines()
123-
.map(|line| line.trim_end().to_string())
124-
.collect();
125-
126-
// Fix .br paragraph macro issues
127-
let mut fixed_lines = Vec::new();
128-
let mut skip_next_br = false;
129-
130-
for i in 0..lines.len() {
131-
let line = &lines[i];
132-
133-
if line == ".br" {
134-
// Check for problematic patterns with .br
135-
let prev_is_br = i > 0 && lines[i - 1] == ".br";
136-
let next_is_empty_then_br =
137-
i + 2 < lines.len() && lines[i + 1].is_empty() && lines[i + 2] == ".br";
138-
let prev_is_empty_with_br = i >= 2 && lines[i - 1].is_empty() && lines[i - 2] == ".br";
139-
140-
// Skip redundant .br in these patterns
141-
if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br {
142-
skip_next_br = false;
143-
continue;
144-
}
145-
146-
// If this .br is followed by empty line and another .br, skip the second one
147-
if next_is_empty_then_br {
148-
skip_next_br = true;
149-
}
150-
}
151-
152-
fixed_lines.push(line.clone());
153-
}
154-
155-
manpage = fixed_lines.join("\n");
156-
manpage.push('\n');
161+
// Post-process the manpage to fix mandoc lint issues
162+
let processed_manpage = post_process_manpage(manpage);
157163

158164
// Write the processed manpage to stdout
159-
io::stdout().write_all(manpage.as_bytes()).unwrap();
165+
io::stdout()
166+
.write_all(processed_manpage.as_bytes())
167+
.unwrap();
160168
io::stdout().flush().unwrap();
161169
process::exit(0);
162170
}
@@ -691,3 +699,117 @@ fn format_examples(content: String, output_markdown: bool) -> Result<String, std
691699
)?;
692700
Ok(s)
693701
}
702+
703+
#[cfg(test)]
704+
mod tests {
705+
use super::*;
706+
707+
#[test]
708+
fn test_post_process_manpage_fixes_th_header() {
709+
// Test that command names are uppercased and date is removed
710+
let input =
711+
".TH cat 1 \"cat (uutils coreutils) 0.7.0\"\n.SH NAME\ncat - concatenate files\n";
712+
let expected = ".TH CAT 1\n.SH NAME\ncat - concatenate files\n";
713+
714+
let result = post_process_manpage(input.to_string());
715+
assert_eq!(result, expected);
716+
}
717+
718+
#[test]
719+
fn test_post_process_manpage_removes_trailing_whitespace() {
720+
// Test that trailing whitespace is removed from lines
721+
let input = ".TH TEST 1 \nSome text with trailing spaces \n.SH SECTION \n";
722+
let expected = ".TH TEST 1\nSome text with trailing spaces\n.SH SECTION\n";
723+
724+
let result = post_process_manpage(input.to_string());
725+
assert_eq!(result, expected);
726+
}
727+
728+
#[test]
729+
fn test_post_process_manpage_fixes_double_br() {
730+
// Test that redundant .br macros are removed
731+
let input = ".TH TEST 1\n.br\n.br\nSome text\n";
732+
let expected = ".TH TEST 1\n.br\nSome text\n";
733+
734+
let result = post_process_manpage(input.to_string());
735+
assert_eq!(result, expected);
736+
}
737+
738+
#[test]
739+
fn test_post_process_manpage_fixes_br_with_empty_line() {
740+
// Test that .br with empty line patterns are fixed
741+
let input = ".TH TEST 1\n.br\n\n.br\nSome text\n";
742+
let expected = ".TH TEST 1\n.br\n\nSome text\n";
743+
744+
let result = post_process_manpage(input.to_string());
745+
assert_eq!(result, expected);
746+
}
747+
748+
#[test]
749+
fn test_post_process_manpage_preserves_single_br() {
750+
// Test that single .br macros are preserved
751+
let input = ".TH TEST 1\nLine 1\n.br\nLine 2\n";
752+
let expected = ".TH TEST 1\nLine 1\n.br\nLine 2\n";
753+
754+
let result = post_process_manpage(input.to_string());
755+
assert_eq!(result, expected);
756+
}
757+
758+
#[test]
759+
fn test_post_process_manpage_handles_mixed_case_command() {
760+
// Test that mixed case command names are uppercased
761+
let input = ".TH CaT 1 \"some version info\"\nContent\n";
762+
let expected = ".TH CAT 1\nContent\n";
763+
764+
let result = post_process_manpage(input.to_string());
765+
assert_eq!(result, expected);
766+
}
767+
768+
#[test]
769+
fn test_post_process_manpage_handles_no_th_header() {
770+
// Test that manpages without TH headers are handled gracefully
771+
let input = ".SH NAME\ntest - a test utility\n";
772+
let expected = ".SH NAME\ntest - a test utility\n";
773+
774+
let result = post_process_manpage(input.to_string());
775+
assert_eq!(result, expected);
776+
}
777+
778+
#[test]
779+
fn test_post_process_manpage_complex_br_pattern() {
780+
// Test complex .br patterns with multiple occurrences
781+
let input =
782+
".TH TEST 1\nSection 1\n.br\n\n.br\nMiddle\n.br\n.br\nSection 2\n.br\n\n.br\nEnd\n";
783+
let expected = ".TH TEST 1\nSection 1\n.br\n\nMiddle\n.br\nSection 2\n.br\n\nEnd\n";
784+
785+
let result = post_process_manpage(input.to_string());
786+
assert_eq!(result, expected);
787+
}
788+
789+
#[test]
790+
fn test_post_process_manpage_malformed_th_header() {
791+
// Test that malformed TH headers don't cause panics and are handled gracefully
792+
let input1 = ".TH\nContent\n"; // Missing command name
793+
let expected1 = ".TH\nContent\n";
794+
let result1 = post_process_manpage(input1.to_string());
795+
assert_eq!(result1, expected1);
796+
797+
// TH header with special characters
798+
let input2 = ".TH test-cmd 1 \"version 1.0\"\nContent\n";
799+
let expected2 = ".TH TEST-CMD 1\nContent\n";
800+
let result2 = post_process_manpage(input2.to_string());
801+
assert_eq!(result2, expected2);
802+
803+
// TH header at end of file without newline
804+
let input3 = "Content\n.TH test 1";
805+
let expected3 = "Content\n.TH TEST 1\n";
806+
let result3 = post_process_manpage(input3.to_string());
807+
assert_eq!(result3, expected3);
808+
809+
// Multiple TH headers (only first should be processed due to ^anchor)
810+
let input4 = ".TH first 1\nMiddle\n.TH second 1\n";
811+
let expected4 = ".TH FIRST 1\nMiddle\n.TH SECOND 1\n";
812+
let result4 = post_process_manpage(input4.to_string());
813+
assert_eq!(result4, expected4);
814+
}
815+
}

0 commit comments

Comments
 (0)