66// spell-checker:ignore mangen tldr
77
88use std:: {
9- collections:: HashMap ,
9+ collections:: { HashMap , HashSet } ,
1010 ffi:: OsString ,
1111 fs:: File ,
1212 io:: { self , Read , Seek , Write } ,
@@ -18,6 +18,7 @@ use clap_complete::Shell;
1818use clap_mangen:: Man ;
1919use fluent_syntax:: ast:: { Entry , Message , Pattern } ;
2020use fluent_syntax:: parser;
21+ use regex:: Regex ;
2122use textwrap:: { fill, indent, termwidth} ;
2223use zip:: ZipArchive ;
2324
@@ -26,6 +27,61 @@ use uucore::Args;
2627
2728include ! ( concat!( env!( "OUT_DIR" ) , "/uutils_map.rs" ) ) ;
2829
30+ /// Post-process a generated manpage to fix mandoc lint issues
31+ ///
32+ /// This function:
33+ /// - Fixes the TH header by uppercasing command names and removing invalid date formats
34+ /// - Removes trailing whitespace from all lines
35+ /// - Fixes redundant .br paragraph macros that cause mandoc warnings
36+ fn post_process_manpage ( manpage : String ) -> String {
37+ // Only match TH headers that have at least a command name on the same line
38+ // Use [ \t] instead of \s to avoid matching newlines
39+ let th_regex = Regex :: new ( r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$" ) . unwrap ( ) ;
40+ let mut result = th_regex
41+ . replace_all ( & manpage, |caps : & regex:: Captures | {
42+ format ! ( ".TH {} 1" , caps[ 1 ] . to_uppercase( ) )
43+ } )
44+ . to_string ( ) ;
45+
46+ // Process lines: remove trailing whitespace and fix .br issues in a single pass
47+ let lines: Vec < & str > = result. lines ( ) . collect ( ) ;
48+ let mut fixed_lines = Vec :: with_capacity ( lines. len ( ) ) ;
49+ let mut skip_indices = HashSet :: new ( ) ;
50+
51+ // First pass: identify lines to skip (redundant .br macros)
52+ for i in 0 ..lines. len ( ) {
53+ let line = lines[ i] . trim_end ( ) ;
54+
55+ if line == ".br" && !skip_indices. contains ( & i) {
56+ // Check for consecutive .br macros
57+ if i > 0 && lines[ i - 1 ] . trim_end ( ) == ".br" {
58+ skip_indices. insert ( i) ;
59+ }
60+ // Check for .br, empty line, .br pattern
61+ else if i + 2 < lines. len ( )
62+ && lines[ i + 1 ] . trim ( ) . is_empty ( )
63+ && lines[ i + 2 ] . trim_end ( ) == ".br"
64+ {
65+ skip_indices. insert ( i + 2 ) ;
66+ }
67+ }
68+ }
69+
70+ // Second pass: build the final output
71+ for ( i, line) in lines. iter ( ) . enumerate ( ) {
72+ if !skip_indices. contains ( & i) {
73+ fixed_lines. push ( line. trim_end ( ) ) ;
74+ }
75+ }
76+
77+ result = fixed_lines. join ( "\n " ) ;
78+ if !result. ends_with ( '\n' ) {
79+ result. push ( '\n' ) ;
80+ }
81+
82+ result
83+ }
84+
2985/// Print usage information for uudoc
3086fn usage < T : Args > ( utils : & UtilityMap < T > ) {
3187 println ! ( "uudoc - Documentation generator for uutils coreutils" ) ;
@@ -100,63 +156,15 @@ fn gen_manpage<T: Args>(
100156 man. render ( & mut buffer) . expect ( "Man page generation failed" ) ;
101157
102158 // Convert to string for processing
103- let mut manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
104-
105- // Fix the TH line: remove version info from date field and uppercase the command name
106- if let Some ( th_pos) = manpage. find ( ".TH " ) {
107- if let Some ( line_end) = manpage[ th_pos..] . find ( '\n' ) {
108- let th_line = & manpage[ th_pos..th_pos + line_end] ;
109- // Parse the TH line parts
110- let parts: Vec < & str > = th_line. split_whitespace ( ) . collect ( ) ;
111- if parts. len ( ) >= 2 {
112- let cmd_name = parts[ 1 ] . to_uppercase ( ) ;
113- // Reconstruct TH line with uppercase command name and no date
114- let new_th = format ! ( ".TH {} 1" , cmd_name) ;
115- manpage. replace_range ( th_pos..th_pos + line_end, & new_th) ;
116- }
117- }
118- }
159+ let manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
119160
120- // Remove trailing whitespace from all lines and fix .br issues
121- let lines: Vec < String > = manpage
122- . lines ( )
123- . map ( |line| line. trim_end ( ) . to_string ( ) )
124- . collect ( ) ;
125-
126- // Fix .br paragraph macro issues
127- let mut fixed_lines = Vec :: new ( ) ;
128- let mut skip_next_br = false ;
129-
130- for i in 0 ..lines. len ( ) {
131- let line = & lines[ i] ;
132-
133- if line == ".br" {
134- // Check for problematic patterns with .br
135- let prev_is_br = i > 0 && lines[ i - 1 ] == ".br" ;
136- let next_is_empty_then_br =
137- i + 2 < lines. len ( ) && lines[ i + 1 ] . is_empty ( ) && lines[ i + 2 ] == ".br" ;
138- let prev_is_empty_with_br = i >= 2 && lines[ i - 1 ] . is_empty ( ) && lines[ i - 2 ] == ".br" ;
139-
140- // Skip redundant .br in these patterns
141- if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br {
142- skip_next_br = false ;
143- continue ;
144- }
145-
146- // If this .br is followed by empty line and another .br, skip the second one
147- if next_is_empty_then_br {
148- skip_next_br = true ;
149- }
150- }
151-
152- fixed_lines. push ( line. clone ( ) ) ;
153- }
154-
155- manpage = fixed_lines. join ( "\n " ) ;
156- manpage. push ( '\n' ) ;
161+ // Post-process the manpage to fix mandoc lint issues
162+ let processed_manpage = post_process_manpage ( manpage) ;
157163
158164 // Write the processed manpage to stdout
159- io:: stdout ( ) . write_all ( manpage. as_bytes ( ) ) . unwrap ( ) ;
165+ io:: stdout ( )
166+ . write_all ( processed_manpage. as_bytes ( ) )
167+ . unwrap ( ) ;
160168 io:: stdout ( ) . flush ( ) . unwrap ( ) ;
161169 process:: exit ( 0 ) ;
162170}
@@ -691,3 +699,117 @@ fn format_examples(content: String, output_markdown: bool) -> Result<String, std
691699 ) ?;
692700 Ok ( s)
693701}
702+
703+ #[ cfg( test) ]
704+ mod tests {
705+ use super :: * ;
706+
707+ #[ test]
708+ fn test_post_process_manpage_fixes_th_header ( ) {
709+ // Test that command names are uppercased and date is removed
710+ let input =
711+ ".TH cat 1 \" cat (uutils coreutils) 0.7.0\" \n .SH NAME\n cat - concatenate files\n " ;
712+ let expected = ".TH CAT 1\n .SH NAME\n cat - concatenate files\n " ;
713+
714+ let result = post_process_manpage ( input. to_string ( ) ) ;
715+ assert_eq ! ( result, expected) ;
716+ }
717+
718+ #[ test]
719+ fn test_post_process_manpage_removes_trailing_whitespace ( ) {
720+ // Test that trailing whitespace is removed from lines
721+ let input = ".TH TEST 1 \n Some text with trailing spaces \n .SH SECTION \n " ;
722+ let expected = ".TH TEST 1\n Some text with trailing spaces\n .SH SECTION\n " ;
723+
724+ let result = post_process_manpage ( input. to_string ( ) ) ;
725+ assert_eq ! ( result, expected) ;
726+ }
727+
728+ #[ test]
729+ fn test_post_process_manpage_fixes_double_br ( ) {
730+ // Test that redundant .br macros are removed
731+ let input = ".TH TEST 1\n .br\n .br\n Some text\n " ;
732+ let expected = ".TH TEST 1\n .br\n Some text\n " ;
733+
734+ let result = post_process_manpage ( input. to_string ( ) ) ;
735+ assert_eq ! ( result, expected) ;
736+ }
737+
738+ #[ test]
739+ fn test_post_process_manpage_fixes_br_with_empty_line ( ) {
740+ // Test that .br with empty line patterns are fixed
741+ let input = ".TH TEST 1\n .br\n \n .br\n Some text\n " ;
742+ let expected = ".TH TEST 1\n .br\n \n Some text\n " ;
743+
744+ let result = post_process_manpage ( input. to_string ( ) ) ;
745+ assert_eq ! ( result, expected) ;
746+ }
747+
748+ #[ test]
749+ fn test_post_process_manpage_preserves_single_br ( ) {
750+ // Test that single .br macros are preserved
751+ let input = ".TH TEST 1\n Line 1\n .br\n Line 2\n " ;
752+ let expected = ".TH TEST 1\n Line 1\n .br\n Line 2\n " ;
753+
754+ let result = post_process_manpage ( input. to_string ( ) ) ;
755+ assert_eq ! ( result, expected) ;
756+ }
757+
758+ #[ test]
759+ fn test_post_process_manpage_handles_mixed_case_command ( ) {
760+ // Test that mixed case command names are uppercased
761+ let input = ".TH CaT 1 \" some version info\" \n Content\n " ;
762+ let expected = ".TH CAT 1\n Content\n " ;
763+
764+ let result = post_process_manpage ( input. to_string ( ) ) ;
765+ assert_eq ! ( result, expected) ;
766+ }
767+
768+ #[ test]
769+ fn test_post_process_manpage_handles_no_th_header ( ) {
770+ // Test that manpages without TH headers are handled gracefully
771+ let input = ".SH NAME\n test - a test utility\n " ;
772+ let expected = ".SH NAME\n test - a test utility\n " ;
773+
774+ let result = post_process_manpage ( input. to_string ( ) ) ;
775+ assert_eq ! ( result, expected) ;
776+ }
777+
778+ #[ test]
779+ fn test_post_process_manpage_complex_br_pattern ( ) {
780+ // Test complex .br patterns with multiple occurrences
781+ let input =
782+ ".TH TEST 1\n Section 1\n .br\n \n .br\n Middle\n .br\n .br\n Section 2\n .br\n \n .br\n End\n " ;
783+ let expected = ".TH TEST 1\n Section 1\n .br\n \n Middle\n .br\n Section 2\n .br\n \n End\n " ;
784+
785+ let result = post_process_manpage ( input. to_string ( ) ) ;
786+ assert_eq ! ( result, expected) ;
787+ }
788+
789+ #[ test]
790+ fn test_post_process_manpage_malformed_th_header ( ) {
791+ // Test that malformed TH headers don't cause panics and are handled gracefully
792+ let input1 = ".TH\n Content\n " ; // Missing command name
793+ let expected1 = ".TH\n Content\n " ;
794+ let result1 = post_process_manpage ( input1. to_string ( ) ) ;
795+ assert_eq ! ( result1, expected1) ;
796+
797+ // TH header with special characters
798+ let input2 = ".TH test-cmd 1 \" version 1.0\" \n Content\n " ;
799+ let expected2 = ".TH TEST-CMD 1\n Content\n " ;
800+ let result2 = post_process_manpage ( input2. to_string ( ) ) ;
801+ assert_eq ! ( result2, expected2) ;
802+
803+ // TH header at end of file without newline
804+ let input3 = "Content\n .TH test 1" ;
805+ let expected3 = "Content\n .TH TEST 1\n " ;
806+ let result3 = post_process_manpage ( input3. to_string ( ) ) ;
807+ assert_eq ! ( result3, expected3) ;
808+
809+ // Multiple TH headers (only first should be processed due to ^anchor)
810+ let input4 = ".TH first 1\n Middle\n .TH second 1\n " ;
811+ let expected4 = ".TH FIRST 1\n Middle\n .TH SECOND 1\n " ;
812+ let result4 = post_process_manpage ( input4. to_string ( ) ) ;
813+ assert_eq ! ( result4, expected4) ;
814+ }
815+ }
0 commit comments