33// For the full copyright and license information, please view the LICENSE
44// file that was distributed with this source code.
55
6- // spell-checker:ignore mangen tldr
6+ // spell-checker:ignore mangen tldr mandoc uppercasing uppercased manpages DESTDIR
77
88use std:: {
9- collections:: HashMap ,
9+ collections:: { HashMap , HashSet } ,
1010 ffi:: OsString ,
1111 fs:: File ,
1212 io:: { self , Read , Seek , Write } ,
@@ -18,6 +18,7 @@ use clap_complete::Shell;
1818use clap_mangen:: Man ;
1919use fluent_syntax:: ast:: { Entry , Message , Pattern } ;
2020use fluent_syntax:: parser;
21+ use regex:: Regex ;
2122use textwrap:: { fill, indent, termwidth} ;
2223use zip:: ZipArchive ;
2324
@@ -26,6 +27,72 @@ use uucore::Args;
2627
2728include ! ( concat!( env!( "OUT_DIR" ) , "/uutils_map.rs" ) ) ;
2829
30+ /// Post-process a generated manpage to fix mandoc lint issues
31+ ///
32+ /// This function:
33+ /// - Fixes the TH header by uppercasing command names and removing invalid date formats
34+ /// - Removes trailing whitespace from all lines
35+ /// - Fixes redundant .br paragraph macros that cause mandoc warnings
36+ fn post_process_manpage ( manpage : String ) -> String {
37+ // Only match TH headers that have at least a command name on the same line
38+ // Use [ \t] instead of \s to avoid matching newlines
39+ // Use a date format that satisfies mandoc (YYYY-MM-DD)
40+ let date = Zoned :: now ( ) . strftime ( "%Y-%m-%d" ) . to_string ( ) ;
41+
42+ let th_regex = Regex :: new ( r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$" ) . unwrap ( ) ;
43+ let mut result = th_regex
44+ . replace_all ( & manpage, |caps : & regex:: Captures | {
45+ // Add date to satisfy mandoc - date must be quoted
46+ format ! ( ".TH {} 1 \" {date}\" " , caps[ 1 ] . to_uppercase( ) )
47+ } )
48+ . to_string ( ) ;
49+
50+ // Process lines: remove trailing whitespace and fix .br issues in a single pass
51+ let lines: Vec < & str > = result. lines ( ) . collect ( ) ;
52+ let mut fixed_lines = Vec :: with_capacity ( lines. len ( ) ) ;
53+ let mut skip_indices = HashSet :: new ( ) ;
54+
55+ // First pass: identify lines to skip (redundant .br macros)
56+ for i in 0 ..lines. len ( ) {
57+ let line = lines[ i] . trim_end ( ) ;
58+
59+ if line == ".br" && !skip_indices. contains ( & i) {
60+ // Check for consecutive .br macros
61+ if i > 0 && lines[ i - 1 ] . trim_end ( ) == ".br" {
62+ skip_indices. insert ( i) ;
63+ }
64+ // Check for .br, empty line, .br pattern
65+ else if i + 2 < lines. len ( )
66+ && lines[ i + 1 ] . trim ( ) . is_empty ( )
67+ && lines[ i + 2 ] . trim_end ( ) == ".br"
68+ {
69+ skip_indices. insert ( i + 2 ) ;
70+ }
71+ }
72+ }
73+
74+ // Second pass: build the final output
75+ for ( i, line) in lines. iter ( ) . enumerate ( ) {
76+ if !skip_indices. contains ( & i) {
77+ fixed_lines. push ( line. trim_end ( ) ) ;
78+ }
79+ }
80+
81+ result = fixed_lines. join ( "\n " ) ;
82+
83+ // Fix escape sequence issues
84+ // \\\\0 appears when trying to represent literal \0 string
85+ // In man pages, use \e for literal backslash
86+ result = result. replace ( "\\ \\ \\ \\ 0" , "\\ e0" ) ;
87+ result = result. replace ( "\\ \\ 0" , "\\ e0" ) ;
88+
89+ if !result. ends_with ( '\n' ) {
90+ result. push ( '\n' ) ;
91+ }
92+
93+ result
94+ }
95+
2996/// Print usage information for uudoc
3097fn usage < T : Args > ( utils : & UtilityMap < T > ) {
3198 println ! ( "uudoc - Documentation generator for uutils coreutils" ) ;
@@ -100,63 +167,15 @@ fn gen_manpage<T: Args>(
100167 man. render ( & mut buffer) . expect ( "Man page generation failed" ) ;
101168
102169 // Convert to string for processing
103- let mut manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
104-
105- // Fix the TH line: remove version info from date field and uppercase the command name
106- if let Some ( th_pos) = manpage. find ( ".TH " ) {
107- if let Some ( line_end) = manpage[ th_pos..] . find ( '\n' ) {
108- let th_line = & manpage[ th_pos..th_pos + line_end] ;
109- // Parse the TH line parts
110- let parts: Vec < & str > = th_line. split_whitespace ( ) . collect ( ) ;
111- if parts. len ( ) >= 2 {
112- let cmd_name = parts[ 1 ] . to_uppercase ( ) ;
113- // Reconstruct TH line with uppercase command name and no date
114- let new_th = format ! ( ".TH {} 1" , cmd_name) ;
115- manpage. replace_range ( th_pos..th_pos + line_end, & new_th) ;
116- }
117- }
118- }
119-
120- // Remove trailing whitespace from all lines and fix .br issues
121- let lines: Vec < String > = manpage
122- . lines ( )
123- . map ( |line| line. trim_end ( ) . to_string ( ) )
124- . collect ( ) ;
170+ let manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
125171
126- // Fix .br paragraph macro issues
127- let mut fixed_lines = Vec :: new ( ) ;
128- let mut skip_next_br = false ;
129-
130- for i in 0 ..lines. len ( ) {
131- let line = & lines[ i] ;
132-
133- if line == ".br" {
134- // Check for problematic patterns with .br
135- let prev_is_br = i > 0 && lines[ i - 1 ] == ".br" ;
136- let next_is_empty_then_br =
137- i + 2 < lines. len ( ) && lines[ i + 1 ] . is_empty ( ) && lines[ i + 2 ] == ".br" ;
138- let prev_is_empty_with_br = i >= 2 && lines[ i - 1 ] . is_empty ( ) && lines[ i - 2 ] == ".br" ;
139-
140- // Skip redundant .br in these patterns
141- if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br {
142- skip_next_br = false ;
143- continue ;
144- }
145-
146- // If this .br is followed by empty line and another .br, skip the second one
147- if next_is_empty_then_br {
148- skip_next_br = true ;
149- }
150- }
151-
152- fixed_lines. push ( line. clone ( ) ) ;
153- }
154-
155- manpage = fixed_lines. join ( "\n " ) ;
156- manpage. push ( '\n' ) ;
172+ // Post-process the manpage to fix mandoc lint issues
173+ let processed_manpage = post_process_manpage ( manpage) ;
157174
158175 // Write the processed manpage to stdout
159- io:: stdout ( ) . write_all ( manpage. as_bytes ( ) ) . unwrap ( ) ;
176+ io:: stdout ( )
177+ . write_all ( processed_manpage. as_bytes ( ) )
178+ . unwrap ( ) ;
160179 io:: stdout ( ) . flush ( ) . unwrap ( ) ;
161180 process:: exit ( 0 ) ;
162181}
@@ -691,3 +710,119 @@ fn format_examples(content: String, output_markdown: bool) -> Result<String, std
691710 ) ?;
692711 Ok ( s)
693712}
713+
714+ #[ cfg( test) ]
715+ mod tests {
716+ use super :: * ;
717+
718+ #[ test]
719+ fn test_post_process_manpage_fixes_th_header ( ) {
720+ // Test that command names are uppercased and date is removed
721+ let input =
722+ ".TH cat 1 \" cat (uutils coreutils) 0.7.0\" \n .SH NAME\n cat - concatenate files\n " ;
723+ let expected = ".TH CAT 1 \" 2024-01-01\" \n .SH NAME\n cat - concatenate files\n " ;
724+
725+ let result = post_process_manpage ( input. to_string ( ) ) ;
726+ assert_eq ! ( result, expected) ;
727+ }
728+
729+ #[ test]
730+ fn test_post_process_manpage_removes_trailing_whitespace ( ) {
731+ // Test that trailing whitespace is removed from lines
732+ let input = ".TH TEST 1 \n Some text with trailing spaces \n .SH SECTION \n " ;
733+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Some text with trailing spaces\n .SH SECTION\n " ;
734+
735+ let result = post_process_manpage ( input. to_string ( ) ) ;
736+ assert_eq ! ( result, expected) ;
737+ }
738+
739+ #[ test]
740+ fn test_post_process_manpage_fixes_double_br ( ) {
741+ // Test that redundant .br macros are removed
742+ let input = ".TH TEST 1\n .br\n .br\n Some text\n " ;
743+ let expected = ".TH TEST 1 \" 2024-01-01\" \n .br\n Some text\n " ;
744+
745+ let result = post_process_manpage ( input. to_string ( ) ) ;
746+ assert_eq ! ( result, expected) ;
747+ }
748+
749+ #[ test]
750+ fn test_post_process_manpage_fixes_br_with_empty_line ( ) {
751+ // Test that .br with empty line patterns are fixed
752+ // Both .br macros should be removed (first because followed by empty, second because preceded by empty)
753+ let input = ".TH TEST 1\n .br\n \n .br\n Some text\n " ;
754+ let expected = ".TH TEST 1 \" 2024-01-01\" \n \n Some text\n " ;
755+
756+ let result = post_process_manpage ( input. to_string ( ) ) ;
757+ assert_eq ! ( result, expected) ;
758+ }
759+
760+ #[ test]
761+ fn test_post_process_manpage_preserves_single_br ( ) {
762+ // Test that single .br macros are preserved
763+ let input = ".TH TEST 1\n Line 1\n .br\n Line 2\n " ;
764+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Line 1\n .br\n Line 2\n " ;
765+
766+ let result = post_process_manpage ( input. to_string ( ) ) ;
767+ assert_eq ! ( result, expected) ;
768+ }
769+
770+ #[ test]
771+ fn test_post_process_manpage_handles_mixed_case_command ( ) {
772+ // Test that mixed case command names are uppercased
773+ let input = ".TH CaT 1 \" some version info\" \n Content\n " ;
774+ let expected = ".TH CAT 1 \" 2024-01-01\" \n Content\n " ;
775+
776+ let result = post_process_manpage ( input. to_string ( ) ) ;
777+ assert_eq ! ( result, expected) ;
778+ }
779+
780+ #[ test]
781+ fn test_post_process_manpage_handles_no_th_header ( ) {
782+ // Test that manpages without TH headers are handled gracefully
783+ let input = ".SH NAME\n test - a test utility\n " ;
784+ let expected = ".SH NAME\n test - a test utility\n " ;
785+
786+ let result = post_process_manpage ( input. to_string ( ) ) ;
787+ assert_eq ! ( result, expected) ;
788+ }
789+
790+ #[ test]
791+ fn test_post_process_manpage_complex_br_pattern ( ) {
792+ // Test complex .br patterns with multiple occurrences
793+ let input =
794+ ".TH TEST 1\n Section 1\n .br\n \n .br\n Middle\n .br\n .br\n Section 2\n .br\n \n .br\n End\n " ;
795+ // .br followed/preceded by empty lines should be removed, consecutive .br should have one removed
796+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Section 1\n \n Middle\n .br\n Section 2\n \n End\n " ;
797+
798+ let result = post_process_manpage ( input. to_string ( ) ) ;
799+ assert_eq ! ( result, expected) ;
800+ }
801+
802+ #[ test]
803+ fn test_post_process_manpage_malformed_th_header ( ) {
804+ // Test that malformed TH headers don't cause panics and are handled gracefully
805+ let input1 = ".TH\n Content\n " ; // Missing command name
806+ let expected1 = ".TH\n Content\n " ;
807+ let result1 = post_process_manpage ( input1. to_string ( ) ) ;
808+ assert_eq ! ( result1, expected1) ;
809+
810+ // TH header with special characters
811+ let input2 = ".TH test-cmd 1 \" version 1.0\" \n Content\n " ;
812+ let expected2 = ".TH TEST-CMD 1 \" 2024-01-01\" \n Content\n " ;
813+ let result2 = post_process_manpage ( input2. to_string ( ) ) ;
814+ assert_eq ! ( result2, expected2) ;
815+
816+ // TH header at end of file without newline
817+ let input3 = "Content\n .TH test 1" ;
818+ let expected3 = "Content\n .TH TEST 1 \" 2024-01-01\" \n " ;
819+ let result3 = post_process_manpage ( input3. to_string ( ) ) ;
820+ assert_eq ! ( result3, expected3) ;
821+
822+ // Multiple TH headers (only first should be processed due to ^anchor)
823+ let input4 = ".TH first 1\n Middle\n .TH second 1\n " ;
824+ let expected4 = ".TH FIRST 1 \" 2024-01-01\" \n Middle\n .TH SECOND 1 \" 2024-01-01\" \n " ;
825+ let result4 = post_process_manpage ( input4. to_string ( ) ) ;
826+ assert_eq ! ( result4, expected4) ;
827+ }
828+ }
0 commit comments