33// For the full copyright and license information, please view the LICENSE
44// file that was distributed with this source code.
55
6- // spell-checker:ignore mangen tldr
6+ // spell-checker:ignore mangen tldr mandoc uppercasing uppercased manpages DESTDIR
77
88use std:: {
9- collections:: HashMap ,
9+ collections:: { HashMap , HashSet } ,
1010 ffi:: OsString ,
1111 fs:: File ,
1212 io:: { self , Read , Seek , Write } ,
@@ -17,7 +17,9 @@ use clap::{Arg, Command};
1717use clap_complete:: Shell ;
1818use clap_mangen:: Man ;
1919use fluent_syntax:: ast:: { Entry , Message , Pattern } ;
20+ use jiff:: Zoned ;
2021use fluent_syntax:: parser;
22+ use regex:: Regex ;
2123use textwrap:: { fill, indent, termwidth} ;
2224use zip:: ZipArchive ;
2325
@@ -26,6 +28,75 @@ use uucore::Args;
2628
2729include ! ( concat!( env!( "OUT_DIR" ) , "/uutils_map.rs" ) ) ;
2830
31+ /// Post-process a generated manpage to fix mandoc lint issues
32+ ///
33+ /// This function:
34+ /// - Fixes the TH header by uppercasing command names and removing invalid date formats
35+ /// - Removes trailing whitespace from all lines
36+ /// - Fixes redundant .br paragraph macros that cause mandoc warnings
37+ fn post_process_manpage ( manpage : String ) -> String {
38+ // Only match TH headers that have at least a command name on the same line
39+ // Use [ \t] instead of \s to avoid matching newlines
40+ // Use a date format that satisfies mandoc (YYYY-MM-DD)
41+ let date = date. map_or_else (
42+ || Zoned :: now ( ) . strftime ( "%Y-%m-%d" ) . to_string ( ) ,
43+ str:: to_string,
44+ ) ;
45+
46+ let th_regex = Regex :: new ( r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$" ) . unwrap ( ) ;
47+ let mut result = th_regex
48+ . replace_all ( & manpage, |caps : & regex:: Captures | {
49+ // Add date to satisfy mandoc - date must be quoted
50+ format ! ( ".TH {} 1 \" {date}\" " , caps[ 1 ] . to_uppercase( ) )
51+ } )
52+ . to_string ( ) ;
53+
54+ // Process lines: remove trailing whitespace and fix .br issues in a single pass
55+ let lines: Vec < & str > = result. lines ( ) . collect ( ) ;
56+ let mut fixed_lines = Vec :: with_capacity ( lines. len ( ) ) ;
57+ let mut skip_indices = HashSet :: new ( ) ;
58+
59+ // First pass: identify lines to skip (redundant .br macros)
60+ for i in 0 ..lines. len ( ) {
61+ let line = lines[ i] . trim_end ( ) ;
62+
63+ if line == ".br" && !skip_indices. contains ( & i) {
64+ // Check for consecutive .br macros
65+ if i > 0 && lines[ i - 1 ] . trim_end ( ) == ".br" {
66+ skip_indices. insert ( i) ;
67+ }
68+ // Check for .br, empty line, .br pattern
69+ else if i + 2 < lines. len ( )
70+ && lines[ i + 1 ] . trim ( ) . is_empty ( )
71+ && lines[ i + 2 ] . trim_end ( ) == ".br"
72+ {
73+ skip_indices. insert ( i + 2 ) ;
74+ }
75+ }
76+ }
77+
78+ // Second pass: build the final output
79+ for ( i, line) in lines. iter ( ) . enumerate ( ) {
80+ if !skip_indices. contains ( & i) {
81+ fixed_lines. push ( line. trim_end ( ) ) ;
82+ }
83+ }
84+
85+ result = fixed_lines. join ( "\n " ) ;
86+
87+ // Fix escape sequence issues
88+ // \\\\0 appears when trying to represent literal \0 string
89+ // In man pages, use \e for literal backslash
90+ result = result. replace ( "\\ \\ \\ \\ 0" , "\\ e0" ) ;
91+ result = result. replace ( "\\ \\ 0" , "\\ e0" ) ;
92+
93+ if !result. ends_with ( '\n' ) {
94+ result. push ( '\n' ) ;
95+ }
96+
97+ result
98+ }
99+
29100/// Print usage information for uudoc
30101fn usage < T : Args > ( utils : & UtilityMap < T > ) {
31102 println ! ( "uudoc - Documentation generator for uutils coreutils" ) ;
@@ -100,63 +171,15 @@ fn gen_manpage<T: Args>(
100171 man. render ( & mut buffer) . expect ( "Man page generation failed" ) ;
101172
102173 // Convert to string for processing
103- let mut manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
104-
105- // Fix the TH line: remove version info from date field and uppercase the command name
106- if let Some ( th_pos) = manpage. find ( ".TH " ) {
107- if let Some ( line_end) = manpage[ th_pos..] . find ( '\n' ) {
108- let th_line = & manpage[ th_pos..th_pos + line_end] ;
109- // Parse the TH line parts
110- let parts: Vec < & str > = th_line. split_whitespace ( ) . collect ( ) ;
111- if parts. len ( ) >= 2 {
112- let cmd_name = parts[ 1 ] . to_uppercase ( ) ;
113- // Reconstruct TH line with uppercase command name and no date
114- let new_th = format ! ( ".TH {} 1" , cmd_name) ;
115- manpage. replace_range ( th_pos..th_pos + line_end, & new_th) ;
116- }
117- }
118- }
174+ let manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
119175
120- // Remove trailing whitespace from all lines and fix .br issues
121- let lines: Vec < String > = manpage
122- . lines ( )
123- . map ( |line| line. trim_end ( ) . to_string ( ) )
124- . collect ( ) ;
125-
126- // Fix .br paragraph macro issues
127- let mut fixed_lines = Vec :: new ( ) ;
128- let mut skip_next_br = false ;
129-
130- for i in 0 ..lines. len ( ) {
131- let line = & lines[ i] ;
132-
133- if line == ".br" {
134- // Check for problematic patterns with .br
135- let prev_is_br = i > 0 && lines[ i - 1 ] == ".br" ;
136- let next_is_empty_then_br =
137- i + 2 < lines. len ( ) && lines[ i + 1 ] . is_empty ( ) && lines[ i + 2 ] == ".br" ;
138- let prev_is_empty_with_br = i >= 2 && lines[ i - 1 ] . is_empty ( ) && lines[ i - 2 ] == ".br" ;
139-
140- // Skip redundant .br in these patterns
141- if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br {
142- skip_next_br = false ;
143- continue ;
144- }
145-
146- // If this .br is followed by empty line and another .br, skip the second one
147- if next_is_empty_then_br {
148- skip_next_br = true ;
149- }
150- }
151-
152- fixed_lines. push ( line. clone ( ) ) ;
153- }
154-
155- manpage = fixed_lines. join ( "\n " ) ;
156- manpage. push ( '\n' ) ;
176+ // Post-process the manpage to fix mandoc lint issues
177+ let processed_manpage = post_process_manpage ( manpage, None ) ;
157178
158179 // Write the processed manpage to stdout
159- io:: stdout ( ) . write_all ( manpage. as_bytes ( ) ) . unwrap ( ) ;
180+ io:: stdout ( )
181+ . write_all ( processed_manpage. as_bytes ( ) )
182+ . unwrap ( ) ;
160183 io:: stdout ( ) . flush ( ) . unwrap ( ) ;
161184 process:: exit ( 0 ) ;
162185}
@@ -691,3 +714,119 @@ fn format_examples(content: String, output_markdown: bool) -> Result<String, std
691714 ) ?;
692715 Ok ( s)
693716}
717+
718+ #[ cfg( test) ]
719+ mod tests {
720+ use super :: * ;
721+
722+ #[ test]
723+ fn test_post_process_manpage_fixes_th_header ( ) {
724+ // Test that command names are uppercased and date is removed
725+ let input =
726+ ".TH cat 1 \" cat (uutils coreutils) 0.7.0\" \n .SH NAME\n cat - concatenate files\n " ;
727+ let expected = ".TH CAT 1 \" 2024-01-01\" \n .SH NAME\n cat - concatenate files\n " ;
728+
729+ let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
730+ assert_eq ! ( result, expected) ;
731+ }
732+
733+ #[ test]
734+ fn test_post_process_manpage_removes_trailing_whitespace ( ) {
735+ // Test that trailing whitespace is removed from lines
736+ let input = ".TH TEST 1 \n Some text with trailing spaces \n .SH SECTION \n " ;
737+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Some text with trailing spaces\n .SH SECTION\n " ;
738+
739+ let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
740+ assert_eq ! ( result, expected) ;
741+ }
742+
743+ #[ test]
744+ fn test_post_process_manpage_fixes_double_br ( ) {
745+ // Test that redundant .br macros are removed
746+ let input = ".TH TEST 1\n .br\n .br\n Some text\n " ;
747+ let expected = ".TH TEST 1 \" 2024-01-01\" \n .br\n Some text\n " ;
748+
749+ let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
750+ assert_eq ! ( result, expected) ;
751+ }
752+
753+ #[ test]
754+ fn test_post_process_manpage_fixes_br_with_empty_line ( ) {
755+ // Test that .br with empty line patterns are fixed
756+ // Both .br macros should be removed (first because followed by empty, second because preceded by empty)
757+ let input = ".TH TEST 1\n .br\n \n .br\n Some text\n " ;
758+ let expected = ".TH TEST 1 \" 2024-01-01\" \n \n Some text\n " ;
759+
760+ let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
761+ assert_eq ! ( result, expected) ;
762+ }
763+
764+ #[ test]
765+ fn test_post_process_manpage_preserves_single_br ( ) {
766+ // Test that single .br macros are preserved
767+ let input = ".TH TEST 1\n Line 1\n .br\n Line 2\n " ;
768+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Line 1\n .br\n Line 2\n " ;
769+
770+ let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
771+ assert_eq ! ( result, expected) ;
772+ }
773+
774+ #[ test]
775+ fn test_post_process_manpage_handles_mixed_case_command ( ) {
776+ // Test that mixed case command names are uppercased
777+ let input = ".TH CaT 1 \" some version info\" \n Content\n " ;
778+ let expected = ".TH CAT 1 \" 2024-01-01\" \n Content\n " ;
779+
780+ let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
781+ assert_eq ! ( result, expected) ;
782+ }
783+
784+ #[ test]
785+ fn test_post_process_manpage_handles_no_th_header ( ) {
786+ // Test that manpages without TH headers are handled gracefully
787+ let input = ".SH NAME\n test - a test utility\n " ;
788+ let expected = ".SH NAME\n test - a test utility\n " ;
789+
790+ let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
791+ assert_eq ! ( result, expected) ;
792+ }
793+
794+ #[ test]
795+ fn test_post_process_manpage_complex_br_pattern ( ) {
796+ // Test complex .br patterns with multiple occurrences
797+ let input =
798+ ".TH TEST 1\n Section 1\n .br\n \n .br\n Middle\n .br\n .br\n Section 2\n .br\n \n .br\n End\n " ;
799+ // .br followed/preceded by empty lines should be removed, consecutive .br should have one removed
800+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Section 1\n \n Middle\n .br\n Section 2\n \n End\n " ;
801+
802+ let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
803+ assert_eq ! ( result, expected) ;
804+ }
805+
806+ #[ test]
807+ fn test_post_process_manpage_malformed_th_header ( ) {
808+ // Test that malformed TH headers don't cause panics and are handled gracefully
809+ let input1 = ".TH\n Content\n " ; // Missing command name
810+ let expected1 = ".TH\n Content\n " ;
811+ let result1 = post_process_manpage ( input1. to_string ( ) , Some ( "2024-01-01" ) ) ;
812+ assert_eq ! ( result1, expected1) ;
813+
814+ // TH header with special characters
815+ let input2 = ".TH test-cmd 1 \" version 1.0\" \n Content\n " ;
816+ let expected2 = ".TH TEST-CMD 1 \" 2024-01-01\" \n Content\n " ;
817+ let result2 = post_process_manpage ( input2. to_string ( ) , Some ( "2024-01-01" ) ) ;
818+ assert_eq ! ( result2, expected2) ;
819+
820+ // TH header at end of file without newline
821+ let input3 = "Content\n .TH test 1" ;
822+ let expected3 = "Content\n .TH TEST 1 \" 2024-01-01\" \n " ;
823+ let result3 = post_process_manpage ( input3. to_string ( ) , Some ( "2024-01-01" ) ) ;
824+ assert_eq ! ( result3, expected3) ;
825+
826+ // Multiple TH headers (only first should be processed due to ^anchor)
827+ let input4 = ".TH first 1\n Middle\n .TH second 1\n " ;
828+ let expected4 = ".TH FIRST 1 \" 2024-01-01\" \n Middle\n .TH SECOND 1 \" 2024-01-01\" \n " ;
829+ let result4 = post_process_manpage ( input4. to_string ( ) , Some ( "2024-01-01" ) ) ;
830+ assert_eq ! ( result4, expected4) ;
831+ }
832+ }
0 commit comments