33// For the full copyright and license information, please view the LICENSE
44// file that was distributed with this source code.
55
6- // spell-checker:ignore mangen tldr
6+ // spell-checker:ignore mangen tldr mandoc uppercasing uppercased manpages DESTDIR
77
88use std:: {
9- collections:: HashMap ,
9+ collections:: { HashMap , HashSet } ,
1010 ffi:: OsString ,
1111 fs:: File ,
1212 io:: { self , Read , Seek , Write } ,
@@ -18,6 +18,7 @@ use clap_complete::Shell;
1818use clap_mangen:: Man ;
1919use fluent_syntax:: ast:: { Entry , Message , Pattern } ;
2020use fluent_syntax:: parser;
21+ use regex:: Regex ;
2122use textwrap:: { fill, indent, termwidth} ;
2223use zip:: ZipArchive ;
2324
@@ -26,6 +27,73 @@ use uucore::Args;
2627
2728include ! ( concat!( env!( "OUT_DIR" ) , "/uutils_map.rs" ) ) ;
2829
30+ /// Post-process a generated manpage to fix mandoc lint issues
31+ ///
32+ /// This function:
33+ /// - Fixes the TH header by uppercasing command names and removing invalid date formats
34+ /// - Removes trailing whitespace from all lines
35+ /// - Fixes redundant .br paragraph macros that cause mandoc warnings
36+ fn post_process_manpage ( manpage : String ) -> String {
37+ // Only match TH headers that have at least a command name on the same line
38+ // Use [ \t] instead of \s to avoid matching newlines
39+ // Use a date format that satisfies mandoc (YYYY-MM-DD)
40+ // Using a static date to ensure reproducible builds
41+ let date = "2024-01-01" ;
42+
43+ let th_regex = Regex :: new ( r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$" ) . unwrap ( ) ;
44+ let mut result = th_regex
45+ . replace_all ( & manpage, |caps : & regex:: Captures | {
46+ // Add date to satisfy mandoc - date must be quoted
47+ format ! ( ".TH {} 1 \" {}\" " , caps[ 1 ] . to_uppercase( ) , date)
48+ } )
49+ . to_string ( ) ;
50+
51+ // Process lines: remove trailing whitespace and fix .br issues in a single pass
52+ let lines: Vec < & str > = result. lines ( ) . collect ( ) ;
53+ let mut fixed_lines = Vec :: with_capacity ( lines. len ( ) ) ;
54+ let mut skip_indices = HashSet :: new ( ) ;
55+
56+ // First pass: identify lines to skip (redundant .br macros)
57+ for i in 0 ..lines. len ( ) {
58+ let line = lines[ i] . trim_end ( ) ;
59+
60+ if line == ".br" && !skip_indices. contains ( & i) {
61+ // Check for consecutive .br macros
62+ if i > 0 && lines[ i - 1 ] . trim_end ( ) == ".br" {
63+ skip_indices. insert ( i) ;
64+ }
65+ // Check for .br, empty line, .br pattern
66+ else if i + 2 < lines. len ( )
67+ && lines[ i + 1 ] . trim ( ) . is_empty ( )
68+ && lines[ i + 2 ] . trim_end ( ) == ".br"
69+ {
70+ skip_indices. insert ( i + 2 ) ;
71+ }
72+ }
73+ }
74+
75+ // Second pass: build the final output
76+ for ( i, line) in lines. iter ( ) . enumerate ( ) {
77+ if !skip_indices. contains ( & i) {
78+ fixed_lines. push ( line. trim_end ( ) ) ;
79+ }
80+ }
81+
82+ result = fixed_lines. join ( "\n " ) ;
83+
84+ // Fix escape sequence issues
85+ // \\\\0 appears when trying to represent literal \0 string
86+ // In man pages, use \e for literal backslash
87+ result = result. replace ( "\\ \\ \\ \\ 0" , "\\ e0" ) ;
88+ result = result. replace ( "\\ \\ 0" , "\\ e0" ) ;
89+
90+ if !result. ends_with ( '\n' ) {
91+ result. push ( '\n' ) ;
92+ }
93+
94+ result
95+ }
96+
2997/// Print usage information for uudoc
3098fn usage < T : Args > ( utils : & UtilityMap < T > ) {
3199 println ! ( "uudoc - Documentation generator for uutils coreutils" ) ;
@@ -100,63 +168,15 @@ fn gen_manpage<T: Args>(
100168 man. render ( & mut buffer) . expect ( "Man page generation failed" ) ;
101169
102170 // Convert to string for processing
103- let mut manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
104-
105- // Fix the TH line: remove version info from date field and uppercase the command name
106- if let Some ( th_pos) = manpage. find ( ".TH " ) {
107- if let Some ( line_end) = manpage[ th_pos..] . find ( '\n' ) {
108- let th_line = & manpage[ th_pos..th_pos + line_end] ;
109- // Parse the TH line parts
110- let parts: Vec < & str > = th_line. split_whitespace ( ) . collect ( ) ;
111- if parts. len ( ) >= 2 {
112- let cmd_name = parts[ 1 ] . to_uppercase ( ) ;
113- // Reconstruct TH line with uppercase command name and no date
114- let new_th = format ! ( ".TH {} 1" , cmd_name) ;
115- manpage. replace_range ( th_pos..th_pos + line_end, & new_th) ;
116- }
117- }
118- }
119-
120- // Remove trailing whitespace from all lines and fix .br issues
121- let lines: Vec < String > = manpage
122- . lines ( )
123- . map ( |line| line. trim_end ( ) . to_string ( ) )
124- . collect ( ) ;
171+ let manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
125172
126- // Fix .br paragraph macro issues
127- let mut fixed_lines = Vec :: new ( ) ;
128- let mut skip_next_br = false ;
129-
130- for i in 0 ..lines. len ( ) {
131- let line = & lines[ i] ;
132-
133- if line == ".br" {
134- // Check for problematic patterns with .br
135- let prev_is_br = i > 0 && lines[ i - 1 ] == ".br" ;
136- let next_is_empty_then_br =
137- i + 2 < lines. len ( ) && lines[ i + 1 ] . is_empty ( ) && lines[ i + 2 ] == ".br" ;
138- let prev_is_empty_with_br = i >= 2 && lines[ i - 1 ] . is_empty ( ) && lines[ i - 2 ] == ".br" ;
139-
140- // Skip redundant .br in these patterns
141- if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br {
142- skip_next_br = false ;
143- continue ;
144- }
145-
146- // If this .br is followed by empty line and another .br, skip the second one
147- if next_is_empty_then_br {
148- skip_next_br = true ;
149- }
150- }
151-
152- fixed_lines. push ( line. clone ( ) ) ;
153- }
154-
155- manpage = fixed_lines. join ( "\n " ) ;
156- manpage. push ( '\n' ) ;
173+ // Post-process the manpage to fix mandoc lint issues
174+ let processed_manpage = post_process_manpage ( manpage) ;
157175
158176 // Write the processed manpage to stdout
159- io:: stdout ( ) . write_all ( manpage. as_bytes ( ) ) . unwrap ( ) ;
177+ io:: stdout ( )
178+ . write_all ( processed_manpage. as_bytes ( ) )
179+ . unwrap ( ) ;
160180 io:: stdout ( ) . flush ( ) . unwrap ( ) ;
161181 process:: exit ( 0 ) ;
162182}
@@ -691,3 +711,119 @@ fn format_examples(content: String, output_markdown: bool) -> Result<String, std
691711 ) ?;
692712 Ok ( s)
693713}
714+
715+ #[ cfg( test) ]
716+ mod tests {
717+ use super :: * ;
718+
719+ #[ test]
720+ fn test_post_process_manpage_fixes_th_header ( ) {
721+ // Test that command names are uppercased and date is removed
722+ let input =
723+ ".TH cat 1 \" cat (uutils coreutils) 0.7.0\" \n .SH NAME\n cat - concatenate files\n " ;
724+ let expected = ".TH CAT 1 \" 2024-01-01\" \n .SH NAME\n cat - concatenate files\n " ;
725+
726+ let result = post_process_manpage ( input. to_string ( ) ) ;
727+ assert_eq ! ( result, expected) ;
728+ }
729+
730+ #[ test]
731+ fn test_post_process_manpage_removes_trailing_whitespace ( ) {
732+ // Test that trailing whitespace is removed from lines
733+ let input = ".TH TEST 1 \n Some text with trailing spaces \n .SH SECTION \n " ;
734+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Some text with trailing spaces\n .SH SECTION\n " ;
735+
736+ let result = post_process_manpage ( input. to_string ( ) ) ;
737+ assert_eq ! ( result, expected) ;
738+ }
739+
740+ #[ test]
741+ fn test_post_process_manpage_fixes_double_br ( ) {
742+ // Test that redundant .br macros are removed
743+ let input = ".TH TEST 1\n .br\n .br\n Some text\n " ;
744+ let expected = ".TH TEST 1 \" 2024-01-01\" \n .br\n Some text\n " ;
745+
746+ let result = post_process_manpage ( input. to_string ( ) ) ;
747+ assert_eq ! ( result, expected) ;
748+ }
749+
750+ #[ test]
751+ fn test_post_process_manpage_fixes_br_with_empty_line ( ) {
752+ // Test that .br with empty line patterns are fixed
753+ // Both .br macros should be removed (first because followed by empty, second because preceded by empty)
754+ let input = ".TH TEST 1\n .br\n \n .br\n Some text\n " ;
755+ let expected = ".TH TEST 1 \" 2024-01-01\" \n \n Some text\n " ;
756+
757+ let result = post_process_manpage ( input. to_string ( ) ) ;
758+ assert_eq ! ( result, expected) ;
759+ }
760+
761+ #[ test]
762+ fn test_post_process_manpage_preserves_single_br ( ) {
763+ // Test that single .br macros are preserved
764+ let input = ".TH TEST 1\n Line 1\n .br\n Line 2\n " ;
765+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Line 1\n .br\n Line 2\n " ;
766+
767+ let result = post_process_manpage ( input. to_string ( ) ) ;
768+ assert_eq ! ( result, expected) ;
769+ }
770+
771+ #[ test]
772+ fn test_post_process_manpage_handles_mixed_case_command ( ) {
773+ // Test that mixed case command names are uppercased
774+ let input = ".TH CaT 1 \" some version info\" \n Content\n " ;
775+ let expected = ".TH CAT 1 \" 2024-01-01\" \n Content\n " ;
776+
777+ let result = post_process_manpage ( input. to_string ( ) ) ;
778+ assert_eq ! ( result, expected) ;
779+ }
780+
781+ #[ test]
782+ fn test_post_process_manpage_handles_no_th_header ( ) {
783+ // Test that manpages without TH headers are handled gracefully
784+ let input = ".SH NAME\n test - a test utility\n " ;
785+ let expected = ".SH NAME\n test - a test utility\n " ;
786+
787+ let result = post_process_manpage ( input. to_string ( ) ) ;
788+ assert_eq ! ( result, expected) ;
789+ }
790+
791+ #[ test]
792+ fn test_post_process_manpage_complex_br_pattern ( ) {
793+ // Test complex .br patterns with multiple occurrences
794+ let input =
795+ ".TH TEST 1\n Section 1\n .br\n \n .br\n Middle\n .br\n .br\n Section 2\n .br\n \n .br\n End\n " ;
796+ // .br followed/preceded by empty lines should be removed, consecutive .br should have one removed
797+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Section 1\n \n Middle\n .br\n Section 2\n \n End\n " ;
798+
799+ let result = post_process_manpage ( input. to_string ( ) ) ;
800+ assert_eq ! ( result, expected) ;
801+ }
802+
803+ #[ test]
804+ fn test_post_process_manpage_malformed_th_header ( ) {
805+ // Test that malformed TH headers don't cause panics and are handled gracefully
806+ let input1 = ".TH\n Content\n " ; // Missing command name
807+ let expected1 = ".TH\n Content\n " ;
808+ let result1 = post_process_manpage ( input1. to_string ( ) ) ;
809+ assert_eq ! ( result1, expected1) ;
810+
811+ // TH header with special characters
812+ let input2 = ".TH test-cmd 1 \" version 1.0\" \n Content\n " ;
813+ let expected2 = ".TH TEST-CMD 1 \" 2024-01-01\" \n Content\n " ;
814+ let result2 = post_process_manpage ( input2. to_string ( ) ) ;
815+ assert_eq ! ( result2, expected2) ;
816+
817+ // TH header at end of file without newline
818+ let input3 = "Content\n .TH test 1" ;
819+ let expected3 = "Content\n .TH TEST 1 \" 2024-01-01\" \n " ;
820+ let result3 = post_process_manpage ( input3. to_string ( ) ) ;
821+ assert_eq ! ( result3, expected3) ;
822+
823+ // Multiple TH headers (only first should be processed due to ^anchor)
824+ let input4 = ".TH first 1\n Middle\n .TH second 1\n " ;
825+ let expected4 = ".TH FIRST 1 \" 2024-01-01\" \n Middle\n .TH SECOND 1 \" 2024-01-01\" \n " ;
826+ let result4 = post_process_manpage ( input4. to_string ( ) ) ;
827+ assert_eq ! ( result4, expected4) ;
828+ }
829+ }
0 commit comments