33// For the full copyright and license information, please view the LICENSE
44// file that was distributed with this source code.
55
6- // spell-checker:ignore mangen tldr
6+ // spell-checker:ignore mangen tldr mandoc uppercasing uppercased manpages DESTDIR
77
88use std:: {
9- collections:: HashMap ,
9+ collections:: { HashMap , HashSet } ,
1010 ffi:: OsString ,
1111 fs:: File ,
1212 io:: { self , Read , Seek , Write } ,
@@ -18,6 +18,7 @@ use clap_complete::Shell;
1818use clap_mangen:: Man ;
1919use fluent_syntax:: ast:: { Entry , Message , Pattern } ;
2020use fluent_syntax:: parser;
21+ use regex:: Regex ;
2122use textwrap:: { fill, indent, termwidth} ;
2223use zip:: ZipArchive ;
2324
@@ -26,6 +27,66 @@ use uucore::Args;
2627
2728include ! ( concat!( env!( "OUT_DIR" ) , "/uutils_map.rs" ) ) ;
2829
30+ /// Post-process a generated manpage to fix mandoc lint issues
31+ ///
32+ /// This function:
33+ /// - Fixes the TH header by uppercasing command names and removing invalid date formats
34+ /// - Removes trailing whitespace from all lines
35+ /// - Fixes redundant .br paragraph macros that cause mandoc warnings
36+ fn post_process_manpage ( manpage : String ) -> String {
37+ // Only match TH headers that have at least a command name on the same line
38+ // Use [ \t] instead of \s to avoid matching newlines
39+ // Use a date format that satisfies mandoc (YYYY-MM-DD)
40+ // Using a static date to ensure reproducible builds
41+ let date = "2024-01-01" ;
42+
43+ let th_regex = Regex :: new ( r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$" ) . unwrap ( ) ;
44+ let mut result = th_regex
45+ . replace_all ( & manpage, |caps : & regex:: Captures | {
46+ // Add date to satisfy mandoc - date must be quoted
47+ format ! ( ".TH {} 1 \" {}\" " , caps[ 1 ] . to_uppercase( ) , date)
48+ } )
49+ . to_string ( ) ;
50+
51+ // Process lines: remove trailing whitespace and fix .br issues in a single pass
52+ let lines: Vec < & str > = result. lines ( ) . collect ( ) ;
53+ let mut fixed_lines = Vec :: with_capacity ( lines. len ( ) ) ;
54+ let mut skip_indices = HashSet :: new ( ) ;
55+
56+ // First pass: identify lines to skip (redundant .br macros)
57+ for i in 0 ..lines. len ( ) {
58+ let line = lines[ i] . trim_end ( ) ;
59+
60+ if line == ".br" && !skip_indices. contains ( & i) {
61+ // Check for consecutive .br macros
62+ if i > 0 && lines[ i - 1 ] . trim_end ( ) == ".br" {
63+ skip_indices. insert ( i) ;
64+ }
65+ // Check for .br, empty line, .br pattern
66+ else if i + 2 < lines. len ( )
67+ && lines[ i + 1 ] . trim ( ) . is_empty ( )
68+ && lines[ i + 2 ] . trim_end ( ) == ".br"
69+ {
70+ skip_indices. insert ( i + 2 ) ;
71+ }
72+ }
73+ }
74+
75+ // Second pass: build the final output
76+ for ( i, line) in lines. iter ( ) . enumerate ( ) {
77+ if !skip_indices. contains ( & i) {
78+ fixed_lines. push ( line. trim_end ( ) ) ;
79+ }
80+ }
81+
82+ result = fixed_lines. join ( "\n " ) ;
83+ if !result. ends_with ( '\n' ) {
84+ result. push ( '\n' ) ;
85+ }
86+
87+ result
88+ }
89+
2990/// Print usage information for uudoc
3091fn usage < T : Args > ( utils : & UtilityMap < T > ) {
3192 println ! ( "uudoc - Documentation generator for uutils coreutils" ) ;
@@ -100,63 +161,15 @@ fn gen_manpage<T: Args>(
100161 man. render ( & mut buffer) . expect ( "Man page generation failed" ) ;
101162
102163 // Convert to string for processing
103- let mut manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
104-
105- // Fix the TH line: remove version info from date field and uppercase the command name
106- if let Some ( th_pos) = manpage. find ( ".TH " ) {
107- if let Some ( line_end) = manpage[ th_pos..] . find ( '\n' ) {
108- let th_line = & manpage[ th_pos..th_pos + line_end] ;
109- // Parse the TH line parts
110- let parts: Vec < & str > = th_line. split_whitespace ( ) . collect ( ) ;
111- if parts. len ( ) >= 2 {
112- let cmd_name = parts[ 1 ] . to_uppercase ( ) ;
113- // Reconstruct TH line with uppercase command name and no date
114- let new_th = format ! ( ".TH {} 1" , cmd_name) ;
115- manpage. replace_range ( th_pos..th_pos + line_end, & new_th) ;
116- }
117- }
118- }
164+ let manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
119165
120- // Remove trailing whitespace from all lines and fix .br issues
121- let lines: Vec < String > = manpage
122- . lines ( )
123- . map ( |line| line. trim_end ( ) . to_string ( ) )
124- . collect ( ) ;
125-
126- // Fix .br paragraph macro issues
127- let mut fixed_lines = Vec :: new ( ) ;
128- let mut skip_next_br = false ;
129-
130- for i in 0 ..lines. len ( ) {
131- let line = & lines[ i] ;
132-
133- if line == ".br" {
134- // Check for problematic patterns with .br
135- let prev_is_br = i > 0 && lines[ i - 1 ] == ".br" ;
136- let next_is_empty_then_br =
137- i + 2 < lines. len ( ) && lines[ i + 1 ] . is_empty ( ) && lines[ i + 2 ] == ".br" ;
138- let prev_is_empty_with_br = i >= 2 && lines[ i - 1 ] . is_empty ( ) && lines[ i - 2 ] == ".br" ;
139-
140- // Skip redundant .br in these patterns
141- if skip_next_br || prev_is_br || next_is_empty_then_br || prev_is_empty_with_br {
142- skip_next_br = false ;
143- continue ;
144- }
145-
146- // If this .br is followed by empty line and another .br, skip the second one
147- if next_is_empty_then_br {
148- skip_next_br = true ;
149- }
150- }
151-
152- fixed_lines. push ( line. clone ( ) ) ;
153- }
154-
155- manpage = fixed_lines. join ( "\n " ) ;
156- manpage. push ( '\n' ) ;
166+ // Post-process the manpage to fix mandoc lint issues
167+ let processed_manpage = post_process_manpage ( manpage) ;
157168
158169 // Write the processed manpage to stdout
159- io:: stdout ( ) . write_all ( manpage. as_bytes ( ) ) . unwrap ( ) ;
170+ io:: stdout ( )
171+ . write_all ( processed_manpage. as_bytes ( ) )
172+ . unwrap ( ) ;
160173 io:: stdout ( ) . flush ( ) . unwrap ( ) ;
161174 process:: exit ( 0 ) ;
162175}
@@ -691,3 +704,118 @@ fn format_examples(content: String, output_markdown: bool) -> Result<String, std
691704 ) ?;
692705 Ok ( s)
693706}
707+
708+ #[ cfg( test) ]
709+ mod tests {
710+ use super :: * ;
711+
712+ #[ test]
713+ fn test_post_process_manpage_fixes_th_header ( ) {
714+ // Test that command names are uppercased and date is removed
715+ let input =
716+ ".TH cat 1 \" cat (uutils coreutils) 0.7.0\" \n .SH NAME\n cat - concatenate files\n " ;
717+ let expected = ".TH CAT 1 \" 2024-01-01\" \n .SH NAME\n cat - concatenate files\n " ;
718+
719+ let result = post_process_manpage ( input. to_string ( ) ) ;
720+ assert_eq ! ( result, expected) ;
721+ }
722+
723+ #[ test]
724+ fn test_post_process_manpage_removes_trailing_whitespace ( ) {
725+ // Test that trailing whitespace is removed from lines
726+ let input = ".TH TEST 1 \n Some text with trailing spaces \n .SH SECTION \n " ;
727+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Some text with trailing spaces\n .SH SECTION\n " ;
728+
729+ let result = post_process_manpage ( input. to_string ( ) ) ;
730+ assert_eq ! ( result, expected) ;
731+ }
732+
733+ #[ test]
734+ fn test_post_process_manpage_fixes_double_br ( ) {
735+ // Test that redundant .br macros are removed
736+ let input = ".TH TEST 1\n .br\n .br\n Some text\n " ;
737+ let expected = ".TH TEST 1 \" 2024-01-01\" \n .br\n Some text\n " ;
738+
739+ let result = post_process_manpage ( input. to_string ( ) ) ;
740+ assert_eq ! ( result, expected) ;
741+ }
742+
743+ #[ test]
744+ fn test_post_process_manpage_fixes_br_with_empty_line ( ) {
745+ // Test that .br with empty line patterns are fixed
746+ let input = ".TH TEST 1\n .br\n \n .br\n Some text\n " ;
747+ let expected = ".TH TEST 1 \" 2024-01-01\" \n .br\n \n Some text\n " ;
748+
749+ let result = post_process_manpage ( input. to_string ( ) ) ;
750+ assert_eq ! ( result, expected) ;
751+ }
752+
753+ #[ test]
754+ fn test_post_process_manpage_preserves_single_br ( ) {
755+ // Test that single .br macros are preserved
756+ let input = ".TH TEST 1\n Line 1\n .br\n Line 2\n " ;
757+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Line 1\n .br\n Line 2\n " ;
758+
759+ let result = post_process_manpage ( input. to_string ( ) ) ;
760+ assert_eq ! ( result, expected) ;
761+ }
762+
763+ #[ test]
764+ fn test_post_process_manpage_handles_mixed_case_command ( ) {
765+ // Test that mixed case command names are uppercased
766+ let input = ".TH CaT 1 \" some version info\" \n Content\n " ;
767+ let expected = ".TH CAT 1 \" 2024-01-01\" \n Content\n " ;
768+
769+ let result = post_process_manpage ( input. to_string ( ) ) ;
770+ assert_eq ! ( result, expected) ;
771+ }
772+
773+ #[ test]
774+ fn test_post_process_manpage_handles_no_th_header ( ) {
775+ // Test that manpages without TH headers are handled gracefully
776+ let input = ".SH NAME\n test - a test utility\n " ;
777+ let expected = ".SH NAME\n test - a test utility\n " ;
778+
779+ let result = post_process_manpage ( input. to_string ( ) ) ;
780+ assert_eq ! ( result, expected) ;
781+ }
782+
783+ #[ test]
784+ fn test_post_process_manpage_complex_br_pattern ( ) {
785+ // Test complex .br patterns with multiple occurrences
786+ let input =
787+ ".TH TEST 1\n Section 1\n .br\n \n .br\n Middle\n .br\n .br\n Section 2\n .br\n \n .br\n End\n " ;
788+ let expected =
789+ ".TH TEST 1 \" 2024-01-01\" \n Section 1\n .br\n \n Middle\n .br\n Section 2\n .br\n \n End\n " ;
790+
791+ let result = post_process_manpage ( input. to_string ( ) ) ;
792+ assert_eq ! ( result, expected) ;
793+ }
794+
795+ #[ test]
796+ fn test_post_process_manpage_malformed_th_header ( ) {
797+ // Test that malformed TH headers don't cause panics and are handled gracefully
798+ let input1 = ".TH\n Content\n " ; // Missing command name
799+ let expected1 = ".TH\n Content\n " ;
800+ let result1 = post_process_manpage ( input1. to_string ( ) ) ;
801+ assert_eq ! ( result1, expected1) ;
802+
803+ // TH header with special characters
804+ let input2 = ".TH test-cmd 1 \" version 1.0\" \n Content\n " ;
805+ let expected2 = ".TH TEST-CMD 1 \" 2024-01-01\" \n Content\n " ;
806+ let result2 = post_process_manpage ( input2. to_string ( ) ) ;
807+ assert_eq ! ( result2, expected2) ;
808+
809+ // TH header at end of file without newline
810+ let input3 = "Content\n .TH test 1" ;
811+ let expected3 = "Content\n .TH TEST 1 \" 2024-01-01\" \n " ;
812+ let result3 = post_process_manpage ( input3. to_string ( ) ) ;
813+ assert_eq ! ( result3, expected3) ;
814+
815+ // Multiple TH headers (only first should be processed due to ^anchor)
816+ let input4 = ".TH first 1\n Middle\n .TH second 1\n " ;
817+ let expected4 = ".TH FIRST 1 \" 2024-01-01\" \n Middle\n .TH SECOND 1 \" 2024-01-01\" \n " ;
818+ let result4 = post_process_manpage ( input4. to_string ( ) ) ;
819+ assert_eq ! ( result4, expected4) ;
820+ }
821+ }
0 commit comments