|
2 | 2 | // |
3 | 3 | // For the full copyright and license information, please view the LICENSE |
4 | 4 | // file that was distributed with this source code. |
| 5 | + |
| 6 | +// spell-checker:ignore plass samp |
| 7 | + |
5 | 8 | use uutests::new_ucmd; |
6 | 9 | use uutests::util::TestScenario; |
7 | 10 | use uutests::util_name; |
@@ -303,3 +306,73 @@ fn prefix_equal_skip_prefix_equal_two() { |
303 | 306 | .stdout_is_fixture("prefixed-one-word-per-line_p=_P=2.txt"); |
304 | 307 | } |
305 | 308 | } |
| 309 | + |
| 310 | +#[test] |
| 311 | +fn test_fmt_unicode_whitespace_handling() { |
| 312 | + // Character classification fix: Test that Unicode whitespace characters like non-breaking space |
| 313 | + // are NOT treated as whitespace by fmt, maintaining GNU fmt compatibility. |
| 314 | + // GNU fmt only recognizes ASCII whitespace (space, tab, newline, etc.) and excludes |
| 315 | + // Unicode whitespace characters to ensure consistent formatting behavior. |
| 316 | + // This prevents regression of the character classification fix |
| 317 | + let non_breaking_space = "\u{00A0}"; // U+00A0 NO-BREAK SPACE |
| 318 | + let figure_space = "\u{2007}"; // U+2007 FIGURE SPACE |
| 319 | + let narrow_no_break_space = "\u{202F}"; // U+202F NARROW NO-BREAK SPACE |
| 320 | + |
| 321 | + // When fmt splits on width=1, these characters should NOT cause line breaks |
| 322 | + // because they should not be considered whitespace |
| 323 | + for (name, char) in [ |
| 324 | + ("non-breaking space", non_breaking_space), |
| 325 | + ("figure space", figure_space), |
| 326 | + ("narrow no-break space", narrow_no_break_space), |
| 327 | + ] { |
| 328 | + let input = format!("={char}="); |
| 329 | + let result = new_ucmd!() |
| 330 | + .args(&["-s", "-w1"]) |
| 331 | + .pipe_in(input.as_bytes()) |
| 332 | + .succeeds(); |
| 333 | + |
| 334 | + // Should be 1 line since the Unicode char is not treated as whitespace |
| 335 | + assert_eq!( |
| 336 | + result.stdout_str().lines().count(), |
| 337 | + 1, |
| 338 | + "Failed for {name}: Unicode character should not be treated as whitespace" |
| 339 | + ); |
| 340 | + } |
| 341 | +} |
| 342 | + |
| 343 | +#[test] |
| 344 | +fn test_fmt_knuth_plass_line_breaking() { |
| 345 | + // Line breaking algorithm improvements: Test the enhanced Knuth-Plass optimal line breaking |
| 346 | + // algorithm that better handles sentence boundaries, word positioning constraints, |
| 347 | + // and produces more natural line breaks for complex text formatting. |
| 348 | + // This prevents regression of the line breaking algorithm improvements |
| 349 | + let input = "@command{fmt} prefers breaking lines at the end of a sentence, and tries to\n\ |
| 350 | + avoid line breaks after the first word of a sentence or before the last word\n\ |
| 351 | + of a sentence. A @dfn{sentence break} is defined as either the end of a\n\ |
| 352 | + paragraph or a word ending in any of @samp{.?!}, followed by two spaces or end\n\ |
| 353 | + of line, ignoring any intervening parentheses or quotes. Like @TeX{},\n\ |
| 354 | + @command{fmt} reads entire ''paragraphs'' before choosing line breaks; the\n\ |
| 355 | + algorithm is a variant of that given by\n\ |
| 356 | + Donald E. Knuth and Michael F. Plass\n\ |
| 357 | + in ''Breaking Paragraphs Into Lines'',\n\ |
| 358 | + @cite{Software---Practice & Experience}\n\ |
| 359 | + @b{11}, 11 (November 1981), 1119--1184."; |
| 360 | + |
| 361 | + let expected = "@command{fmt} prefers breaking lines at the end of a sentence,\n\ |
| 362 | + and tries to avoid line breaks after the first word of a sentence\n\ |
| 363 | + or before the last word of a sentence. A @dfn{sentence break}\n\ |
| 364 | + is defined as either the end of a paragraph or a word ending\n\ |
| 365 | + in any of @samp{.?!}, followed by two spaces or end of line,\n\ |
| 366 | + ignoring any intervening parentheses or quotes. Like @TeX{},\n\ |
| 367 | + @command{fmt} reads entire ''paragraphs'' before choosing line\n\ |
| 368 | + breaks; the algorithm is a variant of that given by Donald\n\ |
| 369 | + E. Knuth and Michael F. Plass in ''Breaking Paragraphs Into\n\ |
| 370 | + Lines'', @cite{Software---Practice & Experience} @b{11}, 11\n\ |
| 371 | + (November 1981), 1119--1184.\n"; |
| 372 | + |
| 373 | + new_ucmd!() |
| 374 | + .args(&["-g", "60", "-w", "72"]) |
| 375 | + .pipe_in(input) |
| 376 | + .succeeds() |
| 377 | + .stdout_is(expected); |
| 378 | +} |
0 commit comments