Skip to content

Commit 1e95554

Browse files
Xylphycakebaker
andauthored
ptx: match GNU default behavior by skipping non-alphabetic index tokens (#10919)
* ptx: skip non-alphabetic tokens in default word matching * Simplify no output operation Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com> * fix: Keyword must start at first alphabetic char * Simplify find first alphabet --------- Co-authored-by: Daniel Hofstetter <daniel.hofstetter@42dh.com>
1 parent b439534 commit 1e95554

2 files changed

Lines changed: 27 additions & 1 deletion

File tree

src/uu/ptx/src/ptx.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,19 @@ fn create_word_set(config: &Config, filter: &WordFilter, file_map: &FileMap) ->
356356
};
357357
// match words with given regex
358358
for mat in reg.find_iter(line) {
359-
let (beg, end) = (mat.start(), mat.end());
359+
let (mut beg, end) = (mat.start(), mat.end());
360+
361+
// GNU-compatible default behavior:
362+
// with default regexp, keyword must start at first alphabetic char.
363+
if filter.word_regex == Config::default().context_regex {
364+
let matched = &line[beg..end];
365+
if let Some(pos) = matched.find(|c: char| c.is_alphabetic()) {
366+
beg += pos;
367+
} else {
368+
continue;
369+
}
370+
}
371+
360372
if config.input_ref && ((beg, end) == (ref_beg, ref_end)) {
361373
continue;
362374
}

tests/by-util/test_ptx.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,11 @@ fn test_gnu_mode_dumb_format() {
300300
new_ucmd!().pipe_in("a b").succeeds().stdout_only(
301301
" a b\n a b\n",
302302
);
303+
304+
new_ucmd!()
305+
.pipe_in("2a")
306+
.succeeds()
307+
.stdout_only(format!("{}2 a\n", " ".repeat(35)));
303308
}
304309

305310
#[test]
@@ -330,6 +335,15 @@ fn test_unicode_padding_alignment() {
330335
.stdout_only(" a\n é\n");
331336
}
332337

338+
#[test]
339+
fn test_gnu_compat_numeric_token_with_emoji_produces_no_index() {
340+
// GNU ptx produces no output for this input in default mode.
341+
new_ucmd!()
342+
.pipe_in("012345678901234567890123456789🛠\n")
343+
.succeeds()
344+
.no_output();
345+
}
346+
333347
#[test]
334348
fn test_unicode_truncation_alignment() {
335349
new_ucmd!()

0 commit comments

Comments
 (0)