Skip to content

Commit 8295933

Browse files
sylvestrepixelb
authored andcommitted
tests: split: verify non-UTF-8 bytes are preserved in filenames
* tests/split/non-utf8.sh: New test to ensure that non-UTF-8 bytes in the prefix and --additional-suffix are preserved as-is in output filenames, rather than being replaced by UTF-8 replacement characters. * tests/local.mk: Register new test. uutils/coreutils#11397 coreutils/coreutils#239
1 parent 2625209 commit 8295933

2 files changed

Lines changed: 39 additions & 0 deletions

File tree

tests/local.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,7 @@ all_tests = \
460460
tests/split/record-sep.sh \
461461
tests/split/numeric.sh \
462462
tests/split/guard-input.sh \
463+
tests/split/non-utf8.sh \
463464
tests/split/split-io-err.sh \
464465
tests/stat/stat-birthtime.sh \
465466
tests/stat/stat-fmt.sh \

tests/split/non-utf8.sh

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/bin/sh
2+
# Verify that split preserves non-UTF-8 bytes in prefix and suffix.
3+
4+
# Copyright (C) 2026 Free Software Foundation, Inc.
5+
6+
# This program is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU General Public License as published by
8+
# the Free Software Foundation, either version 3 of the License, or
9+
# (at your option) any later version.
10+
11+
# This program is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU General Public License for more details.
15+
16+
# You should have received a copy of the GNU General Public License
17+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
19+
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
20+
print_ver_ split
21+
22+
echo a > "$(bad_unicode)" \
23+
|| skip_ 'bad unicode not supported in shell or file system'
24+
25+
# Non-UTF-8 bytes in prefix should be preserved, not replaced
26+
# by UTF-8 replacement characters (0xEF 0xBF 0xBD).
27+
prefix="$(bad_unicode)"
28+
printf 'AB' | split -b1 - "$prefix" || fail=1
29+
test -f "$(printf '%saa' $prefix)" || fail=1
30+
test -f "$(printf '%sab' $prefix)" || fail=1
31+
32+
# Non-UTF-8 bytes in --additional-suffix should also be preserved.
33+
suffix="$(bad_unicode)"
34+
printf 'AB' | split -b1 --additional-suffix="$suffix" - q || fail=1
35+
test -f "$(printf 'qaa%s' "$suffix")" || fail=1
36+
test -f "$(printf 'qab%s' "$suffix")" || fail=1
37+
38+
Exit $fail

0 commit comments

Comments
 (0)