Skip to content

Commit 4a72647

Browse files
committed
diff: fix out-of-bounds reads and NULL deref in diffstat UTF-8 truncation
f85b49f (diff: improve scaling of filenames in diffstat to handle UTF-8 chars, 2026-01-16) introduced a loop in show_stats() that calls utf8_width() repeatedly to skip leading characters until the displayed width fits. However, utf8_width() can return problematic values: - For invalid UTF-8 sequences, pick_one_utf8_char() sets the name pointer to NULL and utf8_width() returns 0. Since name_len does not change, the loop iterates once more and pick_one_utf8_char() dereferences the NULL pointer, crashing. - For control characters, utf8_width() returns -1, so name_len grows when it is expected to shrink. This can cause the loop to consume more characters than the string contains, reading past the trailing NUL. By default, fill_print_name() will C-quotes filenames which escapes control characters and invalid bytes to printable text. That avoids this bug from being triggered; however, with core.quotePath=false, raw bytes can reach this code. Add tests exercising both failure modes with core.quotePath=false and a narrow --stat-name-width to force truncation: one with a bare 0xC0 byte (invalid UTF-8 lead byte, triggers NULL deref) and one with a 0x01 byte (control character, causes the loop to read past the end of the string). Fix both issues by introducing utf8_ish_width(), a thin wrapper around utf8_width() that guarantees the pointer always advances and the returned width is never negative: - On invalid UTF-8 it restores the pointer, advances by one byte, and returns width 1 (matching the strlen()-based fallback used by utf8_strwidth()). - On a control character it returns 0 (matching utf8_strnwidth() which skips them). Also add a "&& *name" guard to the while-loop condition so it terminates at end-of-string even when utf8_strwidth()'s strlen() fallback causes name_len to exceed the sum of per-character widths. Signed-off-by: Elijah Newren <newren@gmail.com>
1 parent 9f223ef commit 4a72647

File tree

2 files changed

+49
-2
lines changed

2 files changed

+49
-2
lines changed

diff.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2927,6 +2927,28 @@ void print_stat_summary(FILE *fp, int files,
29272927
print_stat_summary_inserts_deletes(&o, files, insertions, deletions);
29282928
}
29292929

2930+
/*
2931+
* Like utf8_width(), but guaranteed safe for use in loops that subtract
2932+
* per-character widths:
2933+
*
2934+
* - utf8_width() sets *start to NULL on invalid UTF-8 and returns 0;
2935+
* we restore the pointer and advance by one byte, returning width 1
2936+
* (matching the strlen()-based fallback in utf8_strwidth()).
2937+
*
2938+
* - utf8_width() returns -1 for control characters; we return 0
2939+
* (matching utf8_strnwidth() which skips them).
2940+
*/
2941+
static int utf8_ish_width(const char **start)
2942+
{
2943+
const char *old = *start;
2944+
int w = utf8_width(start, NULL);
2945+
if (!*start) {
2946+
*start = old + 1;
2947+
return 1;
2948+
}
2949+
return (w < 0) ? 0 : w;
2950+
}
2951+
29302952
static void show_stats(struct diffstat_t *data, struct diff_options *options)
29312953
{
29322954
int i, len, add, del, adds = 0, dels = 0;
@@ -3093,8 +3115,8 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
30933115
if (len < 0)
30943116
len = 0;
30953117

3096-
while (name_len > len)
3097-
name_len -= utf8_width((const char**)&name, NULL);
3118+
while (name_len > len && *name)
3119+
name_len -= utf8_ish_width((const char**)&name);
30983120

30993121
slash = strchr(name, '/');
31003122
if (slash)

t/t4052-stat-output.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,4 +445,29 @@ test_expect_success 'diffstat where line_prefix contains ANSI escape codes is co
445445
test_grep "<RED>|<RESET> ${FILENAME_TRIMMED} | 0" out
446446
'
447447

448+
test_expect_success 'diffstat truncation with invalid UTF-8 does not crash' '
449+
empty_blob=$(git hash-object -w --stdin </dev/null) &&
450+
printf "100644 blob $empty_blob\taaa-\300-aaa\n" |
451+
git mktree >tree_file &&
452+
tree=$(cat tree_file) &&
453+
empty_tree=$(git mktree </dev/null) &&
454+
c1=$(git commit-tree -m before $empty_tree) &&
455+
c2=$(git commit-tree -m after -p $c1 $tree) &&
456+
git -c core.quotepath=false diff --stat --stat-name-width=5 $c1..$c2 >output &&
457+
test_grep "| 0" output
458+
'
459+
460+
test_expect_success FUNNYNAMES 'diffstat truncation with control chars does not crash' '
461+
FNAME=$(printf "aaa-\x01-aaa") &&
462+
git commit --allow-empty -m setup &&
463+
>$FNAME &&
464+
git add -- $FNAME &&
465+
git commit -m "add file with control char name" &&
466+
git -c core.quotepath=false diff --stat --stat-name-width=5 HEAD~1..HEAD >output &&
467+
test_grep "| 0" output &&
468+
rm -- $FNAME &&
469+
git rm -- $FNAME &&
470+
git commit -m "remove test file"
471+
'
472+
448473
test_done

0 commit comments

Comments
 (0)