Skip to content

Commit 81f10d6

Browse files
committed
src: fix Boyer-Moore array bounds safety
Refs https://chromium-review.googlesource.com/c/chromium/src/+/6703757 The above CL enabled array bounds sanitization for Electron and therefore Node.js, which triggered a crash in parallel/test-buffer-indexof.js. Upon a bit more investigation, some Boyer-Moore related functions function were using biased pointer arithmetic to create array pointers that point outside the actual array bounds. When start_ is large this creates pointers far outside the valid memory range. The sanitizer detects that we're creating and dereferencing pointers outside array boundaries, even though the final memory access happens to land in valid memory. This changes the functions to use bounds-safe array indexing.
1 parent aa1b3fe commit 81f10d6

File tree

1 file changed

+30
-22
lines changed

1 file changed

+30
-22
lines changed

include/nbytes.h

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,11 @@ size_t StringSearch<Char>::BoyerMooreSearch(Vector subject,
548548
size_t start = start_;
549549

550550
int *bad_char_occurrence = bad_char_shift_table_;
551-
int *good_suffix_shift = good_suffix_shift_table_ - start_;
551+
552+
auto good_suffix_get = [&](size_t idx) -> int {
553+
if (idx < start || idx - start > kBMMaxShift) return 0;
554+
return good_suffix_shift_table_[idx - start];
555+
};
552556

553557
Char last_char = pattern_[pattern_length - 1];
554558
size_t index = start_index;
@@ -575,7 +579,7 @@ size_t StringSearch<Char>::BoyerMooreSearch(Vector subject,
575579
index +=
576580
pattern_length - 1 - CharOccurrence(bad_char_occurrence, last_char);
577581
} else {
578-
int gs_shift = good_suffix_shift[j + 1];
582+
int gs_shift = good_suffix_get(j + 1);
579583
int bc_occ = CharOccurrence(bad_char_occurrence, c);
580584
int shift = j - bc_occ;
581585
if (gs_shift > shift) {
@@ -591,22 +595,25 @@ size_t StringSearch<Char>::BoyerMooreSearch(Vector subject,
591595
template <typename Char>
592596
void StringSearch<Char>::PopulateBoyerMooreTable() {
593597
const size_t pattern_length = pattern_.length();
594-
// Only look at the last kBMMaxShift characters of pattern (from start_
595-
// to pattern_length).
596598
const size_t start = start_;
597599
const size_t length = pattern_length - start;
598600

599-
// Biased tables so that we can use pattern indices as table indices,
600-
// even if we only cover the part of the pattern from offset start.
601-
int *shift_table = good_suffix_shift_table_ - start_;
602-
int *suffix_table = suffix_table_ - start_;
601+
auto shift_get = [&](size_t idx) -> int & {
602+
if (idx < start) abort();
603+
return good_suffix_shift_table_[idx - start];
604+
};
605+
606+
auto suffix_get = [&](size_t idx) -> int & {
607+
if (idx < start) abort();
608+
return suffix_table_[idx - start];
609+
};
603610

604611
// Initialize table.
605612
for (size_t i = start; i < pattern_length; i++) {
606-
shift_table[i] = length;
613+
shift_get(i) = length;
607614
}
608-
shift_table[pattern_length] = 1;
609-
suffix_table[pattern_length] = pattern_length + 1;
615+
shift_get(pattern_length) = 1;
616+
suffix_get(pattern_length) = pattern_length + 1;
610617

611618
if (pattern_length <= start) {
612619
return;
@@ -620,34 +627,35 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {
620627
while (i > start) {
621628
Char c = pattern_[i - 1];
622629
while (suffix <= pattern_length && c != pattern_[suffix - 1]) {
623-
if (static_cast<size_t>(shift_table[suffix]) == length) {
624-
shift_table[suffix] = suffix - i;
630+
if (static_cast<size_t>(shift_get(suffix)) == length) {
631+
shift_get(suffix) = suffix - i;
625632
}
626-
suffix = suffix_table[suffix];
633+
suffix = suffix_get(suffix);
627634
}
628-
suffix_table[--i] = --suffix;
635+
suffix_get(--i) = --suffix;
629636
if (suffix == pattern_length) {
630637
// No suffix to extend, so we check against last_char only.
631638
while ((i > start) && (pattern_[i - 1] != last_char)) {
632-
if (static_cast<size_t>(shift_table[pattern_length]) == length) {
633-
shift_table[pattern_length] = pattern_length - i;
639+
if (static_cast<size_t>(shift_get(pattern_length)) == length) {
640+
shift_get(pattern_length) = pattern_length - i;
634641
}
635-
suffix_table[--i] = pattern_length;
642+
suffix_get(--i) = pattern_length;
636643
}
637644
if (i > start) {
638-
suffix_table[--i] = --suffix;
645+
suffix_get(--i) = --suffix;
639646
}
640647
}
641648
}
642649
}
650+
643651
// Build shift table using suffixes.
644652
if (suffix < pattern_length) {
645653
for (size_t i = start; i <= pattern_length; i++) {
646-
if (static_cast<size_t>(shift_table[i]) == length) {
647-
shift_table[i] = suffix - start;
654+
if (static_cast<size_t>(shift_get(i)) == length) {
655+
shift_get(i) = suffix - start;
648656
}
649657
if (i == suffix) {
650-
suffix = suffix_table[suffix];
658+
suffix = suffix_get(suffix);
651659
}
652660
}
653661
}

0 commit comments

Comments
 (0)