Skip to content

Commit a851c7c

Browse files
pavel-petrofPavel Petrovsayanshaw24
authored
Fixing an out-of-bounds bug in BPE Tokenizer (#1058)
Co-authored-by: Pavel Petrov <pavelpetrov@microsoft.com> Co-authored-by: Sayan Shaw <52221015+sayanshaw24@users.noreply.github.com>
1 parent c19e50b commit a851c7c

1 file changed

Lines changed: 3 additions & 3 deletions

File tree

operators/tokenizer/bpe_tokenizer_model.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ class BpeModel {
184184
if (i > static_cast<uint32_t>((std::numeric_limits<int32_t>::max)())) {
185185
continue; // safe purpose.
186186
}
187-
if (i > id2token_map_.size()) {
187+
if (i >= id2token_map_.size()) {
188188
id2token_map_.resize(static_cast<size_t>(i) + 1);
189189
}
190190
id2token_map_[i] = t;
@@ -260,7 +260,7 @@ class BpeModel {
260260
if (i > static_cast<uint32_t>((std::numeric_limits<int32_t>::max)())) {
261261
continue; // safe purpose.
262262
}
263-
if (i > id2token_map_.size()) {
263+
if (i >= id2token_map_.size()) {
264264
id2token_map_.resize(static_cast<size_t>(i) + 1);
265265
}
266266
id2token_map_[i] = t;
@@ -298,7 +298,7 @@ class BpeModel {
298298
if (i > static_cast<uint32_t>((std::numeric_limits<int32_t>::max)())) {
299299
continue; // safe purpose.
300300
}
301-
if (i > id2token_map_.size()) {
301+
if (i >= id2token_map_.size()) {
302302
id2token_map_.resize(static_cast<size_t>(i) + 1);
303303
}
304304
id2token_map_[i] = t;

0 commit comments

Comments
 (0)