Skip to content

Commit 749f7a0

Browse files
vrasparCopilot
andcommitted
Add clarifying comment for CUDA bounds guard per review
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 3547f0e commit 749f7a0

1 file changed

Lines changed: 2 additions & 0 deletions

File tree

onnxruntime/contrib_ops/cuda/bert/ngram_repeat_block_impl.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ __global__ void banRepeatedTokens(const int64_t* __restrict__ tokens,
4949
if (is_banned == true) {
5050
auto token_to_be_banned = tokens_shm[col + no_repeat_ngram_size - 1];
5151
CUDA_KERNEL_ASSERT(token_to_be_banned >= 0 && token_to_be_banned < vocab_size);
52+
// In release builds, silently skip OOB tokens rather than writing out of bounds.
53+
// CUDA kernels cannot propagate Status errors to the host.
5254
if (token_to_be_banned >= 0 && token_to_be_banned < vocab_size) {
5355
lprobs[lprob_start + token_to_be_banned] = -std::numeric_limits<float>::infinity();
5456
}

0 commit comments

Comments
 (0)