We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 91ca3d1 commit ef47a10Copy full SHA for ef47a10
1 file changed
custom_ops/gpu_ops/append_attn/mla_cache_kernel.cuh
@@ -212,13 +212,15 @@ __global__ void prefill_absorb_cache_kernel(
212
const uint32_t block_idx = block_table_now[ori_seq_id / block_size];
213
const uint32_t block_offset = ori_seq_id % block_size;
214
215
+#ifndef PADDLE_WITH_CUSTOM_DEVICE_METAX_GPU
216
const int32_t block_idx1 = slot_mapping[token_idx] / block_size;
217
if (block_idx1 != block_idx) {
218
printf("block_idx1 %d != block_idx %d\n", block_idx1, block_idx);
219
printf("token_idx %d\n", token_idx);
220
printf("slot_mapping %d\n", slot_mapping[token_idx]);
221
asm volatile("trap;");
222
}
223
+#endif
224
225
if (bias < nope_hidden_size) { // pe
226
const uint32_t inner_bias = bias;
0 commit comments