@@ -296,7 +296,7 @@ void GetBlockShapeAndSplitKVBlock(
296296 if (!phi::backends::gpu::IsCUDAGraphCapturing ())
297297#endif
298298 max_len_tensor_cpu.copy_ (
299- max_len_tensor_gpu, max_len_tensor_cpu.place (), false );
299+ max_len_tensor_gpu, max_len_tensor_cpu.place (), true );
300300
301301 auto max_len_cpu_ptr = max_len_tensor_cpu.data <int >();
302302 int max_len_this_time = max_len_cpu_ptr[0 ];
@@ -379,7 +379,7 @@ void GetBlockShapeAndSplitKVBlock(
379379 if (!phi::backends::gpu::IsCUDAGraphCapturing ())
380380#endif
381381 decoder_num_blocks_cpu.copy_ (
382- decoder_num_blocks_device, decoder_num_blocks_cpu.place (), false );
382+ decoder_num_blocks_device, decoder_num_blocks_cpu.place (), true );
383383 }
384384 }
385385 // mla_backend not need run the following code.
@@ -410,7 +410,7 @@ void GetBlockShapeAndSplitKVBlock(
410410 block_size);
411411
412412 kv_num_blocks_x_cpu.copy_ (
413- kv_num_blocks_x, kv_num_blocks_x_cpu.place (), false );
413+ kv_num_blocks_x, kv_num_blocks_x_cpu.place (), true );
414414 // Clear buffer
415415 const uint32_t encoder_max_tile_size_per_bs_q =
416416 div_up ((max_enc_dec_len_this_time * group_size), encoder_block_shape_q);
@@ -434,7 +434,7 @@ void GetBlockShapeAndSplitKVBlock(
434434 encoder_block_shape_q,
435435 group_size);
436436 encoder_num_blocks_x_cpu.copy_ (
437- encoder_num_blocks_x, encoder_num_blocks_x_cpu.place (), false );
437+ encoder_num_blocks_x, encoder_num_blocks_x_cpu.place (), true );
438438 }
439439}
440440
0 commit comments