[https://nvbugs/6331421][fix] Fix TRTLLM-GEN backend multiCtasKv counter clear (NVIDIA#15761)

pengbowang-nv · MrGeva · commit 5178e1308b3b · 2026-07-01T03:50:10.000-07:00
Signed-off-by: Pengbo Wang &lt;221450789+pengbowang-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/attention_backend/fmha/flashinfer_trtllm_gen.py b/tensorrt_llm/_torch/attention_backend/fmha/flashinfer_trtllm_gen.py
@@ -78,15 +78,15 @@ def _clear_multi_ctas_kv_counter_workspace(
         max_num_requests,
         multi_processor_count,
     )
-    fmha_workspace.narrow(0, 0, counter_size).zero_()
+    fmha_workspace.flatten().narrow(0, 0, counter_size).zero_()
 
 
 def _get_multi_ctas_kv_counter_size(
     num_heads: int,
     max_num_requests: int,
     multi_processor_count: Optional[int],
 ) -> int:
-    return max(num_heads * max_num_requests, multi_processor_count or 0)
+    return max(num_heads * max_num_requests, multi_processor_count or 0) * torch.int32.itemsize
 
 
 def _get_bmm1_scale_log2(bmm1_scale: torch.Tensor) -> torch.Tensor:
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -159,7 +159,6 @@ full:A100/accuracy/test_disaggregated_serving.py::TestNemotron3Super120B::test_c
 full:A100/disaggregated/test_workers.py::test_workers_conditional_disaggregation_deepseek_v3_lite_bf16[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/6329052)
 full:A100X/llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp SKIP (https://nvbugs/6287561)
 full:B200/accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_pp4_mtp] SKIP (https://nvbugs/5970614)
-full:B200/accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=True-torch_compile=True] SKIP (https://nvbugs/6331421)
 full:B200/accuracy/test_llm_api_pytorch.py::TestMiniMaxM3::test_auto_dtype[tp_size=8-ep_size=8] SKIP (https://nvbugs/6384747)
 full:B200/accuracy/test_llm_api_pytorch.py::TestNemotronV3Ultra::test_nvfp4_4gpus_block_reuse[TEP4] SKIP (https://nvbugs/6317074)
 full:B200/accuracy/test_llm_api_pytorch.py::TestQwen3_5_4B::test_dflash SKIP (https://nvbugs/6344883)