File tree Expand file tree Collapse file tree
tensorrt_llm/_torch/attention_backend/fmha
tests/integration/test_lists Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -78,15 +78,15 @@ def _clear_multi_ctas_kv_counter_workspace(
7878 max_num_requests ,
7979 multi_processor_count ,
8080 )
81- fmha_workspace .narrow (0 , 0 , counter_size ).zero_ ()
81+ fmha_workspace .flatten (). narrow (0 , 0 , counter_size ).zero_ ()
8282
8383
8484def _get_multi_ctas_kv_counter_size (
8585 num_heads : int ,
8686 max_num_requests : int ,
8787 multi_processor_count : Optional [int ],
8888) -> int :
89- return max (num_heads * max_num_requests , multi_processor_count or 0 )
89+ return max (num_heads * max_num_requests , multi_processor_count or 0 ) * torch . int32 . itemsize
9090
9191
9292def _get_bmm1_scale_log2 (bmm1_scale : torch .Tensor ) -> torch .Tensor :
Original file line number Diff line number Diff line change @@ -159,7 +159,6 @@ full:A100/accuracy/test_disaggregated_serving.py::TestNemotron3Super120B::test_c
159159full:A100/disaggregated/test_workers.py::test_workers_conditional_disaggregation_deepseek_v3_lite_bf16[DeepSeek-V3-Lite-bf16] SKIP (https://nvbugs/6329052)
160160full:A100X/llmapi/test_llm_examples.py::test_llmapi_speculative_decoding_mtp SKIP (https://nvbugs/6287561)
161161full:B200/accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_pp4_mtp] SKIP (https://nvbugs/5970614)
162- full:B200/accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=True-torch_compile=True] SKIP (https://nvbugs/6331421)
163162full:B200/accuracy/test_llm_api_pytorch.py::TestMiniMaxM3::test_auto_dtype[tp_size=8-ep_size=8] SKIP (https://nvbugs/6384747)
164163full:B200/accuracy/test_llm_api_pytorch.py::TestNemotronV3Ultra::test_nvfp4_4gpus_block_reuse[TEP4] SKIP (https://nvbugs/6317074)
165164full:B200/accuracy/test_llm_api_pytorch.py::TestQwen3_5_4B::test_dflash SKIP (https://nvbugs/6344883)
You can’t perform that action at this time.
0 commit comments