Skip to content

Commit c59346f

Browse files
bartekxkshumway
authored andcommitted
Disable ActiveWorkgroupsPerCU for different arch in wmma kernels (#3566)
1 parent 33d4ae8 commit c59346f

3 files changed

Lines changed: 12 additions & 0 deletions

File tree

include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_wmma_cshuffle_v3.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,10 @@ struct DeviceBatchedGemmMultiD_Wmma_CShuffleV3
314314
{
315315
ActiveWorkgroupsPerCU()
316316
{
317+
if(!ck::is_gfx11_supported() && !ck::is_gfx12_supported())
318+
{
319+
return;
320+
}
317321
constexpr int dynamic_smem_size = 0;
318322
int max_occupancy = 0;
319323

include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_two_stage_wmma_cshuffle_v3.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,10 @@ struct DeviceGroupedConvBwdWeightTwoStage_Wmma_CShuffleV3
466466
{
467467
ActiveWorkgroupsPerCU()
468468
{
469+
if(!ck::is_gfx11_supported() && !ck::is_gfx12_supported())
470+
{
471+
return;
472+
}
469473
constexpr int dynamic_smem_size = 0;
470474
constexpr index_t minimum_occupancy =
471475
BlkGemmPipeSched == BlockGemmPipelineScheduler::Intrawave ? 1 : 2;

include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle_v3.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,10 @@ struct DeviceGroupedConvBwdWeight_Wmma_CShuffleV3
415415
{
416416
ActiveWorkgroupsPerCU()
417417
{
418+
if(!ck::is_gfx11_supported() && !ck::is_gfx12_supported())
419+
{
420+
return;
421+
}
418422
constexpr int dynamic_smem_size = 0;
419423
constexpr index_t minimum_occupancy =
420424
BlkGemmPipeSched == BlockGemmPipelineScheduler::Intrawave ? 1 : 2;

0 commit comments

Comments
 (0)