From baedee9ba363590bbab25cbb69c1a65957ffb2f5 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Thu, 11 Dec 2025 13:19:47 +0000 Subject: [PATCH 1/2] Add rocm to prefix path for codegen --- codegen/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/codegen/CMakeLists.txt b/codegen/CMakeLists.txt index 22d8e58d109..69a6a71de21 100644 --- a/codegen/CMakeLists.txt +++ b/codegen/CMakeLists.txt @@ -15,6 +15,7 @@ configure_file(${CK_ROOT}/include/ck/config.h.in ${CK_ROOT}/include/ck/config.h) find_package(ROCM) include(ROCMInstallTargets) include(ROCMTest) +list(APPEND CMAKE_PREFIX_PATH /opt/rocm $ENV{ROCM_PATH}) find_package(hiprtc REQUIRED) rocm_setup_version(VERSION 1.0) From cbe19d793986177499622d224482d74edd3d76a8 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Wed, 14 Jan 2026 12:17:27 +0000 Subject: [PATCH 2/2] Fix issue with c0_matrix_mask construction --- .../impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp index d6a4f49be8c..90317bf38eb 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp @@ -1059,7 +1059,7 @@ struct DeviceBatchedGemmSoftmaxGemm_Xdl_CShuffle c_grid_desc_m_n)}, has_main_k_block_loop{GridwiseGemm64::CalculateHasMainKBlockLoop( a_grid_desc_ak0_m_ak1.GetLength(I0) * a_grid_desc_ak0_m_ak1.GetLength(I2))}, - c0_matrix_mask{c.GetLength(I1)}, + c0_matrix_mask{b.GetLength(I0)}, a_element_op{a_element_op_}, b_element_op{b_element_op_}, b1_element_op{b1_element_op_},