diff --git a/include/cutlass/gemm/kernel/sm70_gemm.hpp b/include/cutlass/gemm/kernel/sm70_gemm.hpp index faa9b1cd7d..d752ba4139 100644 --- a/include/cutlass/gemm/kernel/sm70_gemm.hpp +++ b/include/cutlass/gemm/kernel/sm70_gemm.hpp @@ -210,7 +210,7 @@ static_assert(is_valid_tile_scheduler, "SM70 kernel does not support specializin int thread_idx = int(threadIdx.x); auto blk_shape = TileShape{}; // (BLK_M,BLK_N,BLK_K) auto [m_coord, n_coord, l_coord] = static_cast(blockIdx); - auto blk_coord_mnkl = make_coord(int(m_coord), int(n_coord), _, int(l_coord)); // (m,n,k,l) + auto blk_coord_mnkl = make_coord(int(m_coord), int(n_coord), _, int(l_coord)); // (m,n,k,l) NOTE: int() cast needed; unsigned coords cause underflow in residue computation for small problem shapes. // Represent the full tensors Tensor mA_mkl = make_tensor(make_gmem_ptr(params.mainloop.ptr_A), make_shape(M,K,L), params.mainloop.dA); //(m,k,l) diff --git a/include/cutlass/gemm/kernel/sm70_gemm_array.hpp b/include/cutlass/gemm/kernel/sm70_gemm_array.hpp index 409ecda15b..e296e09baf 100644 --- a/include/cutlass/gemm/kernel/sm70_gemm_array.hpp +++ b/include/cutlass/gemm/kernel/sm70_gemm_array.hpp @@ -217,7 +217,7 @@ static_assert(is_valid_tile_scheduler, "SM70 kernel does not support specializin int thread_idx = int(threadIdx.x); auto blk_shape = TileShape{}; // (BLK_M,BLK_N,BLK_K) auto [m_coord, n_coord, l_coord] = static_cast(blockIdx); - auto blk_coord_mnkl = make_coord(int(m_coord), int(n_coord), _, int(l_coord)); // (m,n,k,l) + auto blk_coord_mnkl = make_coord(int(m_coord), int(n_coord), _, int(l_coord)); // (m,n,k,l) NOTE: int() cast needed; unsigned coords cause underflow in residue computation for small problem shapes. // Represent the full tensors Tensor mA_mkl = make_tensor(make_gmem_ptr(params.mainloop.ptr_A[l_coord]), make_shape(M,K,1), params.mainloop.dA); //(m,k,l)