Skip to content

Commit 66dc81d

Browse files
jakpiaseassistant-librarian[bot]
authored andcommitted
[rocm-libraries] ROCm/rocm-libraries#5729 (commit 516c974)
[CK_TILE] Changed cshuffle LDS descriptor to naive layout (#5729) ## Motivation This PR changes gemm/convolution cshuffle layout into plain one. to improve cshuffle operation performance. ## Technical Details The purpose is that before this change the cshuffle layout was having some descriptor transformations that were probably aimed at reducing LDS bank conflicts, but the transformations itself were terribly slow, which negatively impacted the performance. ## Test Plan There is no need for additional tests, since current tests cover this functionality.
1 parent e6b8094 commit 66dc81d

1 file changed

Lines changed: 12 additions & 3 deletions

File tree

include/ck_tile/ops/epilogue/cshuffle_epilogue.hpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,6 @@ struct CShuffleEpilogue
336336
constexpr index_t BaseWords = ToWords(BaseStrideElems);
337337
constexpr index_t PadWords = ((BaseWords % 2) == 0) ? 1 : 0;
338338
constexpr auto PaddingAmount = PadWords * ElemsPer4B;
339-
#else
340-
constexpr auto PaddingAmount = 0;
341-
#endif
342339

343340
constexpr auto lds_block_desc_0 = make_naive_tensor_descriptor(
344341
make_tuple(number<MPerIterationShuffle / MLdsLayer>{},
@@ -369,6 +366,18 @@ struct CShuffleEpilogue
369366
make_tuple(sequence<0>{}, sequence<1>{}));
370367

371368
return lds_block_desc;
369+
370+
#else
371+
constexpr auto PaddingAmount = 0;
372+
373+
constexpr auto lds_block_desc = make_naive_tensor_descriptor(
374+
make_tuple(number<MPerIterationShuffle>{}, number<NPerIterationShuffle>{}),
375+
make_tuple(number<NPerIterationShuffle + PaddingAmount>{}, number<1>{}),
376+
number<VectorLen>{},
377+
number<1>{});
378+
379+
return lds_block_desc;
380+
#endif
372381
}
373382
// M is contiguous dimension
374383
else if constexpr(std::is_same_v<ELayout, tensor_layout::gemm::ColumnMajor>)

0 commit comments

Comments
 (0)