Skip to content

Commit 772d2ec

Browse files
committed
save code
1 parent 43704d9 commit 772d2ec

1 file changed

Lines changed: 4 additions & 4 deletions

File tree

csrc/xpu_cutlass_fusion.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -509,8 +509,8 @@ printf("src_compress_size = %d, dst_compress_size = %d, src_vec_size = %d, dst_v
509509
constexpr int src_loop_num = K / src_vec_size / src_compress_size;
510510
constexpr int dst_loop_num = K / dst_vec_size / dst_compress_size;
511511
src_compress_type src[src_vec_size];
512-
//ElementMMA dst[dst_loop_num * dst_compress_size * dst_vec_size];
513-
alignas(64) ElementMMA* dst = reinterpret_cast<ElementMMA*>(smem_buf + 16 * sizeof(float) * LUT_NUM + thread_idx * decltype(cute::size(mma_B))::value * sizeof(ElementMMA));
512+
ElementMMA dst[dst_loop_num * dst_compress_size * dst_vec_size];
513+
//alignas(64) ElementMMA* dst = reinterpret_cast<ElementMMA*>(smem_buf + 16 * sizeof(float) * LUT_NUM + thread_idx * decltype(cute::size(mma_B))::value * sizeof(ElementMMA));
514514

515515
int lut_id = start_lut_id;
516516

@@ -634,8 +634,8 @@ void gemm_4bit_cutlass(int m, int n, int k, int l, T *A, unsigned char *B,
634634
//std::cout<<"group_size = "<<blocksize<<std::endl;
635635

636636
#if 1
637-
//static constexpr int smem_size= (16) * sizeof(float) * LUT_NUM;
638-
static constexpr int smem_size= (16) * sizeof(float) * LUT_NUM + BLK_N * BLK_K * sizeof(ElementMMA)*2;
637+
static constexpr int smem_size= (16) * sizeof(float) * LUT_NUM;
638+
//static constexpr int smem_size= (16) * sizeof(float) * LUT_NUM + BLK_N * BLK_K * sizeof(ElementMMA)*2;
639639
#else
640640
static constexpr int smem_size = BLK_N * BLK_K * sizeof(ElementMMA) * 2 * 2; //aligned with 128B and will be reused for dequant src and dst.
641641
#endif

0 commit comments

Comments
 (0)