Skip to content

Commit 8d62919

Browse files
committed
save code
1 parent 87b9650 commit 8d62919

1 file changed

Lines changed: 3 additions & 6 deletions

File tree

csrc/xpu_cutlass_fusion.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -240,12 +240,8 @@ inline float dDequantizeNF4(unsigned char val) {
240240
// Load Dequatize LUT and save to SLM, 16 for 4bits
241241
alignas(128) float* quant_map_ = reinterpret_cast<float*>(smem_buf);
242242
//alignas(16) float* quant_map_2 = reinterpret_cast<float*>(smem_buf + 32*4);
243-
if (thread_idx < 16) {
244-
float value = params.datatype[thread_idx];
245-
quant_map_[thread_idx] = value;
246-
quant_map_[thread_idx + 16] = value;
247-
quant_map_[thread_idx + 32] = value;
248-
quant_map_[thread_idx + 48] = value;
243+
if (thread_idx < 64) {
244+
quant_map_[thread_idx] = params.datatype[thread_idx % 16];
249245
//quant_map_[thread_idx + 64] = value;
250246
//quant_map_[thread_idx + 80] = value;
251247
//quant_map_[thread_idx + 96] = value;
@@ -563,6 +559,7 @@ printf("src_compress_size = %d, dst_compress_size = %d, src_vec_size = %d, dst_v
563559
}
564560

565561
int map_offset = 16 * (sg_idx % 4);
562+
//int map_offset = 16 * ((sg_idx ^ (sg_idx >> 2)) % 4);
566563

567564
for (int k_tile = k_start_idx, k_s = 0; k_tile < k_tile_count; k_tile++, k_s++, prefetch_k++) {
568565
#if 1 //SLM: 0, register: 1

0 commit comments

Comments
 (0)