Skip to content

Commit a79cc1c

Browse files
densamoilovazhai219
authored andcommitted
cpu: x64: matmul fix wei_k_blk query
1 parent b1c5cb9 commit a79cc1c

3 files changed

Lines changed: 26 additions & 31 deletions

File tree

src/cpu/x64/matmul/brgemm_matmul.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1755,7 +1755,7 @@ struct brgemm_matmul_t<isa>::brg_matmul_exec_ctx_t {
17551755

17561756
dim_t get_data_B_kn_off(int k, int n) const {
17571757
const int wei_k_blk
1758-
= bgmmc_.is_bf32 ? bgmmc_.orig_wei_k_blk : bgmmc_.wei_k_blk;
1758+
= bgmmc_.is_bf32 ? get_wei_k_blk(f32) : bgmmc_.wei_k_blk;
17591759
const int k_idx = bgmmc_.blocked_B ? k / wei_k_blk : k;
17601760
const int n_idx = bgmmc_.blocked_B ? n / bgmmc_.wei_n_blk : n;
17611761
const int int4_fac = bgmmc_.is_int4_weights ? 2 : 1;

src/cpu/x64/matmul/brgemm_matmul_utils.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,16 @@ int get_n_block_from_tag(format_tag_t matrix_b_tag) {
8181
}
8282
}
8383

84+
int get_wei_k_blk(data_type_t wei_dt) {
85+
// Fixed outer block size.
86+
const int k_outer_block = 16;
87+
88+
// VNNI granularity determines the inner block size along K.
89+
const int k_inner_block = data_type_vnni_granularity(wei_dt);
90+
91+
return k_outer_block * k_inner_block;
92+
}
93+
8494
void mem_advice_init(brgemm_matmul_conf_t &bgmmc) {
8595

8696
dim_t parallel_work_amount = bgmmc.batch * bgmmc.M_chunks * bgmmc.N_chunks;
@@ -1528,9 +1538,7 @@ status_t init_brgemm_matmul_conf(cpu_isa_t isa, brgemm_matmul_conf_t &bgmmc,
15281538

15291539
VCONDCHECK_BG(bgmmc.required_k_granularity > 0, VERBOSE_BLOCKING_FAIL, "");
15301540

1531-
bgmmc.wei_k_blk = bm_conf_utils.get_wei_k_blk();
1532-
bgmmc.orig_wei_k_blk
1533-
= bm_conf_utils.get_wei_k_blk(/*use_orig_wei_dt=*/true);
1541+
bgmmc.wei_k_blk = get_wei_k_blk(bgmmc.wei_dt);
15341542

15351543
VCHECK_BG(bm_conf_utils.set_or_check_B_tag(weights_md, helper),
15361544
VERBOSE_UNSUPPORTED_TAG);

src/cpu/x64/matmul/brgemm_matmul_utils.hpp

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ struct brgemm_matmul_conf_t {
9494
dim_t LDA, LDB, LDC, LDD;
9595
dim_t LDB2;
9696
int brgemm_batch_size, brgemm_batch_tail_size;
97-
int wei_n_blk, wei_k_blk, orig_wei_k_blk;
97+
int wei_n_blk, wei_k_blk;
9898
brgemm_batch_kind_t brg_type;
9999
bool is_macro_heuristics;
100100

@@ -282,32 +282,6 @@ struct brgemm_matmul_conf_utils_t {
282282
&& check_b_layout_blocked_by_n(bgmmc.wei_tag);
283283
}
284284

285-
/**
286-
* Returns the total block size along the K dimension, as the product of
287-
* the fixed outer block size and the VNNI granularity.
288-
*
289-
* Example: For format tag BA16a16b4a, the block size is
290-
* 16 (outer) * 4 (VNNI granularity) = 64.
291-
*
292-
* @param use_orig_wei_dt If true, use the original weight data type to
293-
* determine block size. If false, use the compute data type type.
294-
* @return The total K dimension block size.
295-
*/
296-
int get_wei_k_blk(bool use_orig_wei_dt = false) const {
297-
// No blocking is used in GEMV mode.
298-
if (bgmmc.is_gemv) return 1;
299-
300-
// Fixed outer block size.
301-
const int k_outer_block = 16;
302-
303-
// VNNI granularity determines the inner block size along K.
304-
const data_type_t wei_dt
305-
= use_orig_wei_dt ? bgmmc.orig_wei_dt : bgmmc.wei_dt;
306-
const int k_inner_block = data_type_vnni_granularity(wei_dt);
307-
308-
return k_outer_block * k_inner_block;
309-
}
310-
311285
inline bool use_buffer_b(bool use_heuristic = true) const {
312286
if (bgmmc.is_runtime_N) return true;
313287
if (bgmmc.is_bf16_with_int_wei) return true;
@@ -483,6 +457,19 @@ void mem_advice_init(brgemm_matmul_conf_t &bgmmc);
483457

484458
bool is_batch_layout_trivial(const memory_desc_wrapper &mdw, const dim_t batch);
485459

460+
/**
461+
* Returns the total block size along the K dimension, as the product of
462+
* the fixed outer block size and the VNNI granularity.
463+
*
464+
* Example: For format tag BA16a16b4a, the block size is
465+
* 16 (outer) * 4 (VNNI granularity) = 64.
466+
*
467+
* @param wei_dt Weights data type.
468+
*
469+
* @return The total K dimension block size.
470+
*/
471+
int get_wei_k_blk(data_type_t wei_dt);
472+
486473
} // namespace matmul
487474
} // namespace x64
488475
} // namespace cpu

0 commit comments

Comments
 (0)