Skip to content

Commit 649e233

Browse files
bbhattarCopilot
andcommitted
Replaced / and % with Divide and Remainder utils from hwy::Divisor
Co-authored-by: Copilot <copilot@github.com>
1 parent cc0e056 commit 649e233

2 files changed

Lines changed: 15 additions & 8 deletions

File tree

ops/brgemm-inl.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -85,17 +85,20 @@ static HWY_NOINLINE void DoMatMul_BRGeMM(
8585
ke.K_blk = cfg.K_blk;
8686
ke.N_blk = cfg.N_blk;
8787
ke.M_blk = std::min(cfg.M_blk, M);
88+
ke.div_M_blk = hwy::Divisor(ke.M_blk);
89+
ke.div_N_blk = hwy::Divisor(ke.N_blk);
90+
ke.div_K_blk = hwy::Divisor(ke.K_blk);
8891

89-
ke.M_tail = M % ke.M_blk;
90-
ke.N_tail = N % ke.N_blk;
91-
ke.K_tail = K % ke.K_blk;
92+
ke.M_tail = ke.div_M_blk.Remainder(M);
93+
ke.N_tail = ke.div_N_blk.Remainder(N);
94+
ke.K_tail = ke.div_K_blk.Remainder(K);
9295

9396
// Floor division: K_tail remainder is handled by a dedicated brg_ktail
9497
// kernel rather than padding K, avoiding extra memory writes to zero-pad
9598
// A and B along the K dimension.
96-
ke.K_chunks = K / ke.K_blk;
97-
ke.N_full_tiles = N / ke.N_blk;
98-
ke.M_full_tiles = M / ke.M_blk;
99+
ke.K_chunks = ke.div_K_blk.Divide(K);
100+
ke.N_full_tiles = ke.div_N_blk.Divide(N);
101+
ke.M_full_tiles = ke.div_M_blk.Divide(M);
99102
ke.N_total_tiles = ke.N_full_tiles + (ke.N_tail ? 1 : 0);
100103
ke.M_total_tiles = ke.M_full_tiles + (ke.M_tail ? 1 : 0);
101104
ke.N_padded = ke.N_total_tiles * ke.N_blk;
@@ -367,8 +370,8 @@ static HWY_NOINLINE void DoMatMul_BRGeMM(
367370
const auto execute_tile = [&](size_t m_start, size_t n_start,
368371
size_t k_super, float* temp_C,
369372
uint8_t* scratch) HWY_ATTR {
370-
const size_t m_tile_idx = m_start / ke.M_blk;
371-
const size_t n_tile_idx = n_start / ke.N_blk;
373+
const size_t m_tile_idx = ke.div_M_blk.Divide(m_start);
374+
const size_t n_tile_idx = ke.div_N_blk.Divide(n_start);
372375
const int mi = (m_tile_idx < ke.M_full_tiles) ? 0 : 1;
373376
const int ni = (n_tile_idx < ke.N_full_tiles) ? 0 : 1;
374377
const size_t cur_m = ke.m_sizes[mi];

ops/brgemm.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,10 @@ struct BRGeMMKernelKeyHash {
165165
// Cached JIT-compiled kernels with precomputed tile parameters and offsets.
166166
struct BRGeMMKernelEntry {
167167
size_t M_blk, N_blk, K_blk;
168+
// Precomputed divisors for fast modulo/division by block sizes.
169+
hwy::Divisor div_M_blk{1};
170+
hwy::Divisor div_N_blk{1};
171+
hwy::Divisor div_K_blk{1};
168172
size_t M_tail, N_tail, K_tail;
169173
size_t K_chunks;
170174
size_t M_full_tiles, N_full_tiles;

0 commit comments

Comments
 (0)