Skip to content

Commit b0ee407

Browse files
committed
Preserve K.
1 parent eb9bbcc commit b0ee407

1 file changed

Lines changed: 11 additions & 0 deletions

File tree

kernel/riscv64/sgemm_kernel_16x8_zvl256b.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,7 @@ static void FORCEINLINE M_TAIL_ONE(BLASLONG K, const BLASLONG M, const BLASLONG
671671
c18 = __riscv_vfmacc_vf_f32m8(c18, alpha, c28, N);
672672
__riscv_vse32_v_f32m8(C, c18, N * 8);
673673
} else {
674+
// Can swap A and B and remove transpose when compilers get better
674675
FLOAT temp[8 * 8];
675676
vfloat32m1x8_t c28 = __riscv_vcreate_v_f32m1x8(result0, result1, result2, result3, result4, result5, result6, result7);
676677
__riscv_vsseg8e32_v_f32m1x8(temp, c28, N);
@@ -1149,6 +1150,7 @@ static void FORCEINLINE M_TAIL_ONE(BLASLONG K, const BLASLONG M, const BLASLONG
11491150
c14 = __riscv_vfmacc_vf_f32m4(c14, alpha, c24, 4 * 8);
11501151
__riscv_vse32_v_f32m4(C, c14, 4 * 8);
11511152
} else {
1153+
// Can swap A and B and remove transpose when compilers get better
11521154
FLOAT temp[8 * 4];
11531155
vfloat32mf2x8_t c18 = __riscv_vcreate_v_f32mf2x8(result0, result1, result2, result3, result4, result5, result6, result7);
11541156
__riscv_vsseg8e32_v_f32mf2x8(temp, c18, 4);
@@ -1503,6 +1505,12 @@ static void FORCEINLINE N_TAIL_ONE(BLASLONG K, BLASLONG M, const BLASLONG N, FLO
15031505
if (N & 1) {
15041506
B04 = B + ((N & 6) * K);
15051507
}
1508+
#endif
1509+
#ifdef GEMM_BOTTOM_CHUNK
1510+
FLOAT K2;
1511+
if (N <= 4) {
1512+
K2 = K;
1513+
}
15061514
#endif
15071515
do {
15081516
FLOAT B0, B1, B2, B3, B4, B5, B6;
@@ -1530,6 +1538,9 @@ static void FORCEINLINE N_TAIL_ONE(BLASLONG K, BLASLONG M, const BLASLONG N, FLO
15301538
vfloat32m1_t A2, A3, A4, A5, A6, A7;
15311539
vfloat32m1_t resultE, resultF;
15321540
FLOAT B7;
1541+
if (N <= 4) {
1542+
K = K2;
1543+
}
15331544

15341545
if (N == 1) {
15351546
if (K >= 8) {

0 commit comments

Comments
 (0)