Skip to content

Commit 79d9fe3

Browse files
committed
Use mf2 instead of m1.
1 parent f927b94 commit 79d9fe3

1 file changed

Lines changed: 10 additions & 10 deletions

File tree

kernel/riscv64/sgemm_kernel_16x8_zvl256b.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -818,7 +818,7 @@ static void FORCEINLINE M_TAIL_ONE(BLASLONG K, const BLASLONG M, const BLASLONG
818818
vfloat32mf2_t result0, result1, result2, result3, result4, result5, result6, result7;
819819
vfloat32mf2_t result8, result9, resultA, resultB, resultC, resultD, resultE, resultF;
820820
vfloat32m1_t result00, result01, result02, result03, result04, result05;
821-
vfloat32m1_t result08, result09, result0A, result0B, result0C, result0D;
821+
vfloat32m1_t result08, result09, result0A, result0B;
822822
FLOAT r0, r1, r2, r8, r9, rA, rC, rD, rE, a0, a1, a2;
823823
FLOAT B0, B1, B2;
824824
#ifndef GEMM_NEW_PACKING
@@ -1320,17 +1320,17 @@ static void FORCEINLINE M_TAIL_ONE(BLASLONG K, const BLASLONG M, const BLASLONG
13201320
}
13211321
if (M & 4) {
13221322
if (N & 2) {
1323-
result0C = __riscv_vle32_v_f32m1(C, 4);
1324-
result0D = __riscv_vle32_v_f32m1(C2, 4);
1325-
result0C = __riscv_vfmacc_vf_f32m1(result0C, alpha, result09, 4);
1326-
result0D = __riscv_vfmacc_vf_f32m1(result0D, alpha, result0A, 4);
1327-
__riscv_vse32_v_f32m1(C, result0C, 4);
1328-
__riscv_vse32_v_f32m1(C2, result0D, 4);
1323+
resultC = __riscv_vle32_v_f32mf2(C, 4);
1324+
resultD = __riscv_vle32_v_f32mf2(C2, 4);
1325+
resultC = __riscv_vfmacc_vf_f32mf2(resultC, alpha, __riscv_vlmul_trunc_v_f32m1_f32mf2(result09), 4);
1326+
resultD = __riscv_vfmacc_vf_f32mf2(resultD, alpha, __riscv_vlmul_trunc_v_f32m1_f32mf2(result0A), 4);
1327+
__riscv_vse32_v_f32mf2(C, resultC, 4);
1328+
__riscv_vse32_v_f32mf2(C2, resultD, 4);
13291329
}
13301330
if (N & 1) {
1331-
result0B = __riscv_vle32_v_f32m1(C1, 4);
1332-
result0B = __riscv_vfmacc_vf_f32m1(result0B, alpha, result08, 4);
1333-
__riscv_vse32_v_f32m1(C1, result0B, 4);
1331+
resultB = __riscv_vle32_v_f32mf2(C1, 4);
1332+
resultB = __riscv_vfmacc_vf_f32mf2(resultB, alpha, __riscv_vlmul_trunc_v_f32m1_f32mf2(result08), 4);
1333+
__riscv_vse32_v_f32mf2(C1, resultB, 4);
13341334
}
13351335
if (M & 3) {
13361336
if (N & 2) {

0 commit comments

Comments
 (0)