Skip to content

Commit 7ad237f

Browse files
committed
riscv64: wire TRSM, complex SYMV, and complex GEMM copy RVV kernels
Wire existing RVV-optimized kernels into KERNEL.RISCV64_ZVL128B and KERNEL.RISCV64_ZVL256B that were already implemented but not referenced: - TRSM (S/D/C/Z, all 4 directions): replace ../generic/trsm_kernel_*.c with trsm_kernel_*_rvv_v1.c - TRSM copy: add TRSMCOPY*_M and ZTRSMCOPY*_M variables using trsm_*copy_rvv_v1.c and ztrsm_*copy_rvv_v1.c - Complex SYMV (C/Z): replace ../generic/zsymv_k.c with zsymv_*_rvv.c - HEMV (C/Z): replace zhemv_*_vector.c with zhemv_*_rvv.c - Complex GEMM copy (C/Z): replace ../generic/zgemm_ncopy/tcopy with zgemm_ncopy_rvv_v1.c / zgemm_tcopy_rvv_v1.c Generic fallback counts: ZVL256B 56->28, ZVL128B 63->47
1 parent 3da0ff7 commit 7ad237f

2 files changed

Lines changed: 93 additions & 73 deletions

File tree

kernel/riscv64/KERNEL.RISCV64_ZVL128B

Lines changed: 37 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -140,27 +140,27 @@ DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
140140
endif
141141

142142
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N)_zvl128b.c
143-
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
144-
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
143+
CGEMMONCOPY = zgemm_ncopy_rvv_v1.c
144+
CGEMMOTCOPY = zgemm_tcopy_rvv_v1.c
145145
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
146146
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
147147

148148
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
149-
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
150-
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
149+
CGEMMINCOPY = zgemm_ncopy_rvv_v1.c
150+
CGEMMITCOPY = zgemm_tcopy_rvv_v1.c
151151
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
152152
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
153153
endif
154154

155155
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N)_zvl128b.c
156-
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
157-
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
156+
ZGEMMONCOPY = zgemm_ncopy_rvv_v1.c
157+
ZGEMMOTCOPY = zgemm_tcopy_rvv_v1.c
158158
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
159159
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
160160

161161
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
162-
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
163-
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
162+
ZGEMMINCOPY = zgemm_ncopy_rvv_v1.c
163+
ZGEMMITCOPY = zgemm_tcopy_rvv_v1.c
164164
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
165165
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
166166
endif
@@ -189,25 +189,35 @@ ZTRMMLNCOPY_M = ../generic/ztrmm_lncopy_$(ZGEMM_UNROLL_M).c
189189
ZTRMMUTCOPY_M = ../generic/ztrmm_utcopy_$(ZGEMM_UNROLL_M).c
190190
ZTRMMLTCOPY_M = ../generic/ztrmm_ltcopy_$(ZGEMM_UNROLL_M).c
191191

192-
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
193-
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
194-
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
195-
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
196-
197-
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
198-
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
199-
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
200-
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
201-
202-
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
203-
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
204-
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
205-
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
206-
207-
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
208-
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
209-
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
210-
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
192+
STRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
193+
STRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
194+
STRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
195+
STRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
196+
197+
DTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
198+
DTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
199+
DTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
200+
DTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
201+
202+
CTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
203+
CTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
204+
CTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
205+
CTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
206+
207+
ZTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
208+
ZTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
209+
ZTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
210+
ZTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
211+
212+
TRSMCOPYLN_M = trsm_lncopy_rvv_v1.c
213+
TRSMCOPYLT_M = trsm_ltcopy_rvv_v1.c
214+
TRSMCOPYUN_M = trsm_uncopy_rvv_v1.c
215+
TRSMCOPYUT_M = trsm_utcopy_rvv_v1.c
216+
217+
ZTRSMCOPYLN_M = ztrsm_lncopy_rvv_v1.c
218+
ZTRSMCOPYLT_M = ztrsm_ltcopy_rvv_v1.c
219+
ZTRSMCOPYUN_M = ztrsm_uncopy_rvv_v1.c
220+
ZTRSMCOPYUT_M = ztrsm_utcopy_rvv_v1.c
211221

212222
SSYMV_U_KERNEL = symv_U_rvv.c
213223
SSYMV_L_KERNEL = symv_L_rvv.c

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 56 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -138,69 +138,79 @@ DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
138138
endif
139139

140140
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N)_zvl256b.c
141-
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
142-
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
141+
CGEMMONCOPY = zgemm_ncopy_rvv_v1.c
142+
CGEMMOTCOPY = zgemm_tcopy_rvv_v1.c
143143
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
144144
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
145145

146146
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
147-
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
148-
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
147+
CGEMMINCOPY = zgemm_ncopy_rvv_v1.c
148+
CGEMMITCOPY = zgemm_tcopy_rvv_v1.c
149149
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
150150
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
151151
endif
152152

153153
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N)_zvl256b.c
154-
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
155-
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
154+
ZGEMMONCOPY = zgemm_ncopy_rvv_v1.c
155+
ZGEMMOTCOPY = zgemm_tcopy_rvv_v1.c
156156
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
157157
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
158158

159159
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
160-
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
161-
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
160+
ZGEMMINCOPY = zgemm_ncopy_rvv_v1.c
161+
ZGEMMITCOPY = zgemm_tcopy_rvv_v1.c
162162
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
163163
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
164164
endif
165165

166-
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
167-
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
168-
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
169-
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
170-
171-
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
172-
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
173-
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
174-
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
175-
176-
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
177-
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
178-
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
179-
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
180-
181-
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
182-
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
183-
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
184-
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
185-
186-
SSYMV_U_KERNEL = symv_U_vector.c
187-
SSYMV_L_KERNEL = symv_L_vector.c
188-
DSYMV_U_KERNEL = symv_U_vector.c
189-
DSYMV_L_KERNEL = symv_L_vector.c
190-
191-
CSYMV_U_KERNEL = ../generic/zsymv_k.c
192-
CSYMV_L_KERNEL = ../generic/zsymv_k.c
193-
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
194-
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
195-
196-
CHEMV_L_KERNEL = zhemv_LM_vector.c
197-
CHEMV_M_KERNEL = zhemv_LM_vector.c
198-
CHEMV_U_KERNEL = zhemv_UV_vector.c
199-
CHEMV_V_KERNEL = zhemv_UV_vector.c
200-
ZHEMV_L_KERNEL = zhemv_LM_vector.c
201-
ZHEMV_M_KERNEL = zhemv_LM_vector.c
202-
ZHEMV_U_KERNEL = zhemv_UV_vector.c
203-
ZHEMV_V_KERNEL = zhemv_UV_vector.c
166+
STRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
167+
STRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
168+
STRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
169+
STRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
170+
171+
DTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
172+
DTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
173+
DTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
174+
DTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
175+
176+
CTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
177+
CTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
178+
CTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
179+
CTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
180+
181+
ZTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
182+
ZTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
183+
ZTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
184+
ZTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
185+
186+
TRSMCOPYLN_M = trsm_lncopy_rvv_v1.c
187+
TRSMCOPYLT_M = trsm_ltcopy_rvv_v1.c
188+
TRSMCOPYUN_M = trsm_uncopy_rvv_v1.c
189+
TRSMCOPYUT_M = trsm_utcopy_rvv_v1.c
190+
191+
ZTRSMCOPYLN_M = ztrsm_lncopy_rvv_v1.c
192+
ZTRSMCOPYLT_M = ztrsm_ltcopy_rvv_v1.c
193+
ZTRSMCOPYUN_M = ztrsm_uncopy_rvv_v1.c
194+
ZTRSMCOPYUT_M = ztrsm_utcopy_rvv_v1.c
195+
196+
SSYMV_U_KERNEL = symv_U_rvv.c
197+
SSYMV_L_KERNEL = symv_L_rvv.c
198+
DSYMV_U_KERNEL = symv_U_rvv.c
199+
DSYMV_L_KERNEL = symv_L_rvv.c
200+
201+
CSYMV_U_KERNEL = zsymv_U_rvv.c
202+
CSYMV_L_KERNEL = zsymv_L_rvv.c
203+
ZSYMV_U_KERNEL = zsymv_U_rvv.c
204+
ZSYMV_L_KERNEL = zsymv_L_rvv.c
205+
206+
CHEMV_L_KERNEL = zhemv_LM_rvv.c
207+
CHEMV_M_KERNEL = zhemv_LM_rvv.c
208+
CHEMV_U_KERNEL = zhemv_UV_rvv.c
209+
CHEMV_V_KERNEL = zhemv_UV_rvv.c
210+
ZHEMV_L_KERNEL = zhemv_LM_rvv.c
211+
ZHEMV_M_KERNEL = zhemv_LM_rvv.c
212+
ZHEMV_U_KERNEL = zhemv_UV_rvv.c
213+
ZHEMV_V_KERNEL = zhemv_UV_rvv.c
204214

205215
LSAME_KERNEL = ../generic/lsame.c
206216

0 commit comments

Comments
 (0)