Skip to content

Commit ae22645

Browse files
ken-ungerxnnpack-bot
authored andcommitted
Copybara import of the project:
-- 0cd97f2 by Ken Unger <ken.j.unger@gmail.com>: add rvv support for f16-vcmul -- 1f8d093 by Ken Unger <ken.j.unger@gmail.com>: add rvv support for f16-vcmul -- 64d21ff by Ken Unger <ken.j.unger@gmail.com>: handle unconfigured f16-vcmul kernel -- 8b3bda4 by Ken Unger <ken.j.unger@gmail.com>: update-microkernels FUTURE_COPYBARA_INTEGRATE_REVIEW=#9971 from ken-unger:f16-vcmul-rvv 8b3bda4 PiperOrigin-RevId: 907284990
1 parent b2f46c0 commit ae22645

11 files changed

Lines changed: 253 additions & 40 deletions

cmake/gen/rvv_microkernels.cmake

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ SET(PROD_RVV_MICROKERNEL_SRCS
6969
src/f32-vcopysign/gen/f32-vcopysignc-rvv-u8v.c
7070
src/f32-vcopysign/gen/f32-vrcopysignc-rvv-u8v.c
7171
src/f32-vcos/gen/f32-vcos-rvv-rational-5-4-div-u8v.c
72-
src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u8v.c
7372
src/f32-velu/gen/f32-velu-rvv-rr2-p6-u4v.c
73+
src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u8v.c
7474
src/f32-vgelu/gen/f32-vgelu-rvv-rational-12-10-div-u4v.c
7575
src/f32-vhswish/gen/f32-vhswish-rvv-u4v.c
7676
src/f32-vlog/gen/f32-vlog-rvv-rational-3-3-div-u8v.c
@@ -238,12 +238,12 @@ SET(NON_PROD_RVV_MICROKERNEL_SRCS
238238
src/f32-vcos/gen/f32-vcos-rvv-rational-5-4-div-u1v.c
239239
src/f32-vcos/gen/f32-vcos-rvv-rational-5-4-div-u2v.c
240240
src/f32-vcos/gen/f32-vcos-rvv-rational-5-4-div-u4v.c
241-
src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u1v.c
242-
src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u2v.c
243-
src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u4v.c
244241
src/f32-velu/gen/f32-velu-rvv-rr2-p6-u1v.c
245242
src/f32-velu/gen/f32-velu-rvv-rr2-p6-u2v.c
246243
src/f32-velu/gen/f32-velu-rvv-rr2-p6-u8v.c
244+
src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u1v.c
245+
src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u2v.c
246+
src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u4v.c
247247
src/f32-vgelu/gen/f32-vgelu-rvv-rational-12-10-div-u1v.c
248248
src/f32-vgelu/gen/f32-vgelu-rvv-rational-12-10-div-u2v.c
249249
src/f32-vgelu/gen/f32-vgelu-rvv-rational-12-10-div-u8v.c

cmake/gen/rvvfp16arith_microkernels.cmake

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,9 @@ SET(PROD_RVVFP16ARITH_MICROKERNEL_SRCS
5353
src/f16-vbinary/gen/f16-vsub-rvvfp16arith-u8v.c
5454
src/f16-vbinary/gen/f16-vsubc-rvvfp16arith-u8v.c
5555
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c
56-
src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u2v.c
56+
src/f16-vcmul/gen/f16-vcmul-rvvfp16arith-u2v.c
5757
src/f16-velu/gen/f16-velu-rvvfp16arith-rr1-p3-u2v.c
58+
src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u2v.c
5859
src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u8v.c
5960
src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u8v.c
6061
src/f16-vmulcaddc/gen/f16-vmulcaddc-c4v-minmax-rvvfp16arith-2x.c
@@ -63,9 +64,9 @@ SET(PROD_RVVFP16ARITH_MICROKERNEL_SRCS
6364
src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u8v.c
6465
src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u8v.c
6566
src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u4v.c
67+
src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u2v.c
6668
src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u8v.c
6769
src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-expm1minus-rr1-p3h2ts-div-u2v.c
68-
src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u2v.c
6970
src/f16-vunary/gen/f16-vabs-rvvfp16arith-u8v.c
7071
src/f16-vunary/gen/f16-vneg-rvvfp16arith-u8v.c
7172
src/f16-vunary/gen/f16-vsqr-rvvfp16arith-u8v.c
@@ -160,10 +161,12 @@ SET(NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS
160161
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c
161162
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c
162163
src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c
163-
src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u1v.c
164-
src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u4v.c
164+
src/f16-vcmul/gen/f16-vcmul-rvvfp16arith-u1v.c
165+
src/f16-vcmul/gen/f16-vcmul-rvvfp16arith-u4v.c
165166
src/f16-velu/gen/f16-velu-rvvfp16arith-rr1-p3-u1v.c
166167
src/f16-velu/gen/f16-velu-rvvfp16arith-rr1-p3-u4v.c
168+
src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u1v.c
169+
src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u4v.c
167170
src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u4v.c
168171
src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u4v.c
169172
src/f16-vmulcaddc/gen/f16-vmulcaddc-c8v-minmax-rvvfp16arith-2x.c
@@ -172,11 +175,11 @@ SET(NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS
172175
src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u4v.c
173176
src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u4v.c
174177
src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u2v.c
178+
src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u1v.c
179+
src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u4v.c
175180
src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u4v.c
176181
src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-expm1minus-rr1-p3h2ts-div-u1v.c
177182
src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-expm1minus-rr1-p3h2ts-div-u4v.c
178-
src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u1v.c
179-
src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u4v.c
180183
src/f16-vunary/gen/f16-vabs-rvvfp16arith-u1v.c
181184
src/f16-vunary/gen/f16-vabs-rvvfp16arith-u2v.c
182185
src/f16-vunary/gen/f16-vabs-rvvfp16arith-u4v.c

gen/rvv_microkernels.bzl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ PROD_RVV_MICROKERNEL_SRCS = [
6565
"src/f32-vcopysign/gen/f32-vcopysignc-rvv-u8v.c",
6666
"src/f32-vcopysign/gen/f32-vrcopysignc-rvv-u8v.c",
6767
"src/f32-vcos/gen/f32-vcos-rvv-rational-5-4-div-u8v.c",
68-
"src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u8v.c",
6968
"src/f32-velu/gen/f32-velu-rvv-rr2-p6-u4v.c",
69+
"src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u8v.c",
7070
"src/f32-vgelu/gen/f32-vgelu-rvv-rational-12-10-div-u4v.c",
7171
"src/f32-vhswish/gen/f32-vhswish-rvv-u4v.c",
7272
"src/f32-vlog/gen/f32-vlog-rvv-rational-3-3-div-u8v.c",
@@ -235,12 +235,12 @@ NON_PROD_RVV_MICROKERNEL_SRCS = [
235235
"src/f32-vcos/gen/f32-vcos-rvv-rational-5-4-div-u1v.c",
236236
"src/f32-vcos/gen/f32-vcos-rvv-rational-5-4-div-u2v.c",
237237
"src/f32-vcos/gen/f32-vcos-rvv-rational-5-4-div-u4v.c",
238-
"src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u1v.c",
239-
"src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u2v.c",
240-
"src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u4v.c",
241238
"src/f32-velu/gen/f32-velu-rvv-rr2-p6-u1v.c",
242239
"src/f32-velu/gen/f32-velu-rvv-rr2-p6-u2v.c",
243240
"src/f32-velu/gen/f32-velu-rvv-rr2-p6-u8v.c",
241+
"src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u1v.c",
242+
"src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u2v.c",
243+
"src/f32-vexp/gen/f32-vexp-rvv-rational-3-2-div-u4v.c",
244244
"src/f32-vgelu/gen/f32-vgelu-rvv-rational-12-10-div-u1v.c",
245245
"src/f32-vgelu/gen/f32-vgelu-rvv-rational-12-10-div-u2v.c",
246246
"src/f32-vgelu/gen/f32-vgelu-rvv-rational-12-10-div-u8v.c",

gen/rvvfp16arith_microkernels.bzl

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
4949
"src/f16-vbinary/gen/f16-vsub-rvvfp16arith-u8v.c",
5050
"src/f16-vbinary/gen/f16-vsubc-rvvfp16arith-u8v.c",
5151
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u8v.c",
52-
"src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u2v.c",
52+
"src/f16-vcmul/gen/f16-vcmul-rvvfp16arith-u2v.c",
5353
"src/f16-velu/gen/f16-velu-rvvfp16arith-rr1-p3-u2v.c",
54+
"src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u2v.c",
5455
"src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u8v.c",
5556
"src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u8v.c",
5657
"src/f16-vmulcaddc/gen/f16-vmulcaddc-c4v-minmax-rvvfp16arith-2x.c",
@@ -59,9 +60,9 @@ PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
5960
"src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u8v.c",
6061
"src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u8v.c",
6162
"src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u4v.c",
63+
"src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u2v.c",
6264
"src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u8v.c",
6365
"src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-expm1minus-rr1-p3h2ts-div-u2v.c",
64-
"src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u2v.c",
6566
"src/f16-vunary/gen/f16-vabs-rvvfp16arith-u8v.c",
6667
"src/f16-vunary/gen/f16-vneg-rvvfp16arith-u8v.c",
6768
"src/f16-vunary/gen/f16-vsqr-rvvfp16arith-u8v.c",
@@ -157,10 +158,12 @@ NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
157158
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u1v.c",
158159
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u2v.c",
159160
"src/f16-vclamp/gen/f16-vclamp-rvvfp16arith-u4v.c",
160-
"src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u1v.c",
161-
"src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u4v.c",
161+
"src/f16-vcmul/gen/f16-vcmul-rvvfp16arith-u1v.c",
162+
"src/f16-vcmul/gen/f16-vcmul-rvvfp16arith-u4v.c",
162163
"src/f16-velu/gen/f16-velu-rvvfp16arith-rr1-p3-u1v.c",
163164
"src/f16-velu/gen/f16-velu-rvvfp16arith-rr1-p3-u4v.c",
165+
"src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u1v.c",
166+
"src/f16-vgelu/gen/f16-vgelu-rvvfp16arith-rational-6-4-div-u4v.c",
164167
"src/f16-vhswish/gen/f16-vhswish-rvvfp16arith-u4v.c",
165168
"src/f16-vlrelu/gen/f16-vlrelu-rvvfp16arith-u4v.c",
166169
"src/f16-vmulcaddc/gen/f16-vmulcaddc-c8v-minmax-rvvfp16arith-2x.c",
@@ -169,11 +172,11 @@ NON_PROD_RVVFP16ARITH_MICROKERNEL_SRCS = [
169172
"src/f16-vrnd/gen/f16-vrndu-rvvfp16arith-u4v.c",
170173
"src/f16-vrnd/gen/f16-vrndz-rvvfp16arith-u4v.c",
171174
"src/f16-vrsqrt/gen/f16-vrsqrt-rvvfp16arith-rsqrt-u2v.c",
175+
"src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u1v.c",
176+
"src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u4v.c",
172177
"src/f16-vsqrt/gen/f16-vsqrt-rvvfp16arith-sqrt-u4v.c",
173178
"src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-expm1minus-rr1-p3h2ts-div-u1v.c",
174179
"src/f16-vtanh/gen/f16-vtanh-rvvfp16arith-expm1minus-rr1-p3h2ts-div-u4v.c",
175-
"src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u1v.c",
176-
"src/f16-vsigmoid/gen/f16-vsigmoid-rvvfp16arith-rr2-p2-u4v.c",
177180
"src/f16-vunary/gen/f16-vabs-rvvfp16arith-u1v.c",
178181
"src/f16-vunary/gen/f16-vabs-rvvfp16arith-u2v.c",
179182
"src/f16-vunary/gen/f16-vabs-rvvfp16arith-u4v.c",

scripts/generate-f32-vcmul.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ tools/xngen src/f32-vcmul/rvv.c.in -D LMUL=1 -o src/f32-vcmul/gen/f32-vcmul-rvv-
4343
tools/xngen src/f32-vcmul/rvv.c.in -D LMUL=2 -o src/f32-vcmul/gen/f32-vcmul-rvv-u2v.c &
4444
tools/xngen src/f32-vcmul/rvv.c.in -D LMUL=4 -o src/f32-vcmul/gen/f32-vcmul-rvv-u4v.c &
4545

46+
tools/xngen src/f16-vcmul/rvv.c.in -D LMUL=1 -o src/f16-vcmul/gen/f16-vcmul-rvvfp16arith-u1v.c &
47+
tools/xngen src/f16-vcmul/rvv.c.in -D LMUL=2 -o src/f16-vcmul/gen/f16-vcmul-rvvfp16arith-u2v.c &
48+
tools/xngen src/f16-vcmul/rvv.c.in -D LMUL=4 -o src/f16-vcmul/gen/f16-vcmul-rvvfp16arith-u4v.c &
49+
4650
#################################### Scalar ###################################
4751
tools/xngen src/f32-vcmul/scalar.c.in -D BATCH_TILE=1 -o src/f32-vcmul/gen/f32-vcmul-scalar-u1.c &
4852
tools/xngen src/f32-vcmul/scalar.c.in -D BATCH_TILE=2 -o src/f32-vcmul/gen/f32-vcmul-scalar-u2.c &

src/configs/cmul-config.c

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,24 @@
1515
#include "src/xnnpack/microfnptr.h"
1616
#include "src/xnnpack/vbinary.h"
1717

18-
#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
19-
static struct xnn_cmul_config f16_cmul_config = {0};
20-
#endif
18+
static struct xnn_cmul_config f16_cmul_config = {0};
2119
static struct xnn_cmul_config f32_cmul_config = {0};
2220

23-
#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
24-
XNN_INIT_ONCE_GUARD(f16_cmul);
25-
#endif
21+
XNN_INIT_ONCE_GUARD(f16_cmul);
2622
XNN_INIT_ONCE_GUARD(f32_cmul);
2723

2824
// Macros to log the microkernel names if and when they are registered.
2925
#define XNN_INIT_CMUL_UKERNEL(ukernel) \
3026
(xnn_vbinary_ukernel_fn) ukernel; \
3127
xnn_log_info("Using cmul microkernel '%s'.", #ukernel);
3228

33-
#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
34-
static void init_f16_cmul_config(void) {
35-
f16_cmul_config.ukernel = XNN_INIT_CMUL_UKERNEL(xnn_f16_vcmul_ukernel__neonfp16arith_u16);
36-
}
37-
#endif
29+
static void init_f16_cmul_config(void) {
30+
#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
31+
f16_cmul_config.ukernel = XNN_INIT_CMUL_UKERNEL(xnn_f16_vcmul_ukernel__neonfp16arith_u16);
32+
#elif XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
33+
f16_cmul_config.ukernel = XNN_INIT_CMUL_UKERNEL(xnn_f16_vcmul_ukernel__rvvfp16arith_u2v);
34+
#endif
35+
}
3836

3937
static void init_f32_cmul_config(void) {
4038
#if XNN_ARCH_ARM
@@ -81,16 +79,12 @@ static void init_f32_cmul_config(void) {
8179
}
8280

8381
const struct xnn_cmul_config* xnn_init_f16_cmul_config() {
84-
#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
85-
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
86-
if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) {
87-
return NULL;
88-
}
89-
XNN_INIT_ONCE(f16_cmul);
90-
return &f16_cmul_config;
91-
#else
82+
const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config();
83+
if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) {
9284
return NULL;
93-
#endif
85+
}
86+
XNN_INIT_ONCE(f16_cmul);
87+
return f16_cmul_config.ukernel ? &f16_cmul_config : NULL;
9488
}
9589

9690
const struct xnn_cmul_config* xnn_init_f32_cmul_config() {

src/f16-vbinary/f16-vcmul.inc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,8 @@ XNN_UKERNEL(xnn_arch_arm_neon_fp16_arith, xnn_f16_vcmul_ukernel__neonfp16arith_u
1111
XNN_UKERNEL(xnn_arch_arm_neon_fp16_arith, xnn_f16_vcmul_ukernel__neonfp16arith_u32, 32, false, xnn_float16, struct xnn_f16_default_params, ((xnn_init_f16_default_params_fn) NULL))
1212
#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
1313

14+
#if XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
15+
XNN_UKERNEL(xnn_arch_riscv_vector_fp16_arith, xnn_f16_vcmul_ukernel__rvvfp16arith_u1v, 1, true, float, struct xnn_f16_default_params, ((xnn_init_f16_default_params_fn) NULL))
16+
XNN_UKERNEL(xnn_arch_riscv_vector_fp16_arith, xnn_f16_vcmul_ukernel__rvvfp16arith_u2v, 2, true, float, struct xnn_f16_default_params, ((xnn_init_f16_default_params_fn) NULL))
17+
XNN_UKERNEL(xnn_arch_riscv_vector_fp16_arith, xnn_f16_vcmul_ukernel__rvvfp16arith_u4v, 4, true, float, struct xnn_f16_default_params, ((xnn_init_f16_default_params_fn) NULL))
18+
#endif // XNN_ARCH_RISCV && XNN_ENABLE_RISCV_FP16_VECTOR
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// clang-format off
2+
// Auto-generated file. Do not edit!
3+
// Template: src/f16-vcmul/rvv.c.in
4+
// Generator: tools/xngen
5+
//
6+
// Copyright 2026 Google LLC
7+
//
8+
// This source code is licensed under the BSD-style license found in the
9+
// LICENSE file in the root directory of this source tree.
10+
11+
#include <assert.h>
12+
13+
#include <riscv_vector.h>
14+
15+
#include "src/xnnpack/vbinary.h"
16+
17+
void xnn_f16_vcmul_ukernel__rvvfp16arith_u1v(
18+
size_t batch,
19+
const xnn_float16* input_a,
20+
const xnn_float16* input_b,
21+
xnn_float16* output,
22+
const struct xnn_f16_default_params* restrict params)
23+
{
24+
assert(batch != 0);
25+
assert(batch % sizeof(xnn_float16) == 0);
26+
assert(input_a != NULL);
27+
assert(input_b != NULL);
28+
assert(output != NULL);
29+
30+
batch >>= XNN_LOG2_SIZEOF_FLOAT16;
31+
32+
const xnn_float16* ar = input_a;
33+
const xnn_float16* ai = input_a + batch;
34+
const xnn_float16* br = input_b;
35+
const xnn_float16* bi = input_b + batch;
36+
xnn_float16* or = output;
37+
xnn_float16* oi = output + batch;
38+
39+
do {
40+
size_t n = __riscv_vsetvl_e16m1(batch); batch -= n;
41+
vfloat16m1_t var = __riscv_vle16_v_f16m1(ar, n); ar += n;
42+
vfloat16m1_t vai = __riscv_vle16_v_f16m1(ai, n); ai += n;
43+
vfloat16m1_t vbr = __riscv_vle16_v_f16m1(br, n); br += n;
44+
vfloat16m1_t vbi = __riscv_vle16_v_f16m1(bi, n); bi += n;
45+
vfloat16m1_t vaccr = __riscv_vfmul(var, vbr, n);
46+
vfloat16m1_t vacci = __riscv_vfmul(var, vbi, n);
47+
vaccr = __riscv_vfnmsac(vaccr, vai, vbi, n);
48+
vacci = __riscv_vfmacc(vacci, vai, vbr, n);
49+
__riscv_vse16(or, vaccr, n); or += n;
50+
__riscv_vse16(oi, vacci, n); oi += n;
51+
} while (batch > 0);
52+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// clang-format off
2+
// Auto-generated file. Do not edit!
3+
// Template: src/f16-vcmul/rvv.c.in
4+
// Generator: tools/xngen
5+
//
6+
// Copyright 2026 Google LLC
7+
//
8+
// This source code is licensed under the BSD-style license found in the
9+
// LICENSE file in the root directory of this source tree.
10+
11+
#include <assert.h>
12+
13+
#include <riscv_vector.h>
14+
15+
#include "src/xnnpack/vbinary.h"
16+
17+
void xnn_f16_vcmul_ukernel__rvvfp16arith_u2v(
18+
size_t batch,
19+
const xnn_float16* input_a,
20+
const xnn_float16* input_b,
21+
xnn_float16* output,
22+
const struct xnn_f16_default_params* restrict params)
23+
{
24+
assert(batch != 0);
25+
assert(batch % sizeof(xnn_float16) == 0);
26+
assert(input_a != NULL);
27+
assert(input_b != NULL);
28+
assert(output != NULL);
29+
30+
batch >>= XNN_LOG2_SIZEOF_FLOAT16;
31+
32+
const xnn_float16* ar = input_a;
33+
const xnn_float16* ai = input_a + batch;
34+
const xnn_float16* br = input_b;
35+
const xnn_float16* bi = input_b + batch;
36+
xnn_float16* or = output;
37+
xnn_float16* oi = output + batch;
38+
39+
do {
40+
size_t n = __riscv_vsetvl_e16m2(batch); batch -= n;
41+
vfloat16m2_t var = __riscv_vle16_v_f16m2(ar, n); ar += n;
42+
vfloat16m2_t vai = __riscv_vle16_v_f16m2(ai, n); ai += n;
43+
vfloat16m2_t vbr = __riscv_vle16_v_f16m2(br, n); br += n;
44+
vfloat16m2_t vbi = __riscv_vle16_v_f16m2(bi, n); bi += n;
45+
vfloat16m2_t vaccr = __riscv_vfmul(var, vbr, n);
46+
vfloat16m2_t vacci = __riscv_vfmul(var, vbi, n);
47+
vaccr = __riscv_vfnmsac(vaccr, vai, vbi, n);
48+
vacci = __riscv_vfmacc(vacci, vai, vbr, n);
49+
__riscv_vse16(or, vaccr, n); or += n;
50+
__riscv_vse16(oi, vacci, n); oi += n;
51+
} while (batch > 0);
52+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// clang-format off
2+
// Auto-generated file. Do not edit!
3+
// Template: src/f16-vcmul/rvv.c.in
4+
// Generator: tools/xngen
5+
//
6+
// Copyright 2026 Google LLC
7+
//
8+
// This source code is licensed under the BSD-style license found in the
9+
// LICENSE file in the root directory of this source tree.
10+
11+
#include <assert.h>
12+
13+
#include <riscv_vector.h>
14+
15+
#include "src/xnnpack/vbinary.h"
16+
17+
void xnn_f16_vcmul_ukernel__rvvfp16arith_u4v(
18+
size_t batch,
19+
const xnn_float16* input_a,
20+
const xnn_float16* input_b,
21+
xnn_float16* output,
22+
const struct xnn_f16_default_params* restrict params)
23+
{
24+
assert(batch != 0);
25+
assert(batch % sizeof(xnn_float16) == 0);
26+
assert(input_a != NULL);
27+
assert(input_b != NULL);
28+
assert(output != NULL);
29+
30+
batch >>= XNN_LOG2_SIZEOF_FLOAT16;
31+
32+
const xnn_float16* ar = input_a;
33+
const xnn_float16* ai = input_a + batch;
34+
const xnn_float16* br = input_b;
35+
const xnn_float16* bi = input_b + batch;
36+
xnn_float16* or = output;
37+
xnn_float16* oi = output + batch;
38+
39+
do {
40+
size_t n = __riscv_vsetvl_e16m4(batch); batch -= n;
41+
vfloat16m4_t var = __riscv_vle16_v_f16m4(ar, n); ar += n;
42+
vfloat16m4_t vai = __riscv_vle16_v_f16m4(ai, n); ai += n;
43+
vfloat16m4_t vbr = __riscv_vle16_v_f16m4(br, n); br += n;
44+
vfloat16m4_t vbi = __riscv_vle16_v_f16m4(bi, n); bi += n;
45+
vfloat16m4_t vaccr = __riscv_vfmul(var, vbr, n);
46+
vfloat16m4_t vacci = __riscv_vfmul(var, vbi, n);
47+
vaccr = __riscv_vfnmsac(vaccr, vai, vbi, n);
48+
vacci = __riscv_vfmacc(vacci, vai, vbr, n);
49+
__riscv_vse16(or, vaccr, n); or += n;
50+
__riscv_vse16(oi, vacci, n); oi += n;
51+
} while (batch > 0);
52+
}

0 commit comments

Comments
 (0)