Skip to content

Commit d67b723

Browse files
committed
rvv: add exp
Signed-off-by: ihb2032 <hebome@foxmail.com>
1 parent 3318da2 commit d67b723

3 files changed

Lines changed: 301 additions & 0 deletions

File tree

src/layer/riscv/exp_riscv.cpp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright 2026 ihb2032 <hebome@foxmail.com>
2+
// SPDX-License-Identifier: BSD-3-Clause
3+
4+
#include "exp_riscv.h"
5+
6+
#if __riscv_vector
7+
#include <riscv_vector.h>
8+
#include "rvv_mathfun.h"
9+
#endif // __riscv_vector
10+
11+
#include "cpu.h"
12+
13+
namespace ncnn {
14+
15+
Exp_riscv::Exp_riscv()
16+
{
17+
#if __riscv_vector
18+
support_packing = true;
19+
#endif // __riscv_vector
20+
#if NCNN_ZFH
21+
#if __riscv_vector
22+
support_fp16_storage = cpu_support_riscv_zvfh();
23+
#else
24+
support_fp16_storage = cpu_support_riscv_zfh();
25+
#endif
26+
#endif
27+
}
28+
29+
int Exp_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
30+
{
31+
#if NCNN_ZFH
32+
int elembits = bottom_top_blob.elembits();
33+
34+
if (opt.use_fp16_storage && elembits == 16)
35+
{
36+
if (opt.use_fp16_arithmetic)
37+
return forward_inplace_fp16sa(bottom_top_blob, opt);
38+
else
39+
return forward_inplace_fp16s(bottom_top_blob, opt);
40+
}
41+
#endif
42+
43+
int w = bottom_top_blob.w;
44+
int h = bottom_top_blob.h;
45+
int d = bottom_top_blob.d;
46+
int channels = bottom_top_blob.c;
47+
int elempack = bottom_top_blob.elempack;
48+
int size = w * h * d * elempack;
49+
50+
if (base == -1.f)
51+
{
52+
#pragma omp parallel for num_threads(opt.num_threads)
53+
for (int q = 0; q < channels; q++)
54+
{
55+
float* ptr = bottom_top_blob.channel(q);
56+
57+
#if __riscv_vector
58+
int n = size;
59+
while (n > 0)
60+
{
61+
size_t vl = __riscv_vsetvl_e32m8(n);
62+
vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);
63+
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
64+
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
65+
_p = exp_ps(_p, vl);
66+
__riscv_vse32_v_f32m8(ptr, _p, vl);
67+
68+
ptr += vl;
69+
n -= vl;
70+
}
71+
#else // __riscv_vector
72+
for (int i = 0; i < size; i++)
73+
{
74+
ptr[i] = expf(shift + ptr[i] * scale);
75+
}
76+
#endif // __riscv_vector
77+
}
78+
79+
return 0;
80+
}
81+
82+
#pragma omp parallel for num_threads(opt.num_threads)
83+
for (int q = 0; q < channels; q++)
84+
{
85+
float* ptr = bottom_top_blob.channel(q);
86+
87+
#if __riscv_vector
88+
int n = size;
89+
while (n > 0)
90+
{
91+
size_t vl = __riscv_vsetvl_e32m8(n);
92+
vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);
93+
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
94+
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
95+
_p = pow_ps(__riscv_vfmv_v_f_f32m8(base, vl), _p, vl);
96+
__riscv_vse32_v_f32m8(ptr, _p, vl);
97+
98+
ptr += vl;
99+
n -= vl;
100+
}
101+
#else // __riscv_vector
102+
for (int i = 0; i < size; i++)
103+
{
104+
ptr[i] = powf(base, shift + ptr[i] * scale);
105+
}
106+
#endif // __riscv_vector
107+
}
108+
109+
return 0;
110+
}
111+
112+
} // namespace ncnn

src/layer/riscv/exp_riscv.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Copyright 2026 ihb2032 <hebome@foxmail.com>
2+
// SPDX-License-Identifier: BSD-3-Clause
3+
4+
#ifndef LAYER_EXP_RISCV_H
5+
#define LAYER_EXP_RISCV_H
6+
7+
#include "exp.h"
8+
9+
namespace ncnn {
10+
11+
class Exp_riscv : public Exp
12+
{
13+
public:
14+
Exp_riscv();
15+
16+
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
17+
18+
protected:
19+
#if NCNN_ZFH
20+
int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
21+
int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
22+
#endif
23+
};
24+
25+
} // namespace ncnn
26+
27+
#endif // LAYER_EXP_RISCV_H

src/layer/riscv/exp_riscv_zfh.cpp

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
// Copyright 2026 ihb2032 <hebome@foxmail.com>
2+
// SPDX-License-Identifier: BSD-3-Clause
3+
4+
#include "exp_riscv.h"
5+
6+
#if __riscv_vector
7+
#include <riscv_vector.h>
8+
#include "rvv_mathfun.h"
9+
#if __riscv_zvfh
10+
#include "rvv_mathfun_fp16s.h"
11+
#endif
12+
#endif // __riscv_vector
13+
14+
namespace ncnn {
15+
16+
#if NCNN_ZFH
17+
int Exp_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const
18+
{
19+
int w = bottom_top_blob.w;
20+
int h = bottom_top_blob.h;
21+
int d = bottom_top_blob.d;
22+
int channels = bottom_top_blob.c;
23+
int elempack = bottom_top_blob.elempack;
24+
int size = w * h * d * elempack;
25+
26+
if (base == -1.f)
27+
{
28+
#pragma omp parallel for num_threads(opt.num_threads)
29+
for (int q = 0; q < channels; q++)
30+
{
31+
__fp16* ptr = bottom_top_blob.channel(q);
32+
33+
#if __riscv_zvfh
34+
int n = size;
35+
while (n > 0)
36+
{
37+
size_t vl = __riscv_vsetvl_e16m4(n);
38+
vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);
39+
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
40+
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
41+
_p = exp_ps(_p, vl);
42+
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);
43+
44+
ptr += vl;
45+
n -= vl;
46+
}
47+
#else // __riscv_zvfh
48+
for (int i = 0; i < size; i++)
49+
{
50+
ptr[i] = (__fp16)expf(shift + (float)ptr[i] * scale);
51+
}
52+
#endif // __riscv_zvfh
53+
}
54+
55+
return 0;
56+
}
57+
58+
#pragma omp parallel for num_threads(opt.num_threads)
59+
for (int q = 0; q < channels; q++)
60+
{
61+
__fp16* ptr = bottom_top_blob.channel(q);
62+
63+
#if __riscv_zvfh
64+
int n = size;
65+
while (n > 0)
66+
{
67+
size_t vl = __riscv_vsetvl_e16m4(n);
68+
vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);
69+
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
70+
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
71+
_p = pow_ps(__riscv_vfmv_v_f_f32m8(base, vl), _p, vl);
72+
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);
73+
74+
ptr += vl;
75+
n -= vl;
76+
}
77+
#else // __riscv_zvfh
78+
for (int i = 0; i < size; i++)
79+
{
80+
ptr[i] = (__fp16)powf(base, shift + (float)ptr[i] * scale);
81+
}
82+
#endif // __riscv_zvfh
83+
}
84+
85+
return 0;
86+
}
87+
88+
int Exp_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const
89+
{
90+
int w = bottom_top_blob.w;
91+
int h = bottom_top_blob.h;
92+
int d = bottom_top_blob.d;
93+
int channels = bottom_top_blob.c;
94+
int elempack = bottom_top_blob.elempack;
95+
int size = w * h * d * elempack;
96+
__fp16 _scale = (__fp16)scale;
97+
__fp16 _shift = (__fp16)shift;
98+
99+
if (base == -1.f)
100+
{
101+
#pragma omp parallel for num_threads(opt.num_threads)
102+
for (int q = 0; q < channels; q++)
103+
{
104+
__fp16* ptr = bottom_top_blob.channel(q);
105+
106+
#if __riscv_zvfh
107+
int n = size;
108+
while (n > 0)
109+
{
110+
size_t vl = __riscv_vsetvl_e16m8(n);
111+
vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
112+
_p = __riscv_vfmul_vf_f16m8(_p, _scale, vl);
113+
_p = __riscv_vfadd_vf_f16m8(_p, _shift, vl);
114+
_p = exp_ps(_p, vl);
115+
__riscv_vse16_v_f16m8(ptr, _p, vl);
116+
117+
ptr += vl;
118+
n -= vl;
119+
}
120+
#else // __riscv_zvfh
121+
for (int i = 0; i < size; i++)
122+
{
123+
ptr[i] = (__fp16)expf(shift + (float)ptr[i] * scale);
124+
}
125+
#endif // __riscv_zvfh
126+
}
127+
128+
return 0;
129+
}
130+
131+
#pragma omp parallel for num_threads(opt.num_threads)
132+
for (int q = 0; q < channels; q++)
133+
{
134+
__fp16* ptr = bottom_top_blob.channel(q);
135+
136+
#if __riscv_zvfh
137+
int n = size;
138+
while (n > 0)
139+
{
140+
size_t vl = __riscv_vsetvl_e16m8(n);
141+
vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
142+
_p = __riscv_vfmul_vf_f16m8(_p, _scale, vl);
143+
_p = __riscv_vfadd_vf_f16m8(_p, _shift, vl);
144+
_p = pow_ps(__riscv_vfmv_v_f_f16m8((__fp16)base, vl), _p, vl);
145+
__riscv_vse16_v_f16m8(ptr, _p, vl);
146+
147+
ptr += vl;
148+
n -= vl;
149+
}
150+
#else // __riscv_zvfh
151+
for (int i = 0; i < size; i++)
152+
{
153+
ptr[i] = (__fp16)powf(base, shift + (float)ptr[i] * scale);
154+
}
155+
#endif // __riscv_zvfh
156+
}
157+
158+
return 0;
159+
}
160+
#endif // NCNN_ZFH
161+
162+
} // namespace ncnn

0 commit comments

Comments
 (0)