Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions src/layer/riscv/exp_riscv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2026 ihb2032 <hebome@foxmail.com>
// SPDX-License-Identifier: BSD-3-Clause

#include "exp_riscv.h"

#if __riscv_vector
#include <riscv_vector.h>
#include "rvv_mathfun.h"
#endif // __riscv_vector

#include "cpu.h"

namespace ncnn {

Exp_riscv::Exp_riscv()
{
#if __riscv_vector
support_packing = true;
#endif // __riscv_vector
#if NCNN_ZFH
#if __riscv_vector
support_fp16_storage = cpu_support_riscv_zvfh();
#else
support_fp16_storage = cpu_support_riscv_zfh();
#endif
#endif
}

int Exp_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
#if NCNN_ZFH
int elembits = bottom_top_blob.elembits();

if (opt.use_fp16_storage && elembits == 16)
{
if (opt.use_fp16_arithmetic)
return forward_inplace_fp16sa(bottom_top_blob, opt);
else
return forward_inplace_fp16s(bottom_top_blob, opt);
}
#endif

int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int d = bottom_top_blob.d;
int channels = bottom_top_blob.c;
int elempack = bottom_top_blob.elempack;
int size = w * h * d * elempack;

if (base == -1.f)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
float* ptr = bottom_top_blob.channel(q);

#if __riscv_vector
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e32m8(n);
vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
_p = exp_ps(_p, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
#else // __riscv_vector
for (int i = 0; i < size; i++)
{
ptr[i] = expf(shift + ptr[i] * scale);
}
#endif // __riscv_vector
}

return 0;
}

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
float* ptr = bottom_top_blob.channel(q);

#if __riscv_vector
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e32m8(n);
vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
_p = pow_ps(__riscv_vfmv_v_f_f32m8(base, vl), _p, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
#else // __riscv_vector
for (int i = 0; i < size; i++)
{
ptr[i] = powf(base, shift + ptr[i] * scale);
Comment thread
ihb2032 marked this conversation as resolved.
}
#endif // __riscv_vector
}

return 0;
}

} // namespace ncnn
27 changes: 27 additions & 0 deletions src/layer/riscv/exp_riscv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2026 ihb2032 <hebome@foxmail.com>
// SPDX-License-Identifier: BSD-3-Clause

#ifndef LAYER_EXP_RISCV_H
#define LAYER_EXP_RISCV_H

#include "exp.h"

namespace ncnn {

class Exp_riscv : public Exp
{
public:
Exp_riscv();

virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;

protected:
#if NCNN_ZFH
int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
#endif
};

} // namespace ncnn

#endif // LAYER_EXP_RISCV_H
162 changes: 162 additions & 0 deletions src/layer/riscv/exp_riscv_zfh.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// Copyright 2026 ihb2032 <hebome@foxmail.com>
// SPDX-License-Identifier: BSD-3-Clause

#include "exp_riscv.h"

#if __riscv_vector
#include <riscv_vector.h>
#include "rvv_mathfun.h"
#if __riscv_zvfh
#include "rvv_mathfun_fp16s.h"
#endif
#endif // __riscv_vector

namespace ncnn {

#if NCNN_ZFH
int Exp_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const
{
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int d = bottom_top_blob.d;
int channels = bottom_top_blob.c;
int elempack = bottom_top_blob.elempack;
int size = w * h * d * elempack;

if (base == -1.f)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

#if __riscv_zvfh
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m4(n);
vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
_p = exp_ps(_p, vl);
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);

ptr += vl;
n -= vl;
}
#else // __riscv_zvfh
for (int i = 0; i < size; i++)
{
ptr[i] = (__fp16)expf(shift + (float)ptr[i] * scale);
}
#endif // __riscv_zvfh
}

return 0;
}

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

#if __riscv_zvfh
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m4(n);
vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
_p = pow_ps(__riscv_vfmv_v_f_f32m8(base, vl), _p, vl);
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);

ptr += vl;
n -= vl;
}
#else // __riscv_zvfh
for (int i = 0; i < size; i++)
{
ptr[i] = (__fp16)powf(base, shift + (float)ptr[i] * scale);
}
#endif // __riscv_zvfh
}

return 0;
}

int Exp_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const
{
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;
int d = bottom_top_blob.d;
int channels = bottom_top_blob.c;
int elempack = bottom_top_blob.elempack;
int size = w * h * d * elempack;
__fp16 _scale = (__fp16)scale;
__fp16 _shift = (__fp16)shift;

if (base == -1.f)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

#if __riscv_zvfh
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m8(n);
vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
_p = __riscv_vfmul_vf_f16m8(_p, _scale, vl);
_p = __riscv_vfadd_vf_f16m8(_p, _shift, vl);
_p = exp_ps(_p, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
#else // __riscv_zvfh
for (int i = 0; i < size; i++)
{
ptr[i] = (__fp16)expf(shift + (float)ptr[i] * scale);
}
#endif // __riscv_zvfh
}

return 0;
}

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

#if __riscv_zvfh
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m8(n);
vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
_p = __riscv_vfmul_vf_f16m8(_p, _scale, vl);
_p = __riscv_vfadd_vf_f16m8(_p, _shift, vl);
_p = pow_ps(__riscv_vfmv_v_f_f16m8((__fp16)base, vl), _p, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
#else // __riscv_zvfh
for (int i = 0; i < size; i++)
{
ptr[i] = (__fp16)powf(base, shift + (float)ptr[i] * scale);
}
#endif // __riscv_zvfh
}

return 0;
}
#endif // NCNN_ZFH

} // namespace ncnn
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ ncnn_add_layer_test(Eltwise)
ncnn_add_layer_test(ELU)
ncnn_add_layer_test(Embed)
ncnn_add_layer_test(Erf)
ncnn_add_layer_test(Exp)
ncnn_add_layer_test(ExpandDims)
ncnn_add_layer_test(Flatten)
ncnn_add_layer_test(Flip)
Expand Down
56 changes: 56 additions & 0 deletions tests/test_exp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2026 ihb2032 <hebome@foxmail.com>
// SPDX-License-Identifier: BSD-3-Clause

#include "testutil.h"

static int test_exp(const ncnn::Mat& a, float base, float scale, float shift)
{
ncnn::ParamDict pd;
pd.set(0, base);
pd.set(1, scale);
pd.set(2, shift);

std::vector<ncnn::Mat> weights(0);

int ret = test_layer("Exp", pd, weights, a);
if (ret != 0)
{
fprintf(stderr, "test_exp failed a.dims=%d a=(%d %d %d %d) base=%f scale=%f shift=%f\n", a.dims, a.w, a.h, a.d, a.c, base, scale, shift);
}

return ret;
}

static int test_exp_0()
{
return 0
|| test_exp(RandomMat(5, 7, 24, -1.f, 1.f), -1.f, 1.f, 0.f)
|| test_exp(RandomMat(7, 9, 12, -1.f, 1.f), -1.f, 0.75f, -0.25f)
|| test_exp(RandomMat(3, 5, 13, -1.f, 1.f), 2.f, 0.5f, 0.125f);
}

static int test_exp_1()
{
return 0
|| test_exp(RandomMat(15, 24, -1.f, 1.f), -1.f, 1.f, 0.f)
|| test_exp(RandomMat(17, 12, -1.f, 1.f), -1.f, 1.25f, 0.5f)
|| test_exp(RandomMat(19, 15, -1.f, 1.f), 2.f, 0.75f, -0.5f);
}

static int test_exp_2()
{
return 0
|| test_exp(RandomMat(128, -1.f, 1.f), -1.f, 1.f, 0.f)
|| test_exp(RandomMat(124, -1.f, 1.f), -1.f, 0.5f, 0.25f)
|| test_exp(RandomMat(127, -1.f, 1.f), 2.f, 1.5f, -0.75f);
}

int main()
{
SRAND(7767517);

return 0
|| test_exp_0()
|| test_exp_1()
|| test_exp_2();
}
Loading