diff --git a/src/layer/riscv/log_riscv.cpp b/src/layer/riscv/log_riscv.cpp new file mode 100644 index 000000000000..30ff34103382 --- /dev/null +++ b/src/layer/riscv/log_riscv.cpp @@ -0,0 +1,119 @@ +// Copyright 2026 ihb2032 +// SPDX-License-Identifier: BSD-3-Clause + +#include "log_riscv.h" + +#if __riscv_vector +#include +#include "rvv_mathfun.h" +#endif // __riscv_vector + +#include + +#include "cpu.h" + +namespace ncnn { + +Log_riscv::Log_riscv() +{ +#if __riscv_vector + support_packing = true; +#endif // __riscv_vector +#if NCNN_ZFH +#if __riscv_vector + support_fp16_storage = cpu_support_riscv_zvfh(); +#else + support_fp16_storage = cpu_support_riscv_zfh(); +#endif +#endif +} + +int Log_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const +{ +#if NCNN_ZFH + const int elembits = bottom_top_blob.elembits(); + + if (opt.use_fp16_storage && elembits == 16) + { + if (opt.use_fp16_arithmetic) + return forward_inplace_fp16sa(bottom_top_blob, opt); + else + return forward_inplace_fp16s(bottom_top_blob, opt); + } +#endif + + const int w = bottom_top_blob.w; + const int h = bottom_top_blob.h; + const int d = bottom_top_blob.d; + const int channels = bottom_top_blob.c; + const int elempack = bottom_top_blob.elempack; + const int size = w * h * d * elempack; + + if (base == -1.f) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < channels; q++) + { + float* ptr = bottom_top_blob.channel(q); + +#if __riscv_vector + int n = size; + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + + vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl); + _p = __riscv_vfmul_vf_f32m8(_p, scale, vl); + _p = __riscv_vfadd_vf_f32m8(_p, shift, vl); + _p = log_ps(_p, vl); + __riscv_vse32_v_f32m8(ptr, _p, vl); + + ptr += vl; + n -= vl; + } +#else // __riscv_vector + for (int i = 0; i < size; i++) + { + ptr[i] = logf(shift + ptr[i] * scale); + } +#endif // __riscv_vector + } + } + else + { + const float log_base_inv = 1.f / logf(base); + + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < channels; q++) + { + float* ptr = bottom_top_blob.channel(q); + +#if __riscv_vector + int n = size; + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + + vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl); + _p = __riscv_vfmul_vf_f32m8(_p, scale, vl); + _p = __riscv_vfadd_vf_f32m8(_p, shift, vl); + _p = log_ps(_p, vl); + _p = __riscv_vfmul_vf_f32m8(_p, log_base_inv, vl); + __riscv_vse32_v_f32m8(ptr, _p, vl); + + ptr += vl; + n -= vl; + } +#else // __riscv_vector + for (int i = 0; i < size; i++) + { + ptr[i] = logf(shift + ptr[i] * scale) * log_base_inv; + } +#endif // __riscv_vector + } + } + + return 0; +} + +} // namespace ncnn diff --git a/src/layer/riscv/log_riscv.h b/src/layer/riscv/log_riscv.h new file mode 100644 index 000000000000..8fe8f8131bdc --- /dev/null +++ b/src/layer/riscv/log_riscv.h @@ -0,0 +1,27 @@ +// Copyright 2026 ihb2032 +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef LAYER_LOG_RISCV_H +#define LAYER_LOG_RISCV_H + +#include "log.h" + +namespace ncnn { + +class Log_riscv : public Log +{ +public: + Log_riscv(); + + virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const; + +protected: +#if NCNN_ZFH + int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const; + int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const; +#endif +}; + +} // namespace ncnn + +#endif // LAYER_LOG_RISCV_H diff --git a/src/layer/riscv/log_riscv_zfh.cpp b/src/layer/riscv/log_riscv_zfh.cpp new file mode 100644 index 000000000000..08bac8c05f64 --- /dev/null +++ b/src/layer/riscv/log_riscv_zfh.cpp @@ -0,0 +1,175 @@ +// Copyright 2026 ihb2032 +// SPDX-License-Identifier: BSD-3-Clause + +#include "log_riscv.h" + +#if __riscv_vector +#include +#include "rvv_mathfun.h" +#if __riscv_zvfh +#include "rvv_mathfun_fp16s.h" +#endif +#endif // __riscv_vector + +#include + +namespace ncnn { + +#if NCNN_ZFH +int Log_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const +{ + const int w = bottom_top_blob.w; + const int h = bottom_top_blob.h; + const int d = bottom_top_blob.d; + const int channels = bottom_top_blob.c; + const int elempack = bottom_top_blob.elempack; + const int size = w * h * d * elempack; + + if (base == -1.f) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < channels; q++) + { + __fp16* ptr = bottom_top_blob.channel(q); + +#if __riscv_zvfh + int n = size; + while (n > 0) + { + size_t vl = __riscv_vsetvl_e16m4(n); + + vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl); + _p = __riscv_vfmul_vf_f32m8(_p, scale, vl); + _p = __riscv_vfadd_vf_f32m8(_p, shift, vl); + _p = log_ps(_p, vl); + __riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl); + + ptr += vl; + n -= vl; + } +#else // __riscv_zvfh + for (int i = 0; i < size; i++) + { + ptr[i] = (__fp16)logf(shift + (float)ptr[i] * scale); + } +#endif // __riscv_zvfh + } + } + else + { + const float log_base_inv = 1.f / logf(base); + + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < channels; q++) + { + __fp16* ptr = bottom_top_blob.channel(q); + +#if __riscv_zvfh + int n = size; + while (n > 0) + { + size_t vl = __riscv_vsetvl_e16m4(n); + + vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl); + _p = __riscv_vfmul_vf_f32m8(_p, scale, vl); + _p = __riscv_vfadd_vf_f32m8(_p, shift, vl); + _p = log_ps(_p, vl); + _p = __riscv_vfmul_vf_f32m8(_p, log_base_inv, vl); + __riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl); + + ptr += vl; + n -= vl; + } +#else // __riscv_zvfh + for (int i = 0; i < size; i++) + { + ptr[i] = (__fp16)(logf(shift + (float)ptr[i] * scale) * log_base_inv); + } +#endif // __riscv_zvfh + } + } + + return 0; +} + +int Log_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const +{ + const int w = bottom_top_blob.w; + const int h = bottom_top_blob.h; + const int d = bottom_top_blob.d; + const int channels = bottom_top_blob.c; + const int elempack = bottom_top_blob.elempack; + const int size = w * h * d * elempack; + + const __fp16 _scale = (__fp16)scale; + const __fp16 _shift = (__fp16)shift; + + if (base == -1.f) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < channels; q++) + { + __fp16* ptr = bottom_top_blob.channel(q); + +#if __riscv_zvfh + int n = size; + while (n > 0) + { + size_t vl = __riscv_vsetvl_e16m8(n); + + vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl); + _p = __riscv_vfmul_vf_f16m8(_p, _scale, vl); + _p = __riscv_vfadd_vf_f16m8(_p, _shift, vl); + _p = log_ps(_p, vl); + __riscv_vse16_v_f16m8(ptr, _p, vl); + + ptr += vl; + n -= vl; + } +#else // __riscv_zvfh + for (int i = 0; i < size; i++) + { + ptr[i] = (__fp16)logf(shift + (float)ptr[i] * scale); + } +#endif // __riscv_zvfh + } + } + else + { + const __fp16 _log_base_inv = (__fp16)(1.f / logf(base)); + + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < channels; q++) + { + __fp16* ptr = bottom_top_blob.channel(q); + +#if __riscv_zvfh + int n = size; + while (n > 0) + { + size_t vl = __riscv_vsetvl_e16m8(n); + + vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl); + _p = __riscv_vfmul_vf_f16m8(_p, _scale, vl); + _p = __riscv_vfadd_vf_f16m8(_p, _shift, vl); + _p = log_ps(_p, vl); + _p = __riscv_vfmul_vf_f16m8(_p, _log_base_inv, vl); + __riscv_vse16_v_f16m8(ptr, _p, vl); + + ptr += vl; + n -= vl; + } +#else // __riscv_zvfh + for (int i = 0; i < size; i++) + { + ptr[i] = (__fp16)(logf(shift + (float)ptr[i] * scale) * (float)_log_base_inv); + } +#endif // __riscv_zvfh + } + } + + return 0; +} +#endif // NCNN_ZFH + +} // namespace ncnn diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 730cf919a192..d1adc3cbe3e3 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -122,6 +122,7 @@ ncnn_add_layer_test(InstanceNorm) ncnn_add_layer_test(Interp) ncnn_add_layer_test(InverseSpectrogram) ncnn_add_layer_test(LayerNorm) +ncnn_add_layer_test(Log) ncnn_add_layer_test(LRN) ncnn_add_layer_test(LSTM) ncnn_add_layer_test(MatMul) diff --git a/tests/test_log.cpp b/tests/test_log.cpp new file mode 100644 index 000000000000..69eaf5c68705 --- /dev/null +++ b/tests/test_log.cpp @@ -0,0 +1,56 @@ +// Copyright 2026 ihb2032 +// SPDX-License-Identifier: BSD-3-Clause + +#include "testutil.h" + +static int test_log(const ncnn::Mat& a, float base, float scale, float shift) +{ + ncnn::ParamDict pd; + pd.set(0, base); + pd.set(1, scale); + pd.set(2, shift); + + std::vector weights(0); + + int ret = test_layer("Log", pd, weights, a); + if (ret != 0) + { + fprintf(stderr, "test_log failed a.dims=%d a=(%d %d %d %d) base=%f scale=%f shift=%f\n", a.dims, a.w, a.h, a.d, a.c, base, scale, shift); + } + + return ret; +} + +static int test_log_0() +{ + return 0 + || test_log(RandomMat(5, 7, 24, 0.001f, 2.f), -1.f, 1.f, 0.f) + || test_log(RandomMat(7, 9, 12, 0.001f, 2.f), -1.f, 0.75f, 0.25f) + || test_log(RandomMat(3, 5, 13, 0.001f, 2.f), 2.f, 0.5f, 0.125f); +} + +static int test_log_1() +{ + return 0 + || test_log(RandomMat(15, 24, 0.001f, 2.f), -1.f, 1.f, 0.f) + || test_log(RandomMat(17, 12, 0.001f, 2.f), -1.f, 1.25f, 0.5f) + || test_log(RandomMat(19, 15, 0.001f, 2.f), 2.f, 0.75f, 0.25f); +} + +static int test_log_2() +{ + return 0 + || test_log(RandomMat(128, 0.001f, 2.f), -1.f, 1.f, 0.f) + || test_log(RandomMat(124, 0.001f, 2.f), -1.f, 0.5f, 0.25f) + || test_log(RandomMat(127, 0.001f, 2.f), 2.f, 1.5f, 0.125f); +} + +int main() +{ + SRAND(7767517); + + return 0 + || test_log_0() + || test_log_1() + || test_log_2(); +}