Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions src/layer/riscv/log_riscv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright 2026 ihb2032 <hebome@foxmail.com>
// SPDX-License-Identifier: BSD-3-Clause

#include "log_riscv.h"

#if __riscv_vector
#include <riscv_vector.h>
#include "rvv_mathfun.h"
#endif // __riscv_vector

#include <math.h>

#include "cpu.h"

namespace ncnn {

Log_riscv::Log_riscv()
{
#if __riscv_vector
support_packing = true;
#endif // __riscv_vector
#if NCNN_ZFH
#if __riscv_vector
support_fp16_storage = cpu_support_riscv_zvfh();
#else
support_fp16_storage = cpu_support_riscv_zfh();
#endif
#endif
}

int Log_riscv::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
{
#if NCNN_ZFH
const int elembits = bottom_top_blob.elembits();

if (opt.use_fp16_storage && elembits == 16)
{
if (opt.use_fp16_arithmetic)
return forward_inplace_fp16sa(bottom_top_blob, opt);
else
return forward_inplace_fp16s(bottom_top_blob, opt);
}
#endif

const int w = bottom_top_blob.w;
const int h = bottom_top_blob.h;
const int d = bottom_top_blob.d;
const int channels = bottom_top_blob.c;
const int elempack = bottom_top_blob.elempack;
const int size = w * h * d * elempack;

if (base == -1.f)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
float* ptr = bottom_top_blob.channel(q);

#if __riscv_vector
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e32m8(n);

vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
_p = log_ps(_p, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
#else // __riscv_vector
for (int i = 0; i < size; i++)
{
ptr[i] = logf(shift + ptr[i] * scale);
}
#endif // __riscv_vector
}
}
else
{
const float log_base_inv = 1.f / logf(base);

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
float* ptr = bottom_top_blob.channel(q);

#if __riscv_vector
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e32m8(n);

vfloat32m8_t _p = __riscv_vle32_v_f32m8(ptr, vl);
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
_p = log_ps(_p, vl);
_p = __riscv_vfmul_vf_f32m8(_p, log_base_inv, vl);
__riscv_vse32_v_f32m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
#else // __riscv_vector
for (int i = 0; i < size; i++)
{
ptr[i] = logf(shift + ptr[i] * scale) * log_base_inv;
}
#endif // __riscv_vector
}
}

return 0;
}

} // namespace ncnn
27 changes: 27 additions & 0 deletions src/layer/riscv/log_riscv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright 2026 ihb2032 <hebome@foxmail.com>
// SPDX-License-Identifier: BSD-3-Clause

#ifndef LAYER_LOG_RISCV_H
#define LAYER_LOG_RISCV_H

#include "log.h"

namespace ncnn {

class Log_riscv : public Log
{
public:
Log_riscv();

virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;

protected:
#if NCNN_ZFH
int forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const;
int forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const;
#endif
};

} // namespace ncnn

#endif // LAYER_LOG_RISCV_H
175 changes: 175 additions & 0 deletions src/layer/riscv/log_riscv_zfh.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// Copyright 2026 ihb2032 <hebome@foxmail.com>
// SPDX-License-Identifier: BSD-3-Clause

#include "log_riscv.h"

#if __riscv_vector
#include <riscv_vector.h>
#include "rvv_mathfun.h"
#if __riscv_zvfh
#include "rvv_mathfun_fp16s.h"
#endif
#endif // __riscv_vector

#include <math.h>

namespace ncnn {

#if NCNN_ZFH
int Log_riscv::forward_inplace_fp16s(Mat& bottom_top_blob, const Option& opt) const
{
const int w = bottom_top_blob.w;
const int h = bottom_top_blob.h;
const int d = bottom_top_blob.d;
const int channels = bottom_top_blob.c;
const int elempack = bottom_top_blob.elempack;
const int size = w * h * d * elempack;

if (base == -1.f)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

#if __riscv_zvfh
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m4(n);

vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
_p = log_ps(_p, vl);
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);

ptr += vl;
n -= vl;
}
#else // __riscv_zvfh
for (int i = 0; i < size; i++)
{
ptr[i] = (__fp16)logf(shift + (float)ptr[i] * scale);
}
#endif // __riscv_zvfh
}
}
else
{
const float log_base_inv = 1.f / logf(base);

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

#if __riscv_zvfh
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m4(n);

vfloat32m8_t _p = __riscv_vfwcvt_f_f_v_f32m8(__riscv_vle16_v_f16m4(ptr, vl), vl);
_p = __riscv_vfmul_vf_f32m8(_p, scale, vl);
_p = __riscv_vfadd_vf_f32m8(_p, shift, vl);
_p = log_ps(_p, vl);
_p = __riscv_vfmul_vf_f32m8(_p, log_base_inv, vl);
__riscv_vse16_v_f16m4(ptr, __riscv_vfncvt_f_f_w_f16m4(_p, vl), vl);

ptr += vl;
n -= vl;
}
#else // __riscv_zvfh
for (int i = 0; i < size; i++)
{
ptr[i] = (__fp16)(logf(shift + (float)ptr[i] * scale) * log_base_inv);
}
#endif // __riscv_zvfh
}
}

return 0;
}

int Log_riscv::forward_inplace_fp16sa(Mat& bottom_top_blob, const Option& opt) const
{
const int w = bottom_top_blob.w;
const int h = bottom_top_blob.h;
const int d = bottom_top_blob.d;
const int channels = bottom_top_blob.c;
const int elempack = bottom_top_blob.elempack;
const int size = w * h * d * elempack;

const __fp16 _scale = (__fp16)scale;
const __fp16 _shift = (__fp16)shift;

if (base == -1.f)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

#if __riscv_zvfh
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m8(n);

vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
_p = __riscv_vfmul_vf_f16m8(_p, _scale, vl);
_p = __riscv_vfadd_vf_f16m8(_p, _shift, vl);
_p = log_ps(_p, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
#else // __riscv_zvfh
for (int i = 0; i < size; i++)
{
ptr[i] = (__fp16)logf(shift + (float)ptr[i] * scale);
}
#endif // __riscv_zvfh
}
}
else
{
const __fp16 _log_base_inv = (__fp16)(1.f / logf(base));

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
{
__fp16* ptr = bottom_top_blob.channel(q);

#if __riscv_zvfh
int n = size;
while (n > 0)
{
size_t vl = __riscv_vsetvl_e16m8(n);

vfloat16m8_t _p = __riscv_vle16_v_f16m8(ptr, vl);
_p = __riscv_vfmul_vf_f16m8(_p, _scale, vl);
_p = __riscv_vfadd_vf_f16m8(_p, _shift, vl);
_p = log_ps(_p, vl);
_p = __riscv_vfmul_vf_f16m8(_p, _log_base_inv, vl);
__riscv_vse16_v_f16m8(ptr, _p, vl);

ptr += vl;
n -= vl;
}
#else // __riscv_zvfh
for (int i = 0; i < size; i++)
{
ptr[i] = (__fp16)(logf(shift + (float)ptr[i] * scale) * (float)_log_base_inv);
}
#endif // __riscv_zvfh
}
}

return 0;
}
#endif // NCNN_ZFH

} // namespace ncnn
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ ncnn_add_layer_test(InstanceNorm)
ncnn_add_layer_test(Interp)
ncnn_add_layer_test(InverseSpectrogram)
ncnn_add_layer_test(LayerNorm)
ncnn_add_layer_test(Log)
ncnn_add_layer_test(LRN)
ncnn_add_layer_test(LSTM)
ncnn_add_layer_test(MatMul)
Expand Down
56 changes: 56 additions & 0 deletions tests/test_log.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2026 ihb2032 <hebome@foxmail.com>
// SPDX-License-Identifier: BSD-3-Clause

#include "testutil.h"

static int test_log(const ncnn::Mat& a, float base, float scale, float shift)
{
ncnn::ParamDict pd;
pd.set(0, base);
pd.set(1, scale);
pd.set(2, shift);

std::vector<ncnn::Mat> weights(0);

int ret = test_layer("Log", pd, weights, a);
if (ret != 0)
{
fprintf(stderr, "test_log failed a.dims=%d a=(%d %d %d %d) base=%f scale=%f shift=%f\n", a.dims, a.w, a.h, a.d, a.c, base, scale, shift);
}

return ret;
}

static int test_log_0()
{
return 0
|| test_log(RandomMat(5, 7, 24, 0.001f, 2.f), -1.f, 1.f, 0.f)
|| test_log(RandomMat(7, 9, 12, 0.001f, 2.f), -1.f, 0.75f, 0.25f)
|| test_log(RandomMat(3, 5, 13, 0.001f, 2.f), 2.f, 0.5f, 0.125f);
}

static int test_log_1()
{
return 0
|| test_log(RandomMat(15, 24, 0.001f, 2.f), -1.f, 1.f, 0.f)
|| test_log(RandomMat(17, 12, 0.001f, 2.f), -1.f, 1.25f, 0.5f)
|| test_log(RandomMat(19, 15, 0.001f, 2.f), 2.f, 0.75f, 0.25f);
}

static int test_log_2()
{
return 0
|| test_log(RandomMat(128, 0.001f, 2.f), -1.f, 1.f, 0.f)
|| test_log(RandomMat(124, 0.001f, 2.f), -1.f, 0.5f, 0.25f)
|| test_log(RandomMat(127, 0.001f, 2.f), 2.f, 1.5f, 0.125f);
}

int main()
{
SRAND(7767517);

return 0
|| test_log_0()
|| test_log_1()
|| test_log_2();
}
Loading