From a6e57f23f805f5505d123d261047000e8669ce5f Mon Sep 17 00:00:00 2001 From: JustinFung <835776509@qq.com> Date: Mon, 27 Apr 2026 00:01:37 +0800 Subject: [PATCH 1/5] RVV1.0 Requantize Layer --- src/layer/riscv/requantize_riscv.cpp | 390 +++++++++++++++++++++++++++ src/layer/riscv/requantize_riscv.h | 21 ++ src/layer/riscv/riscv_usability.h | 32 +++ tests/test_requantize.cpp | 71 +++++ 4 files changed, 514 insertions(+) create mode 100644 src/layer/riscv/requantize_riscv.cpp create mode 100644 src/layer/riscv/requantize_riscv.h diff --git a/src/layer/riscv/requantize_riscv.cpp b/src/layer/riscv/requantize_riscv.cpp new file mode 100644 index 000000000000..85f5b5da04be --- /dev/null +++ b/src/layer/riscv/requantize_riscv.cpp @@ -0,0 +1,390 @@ +// Copyright 2026 Tencent +// SPDX-License-Identifier: BSD-3-Clause + +#include "requantize_riscv.h" + +#if __riscv_vector +#include +#endif // __riscv_vector + +#include "riscv_activation.h" +#include "riscv_usability.h" + +namespace ncnn { + +Requantize_riscv::Requantize_riscv() +{ +#if __riscv_vector + support_packing = true; +#endif // __riscv_vector +} + +static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack) +{ + const int scale_in_data_size = scale_in_data.w; + const int bias_data_size = bias_data.w; + const int scale_out_data_size = scale_out_data.w; + const int size = elemcount * elempack; + + // int8(relu(v * scale_in) * scale_out) + // int8_relu(v * (scale_in * scale_out)) + // int8(relu(v * scale_in + bias) * scale_out) + // int8_relu(v * (scale_in * scale_out) + (bias * scale_out)) + + // int8(leakyrelu(v * scale_in, slope) * scale_out) + // int8_leakyrelu(v * (scale_in * scale_out), slope) + // int8(leakyrelu(v * scale_in + bias, slope) * scale_out) + // int8_leakyrelu(v * (scale_in * scale_out) + (bias * scale_out), slope) + +#if __riscv_vector + const size_t vlm1 = __riscv_vsetvlmax_e32m1(); + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + + vfloat32m8_t _scale = __riscv_vfmv_v_f_f32m8(scale_in_data[0], vlmax); + if (scale_in_data_size > 1) + { + // if (elempack == vlm1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); + _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + } + } + + vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(0.f, vlmax); + if (bias_data_size == 1) + { + _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax); + } + else if (bias_data_size > 1) + { + // if (elempack == vlm1) + { + vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1); + _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b); + } + } + + if (scale_out_data_size > 1) + { + // if (elempack == vlm1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); + vfloat32m8_t _s2 = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + _scale = __riscv_vfmul_vv_f32m8(_scale, _s2, vlmax); + _bias = __riscv_vfmul_vv_f32m8(_bias, _s2, vlmax); + } + } + else + { + _scale = __riscv_vfmul_vf_f32m8(_scale, scale_out_data[0], vlmax); + _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax); + } + + int n = size; + if (slope > 0.f) // Leaky ReLU + { + if (bias_data_size == 0) + { + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl); + __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } + else + { + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl); + __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } + } + else + { + if (bias_data_size == 0) + { + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl); + __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } + else + { + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl); + __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } + } +#else // __riscv_vector + float scale = scale_in_data[0] * scale_out_data[0]; + if (slope > 0.f) + { + if (bias_data_size == 0) + { + for (int i = 0; i < size; i++) + { + float v = *intptr * scale; + *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v); + intptr++; + ptr++; + } + } + else + { + float bias = bias_data[0] * scale_out_data[0]; + for (int i = 0; i < size; i++) + { + float v = *intptr * scale + bias; + *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v); + intptr++; + ptr++; + } + } + } + else + { + if (bias_data_size == 0) + { + for (int i = 0; i < size; i++) + { + float v = *intptr * scale; + *ptr = (v < 0) ? 0 : float2int8(v); + intptr++; + ptr++; + } + } + else + { + float bias = bias_data[0] * scale_out_data[0]; + for (int i = 0; i < size; i++) + { + float v = *intptr * scale + bias; + *ptr = (v < 0) ? 0 : float2int8(v); + intptr++; + ptr++; + } + } + } +#endif // __riscv_vector +} + +static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int activation_type, const Mat& activation_params, int elemcount, int elempack) +{ + if ((activation_type == 1) || (activation_type == 2)) + { + const float slope = activation_params[0]; + requantize_leakyrelu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack); + return; + } + + const int scale_in_data_size = scale_in_data.w; + const int bias_data_size = bias_data.w; + const int scale_out_data_size = scale_out_data.w; + const int size = elemcount * elempack; + + const float scale_in = scale_in_data[0]; + const float scale_out = scale_out_data[0]; + const float bias = bias_data_size == 0 ? 0.f : bias_data[0]; + +#if __riscv_vector + const size_t vlm1 = __riscv_vsetvlmax_e32m1(); + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + + vfloat32m8_t _scale_in = __riscv_vfmv_v_f_f32m8(scale_in, vlmax); + if (scale_in_data_size > 1) + { + // if (elempack == vlm1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); + _scale_in = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + } + } + + vfloat32m8_t _scale_out = __riscv_vfmv_v_f_f32m8(scale_out, vlmax); + if (scale_out_data_size > 1) + { + // if (elempack == vlm1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); + _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + } + } + + int n = size; + if (bias_data_size == 0) + { + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale_in, vl); + _v = activation_ps(_v, activation_type, activation_params, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale_out, vl); + __riscv_vse8_v_i8m2(ptr, float2int8(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } + else // if (bias_data_size >= 1) + { + vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias, vlmax); + if (bias_data_size > 1) + { + // if (elempack == vlm1) + { + vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1); + _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b); + } + } + + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmadd_vv_f32m8(_v, _scale_in, _bias, vl); // add bias + _v = activation_ps(_v, activation_type, activation_params, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale_out, vl); + __riscv_vse8_v_i8m2(ptr, float2int8(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } +#else // __riscv_vector + if (bias_data_size == 0) + { + for (int i = 0; i < size; i++) + { + float v = (float)(*intptr) * scale_in; + v = activation_ss(v, activation_type, activation_params); + *ptr = float2int8(v * scale_out); + intptr++; + ptr++; + } + } + else + { + for (int i = 0; i < size; i++) + { + float v = (float)(*intptr) * scale_in + bias; + v = activation_ss(v, activation_type, activation_params); + *ptr = float2int8(v * scale_out); + intptr++; + ptr++; + } + } +#endif // __riscv_vector +} + +int Requantize_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + const int dims = bottom_blob.dims; + const int w = bottom_blob.w; + const int h = bottom_blob.h; + const int channels = bottom_blob.c; + const int elempack = bottom_blob.elempack; + const size_t out_elemsize = elempack * 1u; + + if (dims == 1) + { + top_blob.create(w, out_elemsize, elempack, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + const int wp = std::max(1, w / opt.num_threads); + const int nn_w = (w + wp - 1) / wp; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int ii = 0; ii < nn_w; ii++) + { + const int i = ii * wp; + + const int* intptr = (const int*)bottom_blob + i * elempack; + signed char* ptr = (signed char*)top_blob + i * elempack; + const int size = std::min(w - i, wp) * elempack; + + requantize(intptr, ptr, scale_in_data, bias_data, scale_out_data, activation_type, activation_params, size, 1); + } + } + + if (dims == 2) + { + top_blob.create(w, h, out_elemsize, elempack, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int i = 0; i < h; i++) + { + const int* intptr = bottom_blob.row(i); + signed char* ptr = top_blob.row(i); + + const Mat scale_in_data_i = scale_in_data_size > 1 ? scale_in_data.range(i * elempack, elempack) : scale_in_data; + const Mat bias_data_i = bias_data_size > 1 ? bias_data.range(i * elempack, elempack) : bias_data; + const Mat scale_out_data_i = scale_out_data_size > 1 ? scale_out_data.range(i * elempack, elempack) : scale_out_data; + + requantize(intptr, ptr, scale_in_data_i, bias_data_i, scale_out_data_i, activation_type, activation_params, w, elempack); + } + } + + if (dims == 3) + { + top_blob.create(w, h, channels, out_elemsize, elempack, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < channels; q++) + { + const int* intptr = bottom_blob.channel(q); + signed char* ptr = top_blob.channel(q); + + const Mat scale_in_data_q = scale_in_data_size > 1 ? scale_in_data.range(q * elempack, elempack) : scale_in_data; + const Mat bias_data_q = bias_data_size > 1 ? bias_data.range(q * elempack, elempack) : bias_data; + const Mat scale_out_data_q = scale_out_data_size > 1 ? scale_out_data.range(q * elempack, elempack) : scale_out_data; + + requantize(intptr, ptr, scale_in_data_q, bias_data_q, scale_out_data_q, activation_type, activation_params, w * h, elempack); + } + } + + return 0; +} + +} // namespace ncnn diff --git a/src/layer/riscv/requantize_riscv.h b/src/layer/riscv/requantize_riscv.h new file mode 100644 index 000000000000..a058746350c5 --- /dev/null +++ b/src/layer/riscv/requantize_riscv.h @@ -0,0 +1,21 @@ +// Copyright 2026 Tencent +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef LAYER_REQUANTIZE_RISCV_H +#define LAYER_REQUANTIZE_RISCV_H + +#include "requantize.h" + +namespace ncnn { + +class Requantize_riscv : public Requantize +{ +public: + Requantize_riscv(); + + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +}; + +} // namespace ncnn + +#endif // LAYER_REQUANTIZE_RISCV_H diff --git a/src/layer/riscv/riscv_usability.h b/src/layer/riscv/riscv_usability.h index 7ff06d789979..451b0029502d 100644 --- a/src/layer/riscv/riscv_usability.h +++ b/src/layer/riscv/riscv_usability.h @@ -35,6 +35,38 @@ static inline vint8m1_t float2int8(vfloat32m4_t v, size_t vl) return __riscv_vnclip_wx_i8m1(v16, 0, __RISCV_VXRM_RNU, vl); } +static inline vint8m2_t float2int8relu(vfloat32m8_t v, size_t vl) +{ + vint32m8_t v32 = __riscv_vfcvt_x_f_v_i32m8_rm(v, __RISCV_FRM_RMM, vl); + v32 = __riscv_vmax_vx_i32m8(v32, 0, vl); + v32 = __riscv_vmin_vx_i32m8(v32, 127, vl); + vint16m4_t v16 = __riscv_vnclip_wx_i16m4(v32, 0, __RISCV_VXRM_RNU, vl); + return __riscv_vnclip_wx_i8m2(v16, 0, __RISCV_VXRM_RNU, vl); +} + +static inline vint8m1_t float2int8relu(vfloat32m4_t v, size_t vl) +{ + vint32m4_t v32 = __riscv_vfcvt_x_f_v_i32m4_rm(v, __RISCV_FRM_RMM, vl); + v32 = __riscv_vmax_vx_i32m4(v32, 0, vl); + v32 = __riscv_vmin_vx_i32m4(v32, 127, vl); + vint16m2_t v16 = __riscv_vnclip_wx_i16m2(v32, 0, __RISCV_VXRM_RNU, vl); + return __riscv_vnclip_wx_i8m1(v16, 0, __RISCV_VXRM_RNU, vl); +} + +static inline vint8m2_t float2int8leakyrelu(vfloat32m8_t v, float slope, size_t vl) +{ + vint8m2_t v8 = float2int8(v, vl); + vint8m2_t v8_leaky = float2int8(__riscv_vfmul_vf_f32m8(v, slope, vl), vl); + return __riscv_vmax_vv_i8m2(v8, v8_leaky, vl); +} + +static inline vint8m1_t float2int8leakyrelu(vfloat32m4_t v, float slope, size_t vl) +{ + vint8m1_t v8 = float2int8(v, vl); + vint8m1_t v8_leaky = float2int8(__riscv_vfmul_vf_f32m4(v, slope, vl), vl); + return __riscv_vmax_vv_i8m1(v8, v8_leaky, vl); +} + #if __riscv_zvfh static inline vint8m4_t float2int8(vfloat16m8_t v, size_t vl) { diff --git a/tests/test_requantize.cpp b/tests/test_requantize.cpp index 3271119c8572..70ad93057923 100644 --- a/tests/test_requantize.cpp +++ b/tests/test_requantize.cpp @@ -3,6 +3,48 @@ #include "testutil.h" +static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) +{ + ncnn::ParamDict pd; + pd.set(0, scale_in_data_size); + pd.set(1, scale_out_data_size); + pd.set(2, bias_data_size); + + ncnn::Mat activation_params(2); + activation_params[0] = alpha; + activation_params[1] = beta; + pd.set(3, activation_type); + pd.set(4, activation_params); + + std::vector weights(bias_data_size ? 3 : 2); + weights[0] = RandomMat(scale_in_data_size); + weights[1] = RandomMat(scale_out_data_size); + if (bias_data_size) + weights[2] = RandomMat(bias_data_size); + + Randomize(weights[0], 0.0001, 0.001); + Randomize(weights[1], 10, 100); + + int ret = test_layer("Requantize", pd, weights, a, 1); + if (ret != 0) + { + fprintf(stderr, "test_requantize failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]); + } + + return ret; +} + +static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size) +{ + return 0 + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1)) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f); +} + static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) { ncnn::ParamDict pd; @@ -159,6 +201,7 @@ static int test_requantize_2() static int test_requantize_3() { return 0 +#ifndef __riscv || test_requantize_pack8(RandomIntMat(5, 7, 24), 1, 1, 24) || test_requantize_pack8(RandomIntMat(5, 7, 24), 1, 1, 1) || test_requantize_pack8(RandomIntMat(5, 7, 24), 1, 1, 0) @@ -185,6 +228,34 @@ static int test_requantize_3() || test_requantize_pack8(RandomIntMat(15, 24), 24, 1, 0) || test_requantize_pack8(RandomIntMat(128), 1, 1, 1) || test_requantize_pack8(RandomIntMat(128), 1, 1, 0); +#else + || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 24) + || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 1) + || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 0) + || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 24) + || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 1) + || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 0) + || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 24) + || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 1) + || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 0) + || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 24) + || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 1) + || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 0) + || test_requantize(RandomIntMat(15, 24), 1, 1, 24) + || test_requantize(RandomIntMat(15, 24), 1, 1, 1) + || test_requantize(RandomIntMat(15, 24), 1, 1, 0) + || test_requantize(RandomIntMat(15, 24), 24, 24, 24) + || test_requantize(RandomIntMat(15, 24), 24, 24, 1) + || test_requantize(RandomIntMat(15, 24), 24, 24, 0) + || test_requantize(RandomIntMat(15, 24), 1, 24, 24) + || test_requantize(RandomIntMat(15, 24), 1, 24, 1) + || test_requantize(RandomIntMat(15, 24), 1, 24, 0) + || test_requantize(RandomIntMat(15, 24), 24, 1, 24) + || test_requantize(RandomIntMat(15, 24), 24, 1, 1) + || test_requantize(RandomIntMat(15, 24), 24, 1, 0) + || test_requantize(RandomIntMat(128), 1, 1, 1) + || test_requantize(RandomIntMat(128), 1, 1, 0); +#endif // __riscv } int main() From 0bc6eb3a02347051c5b47f0611dfb40f93c325e0 Mon Sep 17 00:00:00 2001 From: JustinFung <835776509@qq.com> Date: Sat, 4 Apr 2026 16:09:49 +0800 Subject: [PATCH 2/5] Requested change: Slope --- src/layer/riscv/riscv_usability.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/layer/riscv/riscv_usability.h b/src/layer/riscv/riscv_usability.h index 451b0029502d..bdd846a80e8a 100644 --- a/src/layer/riscv/riscv_usability.h +++ b/src/layer/riscv/riscv_usability.h @@ -55,16 +55,18 @@ static inline vint8m1_t float2int8relu(vfloat32m4_t v, size_t vl) static inline vint8m2_t float2int8leakyrelu(vfloat32m8_t v, float slope, size_t vl) { - vint8m2_t v8 = float2int8(v, vl); - vint8m2_t v8_leaky = float2int8(__riscv_vfmul_vf_f32m8(v, slope, vl), vl); - return __riscv_vmax_vv_i8m2(v8, v8_leaky, vl); + vfloat32m8_t v_pos = __riscv_vfmax_vf_f32m8(v, 0.f, vl); + vfloat32m8_t v_neg = __riscv_vfmin_vf_f32m8(v, 0.f, vl); + vfloat32m8_t v_leakyrelu = __riscv_vfadd_vv_f32m8(v_pos, __riscv_vfmul_vf_f32m8(v_neg, slope, vl), vl); + return float2int8(v_leakyrelu, vl); } static inline vint8m1_t float2int8leakyrelu(vfloat32m4_t v, float slope, size_t vl) { - vint8m1_t v8 = float2int8(v, vl); - vint8m1_t v8_leaky = float2int8(__riscv_vfmul_vf_f32m4(v, slope, vl), vl); - return __riscv_vmax_vv_i8m1(v8, v8_leaky, vl); + vfloat32m4_t v_pos = __riscv_vfmax_vf_f32m4(v, 0.f, vl); + vfloat32m4_t v_neg = __riscv_vfmin_vf_f32m4(v, 0.f, vl); + vfloat32m4_t v_leakyrelu = __riscv_vfadd_vv_f32m4(v_pos, __riscv_vfmul_vf_f32m4(v_neg, slope, vl), vl); + return float2int8(v_leakyrelu, vl); } #if __riscv_zvfh From 5ec204cf9be00b25327508e62ad562ffd2dfbca2 Mon Sep 17 00:00:00 2001 From: JustinFung <835776509@qq.com> Date: Sun, 17 May 2026 10:37:00 +0800 Subject: [PATCH 3/5] Requested change: naming --- src/layer/riscv/requantize_riscv.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/layer/riscv/requantize_riscv.cpp b/src/layer/riscv/requantize_riscv.cpp index 85f5b5da04be..9e98307ee947 100644 --- a/src/layer/riscv/requantize_riscv.cpp +++ b/src/layer/riscv/requantize_riscv.cpp @@ -19,7 +19,7 @@ Requantize_riscv::Requantize_riscv() #endif // __riscv_vector } -static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack) +static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack) { const int scale_in_data_size = scale_in_data.w; const int bias_data_size = bias_data.w; @@ -205,7 +205,7 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_ if ((activation_type == 1) || (activation_type == 2)) { const float slope = activation_params[0]; - requantize_leakyrelu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack); + requantize_relu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack); return; } From 225785cf3cbf68e831a1ddcb60b325542962c7df Mon Sep 17 00:00:00 2001 From: Deepdive543443 <83911295+Deepdive543443@users.noreply.github.com> Date: Sun, 17 May 2026 02:39:27 +0000 Subject: [PATCH 4/5] apply code-format changes --- src/layer/riscv/riscv_usability.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/layer/riscv/riscv_usability.h b/src/layer/riscv/riscv_usability.h index bdd846a80e8a..9e90eb77ca89 100644 --- a/src/layer/riscv/riscv_usability.h +++ b/src/layer/riscv/riscv_usability.h @@ -55,18 +55,18 @@ static inline vint8m1_t float2int8relu(vfloat32m4_t v, size_t vl) static inline vint8m2_t float2int8leakyrelu(vfloat32m8_t v, float slope, size_t vl) { - vfloat32m8_t v_pos = __riscv_vfmax_vf_f32m8(v, 0.f, vl); - vfloat32m8_t v_neg = __riscv_vfmin_vf_f32m8(v, 0.f, vl); - vfloat32m8_t v_leakyrelu = __riscv_vfadd_vv_f32m8(v_pos, __riscv_vfmul_vf_f32m8(v_neg, slope, vl), vl); - return float2int8(v_leakyrelu, vl); + vfloat32m8_t v_pos = __riscv_vfmax_vf_f32m8(v, 0.f, vl); + vfloat32m8_t v_neg = __riscv_vfmin_vf_f32m8(v, 0.f, vl); + vfloat32m8_t v_leakyrelu = __riscv_vfadd_vv_f32m8(v_pos, __riscv_vfmul_vf_f32m8(v_neg, slope, vl), vl); + return float2int8(v_leakyrelu, vl); } static inline vint8m1_t float2int8leakyrelu(vfloat32m4_t v, float slope, size_t vl) { - vfloat32m4_t v_pos = __riscv_vfmax_vf_f32m4(v, 0.f, vl); - vfloat32m4_t v_neg = __riscv_vfmin_vf_f32m4(v, 0.f, vl); - vfloat32m4_t v_leakyrelu = __riscv_vfadd_vv_f32m4(v_pos, __riscv_vfmul_vf_f32m4(v_neg, slope, vl), vl); - return float2int8(v_leakyrelu, vl); + vfloat32m4_t v_pos = __riscv_vfmax_vf_f32m4(v, 0.f, vl); + vfloat32m4_t v_neg = __riscv_vfmin_vf_f32m4(v, 0.f, vl); + vfloat32m4_t v_leakyrelu = __riscv_vfadd_vv_f32m4(v_pos, __riscv_vfmul_vf_f32m4(v_neg, slope, vl), vl); + return float2int8(v_leakyrelu, vl); } #if __riscv_zvfh From 3614864a7529154d86cb5f948a234d44caf90175 Mon Sep 17 00:00:00 2001 From: nihui Date: Mon, 18 May 2026 15:24:21 +0800 Subject: [PATCH 5/5] cc --- src/layer/riscv/requantize_riscv.cpp | 311 +++++++++++++++------------ tests/test_requantize.cpp | 36 +++- 2 files changed, 206 insertions(+), 141 deletions(-) diff --git a/src/layer/riscv/requantize_riscv.cpp b/src/layer/riscv/requantize_riscv.cpp index 9e98307ee947..677f5b570747 100644 --- a/src/layer/riscv/requantize_riscv.cpp +++ b/src/layer/riscv/requantize_riscv.cpp @@ -19,11 +19,9 @@ Requantize_riscv::Requantize_riscv() #endif // __riscv_vector } -static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack) +static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int elemcount, int elempack) { - const int scale_in_data_size = scale_in_data.w; const int bias_data_size = bias_data.w; - const int scale_out_data_size = scale_out_data.w; const int size = elemcount * elempack; // int8(relu(v * scale_in) * scale_out) @@ -31,170 +29,209 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal // int8(relu(v * scale_in + bias) * scale_out) // int8_relu(v * (scale_in * scale_out) + (bias * scale_out)) - // int8(leakyrelu(v * scale_in, slope) * scale_out) - // int8_leakyrelu(v * (scale_in * scale_out), slope) - // int8(leakyrelu(v * scale_in + bias, slope) * scale_out) - // int8_leakyrelu(v * (scale_in * scale_out) + (bias * scale_out), slope) - #if __riscv_vector + const int scale_in_data_size = scale_in_data.w; + const int scale_out_data_size = scale_out_data.w; + const size_t vlm1 = __riscv_vsetvlmax_e32m1(); const size_t vlmax = __riscv_vsetvlmax_e32m8(); vfloat32m8_t _scale = __riscv_vfmv_v_f_f32m8(scale_in_data[0], vlmax); if (scale_in_data_size > 1) { - // if (elempack == vlm1) - { - vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); - _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); - } + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); + _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + } + + vfloat32m8_t _scale_out; + if (scale_out_data_size > 1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); + _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + _scale = __riscv_vfmul_vv_f32m8(_scale, _scale_out, vlmax); + } + else + { + _scale = __riscv_vfmul_vf_f32m8(_scale, scale_out_data[0], vlmax); } - vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(0.f, vlmax); - if (bias_data_size == 1) + int n = size; + if (bias_data_size == 0) { - _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax); + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl); + __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } } - else if (bias_data_size > 1) + else { - // if (elempack == vlm1) + vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax); + if (bias_data_size > 1) { vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1); _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b); } - } - if (scale_out_data_size > 1) + if (scale_out_data_size > 1) + { + _bias = __riscv_vfmul_vv_f32m8(_bias, _scale_out, vlmax); + } + else + { + _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax); + } + + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl); + __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } +#else // __riscv_vector + float scale = scale_in_data[0] * scale_out_data[0]; + if (bias_data_size == 0) { - // if (elempack == vlm1) + for (int i = 0; i < size; i++) { - vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); - vfloat32m8_t _s2 = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); - _scale = __riscv_vfmul_vv_f32m8(_scale, _s2, vlmax); - _bias = __riscv_vfmul_vv_f32m8(_bias, _s2, vlmax); + float v = *intptr * scale; + *ptr = (v < 0) ? 0 : float2int8(v); + intptr++; + ptr++; } } else + { + float bias = bias_data[0] * scale_out_data[0]; + for (int i = 0; i < size; i++) + { + float v = *intptr * scale + bias; + *ptr = (v < 0) ? 0 : float2int8(v); + intptr++; + ptr++; + } + } +#endif // __riscv_vector +} + +static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack) +{ + const int bias_data_size = bias_data.w; + const int size = elemcount * elempack; + + // int8(leakyrelu(v * scale_in, slope) * scale_out) + // int8_leakyrelu(v * (scale_in * scale_out), slope) + // int8(leakyrelu(v * scale_in + bias, slope) * scale_out) + // int8_leakyrelu(v * (scale_in * scale_out) + (bias * scale_out), slope) + +#if __riscv_vector + const int scale_in_data_size = scale_in_data.w; + const int scale_out_data_size = scale_out_data.w; + + const size_t vlm1 = __riscv_vsetvlmax_e32m1(); + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + + vfloat32m8_t _scale = __riscv_vfmv_v_f_f32m8(scale_in_data[0], vlmax); + if (scale_in_data_size > 1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); + _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + } + + vfloat32m8_t _scale_out; + if (scale_out_data_size > 1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); + _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + _scale = __riscv_vfmul_vv_f32m8(_scale, _scale_out, vlmax); + } + else { _scale = __riscv_vfmul_vf_f32m8(_scale, scale_out_data[0], vlmax); - _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax); } int n = size; - if (slope > 0.f) // Leaky ReLU + if (bias_data_size == 0) { - if (bias_data_size == 0) - { - while (n > 0) - { - size_t vl = __riscv_vsetvl_e32m8(n); - vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); - vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); - _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl); - __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl); - - intptr += vl; - ptr += vl; - n -= vl; - } - } - else + while (n > 0) { - while (n > 0) - { - size_t vl = __riscv_vsetvl_e32m8(n); - vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); - vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); - _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl); - __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl); - - intptr += vl; - ptr += vl; - n -= vl; - } + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl); + __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; } } else { - if (bias_data_size == 0) + vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax); + if (bias_data_size > 1) { - while (n > 0) - { - size_t vl = __riscv_vsetvl_e32m8(n); - vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); - vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); - _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl); - __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl); - - intptr += vl; - ptr += vl; - n -= vl; - } + vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1); + _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b); + } + + if (scale_out_data_size > 1) + { + _bias = __riscv_vfmul_vv_f32m8(_bias, _scale_out, vlmax); } else { - while (n > 0) - { - size_t vl = __riscv_vsetvl_e32m8(n); - vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); - vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); - _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl); - __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl); - - intptr += vl; - ptr += vl; - n -= vl; - } + _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax); + } + + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl); + __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; } } #else // __riscv_vector float scale = scale_in_data[0] * scale_out_data[0]; - if (slope > 0.f) + if (bias_data_size == 0) { - if (bias_data_size == 0) - { - for (int i = 0; i < size; i++) - { - float v = *intptr * scale; - *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v); - intptr++; - ptr++; - } - } - else + for (int i = 0; i < size; i++) { - float bias = bias_data[0] * scale_out_data[0]; - for (int i = 0; i < size; i++) - { - float v = *intptr * scale + bias; - *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v); - intptr++; - ptr++; - } + float v = *intptr * scale; + *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v); + intptr++; + ptr++; } } else { - if (bias_data_size == 0) - { - for (int i = 0; i < size; i++) - { - float v = *intptr * scale; - *ptr = (v < 0) ? 0 : float2int8(v); - intptr++; - ptr++; - } - } - else + float bias = bias_data[0] * scale_out_data[0]; + for (int i = 0; i < size; i++) { - float bias = bias_data[0] * scale_out_data[0]; - for (int i = 0; i < size; i++) - { - float v = *intptr * scale + bias; - *ptr = (v < 0) ? 0 : float2int8(v); - intptr++; - ptr++; - } + float v = *intptr * scale + bias; + *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v); + intptr++; + ptr++; } } #endif // __riscv_vector @@ -202,16 +239,20 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int activation_type, const Mat& activation_params, int elemcount, int elempack) { - if ((activation_type == 1) || (activation_type == 2)) + if (activation_type == 1) + { + requantize_relu(intptr, ptr, scale_in_data, bias_data, scale_out_data, elemcount, elempack); + return; + } + + if (activation_type == 2 && activation_params[0] > 0.f) { const float slope = activation_params[0]; - requantize_relu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack); + requantize_leakyrelu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack); return; } - const int scale_in_data_size = scale_in_data.w; const int bias_data_size = bias_data.w; - const int scale_out_data_size = scale_out_data.w; const int size = elemcount * elempack; const float scale_in = scale_in_data[0]; @@ -219,27 +260,24 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_ const float bias = bias_data_size == 0 ? 0.f : bias_data[0]; #if __riscv_vector + const int scale_in_data_size = scale_in_data.w; + const int scale_out_data_size = scale_out_data.w; + const size_t vlm1 = __riscv_vsetvlmax_e32m1(); const size_t vlmax = __riscv_vsetvlmax_e32m8(); vfloat32m8_t _scale_in = __riscv_vfmv_v_f_f32m8(scale_in, vlmax); if (scale_in_data_size > 1) { - // if (elempack == vlm1) - { - vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); - _scale_in = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); - } + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); + _scale_in = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); } vfloat32m8_t _scale_out = __riscv_vfmv_v_f_f32m8(scale_out, vlmax); if (scale_out_data_size > 1) { - // if (elempack == vlm1) - { - vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); - _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); - } + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); + _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); } int n = size; @@ -265,11 +303,8 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_ vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias, vlmax); if (bias_data_size > 1) { - // if (elempack == vlm1) - { - vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1); - _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b); - } + vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1); + _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b); } while (n > 0) diff --git a/tests/test_requantize.cpp b/tests/test_requantize.cpp index 70ad93057923..6f2183195d1e 100644 --- a/tests/test_requantize.cpp +++ b/tests/test_requantize.cpp @@ -25,7 +25,8 @@ static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale Randomize(weights[0], 0.0001, 0.001); Randomize(weights[1], 10, 100); - int ret = test_layer("Requantize", pd, weights, a, 1); + int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING; + int ret = test_layer("Requantize", pd, weights, a, 1, flag); if (ret != 0) { fprintf(stderr, "test_requantize failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]); @@ -45,6 +46,31 @@ static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f); } +static int test_requantize_relu_empty_activation_params(const ncnn::Mat& a) +{ + ncnn::ParamDict pd; + pd.set(0, 1); + pd.set(1, 1); + pd.set(2, 0); + pd.set(3, 1); + + std::vector weights(2); + weights[0] = RandomMat(1); + weights[1] = RandomMat(1); + + Randomize(weights[0], 0.0001, 0.001); + Randomize(weights[1], 10, 100); + + int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_AUTO_INPUT_PACKING; + int ret = test_layer("Requantize", pd, weights, a, 1, flag); + if (ret != 0) + { + fprintf(stderr, "test_requantize_relu_empty_activation_params failed a.dims=%d a=(%d %d %d)\n", a.dims, a.w, a.h, a.c); + } + + return ret; +} + static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) { ncnn::ParamDict pd; @@ -195,7 +221,10 @@ static int test_requantize_2() || test_requantize_pack1(RandomIntMat(124), 1, 1, 1) || test_requantize_pack1(RandomIntMat(124), 1, 1, 0) || test_requantize_pack1(RandomIntMat(127), 1, 1, 1) - || test_requantize_pack1(RandomIntMat(127), 1, 1, 0); + || test_requantize_pack1(RandomIntMat(127), 1, 1, 0) + || test_requantize_pack1(RandomIntMat(127), 1, 1, 0, 2, 0.f, 0.f) + || test_requantize_pack1(RandomIntMat(127), 1, 1, 0, 2, RandomFloat(-1, 0), 0.f) + || test_requantize_relu_empty_activation_params(RandomIntMat(127)); } static int test_requantize_3() @@ -254,7 +283,8 @@ static int test_requantize_3() || test_requantize(RandomIntMat(15, 24), 24, 1, 1) || test_requantize(RandomIntMat(15, 24), 24, 1, 0) || test_requantize(RandomIntMat(128), 1, 1, 1) - || test_requantize(RandomIntMat(128), 1, 1, 0); + || test_requantize(RandomIntMat(128), 1, 1, 0) + || test_requantize(RandomIntMat(127), 1, 1, 0, 2, RandomFloat(1, 2), 0.f); #endif // __riscv }