diff --git a/src/layer/riscv/requantize_riscv.cpp b/src/layer/riscv/requantize_riscv.cpp new file mode 100644 index 000000000000..677f5b570747 --- /dev/null +++ b/src/layer/riscv/requantize_riscv.cpp @@ -0,0 +1,425 @@ +// Copyright 2026 Tencent +// SPDX-License-Identifier: BSD-3-Clause + +#include "requantize_riscv.h" + +#if __riscv_vector +#include +#endif // __riscv_vector + +#include "riscv_activation.h" +#include "riscv_usability.h" + +namespace ncnn { + +Requantize_riscv::Requantize_riscv() +{ +#if __riscv_vector + support_packing = true; +#endif // __riscv_vector +} + +static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int elemcount, int elempack) +{ + const int bias_data_size = bias_data.w; + const int size = elemcount * elempack; + + // int8(relu(v * scale_in) * scale_out) + // int8_relu(v * (scale_in * scale_out)) + // int8(relu(v * scale_in + bias) * scale_out) + // int8_relu(v * (scale_in * scale_out) + (bias * scale_out)) + +#if __riscv_vector + const int scale_in_data_size = scale_in_data.w; + const int scale_out_data_size = scale_out_data.w; + + const size_t vlm1 = __riscv_vsetvlmax_e32m1(); + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + + vfloat32m8_t _scale = __riscv_vfmv_v_f_f32m8(scale_in_data[0], vlmax); + if (scale_in_data_size > 1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); + _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + } + + vfloat32m8_t _scale_out; + if (scale_out_data_size > 1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); + _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + _scale = __riscv_vfmul_vv_f32m8(_scale, _scale_out, vlmax); + } + else + { + _scale = __riscv_vfmul_vf_f32m8(_scale, scale_out_data[0], vlmax); + } + + int n = size; + if (bias_data_size == 0) + { + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl); + __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } + else + { + vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax); + if (bias_data_size > 1) + { + vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1); + _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b); + } + + if (scale_out_data_size > 1) + { + _bias = __riscv_vfmul_vv_f32m8(_bias, _scale_out, vlmax); + } + else + { + _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax); + } + + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl); + __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } +#else // __riscv_vector + float scale = scale_in_data[0] * scale_out_data[0]; + if (bias_data_size == 0) + { + for (int i = 0; i < size; i++) + { + float v = *intptr * scale; + *ptr = (v < 0) ? 0 : float2int8(v); + intptr++; + ptr++; + } + } + else + { + float bias = bias_data[0] * scale_out_data[0]; + for (int i = 0; i < size; i++) + { + float v = *intptr * scale + bias; + *ptr = (v < 0) ? 0 : float2int8(v); + intptr++; + ptr++; + } + } +#endif // __riscv_vector +} + +static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack) +{ + const int bias_data_size = bias_data.w; + const int size = elemcount * elempack; + + // int8(leakyrelu(v * scale_in, slope) * scale_out) + // int8_leakyrelu(v * (scale_in * scale_out), slope) + // int8(leakyrelu(v * scale_in + bias, slope) * scale_out) + // int8_leakyrelu(v * (scale_in * scale_out) + (bias * scale_out), slope) + +#if __riscv_vector + const int scale_in_data_size = scale_in_data.w; + const int scale_out_data_size = scale_out_data.w; + + const size_t vlm1 = __riscv_vsetvlmax_e32m1(); + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + + vfloat32m8_t _scale = __riscv_vfmv_v_f_f32m8(scale_in_data[0], vlmax); + if (scale_in_data_size > 1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); + _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + } + + vfloat32m8_t _scale_out; + if (scale_out_data_size > 1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); + _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + _scale = __riscv_vfmul_vv_f32m8(_scale, _scale_out, vlmax); + } + else + { + _scale = __riscv_vfmul_vf_f32m8(_scale, scale_out_data[0], vlmax); + } + + int n = size; + if (bias_data_size == 0) + { + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl); + __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } + else + { + vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax); + if (bias_data_size > 1) + { + vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1); + _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b); + } + + if (scale_out_data_size > 1) + { + _bias = __riscv_vfmul_vv_f32m8(_bias, _scale_out, vlmax); + } + else + { + _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax); + } + + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl); + __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } +#else // __riscv_vector + float scale = scale_in_data[0] * scale_out_data[0]; + if (bias_data_size == 0) + { + for (int i = 0; i < size; i++) + { + float v = *intptr * scale; + *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v); + intptr++; + ptr++; + } + } + else + { + float bias = bias_data[0] * scale_out_data[0]; + for (int i = 0; i < size; i++) + { + float v = *intptr * scale + bias; + *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v); + intptr++; + ptr++; + } + } +#endif // __riscv_vector +} + +static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int activation_type, const Mat& activation_params, int elemcount, int elempack) +{ + if (activation_type == 1) + { + requantize_relu(intptr, ptr, scale_in_data, bias_data, scale_out_data, elemcount, elempack); + return; + } + + if (activation_type == 2 && activation_params[0] > 0.f) + { + const float slope = activation_params[0]; + requantize_leakyrelu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack); + return; + } + + const int bias_data_size = bias_data.w; + const int size = elemcount * elempack; + + const float scale_in = scale_in_data[0]; + const float scale_out = scale_out_data[0]; + const float bias = bias_data_size == 0 ? 0.f : bias_data[0]; + +#if __riscv_vector + const int scale_in_data_size = scale_in_data.w; + const int scale_out_data_size = scale_out_data.w; + + const size_t vlm1 = __riscv_vsetvlmax_e32m1(); + const size_t vlmax = __riscv_vsetvlmax_e32m8(); + + vfloat32m8_t _scale_in = __riscv_vfmv_v_f_f32m8(scale_in, vlmax); + if (scale_in_data_size > 1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1); + _scale_in = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + } + + vfloat32m8_t _scale_out = __riscv_vfmv_v_f_f32m8(scale_out, vlmax); + if (scale_out_data_size > 1) + { + vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1); + _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s); + } + + int n = size; + if (bias_data_size == 0) + { + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale_in, vl); + _v = activation_ps(_v, activation_type, activation_params, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale_out, vl); + __riscv_vse8_v_i8m2(ptr, float2int8(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } + else // if (bias_data_size >= 1) + { + vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias, vlmax); + if (bias_data_size > 1) + { + vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1); + _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b); + } + + while (n > 0) + { + size_t vl = __riscv_vsetvl_e32m8(n); + vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl); + vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl); + _v = __riscv_vfmadd_vv_f32m8(_v, _scale_in, _bias, vl); // add bias + _v = activation_ps(_v, activation_type, activation_params, vl); + _v = __riscv_vfmul_vv_f32m8(_v, _scale_out, vl); + __riscv_vse8_v_i8m2(ptr, float2int8(_v, vl), vl); + + intptr += vl; + ptr += vl; + n -= vl; + } + } +#else // __riscv_vector + if (bias_data_size == 0) + { + for (int i = 0; i < size; i++) + { + float v = (float)(*intptr) * scale_in; + v = activation_ss(v, activation_type, activation_params); + *ptr = float2int8(v * scale_out); + intptr++; + ptr++; + } + } + else + { + for (int i = 0; i < size; i++) + { + float v = (float)(*intptr) * scale_in + bias; + v = activation_ss(v, activation_type, activation_params); + *ptr = float2int8(v * scale_out); + intptr++; + ptr++; + } + } +#endif // __riscv_vector +} + +int Requantize_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const +{ + const int dims = bottom_blob.dims; + const int w = bottom_blob.w; + const int h = bottom_blob.h; + const int channels = bottom_blob.c; + const int elempack = bottom_blob.elempack; + const size_t out_elemsize = elempack * 1u; + + if (dims == 1) + { + top_blob.create(w, out_elemsize, elempack, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + const int wp = std::max(1, w / opt.num_threads); + const int nn_w = (w + wp - 1) / wp; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int ii = 0; ii < nn_w; ii++) + { + const int i = ii * wp; + + const int* intptr = (const int*)bottom_blob + i * elempack; + signed char* ptr = (signed char*)top_blob + i * elempack; + const int size = std::min(w - i, wp) * elempack; + + requantize(intptr, ptr, scale_in_data, bias_data, scale_out_data, activation_type, activation_params, size, 1); + } + } + + if (dims == 2) + { + top_blob.create(w, h, out_elemsize, elempack, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int i = 0; i < h; i++) + { + const int* intptr = bottom_blob.row(i); + signed char* ptr = top_blob.row(i); + + const Mat scale_in_data_i = scale_in_data_size > 1 ? scale_in_data.range(i * elempack, elempack) : scale_in_data; + const Mat bias_data_i = bias_data_size > 1 ? bias_data.range(i * elempack, elempack) : bias_data; + const Mat scale_out_data_i = scale_out_data_size > 1 ? scale_out_data.range(i * elempack, elempack) : scale_out_data; + + requantize(intptr, ptr, scale_in_data_i, bias_data_i, scale_out_data_i, activation_type, activation_params, w, elempack); + } + } + + if (dims == 3) + { + top_blob.create(w, h, channels, out_elemsize, elempack, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int q = 0; q < channels; q++) + { + const int* intptr = bottom_blob.channel(q); + signed char* ptr = top_blob.channel(q); + + const Mat scale_in_data_q = scale_in_data_size > 1 ? scale_in_data.range(q * elempack, elempack) : scale_in_data; + const Mat bias_data_q = bias_data_size > 1 ? bias_data.range(q * elempack, elempack) : bias_data; + const Mat scale_out_data_q = scale_out_data_size > 1 ? scale_out_data.range(q * elempack, elempack) : scale_out_data; + + requantize(intptr, ptr, scale_in_data_q, bias_data_q, scale_out_data_q, activation_type, activation_params, w * h, elempack); + } + } + + return 0; +} + +} // namespace ncnn diff --git a/src/layer/riscv/requantize_riscv.h b/src/layer/riscv/requantize_riscv.h new file mode 100644 index 000000000000..a058746350c5 --- /dev/null +++ b/src/layer/riscv/requantize_riscv.h @@ -0,0 +1,21 @@ +// Copyright 2026 Tencent +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef LAYER_REQUANTIZE_RISCV_H +#define LAYER_REQUANTIZE_RISCV_H + +#include "requantize.h" + +namespace ncnn { + +class Requantize_riscv : public Requantize +{ +public: + Requantize_riscv(); + + virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const; +}; + +} // namespace ncnn + +#endif // LAYER_REQUANTIZE_RISCV_H diff --git a/src/layer/riscv/riscv_usability.h b/src/layer/riscv/riscv_usability.h index 7ff06d789979..9e90eb77ca89 100644 --- a/src/layer/riscv/riscv_usability.h +++ b/src/layer/riscv/riscv_usability.h @@ -35,6 +35,40 @@ static inline vint8m1_t float2int8(vfloat32m4_t v, size_t vl) return __riscv_vnclip_wx_i8m1(v16, 0, __RISCV_VXRM_RNU, vl); } +static inline vint8m2_t float2int8relu(vfloat32m8_t v, size_t vl) +{ + vint32m8_t v32 = __riscv_vfcvt_x_f_v_i32m8_rm(v, __RISCV_FRM_RMM, vl); + v32 = __riscv_vmax_vx_i32m8(v32, 0, vl); + v32 = __riscv_vmin_vx_i32m8(v32, 127, vl); + vint16m4_t v16 = __riscv_vnclip_wx_i16m4(v32, 0, __RISCV_VXRM_RNU, vl); + return __riscv_vnclip_wx_i8m2(v16, 0, __RISCV_VXRM_RNU, vl); +} + +static inline vint8m1_t float2int8relu(vfloat32m4_t v, size_t vl) +{ + vint32m4_t v32 = __riscv_vfcvt_x_f_v_i32m4_rm(v, __RISCV_FRM_RMM, vl); + v32 = __riscv_vmax_vx_i32m4(v32, 0, vl); + v32 = __riscv_vmin_vx_i32m4(v32, 127, vl); + vint16m2_t v16 = __riscv_vnclip_wx_i16m2(v32, 0, __RISCV_VXRM_RNU, vl); + return __riscv_vnclip_wx_i8m1(v16, 0, __RISCV_VXRM_RNU, vl); +} + +static inline vint8m2_t float2int8leakyrelu(vfloat32m8_t v, float slope, size_t vl) +{ + vfloat32m8_t v_pos = __riscv_vfmax_vf_f32m8(v, 0.f, vl); + vfloat32m8_t v_neg = __riscv_vfmin_vf_f32m8(v, 0.f, vl); + vfloat32m8_t v_leakyrelu = __riscv_vfadd_vv_f32m8(v_pos, __riscv_vfmul_vf_f32m8(v_neg, slope, vl), vl); + return float2int8(v_leakyrelu, vl); +} + +static inline vint8m1_t float2int8leakyrelu(vfloat32m4_t v, float slope, size_t vl) +{ + vfloat32m4_t v_pos = __riscv_vfmax_vf_f32m4(v, 0.f, vl); + vfloat32m4_t v_neg = __riscv_vfmin_vf_f32m4(v, 0.f, vl); + vfloat32m4_t v_leakyrelu = __riscv_vfadd_vv_f32m4(v_pos, __riscv_vfmul_vf_f32m4(v_neg, slope, vl), vl); + return float2int8(v_leakyrelu, vl); +} + #if __riscv_zvfh static inline vint8m4_t float2int8(vfloat16m8_t v, size_t vl) { diff --git a/tests/test_requantize.cpp b/tests/test_requantize.cpp index 3271119c8572..6f2183195d1e 100644 --- a/tests/test_requantize.cpp +++ b/tests/test_requantize.cpp @@ -3,6 +3,74 @@ #include "testutil.h" +static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) +{ + ncnn::ParamDict pd; + pd.set(0, scale_in_data_size); + pd.set(1, scale_out_data_size); + pd.set(2, bias_data_size); + + ncnn::Mat activation_params(2); + activation_params[0] = alpha; + activation_params[1] = beta; + pd.set(3, activation_type); + pd.set(4, activation_params); + + std::vector weights(bias_data_size ? 3 : 2); + weights[0] = RandomMat(scale_in_data_size); + weights[1] = RandomMat(scale_out_data_size); + if (bias_data_size) + weights[2] = RandomMat(bias_data_size); + + Randomize(weights[0], 0.0001, 0.001); + Randomize(weights[1], 10, 100); + + int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING; + int ret = test_layer("Requantize", pd, weights, a, 1, flag); + if (ret != 0) + { + fprintf(stderr, "test_requantize failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]); + } + + return ret; +} + +static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size) +{ + return 0 + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1)) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f) + || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f); +} + +static int test_requantize_relu_empty_activation_params(const ncnn::Mat& a) +{ + ncnn::ParamDict pd; + pd.set(0, 1); + pd.set(1, 1); + pd.set(2, 0); + pd.set(3, 1); + + std::vector weights(2); + weights[0] = RandomMat(1); + weights[1] = RandomMat(1); + + Randomize(weights[0], 0.0001, 0.001); + Randomize(weights[1], 10, 100); + + int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_AUTO_INPUT_PACKING; + int ret = test_layer("Requantize", pd, weights, a, 1, flag); + if (ret != 0) + { + fprintf(stderr, "test_requantize_relu_empty_activation_params failed a.dims=%d a=(%d %d %d)\n", a.dims, a.w, a.h, a.c); + } + + return ret; +} + static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta) { ncnn::ParamDict pd; @@ -153,12 +221,16 @@ static int test_requantize_2() || test_requantize_pack1(RandomIntMat(124), 1, 1, 1) || test_requantize_pack1(RandomIntMat(124), 1, 1, 0) || test_requantize_pack1(RandomIntMat(127), 1, 1, 1) - || test_requantize_pack1(RandomIntMat(127), 1, 1, 0); + || test_requantize_pack1(RandomIntMat(127), 1, 1, 0) + || test_requantize_pack1(RandomIntMat(127), 1, 1, 0, 2, 0.f, 0.f) + || test_requantize_pack1(RandomIntMat(127), 1, 1, 0, 2, RandomFloat(-1, 0), 0.f) + || test_requantize_relu_empty_activation_params(RandomIntMat(127)); } static int test_requantize_3() { return 0 +#ifndef __riscv || test_requantize_pack8(RandomIntMat(5, 7, 24), 1, 1, 24) || test_requantize_pack8(RandomIntMat(5, 7, 24), 1, 1, 1) || test_requantize_pack8(RandomIntMat(5, 7, 24), 1, 1, 0) @@ -185,6 +257,35 @@ static int test_requantize_3() || test_requantize_pack8(RandomIntMat(15, 24), 24, 1, 0) || test_requantize_pack8(RandomIntMat(128), 1, 1, 1) || test_requantize_pack8(RandomIntMat(128), 1, 1, 0); +#else + || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 24) + || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 1) + || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 0) + || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 24) + || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 1) + || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 0) + || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 24) + || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 1) + || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 0) + || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 24) + || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 1) + || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 0) + || test_requantize(RandomIntMat(15, 24), 1, 1, 24) + || test_requantize(RandomIntMat(15, 24), 1, 1, 1) + || test_requantize(RandomIntMat(15, 24), 1, 1, 0) + || test_requantize(RandomIntMat(15, 24), 24, 24, 24) + || test_requantize(RandomIntMat(15, 24), 24, 24, 1) + || test_requantize(RandomIntMat(15, 24), 24, 24, 0) + || test_requantize(RandomIntMat(15, 24), 1, 24, 24) + || test_requantize(RandomIntMat(15, 24), 1, 24, 1) + || test_requantize(RandomIntMat(15, 24), 1, 24, 0) + || test_requantize(RandomIntMat(15, 24), 24, 1, 24) + || test_requantize(RandomIntMat(15, 24), 24, 1, 1) + || test_requantize(RandomIntMat(15, 24), 24, 1, 0) + || test_requantize(RandomIntMat(128), 1, 1, 1) + || test_requantize(RandomIntMat(128), 1, 1, 0) + || test_requantize(RandomIntMat(127), 1, 1, 0, 2, RandomFloat(1, 2), 0.f); +#endif // __riscv } int main()