From a6e57f23f805f5505d123d261047000e8669ce5f Mon Sep 17 00:00:00 2001
From: JustinFung <835776509@qq.com>
Date: Mon, 27 Apr 2026 00:01:37 +0800
Subject: [PATCH 1/5] RVV1.0 Requantize Layer

---
 src/layer/riscv/requantize_riscv.cpp | 390 +++++++++++++++++++++++++++
 src/layer/riscv/requantize_riscv.h   |  21 ++
 src/layer/riscv/riscv_usability.h    |  32 +++
 tests/test_requantize.cpp            |  71 +++++
 4 files changed, 514 insertions(+)
 create mode 100644 src/layer/riscv/requantize_riscv.cpp
 create mode 100644 src/layer/riscv/requantize_riscv.h

diff --git a/src/layer/riscv/requantize_riscv.cpp b/src/layer/riscv/requantize_riscv.cpp
new file mode 100644
index 000000000000..85f5b5da04be
--- /dev/null
+++ b/src/layer/riscv/requantize_riscv.cpp
@@ -0,0 +1,390 @@
+// Copyright 2026 Tencent
+// SPDX-License-Identifier: BSD-3-Clause
+
+#include "requantize_riscv.h"
+
+#if __riscv_vector
+#include <riscv_vector.h>
+#endif // __riscv_vector
+
+#include "riscv_activation.h"
+#include "riscv_usability.h"
+
+namespace ncnn {
+
+Requantize_riscv::Requantize_riscv()
+{
+#if __riscv_vector
+    support_packing = true;
+#endif // __riscv_vector
+}
+
+static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack)
+{
+    const int scale_in_data_size = scale_in_data.w;
+    const int bias_data_size = bias_data.w;
+    const int scale_out_data_size = scale_out_data.w;
+    const int size = elemcount * elempack;
+
+    // int8(relu(v * scale_in) * scale_out)
+    // int8_relu(v * (scale_in * scale_out))
+    // int8(relu(v * scale_in + bias) * scale_out)
+    // int8_relu(v * (scale_in * scale_out) + (bias * scale_out))
+
+    // int8(leakyrelu(v * scale_in, slope) * scale_out)
+    // int8_leakyrelu(v * (scale_in * scale_out), slope)
+    // int8(leakyrelu(v * scale_in + bias, slope) * scale_out)
+    // int8_leakyrelu(v * (scale_in * scale_out) + (bias * scale_out), slope)
+
+#if __riscv_vector
+    const size_t vlm1 = __riscv_vsetvlmax_e32m1();
+    const size_t vlmax = __riscv_vsetvlmax_e32m8();
+
+    vfloat32m8_t _scale = __riscv_vfmv_v_f_f32m8(scale_in_data[0], vlmax);
+    if (scale_in_data_size > 1)
+    {
+        // if (elempack == vlm1)
+        {
+            vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1);
+            _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
+        }
+    }
+
+    vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(0.f, vlmax);
+    if (bias_data_size == 1)
+    {
+        _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax);
+    }
+    else if (bias_data_size > 1)
+    {
+        // if (elempack == vlm1)
+        {
+            vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1);
+            _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b);
+        }
+    }
+
+    if (scale_out_data_size > 1)
+    {
+        // if (elempack == vlm1)
+        {
+            vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1);
+            vfloat32m8_t _s2 = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
+            _scale = __riscv_vfmul_vv_f32m8(_scale, _s2, vlmax);
+            _bias = __riscv_vfmul_vv_f32m8(_bias, _s2, vlmax);
+        }
+    }
+    else
+    {
+        _scale = __riscv_vfmul_vf_f32m8(_scale, scale_out_data[0], vlmax);
+        _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax);
+    }
+
+    int n = size;
+    if (slope > 0.f) // Leaky ReLU
+    {
+        if (bias_data_size == 0)
+        {
+            while (n > 0)
+            {
+                size_t vl = __riscv_vsetvl_e32m8(n);
+                vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+                vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+                _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl);
+                __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl);
+
+                intptr += vl;
+                ptr += vl;
+                n -= vl;
+            }
+        }
+        else
+        {
+            while (n > 0)
+            {
+                size_t vl = __riscv_vsetvl_e32m8(n);
+                vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+                vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+                _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl);
+                __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl);
+
+                intptr += vl;
+                ptr += vl;
+                n -= vl;
+            }
+        }
+    }
+    else
+    {
+        if (bias_data_size == 0)
+        {
+            while (n > 0)
+            {
+                size_t vl = __riscv_vsetvl_e32m8(n);
+                vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+                vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+                _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl);
+                __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl);
+
+                intptr += vl;
+                ptr += vl;
+                n -= vl;
+            }
+        }
+        else
+        {
+            while (n > 0)
+            {
+                size_t vl = __riscv_vsetvl_e32m8(n);
+                vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+                vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+                _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl);
+                __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl);
+
+                intptr += vl;
+                ptr += vl;
+                n -= vl;
+            }
+        }
+    }
+#else  // __riscv_vector
+    float scale = scale_in_data[0] * scale_out_data[0];
+    if (slope > 0.f)
+    {
+        if (bias_data_size == 0)
+        {
+            for (int i = 0; i < size; i++)
+            {
+                float v = *intptr * scale;
+                *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v);
+                intptr++;
+                ptr++;
+            }
+        }
+        else
+        {
+            float bias = bias_data[0] * scale_out_data[0];
+            for (int i = 0; i < size; i++)
+            {
+                float v = *intptr * scale + bias;
+                *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v);
+                intptr++;
+                ptr++;
+            }
+        }
+    }
+    else
+    {
+        if (bias_data_size == 0)
+        {
+            for (int i = 0; i < size; i++)
+            {
+                float v = *intptr * scale;
+                *ptr = (v < 0) ? 0 : float2int8(v);
+                intptr++;
+                ptr++;
+            }
+        }
+        else
+        {
+            float bias = bias_data[0] * scale_out_data[0];
+            for (int i = 0; i < size; i++)
+            {
+                float v = *intptr * scale + bias;
+                *ptr = (v < 0) ? 0 : float2int8(v);
+                intptr++;
+                ptr++;
+            }
+        }
+    }
+#endif // __riscv_vector
+}
+
+static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int activation_type, const Mat& activation_params, int elemcount, int elempack)
+{
+    if ((activation_type == 1) || (activation_type == 2))
+    {
+        const float slope = activation_params[0];
+        requantize_leakyrelu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack);
+        return;
+    }
+
+    const int scale_in_data_size = scale_in_data.w;
+    const int bias_data_size = bias_data.w;
+    const int scale_out_data_size = scale_out_data.w;
+    const int size = elemcount * elempack;
+
+    const float scale_in = scale_in_data[0];
+    const float scale_out = scale_out_data[0];
+    const float bias = bias_data_size == 0 ? 0.f : bias_data[0];
+
+#if __riscv_vector
+    const size_t vlm1 = __riscv_vsetvlmax_e32m1();
+    const size_t vlmax = __riscv_vsetvlmax_e32m8();
+
+    vfloat32m8_t _scale_in = __riscv_vfmv_v_f_f32m8(scale_in, vlmax);
+    if (scale_in_data_size > 1)
+    {
+        // if (elempack == vlm1)
+        {
+            vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1);
+            _scale_in = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
+        }
+    }
+
+    vfloat32m8_t _scale_out = __riscv_vfmv_v_f_f32m8(scale_out, vlmax);
+    if (scale_out_data_size > 1)
+    {
+        // if (elempack == vlm1)
+        {
+            vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1);
+            _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
+        }
+    }
+
+    int n = size;
+    if (bias_data_size == 0)
+    {
+        while (n > 0)
+        {
+            size_t vl = __riscv_vsetvl_e32m8(n);
+            vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+            vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+            _v = __riscv_vfmul_vv_f32m8(_v, _scale_in, vl);
+            _v = activation_ps(_v, activation_type, activation_params, vl);
+            _v = __riscv_vfmul_vv_f32m8(_v, _scale_out, vl);
+            __riscv_vse8_v_i8m2(ptr, float2int8(_v, vl), vl);
+
+            intptr += vl;
+            ptr += vl;
+            n -= vl;
+        }
+    }
+    else // if (bias_data_size >= 1)
+    {
+        vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias, vlmax);
+        if (bias_data_size > 1)
+        {
+            // if (elempack == vlm1)
+            {
+                vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1);
+                _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b);
+            }
+        }
+
+        while (n > 0)
+        {
+            size_t vl = __riscv_vsetvl_e32m8(n);
+            vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+            vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+            _v = __riscv_vfmadd_vv_f32m8(_v, _scale_in, _bias, vl); // add bias
+            _v = activation_ps(_v, activation_type, activation_params, vl);
+            _v = __riscv_vfmul_vv_f32m8(_v, _scale_out, vl);
+            __riscv_vse8_v_i8m2(ptr, float2int8(_v, vl), vl);
+
+            intptr += vl;
+            ptr += vl;
+            n -= vl;
+        }
+    }
+#else  // __riscv_vector
+    if (bias_data_size == 0)
+    {
+        for (int i = 0; i < size; i++)
+        {
+            float v = (float)(*intptr) * scale_in;
+            v = activation_ss(v, activation_type, activation_params);
+            *ptr = float2int8(v * scale_out);
+            intptr++;
+            ptr++;
+        }
+    }
+    else
+    {
+        for (int i = 0; i < size; i++)
+        {
+            float v = (float)(*intptr) * scale_in + bias;
+            v = activation_ss(v, activation_type, activation_params);
+            *ptr = float2int8(v * scale_out);
+            intptr++;
+            ptr++;
+        }
+    }
+#endif // __riscv_vector
+}
+
+int Requantize_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
+{
+    const int dims = bottom_blob.dims;
+    const int w = bottom_blob.w;
+    const int h = bottom_blob.h;
+    const int channels = bottom_blob.c;
+    const int elempack = bottom_blob.elempack;
+    const size_t out_elemsize = elempack * 1u;
+
+    if (dims == 1)
+    {
+        top_blob.create(w, out_elemsize, elempack, opt.blob_allocator);
+        if (top_blob.empty())
+            return -100;
+
+        const int wp = std::max(1, w / opt.num_threads);
+        const int nn_w = (w + wp - 1) / wp;
+
+        #pragma omp parallel for num_threads(opt.num_threads)
+        for (int ii = 0; ii < nn_w; ii++)
+        {
+            const int i = ii * wp;
+
+            const int* intptr = (const int*)bottom_blob + i * elempack;
+            signed char* ptr = (signed char*)top_blob + i * elempack;
+            const int size = std::min(w - i, wp) * elempack;
+
+            requantize(intptr, ptr, scale_in_data, bias_data, scale_out_data, activation_type, activation_params, size, 1);
+        }
+    }
+
+    if (dims == 2)
+    {
+        top_blob.create(w, h, out_elemsize, elempack, opt.blob_allocator);
+        if (top_blob.empty())
+            return -100;
+
+        #pragma omp parallel for num_threads(opt.num_threads)
+        for (int i = 0; i < h; i++)
+        {
+            const int* intptr = bottom_blob.row<const int>(i);
+            signed char* ptr = top_blob.row<signed char>(i);
+
+            const Mat scale_in_data_i = scale_in_data_size > 1 ? scale_in_data.range(i * elempack, elempack) : scale_in_data;
+            const Mat bias_data_i = bias_data_size > 1 ? bias_data.range(i * elempack, elempack) : bias_data;
+            const Mat scale_out_data_i = scale_out_data_size > 1 ? scale_out_data.range(i * elempack, elempack) : scale_out_data;
+
+            requantize(intptr, ptr, scale_in_data_i, bias_data_i, scale_out_data_i, activation_type, activation_params, w, elempack);
+        }
+    }
+
+    if (dims == 3)
+    {
+        top_blob.create(w, h, channels, out_elemsize, elempack, opt.blob_allocator);
+        if (top_blob.empty())
+            return -100;
+
+        #pragma omp parallel for num_threads(opt.num_threads)
+        for (int q = 0; q < channels; q++)
+        {
+            const int* intptr = bottom_blob.channel(q);
+            signed char* ptr = top_blob.channel(q);
+
+            const Mat scale_in_data_q = scale_in_data_size > 1 ? scale_in_data.range(q * elempack, elempack) : scale_in_data;
+            const Mat bias_data_q = bias_data_size > 1 ? bias_data.range(q * elempack, elempack) : bias_data;
+            const Mat scale_out_data_q = scale_out_data_size > 1 ? scale_out_data.range(q * elempack, elempack) : scale_out_data;
+
+            requantize(intptr, ptr, scale_in_data_q, bias_data_q, scale_out_data_q, activation_type, activation_params, w * h, elempack);
+        }
+    }
+
+    return 0;
+}
+
+} // namespace ncnn
diff --git a/src/layer/riscv/requantize_riscv.h b/src/layer/riscv/requantize_riscv.h
new file mode 100644
index 000000000000..a058746350c5
--- /dev/null
+++ b/src/layer/riscv/requantize_riscv.h
@@ -0,0 +1,21 @@
+// Copyright 2026 Tencent
+// SPDX-License-Identifier: BSD-3-Clause
+
+#ifndef LAYER_REQUANTIZE_RISCV_H
+#define LAYER_REQUANTIZE_RISCV_H
+
+#include "requantize.h"
+
+namespace ncnn {
+
+class Requantize_riscv : public Requantize
+{
+public:
+    Requantize_riscv();
+
+    virtual int forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const;
+};
+
+} // namespace ncnn
+
+#endif // LAYER_REQUANTIZE_RISCV_H
diff --git a/src/layer/riscv/riscv_usability.h b/src/layer/riscv/riscv_usability.h
index 7ff06d789979..451b0029502d 100644
--- a/src/layer/riscv/riscv_usability.h
+++ b/src/layer/riscv/riscv_usability.h
@@ -35,6 +35,38 @@ static inline vint8m1_t float2int8(vfloat32m4_t v, size_t vl)
     return __riscv_vnclip_wx_i8m1(v16, 0, __RISCV_VXRM_RNU, vl);
 }
 
+static inline vint8m2_t float2int8relu(vfloat32m8_t v, size_t vl)
+{
+    vint32m8_t v32 = __riscv_vfcvt_x_f_v_i32m8_rm(v, __RISCV_FRM_RMM, vl);
+    v32 = __riscv_vmax_vx_i32m8(v32, 0, vl);
+    v32 = __riscv_vmin_vx_i32m8(v32, 127, vl);
+    vint16m4_t v16 = __riscv_vnclip_wx_i16m4(v32, 0, __RISCV_VXRM_RNU, vl);
+    return __riscv_vnclip_wx_i8m2(v16, 0, __RISCV_VXRM_RNU, vl);
+}
+
+static inline vint8m1_t float2int8relu(vfloat32m4_t v, size_t vl)
+{
+    vint32m4_t v32 = __riscv_vfcvt_x_f_v_i32m4_rm(v, __RISCV_FRM_RMM, vl);
+    v32 = __riscv_vmax_vx_i32m4(v32, 0, vl);
+    v32 = __riscv_vmin_vx_i32m4(v32, 127, vl);
+    vint16m2_t v16 = __riscv_vnclip_wx_i16m2(v32, 0, __RISCV_VXRM_RNU, vl);
+    return __riscv_vnclip_wx_i8m1(v16, 0, __RISCV_VXRM_RNU, vl);
+}
+
+static inline vint8m2_t float2int8leakyrelu(vfloat32m8_t v, float slope, size_t vl)
+{
+    vint8m2_t v8 = float2int8(v, vl);
+    vint8m2_t v8_leaky = float2int8(__riscv_vfmul_vf_f32m8(v, slope, vl), vl);
+    return __riscv_vmax_vv_i8m2(v8, v8_leaky, vl);
+}
+
+static inline vint8m1_t float2int8leakyrelu(vfloat32m4_t v, float slope, size_t vl)
+{
+    vint8m1_t v8 = float2int8(v, vl);
+    vint8m1_t v8_leaky = float2int8(__riscv_vfmul_vf_f32m4(v, slope, vl), vl);
+    return __riscv_vmax_vv_i8m1(v8, v8_leaky, vl);
+}
+
 #if __riscv_zvfh
 static inline vint8m4_t float2int8(vfloat16m8_t v, size_t vl)
 {
diff --git a/tests/test_requantize.cpp b/tests/test_requantize.cpp
index 3271119c8572..70ad93057923 100644
--- a/tests/test_requantize.cpp
+++ b/tests/test_requantize.cpp
@@ -3,6 +3,48 @@
 
 #include "testutil.h"
 
+static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta)
+{
+    ncnn::ParamDict pd;
+    pd.set(0, scale_in_data_size);
+    pd.set(1, scale_out_data_size);
+    pd.set(2, bias_data_size);
+
+    ncnn::Mat activation_params(2);
+    activation_params[0] = alpha;
+    activation_params[1] = beta;
+    pd.set(3, activation_type);
+    pd.set(4, activation_params);
+
+    std::vector<ncnn::Mat> weights(bias_data_size ? 3 : 2);
+    weights[0] = RandomMat(scale_in_data_size);
+    weights[1] = RandomMat(scale_out_data_size);
+    if (bias_data_size)
+        weights[2] = RandomMat(bias_data_size);
+
+    Randomize(weights[0], 0.0001, 0.001);
+    Randomize(weights[1], 10, 100);
+
+    int ret = test_layer("Requantize", pd, weights, a, 1);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_requantize failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]);
+    }
+
+    return ret;
+}
+
+static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size)
+{
+    return 0
+           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 0, 0.f, 0.f)
+           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 1, 0.f, 0.f)
+           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 2, RandomFloat(0, 1), 0.f)
+           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 3, RandomFloat(-1, 0), RandomFloat(0, 1))
+           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 4, 0.f, 0.f)
+           || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f);
+}
+
 static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta)
 {
     ncnn::ParamDict pd;
@@ -159,6 +201,7 @@ static int test_requantize_2()
 static int test_requantize_3()
 {
     return 0
+#ifndef __riscv
            || test_requantize_pack8(RandomIntMat(5, 7, 24), 1, 1, 24)
            || test_requantize_pack8(RandomIntMat(5, 7, 24), 1, 1, 1)
            || test_requantize_pack8(RandomIntMat(5, 7, 24), 1, 1, 0)
@@ -185,6 +228,34 @@ static int test_requantize_3()
            || test_requantize_pack8(RandomIntMat(15, 24), 24, 1, 0)
            || test_requantize_pack8(RandomIntMat(128), 1, 1, 1)
            || test_requantize_pack8(RandomIntMat(128), 1, 1, 0);
+#else
+           || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 24)
+           || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 1)
+           || test_requantize(RandomIntMat(5, 7, 24), 1, 1, 0)
+           || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 24)
+           || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 1)
+           || test_requantize(RandomIntMat(5, 7, 24), 24, 24, 0)
+           || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 24)
+           || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 1)
+           || test_requantize(RandomIntMat(5, 7, 24), 1, 24, 0)
+           || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 24)
+           || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 1)
+           || test_requantize(RandomIntMat(5, 7, 24), 24, 1, 0)
+           || test_requantize(RandomIntMat(15, 24), 1, 1, 24)
+           || test_requantize(RandomIntMat(15, 24), 1, 1, 1)
+           || test_requantize(RandomIntMat(15, 24), 1, 1, 0)
+           || test_requantize(RandomIntMat(15, 24), 24, 24, 24)
+           || test_requantize(RandomIntMat(15, 24), 24, 24, 1)
+           || test_requantize(RandomIntMat(15, 24), 24, 24, 0)
+           || test_requantize(RandomIntMat(15, 24), 1, 24, 24)
+           || test_requantize(RandomIntMat(15, 24), 1, 24, 1)
+           || test_requantize(RandomIntMat(15, 24), 1, 24, 0)
+           || test_requantize(RandomIntMat(15, 24), 24, 1, 24)
+           || test_requantize(RandomIntMat(15, 24), 24, 1, 1)
+           || test_requantize(RandomIntMat(15, 24), 24, 1, 0)
+           || test_requantize(RandomIntMat(128), 1, 1, 1)
+           || test_requantize(RandomIntMat(128), 1, 1, 0);
+#endif // __riscv
 }
 
 int main()

From 0bc6eb3a02347051c5b47f0611dfb40f93c325e0 Mon Sep 17 00:00:00 2001
From: JustinFung <835776509@qq.com>
Date: Sat, 4 Apr 2026 16:09:49 +0800
Subject: [PATCH 2/5] Requested change: Slope

---
 src/layer/riscv/riscv_usability.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/layer/riscv/riscv_usability.h b/src/layer/riscv/riscv_usability.h
index 451b0029502d..bdd846a80e8a 100644
--- a/src/layer/riscv/riscv_usability.h
+++ b/src/layer/riscv/riscv_usability.h
@@ -55,16 +55,18 @@ static inline vint8m1_t float2int8relu(vfloat32m4_t v, size_t vl)
 
 static inline vint8m2_t float2int8leakyrelu(vfloat32m8_t v, float slope, size_t vl)
 {
-    vint8m2_t v8 = float2int8(v, vl);
-    vint8m2_t v8_leaky = float2int8(__riscv_vfmul_vf_f32m8(v, slope, vl), vl);
-    return __riscv_vmax_vv_i8m2(v8, v8_leaky, vl);
+     vfloat32m8_t v_pos = __riscv_vfmax_vf_f32m8(v, 0.f, vl);
+     vfloat32m8_t v_neg = __riscv_vfmin_vf_f32m8(v, 0.f, vl);
+     vfloat32m8_t v_leakyrelu = __riscv_vfadd_vv_f32m8(v_pos, __riscv_vfmul_vf_f32m8(v_neg, slope, vl), vl);
+     return float2int8(v_leakyrelu, vl);
 }
 
 static inline vint8m1_t float2int8leakyrelu(vfloat32m4_t v, float slope, size_t vl)
 {
-    vint8m1_t v8 = float2int8(v, vl);
-    vint8m1_t v8_leaky = float2int8(__riscv_vfmul_vf_f32m4(v, slope, vl), vl);
-    return __riscv_vmax_vv_i8m1(v8, v8_leaky, vl);
+     vfloat32m4_t v_pos = __riscv_vfmax_vf_f32m4(v, 0.f, vl);
+     vfloat32m4_t v_neg = __riscv_vfmin_vf_f32m4(v, 0.f, vl);
+     vfloat32m4_t v_leakyrelu = __riscv_vfadd_vv_f32m4(v_pos, __riscv_vfmul_vf_f32m4(v_neg, slope, vl), vl);
+     return float2int8(v_leakyrelu, vl);
 }
 
 #if __riscv_zvfh

From 5ec204cf9be00b25327508e62ad562ffd2dfbca2 Mon Sep 17 00:00:00 2001
From: JustinFung <835776509@qq.com>
Date: Sun, 17 May 2026 10:37:00 +0800
Subject: [PATCH 3/5] Requested change: naming

---
 src/layer/riscv/requantize_riscv.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/layer/riscv/requantize_riscv.cpp b/src/layer/riscv/requantize_riscv.cpp
index 85f5b5da04be..9e98307ee947 100644
--- a/src/layer/riscv/requantize_riscv.cpp
+++ b/src/layer/riscv/requantize_riscv.cpp
@@ -19,7 +19,7 @@ Requantize_riscv::Requantize_riscv()
 #endif // __riscv_vector
 }
 
-static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack)
+static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack)
 {
     const int scale_in_data_size = scale_in_data.w;
     const int bias_data_size = bias_data.w;
@@ -205,7 +205,7 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
     if ((activation_type == 1) || (activation_type == 2))
     {
         const float slope = activation_params[0];
-        requantize_leakyrelu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack);
+        requantize_relu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack);
         return;
     }
 

From 225785cf3cbf68e831a1ddcb60b325542962c7df Mon Sep 17 00:00:00 2001
From: Deepdive543443 <83911295+Deepdive543443@users.noreply.github.com>
Date: Sun, 17 May 2026 02:39:27 +0000
Subject: [PATCH 4/5] apply code-format changes

---
 src/layer/riscv/riscv_usability.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/layer/riscv/riscv_usability.h b/src/layer/riscv/riscv_usability.h
index bdd846a80e8a..9e90eb77ca89 100644
--- a/src/layer/riscv/riscv_usability.h
+++ b/src/layer/riscv/riscv_usability.h
@@ -55,18 +55,18 @@ static inline vint8m1_t float2int8relu(vfloat32m4_t v, size_t vl)
 
 static inline vint8m2_t float2int8leakyrelu(vfloat32m8_t v, float slope, size_t vl)
 {
-     vfloat32m8_t v_pos = __riscv_vfmax_vf_f32m8(v, 0.f, vl);
-     vfloat32m8_t v_neg = __riscv_vfmin_vf_f32m8(v, 0.f, vl);
-     vfloat32m8_t v_leakyrelu = __riscv_vfadd_vv_f32m8(v_pos, __riscv_vfmul_vf_f32m8(v_neg, slope, vl), vl);
-     return float2int8(v_leakyrelu, vl);
+    vfloat32m8_t v_pos = __riscv_vfmax_vf_f32m8(v, 0.f, vl);
+    vfloat32m8_t v_neg = __riscv_vfmin_vf_f32m8(v, 0.f, vl);
+    vfloat32m8_t v_leakyrelu = __riscv_vfadd_vv_f32m8(v_pos, __riscv_vfmul_vf_f32m8(v_neg, slope, vl), vl);
+    return float2int8(v_leakyrelu, vl);
 }
 
 static inline vint8m1_t float2int8leakyrelu(vfloat32m4_t v, float slope, size_t vl)
 {
-     vfloat32m4_t v_pos = __riscv_vfmax_vf_f32m4(v, 0.f, vl);
-     vfloat32m4_t v_neg = __riscv_vfmin_vf_f32m4(v, 0.f, vl);
-     vfloat32m4_t v_leakyrelu = __riscv_vfadd_vv_f32m4(v_pos, __riscv_vfmul_vf_f32m4(v_neg, slope, vl), vl);
-     return float2int8(v_leakyrelu, vl);
+    vfloat32m4_t v_pos = __riscv_vfmax_vf_f32m4(v, 0.f, vl);
+    vfloat32m4_t v_neg = __riscv_vfmin_vf_f32m4(v, 0.f, vl);
+    vfloat32m4_t v_leakyrelu = __riscv_vfadd_vv_f32m4(v_pos, __riscv_vfmul_vf_f32m4(v_neg, slope, vl), vl);
+    return float2int8(v_leakyrelu, vl);
 }
 
 #if __riscv_zvfh

From 3614864a7529154d86cb5f948a234d44caf90175 Mon Sep 17 00:00:00 2001
From: nihui <nihuini@tencent.com>
Date: Mon, 18 May 2026 15:24:21 +0800
Subject: [PATCH 5/5] cc

---
 src/layer/riscv/requantize_riscv.cpp | 311 +++++++++++++++------------
 tests/test_requantize.cpp            |  36 +++-
 2 files changed, 206 insertions(+), 141 deletions(-)

diff --git a/src/layer/riscv/requantize_riscv.cpp b/src/layer/riscv/requantize_riscv.cpp
index 9e98307ee947..677f5b570747 100644
--- a/src/layer/riscv/requantize_riscv.cpp
+++ b/src/layer/riscv/requantize_riscv.cpp
@@ -19,11 +19,9 @@ Requantize_riscv::Requantize_riscv()
 #endif // __riscv_vector
 }
 
-static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack)
+static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int elemcount, int elempack)
 {
-    const int scale_in_data_size = scale_in_data.w;
     const int bias_data_size = bias_data.w;
-    const int scale_out_data_size = scale_out_data.w;
     const int size = elemcount * elempack;
 
     // int8(relu(v * scale_in) * scale_out)
@@ -31,170 +29,209 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
     // int8(relu(v * scale_in + bias) * scale_out)
     // int8_relu(v * (scale_in * scale_out) + (bias * scale_out))
 
-    // int8(leakyrelu(v * scale_in, slope) * scale_out)
-    // int8_leakyrelu(v * (scale_in * scale_out), slope)
-    // int8(leakyrelu(v * scale_in + bias, slope) * scale_out)
-    // int8_leakyrelu(v * (scale_in * scale_out) + (bias * scale_out), slope)
-
 #if __riscv_vector
+    const int scale_in_data_size = scale_in_data.w;
+    const int scale_out_data_size = scale_out_data.w;
+
     const size_t vlm1 = __riscv_vsetvlmax_e32m1();
     const size_t vlmax = __riscv_vsetvlmax_e32m8();
 
     vfloat32m8_t _scale = __riscv_vfmv_v_f_f32m8(scale_in_data[0], vlmax);
     if (scale_in_data_size > 1)
     {
-        // if (elempack == vlm1)
-        {
-            vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1);
-            _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
-        }
+        vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1);
+        _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
+    }
+
+    vfloat32m8_t _scale_out;
+    if (scale_out_data_size > 1)
+    {
+        vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1);
+        _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
+        _scale = __riscv_vfmul_vv_f32m8(_scale, _scale_out, vlmax);
+    }
+    else
+    {
+        _scale = __riscv_vfmul_vf_f32m8(_scale, scale_out_data[0], vlmax);
     }
 
-    vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(0.f, vlmax);
-    if (bias_data_size == 1)
+    int n = size;
+    if (bias_data_size == 0)
     {
-        _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax);
+        while (n > 0)
+        {
+            size_t vl = __riscv_vsetvl_e32m8(n);
+            vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+            vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+            _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl);
+            __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl);
+
+            intptr += vl;
+            ptr += vl;
+            n -= vl;
+        }
     }
-    else if (bias_data_size > 1)
+    else
     {
-        // if (elempack == vlm1)
+        vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax);
+        if (bias_data_size > 1)
         {
             vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1);
             _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b);
         }
-    }
 
-    if (scale_out_data_size > 1)
+        if (scale_out_data_size > 1)
+        {
+            _bias = __riscv_vfmul_vv_f32m8(_bias, _scale_out, vlmax);
+        }
+        else
+        {
+            _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax);
+        }
+
+        while (n > 0)
+        {
+            size_t vl = __riscv_vsetvl_e32m8(n);
+            vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+            vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+            _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl);
+            __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl);
+
+            intptr += vl;
+            ptr += vl;
+            n -= vl;
+        }
+    }
+#else  // __riscv_vector
+    float scale = scale_in_data[0] * scale_out_data[0];
+    if (bias_data_size == 0)
     {
-        // if (elempack == vlm1)
+        for (int i = 0; i < size; i++)
         {
-            vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1);
-            vfloat32m8_t _s2 = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
-            _scale = __riscv_vfmul_vv_f32m8(_scale, _s2, vlmax);
-            _bias = __riscv_vfmul_vv_f32m8(_bias, _s2, vlmax);
+            float v = *intptr * scale;
+            *ptr = (v < 0) ? 0 : float2int8(v);
+            intptr++;
+            ptr++;
         }
     }
     else
+    {
+        float bias = bias_data[0] * scale_out_data[0];
+        for (int i = 0; i < size; i++)
+        {
+            float v = *intptr * scale + bias;
+            *ptr = (v < 0) ? 0 : float2int8(v);
+            intptr++;
+            ptr++;
+        }
+    }
+#endif // __riscv_vector
+}
+
+static void requantize_leakyrelu(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, float slope, int elemcount, int elempack)
+{
+    const int bias_data_size = bias_data.w;
+    const int size = elemcount * elempack;
+
+    // int8(leakyrelu(v * scale_in, slope) * scale_out)
+    // int8_leakyrelu(v * (scale_in * scale_out), slope)
+    // int8(leakyrelu(v * scale_in + bias, slope) * scale_out)
+    // int8_leakyrelu(v * (scale_in * scale_out) + (bias * scale_out), slope)
+
+#if __riscv_vector
+    const int scale_in_data_size = scale_in_data.w;
+    const int scale_out_data_size = scale_out_data.w;
+
+    const size_t vlm1 = __riscv_vsetvlmax_e32m1();
+    const size_t vlmax = __riscv_vsetvlmax_e32m8();
+
+    vfloat32m8_t _scale = __riscv_vfmv_v_f_f32m8(scale_in_data[0], vlmax);
+    if (scale_in_data_size > 1)
+    {
+        vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1);
+        _scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
+    }
+
+    vfloat32m8_t _scale_out;
+    if (scale_out_data_size > 1)
+    {
+        vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1);
+        _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
+        _scale = __riscv_vfmul_vv_f32m8(_scale, _scale_out, vlmax);
+    }
+    else
     {
         _scale = __riscv_vfmul_vf_f32m8(_scale, scale_out_data[0], vlmax);
-        _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax);
     }
 
     int n = size;
-    if (slope > 0.f) // Leaky ReLU
+    if (bias_data_size == 0)
     {
-        if (bias_data_size == 0)
-        {
-            while (n > 0)
-            {
-                size_t vl = __riscv_vsetvl_e32m8(n);
-                vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
-                vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
-                _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl);
-                __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl);
-
-                intptr += vl;
-                ptr += vl;
-                n -= vl;
-            }
-        }
-        else
+        while (n > 0)
         {
-            while (n > 0)
-            {
-                size_t vl = __riscv_vsetvl_e32m8(n);
-                vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
-                vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
-                _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl);
-                __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl);
-
-                intptr += vl;
-                ptr += vl;
-                n -= vl;
-            }
+            size_t vl = __riscv_vsetvl_e32m8(n);
+            vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+            vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+            _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl);
+            __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl);
+
+            intptr += vl;
+            ptr += vl;
+            n -= vl;
         }
     }
     else
     {
-        if (bias_data_size == 0)
+        vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias_data[0], vlmax);
+        if (bias_data_size > 1)
         {
-            while (n > 0)
-            {
-                size_t vl = __riscv_vsetvl_e32m8(n);
-                vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
-                vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
-                _v = __riscv_vfmul_vv_f32m8(_v, _scale, vl);
-                __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl);
-
-                intptr += vl;
-                ptr += vl;
-                n -= vl;
-            }
+            vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1);
+            _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b);
+        }
+
+        if (scale_out_data_size > 1)
+        {
+            _bias = __riscv_vfmul_vv_f32m8(_bias, _scale_out, vlmax);
         }
         else
         {
-            while (n > 0)
-            {
-                size_t vl = __riscv_vsetvl_e32m8(n);
-                vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
-                vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
-                _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl);
-                __riscv_vse8_v_i8m2(ptr, float2int8relu(_v, vl), vl);
-
-                intptr += vl;
-                ptr += vl;
-                n -= vl;
-            }
+            _bias = __riscv_vfmul_vf_f32m8(_bias, scale_out_data[0], vlmax);
+        }
+
+        while (n > 0)
+        {
+            size_t vl = __riscv_vsetvl_e32m8(n);
+            vint32m8_t _vi = __riscv_vle32_v_i32m8(intptr, vl);
+            vfloat32m8_t _v = __riscv_vfcvt_f_x_v_f32m8(_vi, vl);
+            _v = __riscv_vfmadd_vv_f32m8(_v, _scale, _bias, vl);
+            __riscv_vse8_v_i8m2(ptr, float2int8leakyrelu(_v, slope, vl), vl);
+
+            intptr += vl;
+            ptr += vl;
+            n -= vl;
         }
     }
 #else  // __riscv_vector
     float scale = scale_in_data[0] * scale_out_data[0];
-    if (slope > 0.f)
+    if (bias_data_size == 0)
     {
-        if (bias_data_size == 0)
-        {
-            for (int i = 0; i < size; i++)
-            {
-                float v = *intptr * scale;
-                *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v);
-                intptr++;
-                ptr++;
-            }
-        }
-        else
+        for (int i = 0; i < size; i++)
         {
-            float bias = bias_data[0] * scale_out_data[0];
-            for (int i = 0; i < size; i++)
-            {
-                float v = *intptr * scale + bias;
-                *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v);
-                intptr++;
-                ptr++;
-            }
+            float v = *intptr * scale;
+            *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v);
+            intptr++;
+            ptr++;
         }
     }
     else
     {
-        if (bias_data_size == 0)
-        {
-            for (int i = 0; i < size; i++)
-            {
-                float v = *intptr * scale;
-                *ptr = (v < 0) ? 0 : float2int8(v);
-                intptr++;
-                ptr++;
-            }
-        }
-        else
+        float bias = bias_data[0] * scale_out_data[0];
+        for (int i = 0; i < size; i++)
         {
-            float bias = bias_data[0] * scale_out_data[0];
-            for (int i = 0; i < size; i++)
-            {
-                float v = *intptr * scale + bias;
-                *ptr = (v < 0) ? 0 : float2int8(v);
-                intptr++;
-                ptr++;
-            }
+            float v = *intptr * scale + bias;
+            *ptr = (v < 0) ? float2int8(v * slope) : float2int8(v);
+            intptr++;
+            ptr++;
         }
     }
 #endif // __riscv_vector
@@ -202,16 +239,20 @@ static void requantize_relu(const int* intptr, signed char* ptr, const Mat& scal
 
 static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_data, const Mat& bias_data, const Mat& scale_out_data, int activation_type, const Mat& activation_params, int elemcount, int elempack)
 {
-    if ((activation_type == 1) || (activation_type == 2))
+    if (activation_type == 1)
+    {
+        requantize_relu(intptr, ptr, scale_in_data, bias_data, scale_out_data, elemcount, elempack);
+        return;
+    }
+
+    if (activation_type == 2 && activation_params[0] > 0.f)
     {
         const float slope = activation_params[0];
-        requantize_relu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack);
+        requantize_leakyrelu(intptr, ptr, scale_in_data, bias_data, scale_out_data, slope, elemcount, elempack);
         return;
     }
 
-    const int scale_in_data_size = scale_in_data.w;
     const int bias_data_size = bias_data.w;
-    const int scale_out_data_size = scale_out_data.w;
     const int size = elemcount * elempack;
 
     const float scale_in = scale_in_data[0];
@@ -219,27 +260,24 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
     const float bias = bias_data_size == 0 ? 0.f : bias_data[0];
 
 #if __riscv_vector
+    const int scale_in_data_size = scale_in_data.w;
+    const int scale_out_data_size = scale_out_data.w;
+
     const size_t vlm1 = __riscv_vsetvlmax_e32m1();
     const size_t vlmax = __riscv_vsetvlmax_e32m8();
 
     vfloat32m8_t _scale_in = __riscv_vfmv_v_f_f32m8(scale_in, vlmax);
     if (scale_in_data_size > 1)
     {
-        // if (elempack == vlm1)
-        {
-            vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1);
-            _scale_in = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
-        }
+        vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_in_data, vlm1);
+        _scale_in = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
     }
 
     vfloat32m8_t _scale_out = __riscv_vfmv_v_f_f32m8(scale_out, vlmax);
     if (scale_out_data_size > 1)
     {
-        // if (elempack == vlm1)
-        {
-            vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1);
-            _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
-        }
+        vfloat32m1_t _s = __riscv_vle32_v_f32m1((const float*)scale_out_data, vlm1);
+        _scale_out = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
     }
 
     int n = size;
@@ -265,11 +303,8 @@ static void requantize(const int* intptr, signed char* ptr, const Mat& scale_in_
         vfloat32m8_t _bias = __riscv_vfmv_v_f_f32m8(bias, vlmax);
         if (bias_data_size > 1)
         {
-            // if (elempack == vlm1)
-            {
-                vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1);
-                _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b);
-            }
+            vfloat32m1_t _b = __riscv_vle32_v_f32m1((const float*)bias_data, vlm1);
+            _bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b);
         }
 
         while (n > 0)
diff --git a/tests/test_requantize.cpp b/tests/test_requantize.cpp
index 70ad93057923..6f2183195d1e 100644
--- a/tests/test_requantize.cpp
+++ b/tests/test_requantize.cpp
@@ -25,7 +25,8 @@ static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale
     Randomize(weights[0], 0.0001, 0.001);
     Randomize(weights[1], 10, 100);
 
-    int ret = test_layer("Requantize", pd, weights, a, 1);
+    int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING;
+    int ret = test_layer("Requantize", pd, weights, a, 1, flag);
     if (ret != 0)
     {
         fprintf(stderr, "test_requantize failed a.dims=%d a=(%d %d %d) scale_in_data_size=%d scale_out_data_size=%d bias_data_size=%d act=%d actparams=[%f,%f]\n", a.dims, a.w, a.h, a.c, scale_in_data_size, scale_out_data_size, bias_data_size, activation_type, activation_params[0], activation_params[1]);
@@ -45,6 +46,31 @@ static int test_requantize(const ncnn::Mat& a, int scale_in_data_size, int scale
            || test_requantize(a, scale_in_data_size, scale_out_data_size, bias_data_size, 5, 0.f, 0.f);
 }
 
+static int test_requantize_relu_empty_activation_params(const ncnn::Mat& a)
+{
+    ncnn::ParamDict pd;
+    pd.set(0, 1);
+    pd.set(1, 1);
+    pd.set(2, 0);
+    pd.set(3, 1);
+
+    std::vector<ncnn::Mat> weights(2);
+    weights[0] = RandomMat(1);
+    weights[1] = RandomMat(1);
+
+    Randomize(weights[0], 0.0001, 0.001);
+    Randomize(weights[1], 10, 100);
+
+    int flag = TEST_LAYER_DISABLE_AUTO_INPUT_CASTING | TEST_LAYER_DISABLE_AUTO_INPUT_PACKING;
+    int ret = test_layer("Requantize", pd, weights, a, 1, flag);
+    if (ret != 0)
+    {
+        fprintf(stderr, "test_requantize_relu_empty_activation_params failed a.dims=%d a=(%d %d %d)\n", a.dims, a.w, a.h, a.c);
+    }
+
+    return ret;
+}
+
 static int test_requantize_pack1(const ncnn::Mat& a, int scale_in_data_size, int scale_out_data_size, int bias_data_size, int activation_type, float alpha, float beta)
 {
     ncnn::ParamDict pd;
@@ -195,7 +221,10 @@ static int test_requantize_2()
            || test_requantize_pack1(RandomIntMat(124), 1, 1, 1)
            || test_requantize_pack1(RandomIntMat(124), 1, 1, 0)
            || test_requantize_pack1(RandomIntMat(127), 1, 1, 1)
-           || test_requantize_pack1(RandomIntMat(127), 1, 1, 0);
+           || test_requantize_pack1(RandomIntMat(127), 1, 1, 0)
+           || test_requantize_pack1(RandomIntMat(127), 1, 1, 0, 2, 0.f, 0.f)
+           || test_requantize_pack1(RandomIntMat(127), 1, 1, 0, 2, RandomFloat(-1, 0), 0.f)
+           || test_requantize_relu_empty_activation_params(RandomIntMat(127));
 }
 
 static int test_requantize_3()
@@ -254,7 +283,8 @@ static int test_requantize_3()
            || test_requantize(RandomIntMat(15, 24), 24, 1, 1)
            || test_requantize(RandomIntMat(15, 24), 24, 1, 0)
            || test_requantize(RandomIntMat(128), 1, 1, 1)
-           || test_requantize(RandomIntMat(128), 1, 1, 0);
+           || test_requantize(RandomIntMat(128), 1, 1, 0)
+           || test_requantize(RandomIntMat(127), 1, 1, 0, 2, RandomFloat(1, 2), 0.f);
 #endif // __riscv
 }