Fused quant relu kernel (#19486)

Andrew Grebenisan · facebook-github-bot · commit 4a9e1cfda568 · 2026-05-11T21:23:10.000-07:00
Summary:

Fused quant ReLU kernel with optional dequantize/quantize. Unary op that applies max(0, x) in float space. Supports per-tensor and per-channel quantization.

Reviewed By: mvartani-meta

Differential Revision: D103754745
diff --git a/backends/cadence/fused_quant/op_relu.cpp b/backends/cadence/fused_quant/op_relu.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <algorithm>
+
+#include <executorch/backends/cadence/fused_quant/op_relu.h>
+#include <executorch/backends/cadence/fused_quant/quant_utils.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace cadence {
+namespace fused_quant {
+namespace native {
+
+using executorch::aten::optional;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using executorch::runtime::KernelRuntimeContext;
+
+namespace {
+
+void relu_kernel(const float* inp, float* out, int64_t numel) {
+  for (int64_t i = 0; i < numel; ++i) {
+    out[i] = std::max(0.0f, inp[i]);
+  }
+}
+
+} // namespace
+
+Tensor& relu_out(
+    KernelRuntimeContext& ctx,
+    const Tensor& inp,
+    const optional<Tensor>& inp_scale,
+    const optional<Tensor>& inp_zero_point,
+    ScalarType inp_dtype,
+    int64_t inp_quant_min,
+    int64_t inp_quant_max,
+    optional<int64_t> inp_axis,
+    const optional<Tensor>& out_scale,
+    const optional<Tensor>& out_zero_point,
+    ScalarType out_dtype,
+    int64_t out_quant_min,
+    int64_t out_quant_max,
+    optional<int64_t> out_axis,
+    Tensor& out) {
+  int64_t numel = inp.numel();
+
+  bool inp_quantized = inp_scale.has_value();
+  bool out_quantized = out_scale.has_value();
+
+  std::vector<float> inp_buf;
+  const float* const inp_float = [&]() -> const float* {
+    if (!inp_quantized) {
+      return inp.const_data_ptr<float>();
+    }
+    inp_buf.resize(numel);
+    QParams qp = extract_qparams(
+        inp_scale, inp_zero_point, inp_quant_min, inp_quant_max, inp_axis, inp);
+    FUSED_QUANT_DTYPE_SWITCH(
+        inp.scalar_type(),
+        scalar_t,
+        dequantize_buffer(
+            inp.const_data_ptr<scalar_t>(), inp_buf.data(), numel, qp);)
+    return inp_buf.data();
+  }();
+
+  if (out_quantized) {
+    std::vector<float> result_float(numel);
+    relu_kernel(inp_float, result_float.data(), numel);
+
+    QParams qp = extract_qparams(
+        out_scale, out_zero_point, out_quant_min, out_quant_max, out_axis, out);
+    FUSED_QUANT_DTYPE_SWITCH(
+        out.scalar_type(),
+        scalar_t,
+        quantize_buffer(
+            result_float.data(), out.mutable_data_ptr<scalar_t>(), numel, qp);)
+  } else {
+    relu_kernel(inp_float, out.mutable_data_ptr<float>(), numel);
+  }
+
+  return out;
+}
+
+} // namespace native
+} // namespace fused_quant
+} // namespace cadence
diff --git a/backends/cadence/fused_quant/op_relu.h b/backends/cadence/fused_quant/op_relu.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace cadence {
+namespace fused_quant {
+namespace native {
+
+executorch::aten::Tensor& relu_out(
+    executorch::runtime::KernelRuntimeContext& ctx,
+    const executorch::aten::Tensor& inp,
+    const executorch::aten::optional<executorch::aten::Tensor>& inp_scale,
+    const executorch::aten::optional<executorch::aten::Tensor>& inp_zero_point,
+    executorch::aten::ScalarType inp_dtype,
+    int64_t inp_quant_min,
+    int64_t inp_quant_max,
+    executorch::aten::optional<int64_t> inp_axis,
+    const executorch::aten::optional<executorch::aten::Tensor>& out_scale,
+    const executorch::aten::optional<executorch::aten::Tensor>& out_zero_point,
+    executorch::aten::ScalarType out_dtype,
+    int64_t out_quant_min,
+    int64_t out_quant_max,
+    executorch::aten::optional<int64_t> out_axis,
+    executorch::aten::Tensor& out);
+
+} // namespace native
+} // namespace fused_quant
+} // namespace cadence
diff --git a/backends/cadence/fused_quant/targets.bzl b/backends/cadence/fused_quant/targets.bzl
@@ -34,3 +34,15 @@ def define_common_targets():
         ],
         visibility = ["PUBLIC"],
     )
+
+    runtime.cxx_library(
+        name = "op_relu",
+        srcs = ["op_relu.cpp"],
+        exported_headers = ["op_relu.h"],
+        platforms = CXX,
+        deps = [
+            ":quant_utils",
+            "//executorch/runtime/kernel:kernel_includes",
+        ],
+        visibility = ["PUBLIC"],
+    )
diff --git a/backends/cadence/fused_quant/tests/BUCK b/backends/cadence/fused_quant/tests/BUCK
@@ -24,3 +24,14 @@ runtime.cxx_test(
         "//executorch/runtime/core/exec_aten/testing_util:tensor_util",
     ],
 )
+
+runtime.cxx_test(
+    name = "test_op_relu",
+    srcs = ["test_op_relu.cpp"],
+    platforms = CXX,
+    deps = [
+        "//executorch/backends/cadence/fused_quant:op_relu",
+        "//executorch/kernels/test:gtest_utils",
+        "//executorch/runtime/core/exec_aten/testing_util:tensor_util",
+    ],
+)
diff --git a/backends/cadence/fused_quant/tests/test_op_relu.cpp b/backends/cadence/fused_quant/tests/test_op_relu.cpp