Skip to content

Commit 89600b3

Browse files
authored
Precompute T1 offset for quantized conv2d NHWC in TIE kernel (#18960)
Differential Revision: D100690813 Pull Request resolved: #18960
1 parent 54b0148 commit 89600b3

9 files changed

Lines changed: 15 additions & 4 deletions

File tree

backends/cadence/aot/functions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@
384384
- arg_meta: null
385385
kernel_name: impl::generic::quantized_conv2d_nchw_per_tensor_out
386386

387-
- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
387+
- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None, *, Tensor(a!) out) -> Tensor(a!)
388388
kernels:
389389
- arg_meta: null
390390
kernel_name: impl::generic::quantized_conv2d_nhwc_per_tensor_out

backends/cadence/aot/functions_hifi.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@
395395
- arg_meta: null
396396
kernel_name: impl::HiFi::quantized_conv2d_nchw_per_tensor_out
397397

398-
- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)
398+
- func: cadence::quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None, *, Tensor(a!) out) -> Tensor(a!)
399399
kernels:
400400
- arg_meta: null
401401
kernel_name: impl::HiFi::quantized_conv2d_nhwc_per_tensor_out

backends/cadence/aot/ops_registrations.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,10 @@ def register_fake(
233233
"quantized_conv2d_nhwc.out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift, *, Tensor(a!) out) -> Tensor(a!)"
234234
)
235235
lib.define(
236-
"quantized_conv2d_nhwc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift) -> (Tensor Z)"
236+
"quantized_conv2d_nhwc.per_tensor(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None) -> (Tensor Z)"
237237
)
238238
lib.define(
239-
"quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, *, Tensor(a!) out) -> Tensor(a!)"
239+
"quantized_conv2d_nhwc.per_tensor_out(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, int weight_zero_point, float bias_scale, float out_scale, int out_zero_point, int out_multiplier, int out_shift, Tensor? offset=None, *, Tensor(a!) out) -> Tensor(a!)"
240240
)
241241
lib.define(
242242
"quantized_conv1d_ncl(Tensor input, Tensor weight, Tensor bias, int[] stride, SymInt[] padding, int[] dilation, int groups, int input_zero_point, Tensor weight_zero_point, Tensor bias_scale, float out_scale, int out_zero_point, Tensor out_multiplier, Tensor out_shift) -> (Tensor Z)"
@@ -1444,6 +1444,7 @@ def quantized_conv2d_nhwc_per_tensor_meta(
14441444
output_zero_point: int,
14451445
out_multiplier: int,
14461446
out_shift: int,
1447+
offset: Optional[torch.Tensor] = None,
14471448
) -> torch.Tensor:
14481449
in_size = input.shape
14491450
# Assert that the input tensor has at least 3 dimensions, and at most 6

backends/cadence/aot/ref_implementations.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1449,6 +1449,7 @@ def quantized_conv2d_nhwc_per_tensor(
14491449
output_zero_point: int,
14501450
out_multiplier: int,
14511451
out_shift: int,
1452+
offset: torch.Tensor | None = None,
14521453
) -> torch.Tensor:
14531454
"""
14541455
Quantized convolution operation.

backends/cadence/generic/operators/op_quantized_conv2d.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ namespace impl {
1616
namespace generic {
1717
namespace native {
1818

19+
using ::executorch::aten::optional;
1920
using ::executorch::aten::ScalarType;
2021
using ::executorch::aten::Tensor;
2122
using ::executorch::runtime::KernelRuntimeContext;
@@ -935,6 +936,7 @@ Tensor& quantized_conv2d_nhwc_per_tensor_out(
935936
int64_t output_zero_point,
936937
ET_UNUSED int64_t out_multiplier,
937938
ET_UNUSED int64_t out_shift,
939+
ET_UNUSED const ::executorch::aten::optional<Tensor>& offset,
938940
Tensor& out) {
939941
quantized_conv2d_nhwc(
940942
input,

backends/cadence/generic/operators/op_quantized_conv2d.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ ::executorch::aten::Tensor& quantized_conv2d_nhwc_per_tensor_out(
205205
int64_t output_zero_point,
206206
int64_t out_multiplier,
207207
int64_t out_shift,
208+
const ::executorch::aten::optional<Tensor>& offset,
208209
Tensor& out);
209210

210211
::executorch::aten::Tensor&

backends/cadence/hifi/operators/op_quantized_conv2d_nhwc_out.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ using Tensor = executorch::aten::Tensor;
1717
using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
1818
using ScalarType = executorch::aten::ScalarType;
1919
using ::executorch::aten::IntArrayRef;
20+
using ::executorch::aten::optional;
2021

2122
namespace impl {
2223
namespace HiFi {
@@ -378,6 +379,7 @@ void xa_opt_quantized_conv2d_nhwc(
378379
output_zero_point,
379380
0, // out_multiplier (unused)
380381
0, // out_shift (unused)
382+
optional<Tensor>(), // offset (unused)
381383
out);
382384
}
383385

@@ -568,6 +570,7 @@ void quantized_conv2d_nhwc_per_tensor_out(
568570
int64_t output_zero_point,
569571
__ET_UNUSED int64_t out_multiplier,
570572
__ET_UNUSED int64_t out_shift,
573+
const optional<Tensor>& offset,
571574
Tensor& out) {
572575
// Handle W8A16 heterogeneous type (int16_t activations, int8_t weights)
573576
if (out.scalar_type() == ::executorch::aten::ScalarType::Short &&
@@ -589,6 +592,7 @@ void quantized_conv2d_nhwc_per_tensor_out(
589592
output_zero_point,
590593
out_multiplier,
591594
out_shift,
595+
offset,
592596
out);
593597
return;
594598
}

backends/cadence/hifi/operators/operators.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ void quantized_conv2d_nhwc_per_tensor_out(
158158
int64_t output_zero_point,
159159
int64_t out_multiplier,
160160
int64_t out_shift,
161+
const ::executorch::aten::optional<::executorch::aten::Tensor>& offset,
161162
::executorch::aten::Tensor& out);
162163

163164
::executorch::aten::Tensor& cat_out(

backends/cadence/vision/operators/op_quantized_conv_out.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ void quantized_conv2d_nhwc_per_tensor_out(
582582
int64_t output_zero_point,
583583
int64_t out_multiplier,
584584
int64_t out_shift,
585+
ET_UNUSED const ::executorch::aten::optional<Tensor>& offset,
585586
Tensor& out) {
586587
quantized_conv_per_tensor_out(
587588
ctx,

0 commit comments

Comments
 (0)