Skip to content

Commit fa8c00c

Browse files
hsharma35facebook-github-bot
authored andcommitted
Fix overflow and stride>1 fallback in cadence::quantized_conv1d HiFi kernels
Summary: PR pytorch#19193 Fixes two correctness bugs in the HiFi kernels for cadence::quantized_conv1d_ncl.out and cadence::quantized_conv1d_nlc.out. The int8 path (xa_nn_conv2d_per_chan_sym8sxasym8s) produces incorrect results with stride > 1 on some backends (e.g., Artemis HiFi4) and is now redirected to the generic fallback for that case. The uint8 path overflowed WORD32 when computing out_multiplier32 if eff_scale >= 1.0 (i.e., output_scale > bias_scale), which is now clamped to INT32_MAX. Reviewed By: zonglinpeng Differential Revision: D102821209
1 parent 321c029 commit fa8c00c

2 files changed

Lines changed: 14 additions & 8 deletions

File tree

backends/cadence/hifi/operators/op_quantized_conv1d_ncl.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,10 @@ void xa_opt_quantized_conv1d_ncl_asym8uxsym8u_asym8u(
240240
WORD32 x_stride = stride[0];
241241
WORD32 x_padding = padding[0];
242242
WORD32 input_zero_bias = -in_zero_point;
243-
WORD32 out_multiplier32 = bias_scale * (1. / output_scale) * 2147483648;
243+
const float eff_scale = bias_scale * (1.0f / output_scale);
244+
WORD32 out_multiplier32 = (eff_scale >= 1.0f)
245+
? static_cast<WORD32>(2147483647)
246+
: static_cast<WORD32>(eff_scale * 2147483648.0f);
244247
WORD32 out_shift32 = 0;
245248
WORD32 kernel_zero_bias = -weight_zero_point;
246249

@@ -419,9 +422,9 @@ void quantized_conv1d_ncl_per_tensor_out(
419422
out);
420423
}
421424
} else if (dtype == ScalarType::Byte) {
422-
// HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1).
423-
// Fall back to generic implementation.
424-
if (groups > 1) {
425+
// HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1)
426+
// or stride > 1. Fall back to generic implementation.
427+
if (groups > 1 || stride[0] > 1) {
425428
impl::generic::native::quantized_conv1d_ncl_per_tensor_out(
426429
ctx,
427430
input,

backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,10 @@ void xa_opt_quantized_conv1d_nlc_asym8uxsym8u_asym8u(
176176
WORD32 x_stride = stride[stride.size() - 1];
177177
WORD32 x_padding = padding[padding.size() - 1];
178178
WORD32 input_zero_bias = -in_zero_point;
179-
WORD32 out_multiplier32 = bias_scale * (1. / output_scale) * 2147483648;
179+
const float eff_scale = bias_scale * (1.0f / output_scale);
180+
WORD32 out_multiplier32 = (eff_scale >= 1.0f)
181+
? static_cast<WORD32>(2147483647)
182+
: static_cast<WORD32>(eff_scale * 2147483648.0f);
180183
WORD32 out_shift32 = 0;
181184
WORD32 kernel_zero_bias = -weight_zero_point;
182185

@@ -298,9 +301,9 @@ void quantized_conv1d_nlc_per_tensor_out(
298301
out);
299302
}
300303
} else if (dtype == ScalarType::Byte) {
301-
// HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1).
302-
// Fall back to generic implementation.
303-
if (groups > 1) {
304+
// HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1)
305+
// or stride > 1. Fall back to generic implementation.
306+
if (groups > 1 || stride[0] > 1) {
304307
impl::generic::native::quantized_conv1d_nlc_per_tensor_out(
305308
ctx,
306309
input,

0 commit comments

Comments
 (0)