Skip to content

Commit 3c7f276

Browse files
rascaniclaude
andauthored
Use 16-byte alignment for CMSIS-NN scratch buffers (pytorch#17765)
### Summary MVE vector operations require 16-byte aligned data. Replace alignof(int16_t) (2 bytes) for scratch buffer allocations with a shared kCortexMMveAlignment constant. Co-authored-by: Claude <noreply@anthropic.com>
1 parent ae41854 commit 3c7f276

4 files changed

Lines changed: 7 additions & 4 deletions

File tree

backends/cortex_m/ops/cortex_m_ops_common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ using KernelRuntimeContext = torch::executor::KernelRuntimeContext;
3636
#define ARM_NN_Q31_MAX ((int32_t)(0x7FFFFFFFL))
3737
#define ARM_NN_Q31_MIN ((int32_t)(0x80000000L))
3838

39+
// 16-byte alignment for MVE vector operations.
40+
constexpr size_t kCortexMMveAlignment = 16;
41+
3942
// Basic tensor type / layout validation and dimension order checking
4043
inline void validate_cmsis_nn_tensor_requirements(
4144
const Tensor& input1,

backends/cortex_m/ops/op_quantized_conv2d.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ Tensor& quantized_conv2d_out(
192192
arm_convolve_s8_get_buffer_size(&input_dims, &filter_dims));
193193
if (buffer_bytes > 0) {
194194
auto buffer_or_error =
195-
context.allocate_temp(buffer_bytes, alignof(int16_t));
195+
context.allocate_temp(buffer_bytes, kCortexMMveAlignment);
196196
if (!buffer_or_error.ok()) {
197197
if (buffer_or_error.error() != Error::NotFound) {
198198
ET_LOG(

backends/cortex_m/ops/op_quantized_depthwise_conv2d.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ Tensor& quantized_depthwise_conv2d_out(
237237
}
238238

239239
auto buffer_or_error = context.allocate_temp(
240-
static_cast<size_t>(buffer_bytes), alignof(int16_t));
240+
static_cast<size_t>(buffer_bytes), kCortexMMveAlignment);
241241
if (!buffer_or_error.ok()) {
242242
ET_LOG(
243243
Error,

backends/cortex_m/ops/op_quantized_transpose_conv2d.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ Tensor& quantized_transpose_conv2d_out(
192192
const int32_t buffer_bytes = arm_transpose_conv_s8_get_buffer_size(
193193
&transpose_conv_params, &input_dims, &filter_dims, &output_dims);
194194
auto buffer_or_error = context.allocate_temp(
195-
static_cast<size_t>(buffer_bytes), alignof(int16_t));
195+
static_cast<size_t>(buffer_bytes), kCortexMMveAlignment);
196196
if (!buffer_or_error.ok()) {
197197
ET_LOG(
198198
Error,
@@ -209,7 +209,7 @@ Tensor& quantized_transpose_conv2d_out(
209209
arm_transpose_conv_s8_get_reverse_conv_buffer_size(
210210
&transpose_conv_params, &input_dims, &filter_dims);
211211
auto output_buffer_or_error = context.allocate_temp(
212-
static_cast<size_t>(output_buffer_bytes), alignof(int16_t));
212+
static_cast<size_t>(output_buffer_bytes), kCortexMMveAlignment);
213213
if (!output_buffer_or_error.ok()) {
214214
ET_LOG(
215215
Error,

0 commit comments

Comments
 (0)