Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions onnxruntime/core/graph/contrib_ops/contrib_defs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,22 @@ void convTransposeWithDynamicPadsShapeInference(InferenceContext& ctx) {
}

int64_t group = getAttribute(ctx, "group", 1);
if (group <= 0) {
fail_shape_inference("group attribute must be positive. Got: ", group);
}

auto input_shape = ctx.getInputType(0)->tensor_type().shape();
if (input_shape.dim_size() < 2) {
return; // Input tensor should have at least two dimensions.
// ConvTranspose requires X=(N x C x D1...Dn) and W=(C x M/group x k1...kn), both rank >= 3.
Comment thread
yuslepukhin marked this conversation as resolved.
// The upstream ONNX ConvTranspose shape inference only checks rank >= 2, which allows rank-2
// inputs to pass shape inference but crash at kernel execution time. We tighten the check here
// to fail early at model load with a clear error. Fixing ONNX upstream is tracked separately.
if (input_shape.dim_size() < 3) {
fail_shape_inference("Input tensor must have at least 3 dimensions. Got: ", input_shape.dim_size());
}

auto weight_shape = ctx.getInputType(1)->tensor_type().shape();
if (weight_shape.dim_size() < 3) {
fail_shape_inference("Weight tensor must have at least 3 dimensions. Got: ", weight_shape.dim_size());
}

Comment thread
yuslepukhin marked this conversation as resolved.
// first dim is the batch axis and the next is the number of channels.
Expand Down Expand Up @@ -147,7 +159,7 @@ void convTransposeWithDynamicPadsShapeInference(InferenceContext& ctx) {

*final_output_shape->add_dim() = input_shape.dim(0);
*final_output_shape->add_dim() =
ctx.getInputType(1)->tensor_type().shape().dim(1) *
weight_shape.dim(1) *
group; // channels should be the second dim of second input multiply
// group.

Expand Down
7 changes: 4 additions & 3 deletions onnxruntime/core/providers/cpu/nn/conv_transpose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,17 @@ Status ConvTranspose<float>::PrePack(const Tensor& tensor, int input_idx, Alloca
size_t packed_filter_data_size = SafeInt<size_t>(packed_elements_per_group) * sizeof(float) * conv_transpose_attrs_.group;
auto* packed_filter_data = alloc->Alloc(packed_filter_data_size);

// Wrap in BufferUniquePtr immediately to prevent leaks.
transposed_filter_ = BufferUniquePtr(packed_filter_data, BufferDeleter(std::move(alloc)));

// Initialize memory to 0 as there could be some padding associated with pre-packed
// buffer memory and we don not want it uninitialized and generate different hashes
// if and when we try to cache this pre-packed buffer for sharing between sessions.
memset(packed_filter_data, 0, packed_filter_data_size);

transposed_filter_ = BufferUniquePtr(packed_filter_data, BufferDeleter(std::move(alloc)));

for (int64_t group_id = 0; group_id < conv_transpose_attrs_.group; ++group_id) {
MlasTranspose(tensor.Data<float>() + (group_id * N * K),
((float*)packed_filter_data) + (group_id * packed_elements_per_group),
static_cast<float*>(packed_filter_data) + (group_id * packed_elements_per_group),
K, N, nullptr);
}

Expand Down
144 changes: 123 additions & 21 deletions onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@

#pragma once

#include <algorithm>

#include "core/providers/cpu/nn/conv_attributes.h"
#include "core/common/safeint.h"

namespace onnxruntime {

Expand Down Expand Up @@ -61,6 +64,21 @@ struct ConvTransposeAttributes : public ConvAttributes {
const Tensor* B = has_bias ? (dynamic_padding ? context->Input<Tensor>(3) : context->Input<Tensor>(2)) : nullptr;

const int rank = static_cast<int>(X->Shape().NumDimensions());

// ConvTranspose requires X shape (N x C x D1...Dn) and W shape (C x M/group x k1...kn),
// both must have at least 3 dimensions.
if (rank < 3) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Input X must have at least 3 dimensions (N x C x D1...Dn).",
" X: ", X->Shape().ToString().c_str());
}

if (static_cast<int>(F_Shape.NumDimensions()) < 3) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Filter W must have at least 3 dimensions (C x M/group x k1...kn).",
" W: ", F_Shape.ToString().c_str());
}

TensorShape input_shape = X->Shape().Slice(is_nhwc ? 1 : 2, is_nhwc ? rank - 1 : rank);
const int64_t num_input_channels = is_nhwc ? X->Shape()[rank - 1] : X->Shape()[1];
const int64_t N = X->Shape()[0];
Expand Down Expand Up @@ -119,11 +137,32 @@ struct ConvTransposeAttributes : public ConvAttributes {
if (local_output_padding.empty()) {
local_output_padding.resize(kernel_shape.size(), 0);
}
if (local_output_padding.size() != kernel_shape.size()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"output_padding size (", local_output_padding.size(),
") does not match the number of spatial dimensions (", kernel_shape.size(), ").");
}
ConvPadVector local_pads;
local_pads.reserve(2 * (input_shape.NumDimensions()));
if (dynamic_padding) {
for (int64_t i = 0; i < Pads->Shape().SizeFromDimension(0); ++i) {
local_pads.push_back(Pads->Data<int64_t>()[i]);
if (Pads->Shape().NumDimensions() != 1) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Dynamic pads tensor must be 1-D. Got rank: ", Pads->Shape().NumDimensions());
}
const int64_t expected_pads_size = SafeInt<int64_t>(kernel_shape.size()) * 2;
const int64_t actual_pads_size = Pads->Shape().SizeFromDimension(0);
if (actual_pads_size != expected_pads_size) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Dynamic pads tensor size (", actual_pads_size,
") does not match expected size (2 * spatial_dims = ", expected_pads_size, ").");
}
Comment thread
yuslepukhin marked this conversation as resolved.
const auto* pads_data = Pads->Data<int64_t>();
for (int64_t i = 0; i < actual_pads_size; ++i) {
Comment thread
yuslepukhin marked this conversation as resolved.
if (pads_data[i] < 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Dynamic pads must be non-negative. Got pads[", i, "] = ", pads_data[i]);
}
local_pads.push_back(pads_data[i]);
}
} else {
local_pads.assign(pads.begin(), pads.end());
Expand All @@ -140,10 +179,34 @@ struct ConvTransposeAttributes : public ConvAttributes {
local_strides.resize(kernel_shape.size(), 1);
}

if (local_strides.size() != kernel_shape.size()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"strides size (", local_strides.size(),
") does not match the number of spatial dimensions (", kernel_shape.size(), ").");
}
if (local_dilations.size() != kernel_shape.size()) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"dilations size (", local_dilations.size(),
") does not match the number of spatial dimensions (", kernel_shape.size(), ").");
}

// ONNX spec: "output_padding[i] should be less than max(stride[i], dilation[i])".
// This constraint ensures the output_padding is unambiguous — larger values would shift
// the output by more than one stride/dilation step, making the inverse of Conv ill-defined.
for (size_t i = 0; i < local_output_padding.size(); ++i) {
int64_t limit = std::max(local_strides[i], local_dilations[i]);
if (local_output_padding[i] >= limit) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"output_padding[", i, "] (", local_output_padding[i],
") must be less than max(stride, dilation) (", limit,
") for spatial dimension ", i, ".");
}
}

TensorShapeVector Y_dims;

ComputePadsAndOutputShape(input_shape, num_output_channels, kernel_shape,
local_strides, local_dilations, local_output_padding, N, &local_pads, &Y_dims, is_nhwc);
ORT_RETURN_IF_ERROR(ComputePadsAndOutputShape(input_shape, num_output_channels, kernel_shape,
local_strides, local_dilations, local_output_padding, N, &local_pads, &Y_dims, is_nhwc));
TensorShape Yshape(Y_dims);
Tensor* Y = context->Output(0, Yshape);

Expand All @@ -162,50 +225,68 @@ struct ConvTransposeAttributes : public ConvAttributes {
return Status::OK();
}

void ComputePadsAndOutputShape(TensorShape input_shape, int64_t output_channel,
const TensorShapeVector& kernel_shape, const TensorShapeVector& p_strides,
const TensorShapeVector& p_dilations, const TensorShapeVector& p_output_padding, const int64_t N,
ConvPadVector* p_pads, TensorShapeVector* output_shape_p,
bool is_nhwc = false) const {
Status ComputePadsAndOutputShape(TensorShape input_shape, int64_t output_channel,
const TensorShapeVector& kernel_shape, const TensorShapeVector& p_strides,
const TensorShapeVector& p_dilations, const TensorShapeVector& p_output_padding, const int64_t N,
ConvPadVector* p_pads, TensorShapeVector* output_shape_p,
bool is_nhwc = false) const {
Comment thread
yuslepukhin marked this conversation as resolved.
size_t output_shape_size = output_shape.size();
size_t rank = input_shape.NumDimensions();

if (p_pads->size() != 2 * rank) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"pads size (", p_pads->size(), ") does not match expected size (2 * ", rank, ").");
}

// output_shape attribute, if specified, must have either 'rank' or 'rank + 2' elements
if (output_shape_size != 0 && output_shape_size != rank && output_shape_size != rank + 2) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"output_shape attribute has ", output_shape_size,
" elements, expected ", rank, " or ", rank + 2, ".");
}

if (is_nhwc) {
output_shape_p->insert(output_shape_p->begin(), {N});
} else {
output_shape_p->insert(output_shape_p->begin(), {N, output_channel});
}

size_t rank = input_shape.NumDimensions();
for (size_t dim = 0; dim < rank; ++dim) {
int64_t dim_size = -1;

if (output_shape_size != 0) {
dim_size = output_shape_size == rank ? output_shape[dim] : output_shape[dim + 2];
}

ComputeTransposePadAndOutputShape(
ORT_RETURN_IF_ERROR(ComputeTransposePadAndOutputShape(
input_shape[dim],
p_strides[dim],
kernel_shape[dim],
p_dilations[dim],
p_output_padding[dim],
auto_pad,
&p_pads->at(dim),
&p_pads->at(input_shape.NumDimensions() + dim),
&dim_size);
&(*p_pads)[dim],
&(*p_pads)[input_shape.NumDimensions() + dim],
&dim_size));
Comment thread
yuslepukhin marked this conversation as resolved.

ORT_ENFORCE(dim_size > 0, "Invalid input shape: ", input_shape.ToString());
if (dim_size <= 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Computed output dimension is <= 0 for dim ", dim,
". Input shape: ", input_shape.ToString());
}
output_shape_p->push_back(dim_size);
}
if (is_nhwc) {
output_shape_p->push_back(output_channel);
}
return Status::OK();
}

TensorShapeVector output_padding;
TensorShapeVector output_shape;

private:
void ComputeTransposePadAndOutputShape(
Status ComputeTransposePadAndOutputShape(
const int64_t in_size,
const int64_t stride,
const int64_t kernel,
Expand All @@ -217,27 +298,48 @@ struct ConvTransposeAttributes : public ConvAttributes {
int64_t* out_size) const {
// Output shape is explicitly provided - pad values will have to be computed
if (*out_size != -1) {
ORT_ENFORCE(*out_size >= 0);
if (*out_size < 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Explicit output size is negative: ", *out_size);
}
// total pad
auto total_pad = ComputeTotalPad(in_size, stride, adj,
kernel, dilation, *out_size);
DistributePadding(pad_type, total_pad, *pad_head, *pad_tail);
return;
return Status::OK();
}

// Output shape is not provided - it needs to be computed along with pad values (if applicable)

// Validate that stride, kernel, and dilation are positive
if (stride <= 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Stride must be positive. Got: ", stride);
}
if (kernel <= 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Kernel size must be positive. Got: ", kernel);
}
if (dilation <= 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Dilation must be positive. Got: ", dilation);
}
if (adj < 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Output padding must be non-negative. Got: ", adj);
}

// Compute padding if the auto_pad attribute is SAME_UPPER/SAME_LOWER
if (pad_type == AutoPadType::SAME_UPPER || pad_type == AutoPadType::SAME_LOWER) {
// The ONNX spec says if `auto_pad` attribute is set, pad until the `out_size`
// is `in_size * stride`
int64_t auto_out_size = SafeInt<int64_t>(in_size) * stride;
auto total_pad = ComputeTotalPad(in_size, stride, adj,
kernel, dilation, /*out_size = */ in_size * stride);
kernel, dilation, auto_out_size);
DistributePadding(pad_type, total_pad, *pad_head, *pad_tail);
}

*out_size =
(in_size - 1) * stride + adj + (kernel - 1) * dilation + 1 - *pad_head - *pad_tail;
// *out_size = (in_size - 1) * stride + adj + (kernel - 1) * dilation + 1 - *pad_head - *pad_tail
*out_size = SafeInt<int64_t>(in_size - 1) * stride + adj +
SafeInt<int64_t>(kernel - 1) * dilation + 1 -
*pad_head - *pad_tail;
return Status::OK();
Comment thread
yuslepukhin marked this conversation as resolved.
Comment thread
yuslepukhin marked this conversation as resolved.
Comment thread
yuslepukhin marked this conversation as resolved.
}
};

Expand Down
Loading
Loading