diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc index a5537c7d58b05..50e8c017284e2 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc @@ -56,10 +56,22 @@ void convTransposeWithDynamicPadsShapeInference(InferenceContext& ctx) { } int64_t group = getAttribute(ctx, "group", 1); + if (group <= 0) { + fail_shape_inference("group attribute must be positive. Got: ", group); + } auto input_shape = ctx.getInputType(0)->tensor_type().shape(); - if (input_shape.dim_size() < 2) { - return; // Input tensor should have at least two dimensions. + // ConvTranspose requires X=(N x C x D1...Dn) and W=(C x M/group x k1...kn), both rank >= 3. + // The upstream ONNX ConvTranspose shape inference only checks rank >= 2, which allows rank-2 + // inputs to pass shape inference but crash at kernel execution time. We tighten the check here + // to fail early at model load with a clear error. Fixing ONNX upstream is tracked separately. + if (input_shape.dim_size() < 3) { + fail_shape_inference("Input tensor must have at least 3 dimensions. Got: ", input_shape.dim_size()); + } + + auto weight_shape = ctx.getInputType(1)->tensor_type().shape(); + if (weight_shape.dim_size() < 3) { + fail_shape_inference("Weight tensor must have at least 3 dimensions. Got: ", weight_shape.dim_size()); } // first dim is the batch axis and the next is the number of channels. @@ -147,7 +159,7 @@ void convTransposeWithDynamicPadsShapeInference(InferenceContext& ctx) { *final_output_shape->add_dim() = input_shape.dim(0); *final_output_shape->add_dim() = - ctx.getInputType(1)->tensor_type().shape().dim(1) * + weight_shape.dim(1) * group; // channels should be the second dim of second input multiply // group. diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc index bbb530d037cec..4cb1b91fc28c8 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc @@ -76,16 +76,17 @@ Status ConvTranspose::PrePack(const Tensor& tensor, int input_idx, Alloca size_t packed_filter_data_size = SafeInt(packed_elements_per_group) * sizeof(float) * conv_transpose_attrs_.group; auto* packed_filter_data = alloc->Alloc(packed_filter_data_size); + // Wrap in BufferUniquePtr immediately to prevent leaks. + transposed_filter_ = BufferUniquePtr(packed_filter_data, BufferDeleter(std::move(alloc))); + // Initialize memory to 0 as there could be some padding associated with pre-packed // buffer memory and we don not want it uninitialized and generate different hashes // if and when we try to cache this pre-packed buffer for sharing between sessions. memset(packed_filter_data, 0, packed_filter_data_size); - transposed_filter_ = BufferUniquePtr(packed_filter_data, BufferDeleter(std::move(alloc))); - for (int64_t group_id = 0; group_id < conv_transpose_attrs_.group; ++group_id) { MlasTranspose(tensor.Data() + (group_id * N * K), - ((float*)packed_filter_data) + (group_id * packed_elements_per_group), + static_cast(packed_filter_data) + (group_id * packed_elements_per_group), K, N, nullptr); } diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h b/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h index 4ca90a885ea96..03d9c4e28f6eb 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h +++ b/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h @@ -18,7 +18,10 @@ #pragma once +#include + #include "core/providers/cpu/nn/conv_attributes.h" +#include "core/common/safeint.h" namespace onnxruntime { @@ -61,6 +64,21 @@ struct ConvTransposeAttributes : public ConvAttributes { const Tensor* B = has_bias ? (dynamic_padding ? context->Input(3) : context->Input(2)) : nullptr; const int rank = static_cast(X->Shape().NumDimensions()); + + // ConvTranspose requires X shape (N x C x D1...Dn) and W shape (C x M/group x k1...kn), + // both must have at least 3 dimensions. + if (rank < 3) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Input X must have at least 3 dimensions (N x C x D1...Dn).", + " X: ", X->Shape().ToString().c_str()); + } + + if (static_cast(F_Shape.NumDimensions()) < 3) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Filter W must have at least 3 dimensions (C x M/group x k1...kn).", + " W: ", F_Shape.ToString().c_str()); + } + TensorShape input_shape = X->Shape().Slice(is_nhwc ? 1 : 2, is_nhwc ? rank - 1 : rank); const int64_t num_input_channels = is_nhwc ? X->Shape()[rank - 1] : X->Shape()[1]; const int64_t N = X->Shape()[0]; @@ -119,11 +137,32 @@ struct ConvTransposeAttributes : public ConvAttributes { if (local_output_padding.empty()) { local_output_padding.resize(kernel_shape.size(), 0); } + if (local_output_padding.size() != kernel_shape.size()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "output_padding size (", local_output_padding.size(), + ") does not match the number of spatial dimensions (", kernel_shape.size(), ")."); + } ConvPadVector local_pads; local_pads.reserve(2 * (input_shape.NumDimensions())); if (dynamic_padding) { - for (int64_t i = 0; i < Pads->Shape().SizeFromDimension(0); ++i) { - local_pads.push_back(Pads->Data()[i]); + if (Pads->Shape().NumDimensions() != 1) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Dynamic pads tensor must be 1-D. Got rank: ", Pads->Shape().NumDimensions()); + } + const int64_t expected_pads_size = SafeInt(kernel_shape.size()) * 2; + const int64_t actual_pads_size = Pads->Shape().SizeFromDimension(0); + if (actual_pads_size != expected_pads_size) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Dynamic pads tensor size (", actual_pads_size, + ") does not match expected size (2 * spatial_dims = ", expected_pads_size, ")."); + } + const auto* pads_data = Pads->Data(); + for (int64_t i = 0; i < actual_pads_size; ++i) { + if (pads_data[i] < 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Dynamic pads must be non-negative. Got pads[", i, "] = ", pads_data[i]); + } + local_pads.push_back(pads_data[i]); } } else { local_pads.assign(pads.begin(), pads.end()); @@ -140,10 +179,34 @@ struct ConvTransposeAttributes : public ConvAttributes { local_strides.resize(kernel_shape.size(), 1); } + if (local_strides.size() != kernel_shape.size()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "strides size (", local_strides.size(), + ") does not match the number of spatial dimensions (", kernel_shape.size(), ")."); + } + if (local_dilations.size() != kernel_shape.size()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "dilations size (", local_dilations.size(), + ") does not match the number of spatial dimensions (", kernel_shape.size(), ")."); + } + + // ONNX spec: "output_padding[i] should be less than max(stride[i], dilation[i])". + // This constraint ensures the output_padding is unambiguous — larger values would shift + // the output by more than one stride/dilation step, making the inverse of Conv ill-defined. + for (size_t i = 0; i < local_output_padding.size(); ++i) { + int64_t limit = std::max(local_strides[i], local_dilations[i]); + if (local_output_padding[i] >= limit) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "output_padding[", i, "] (", local_output_padding[i], + ") must be less than max(stride, dilation) (", limit, + ") for spatial dimension ", i, "."); + } + } + TensorShapeVector Y_dims; - ComputePadsAndOutputShape(input_shape, num_output_channels, kernel_shape, - local_strides, local_dilations, local_output_padding, N, &local_pads, &Y_dims, is_nhwc); + ORT_RETURN_IF_ERROR(ComputePadsAndOutputShape(input_shape, num_output_channels, kernel_shape, + local_strides, local_dilations, local_output_padding, N, &local_pads, &Y_dims, is_nhwc)); TensorShape Yshape(Y_dims); Tensor* Y = context->Output(0, Yshape); @@ -162,19 +225,32 @@ struct ConvTransposeAttributes : public ConvAttributes { return Status::OK(); } - void ComputePadsAndOutputShape(TensorShape input_shape, int64_t output_channel, - const TensorShapeVector& kernel_shape, const TensorShapeVector& p_strides, - const TensorShapeVector& p_dilations, const TensorShapeVector& p_output_padding, const int64_t N, - ConvPadVector* p_pads, TensorShapeVector* output_shape_p, - bool is_nhwc = false) const { + Status ComputePadsAndOutputShape(TensorShape input_shape, int64_t output_channel, + const TensorShapeVector& kernel_shape, const TensorShapeVector& p_strides, + const TensorShapeVector& p_dilations, const TensorShapeVector& p_output_padding, const int64_t N, + ConvPadVector* p_pads, TensorShapeVector* output_shape_p, + bool is_nhwc = false) const { size_t output_shape_size = output_shape.size(); + size_t rank = input_shape.NumDimensions(); + + if (p_pads->size() != 2 * rank) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "pads size (", p_pads->size(), ") does not match expected size (2 * ", rank, ")."); + } + + // output_shape attribute, if specified, must have either 'rank' or 'rank + 2' elements + if (output_shape_size != 0 && output_shape_size != rank && output_shape_size != rank + 2) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "output_shape attribute has ", output_shape_size, + " elements, expected ", rank, " or ", rank + 2, "."); + } + if (is_nhwc) { output_shape_p->insert(output_shape_p->begin(), {N}); } else { output_shape_p->insert(output_shape_p->begin(), {N, output_channel}); } - size_t rank = input_shape.NumDimensions(); for (size_t dim = 0; dim < rank; ++dim) { int64_t dim_size = -1; @@ -182,30 +258,35 @@ struct ConvTransposeAttributes : public ConvAttributes { dim_size = output_shape_size == rank ? output_shape[dim] : output_shape[dim + 2]; } - ComputeTransposePadAndOutputShape( + ORT_RETURN_IF_ERROR(ComputeTransposePadAndOutputShape( input_shape[dim], p_strides[dim], kernel_shape[dim], p_dilations[dim], p_output_padding[dim], auto_pad, - &p_pads->at(dim), - &p_pads->at(input_shape.NumDimensions() + dim), - &dim_size); + &(*p_pads)[dim], + &(*p_pads)[input_shape.NumDimensions() + dim], + &dim_size)); - ORT_ENFORCE(dim_size > 0, "Invalid input shape: ", input_shape.ToString()); + if (dim_size <= 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Computed output dimension is <= 0 for dim ", dim, + ". Input shape: ", input_shape.ToString()); + } output_shape_p->push_back(dim_size); } if (is_nhwc) { output_shape_p->push_back(output_channel); } + return Status::OK(); } TensorShapeVector output_padding; TensorShapeVector output_shape; private: - void ComputeTransposePadAndOutputShape( + Status ComputeTransposePadAndOutputShape( const int64_t in_size, const int64_t stride, const int64_t kernel, @@ -217,27 +298,48 @@ struct ConvTransposeAttributes : public ConvAttributes { int64_t* out_size) const { // Output shape is explicitly provided - pad values will have to be computed if (*out_size != -1) { - ORT_ENFORCE(*out_size >= 0); + if (*out_size < 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Explicit output size is negative: ", *out_size); + } // total pad auto total_pad = ComputeTotalPad(in_size, stride, adj, kernel, dilation, *out_size); DistributePadding(pad_type, total_pad, *pad_head, *pad_tail); - return; + return Status::OK(); } // Output shape is not provided - it needs to be computed along with pad values (if applicable) + // Validate that stride, kernel, and dilation are positive + if (stride <= 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Stride must be positive. Got: ", stride); + } + if (kernel <= 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Kernel size must be positive. Got: ", kernel); + } + if (dilation <= 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Dilation must be positive. Got: ", dilation); + } + if (adj < 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Output padding must be non-negative. Got: ", adj); + } + // Compute padding if the auto_pad attribute is SAME_UPPER/SAME_LOWER if (pad_type == AutoPadType::SAME_UPPER || pad_type == AutoPadType::SAME_LOWER) { // The ONNX spec says if `auto_pad` attribute is set, pad until the `out_size` // is `in_size * stride` + int64_t auto_out_size = SafeInt(in_size) * stride; auto total_pad = ComputeTotalPad(in_size, stride, adj, - kernel, dilation, /*out_size = */ in_size * stride); + kernel, dilation, auto_out_size); DistributePadding(pad_type, total_pad, *pad_head, *pad_tail); } - *out_size = - (in_size - 1) * stride + adj + (kernel - 1) * dilation + 1 - *pad_head - *pad_tail; + // *out_size = (in_size - 1) * stride + adj + (kernel - 1) * dilation + 1 - *pad_head - *pad_tail + *out_size = SafeInt(in_size - 1) * stride + adj + + SafeInt(kernel - 1) * dilation + 1 - + *pad_head - *pad_tail; + return Status::OK(); } }; diff --git a/onnxruntime/core/providers/cuda/nn/conv_transpose.cc b/onnxruntime/core/providers/cuda/nn/conv_transpose.cc index 808c0352e69c9..f1e7e5c41055a 100644 --- a/onnxruntime/core/providers/cuda/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/cuda/nn/conv_transpose.cc @@ -2,6 +2,7 @@ // Copyright (c) 2023 NVIDIA Corporation. // Licensed under the MIT License. +#include #include #include @@ -270,9 +271,28 @@ Status ConvTranspose::UpdateState(OpKernelContext* context, bool dyna const Tensor* Pads = dynamic_padding ? context->Input(2) : nullptr; + // ConvTranspose requires X shape (N x C x D1...Dn) and W shape (C x M/group x k1...kn), + // both must have at least 3 dimensions. Check before dims-changed comparison because + // a scalar (rank 0) has empty dims which matches the default-initialized last_x_dims, + // causing the validation block to be skipped entirely. + const size_t rank = x_shape.NumDimensions(); + if (rank < 3) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Input X must have at least 3 dimensions (N x C x D1...Dn).", + " X: ", x_shape.ToString().c_str()); + } + + if (w_shape.NumDimensions() < 3) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Filter W must have at least 3 dimensions (C x M/group x k1...kn).", + " W: ", w_shape.ToString().c_str()); + } + bool input_dims_changed = (s_.last_x_dims != x_dims); bool w_dims_changed = (s_.last_w_dims != w_dims); - if (input_dims_changed || w_dims_changed) { + // When dynamic_padding is used, Pads can change between calls even when X/W shapes + // stay the same, so we must always recompute the output shape and re-validate. + if (input_dims_changed || w_dims_changed || dynamic_padding) { if (input_dims_changed) s_.last_x_dims = gsl::make_span(x_dims); @@ -282,7 +302,6 @@ Status ConvTranspose::UpdateState(OpKernelContext* context, bool dyna // The following code is from ConvTransposeAttributes::PrepareForCompute - const int rank = static_cast(X->Shape().NumDimensions()); TensorShape input_shape = X->Shape().Slice(channels_last ? 1 : 2, channels_last ? rank - 1 : rank); const int64_t num_input_channels = channels_last ? X->Shape()[rank - 1] : X->Shape()[1]; const int64_t N = X->Shape()[0]; @@ -336,11 +355,35 @@ Status ConvTranspose::UpdateState(OpKernelContext* context, bool dyna if (local_output_padding.empty()) { local_output_padding.resize(kernel_shape.size(), 0); } + if (local_output_padding.size() != kernel_shape.size()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "output_padding size (", local_output_padding.size(), + ") does not match the number of spatial dimensions (", kernel_shape.size(), ")."); + } ConvPadVector pads; pads.reserve(2 * (input_shape.NumDimensions())); if (dynamic_padding) { - for (int64_t i = 0; i < Pads->Shape().SizeFromDimension(0); ++i) { - pads.push_back(Pads->Data()[i]); + if (Pads == nullptr) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Pads input is required in dynamic padding mode."); + } + if (Pads->Shape().NumDimensions() != 1) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Dynamic pads tensor must be 1-D. Got rank: ", Pads->Shape().NumDimensions()); + } + const int64_t expected_pads_size = static_cast(kernel_shape.size()) * 2; + if (Pads->Shape()[0] != expected_pads_size) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Dynamic pads tensor size (", Pads->Shape()[0], + ") does not match expected size (2 * spatial_dims = ", expected_pads_size, ")."); + } + const auto* pads_data = Pads->Data(); + for (int64_t i = 0; i < Pads->Shape()[0]; ++i) { + if (pads_data[i] < 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Dynamic pads must be non-negative. Got pads[", i, "] = ", pads_data[i]); + } + pads.push_back(pads_data[i]); } } else { pads.assign(conv_transpose_attrs_.pads.begin(), conv_transpose_attrs_.pads.end()); @@ -357,10 +400,32 @@ Status ConvTranspose::UpdateState(OpKernelContext* context, bool dyna strides.resize(kernel_shape.size(), 1); } + if (strides.size() != kernel_shape.size()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "strides size (", strides.size(), + ") does not match the number of spatial dimensions (", kernel_shape.size(), ")."); + } + if (dilations.size() != kernel_shape.size()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "dilations size (", dilations.size(), + ") does not match the number of spatial dimensions (", kernel_shape.size(), ")."); + } + + // ONNX spec: "output_padding[i] should be less than max(stride[i], dilation[i])". + for (size_t i = 0; i < local_output_padding.size(); ++i) { + int64_t limit = std::max(strides[i], dilations[i]); + if (local_output_padding[i] >= limit) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "output_padding[", i, "] (", local_output_padding[i], + ") must be less than max(stride, dilation) (", limit, + ") for spatial dimension ", i, "."); + } + } + TensorShapeVector y_dims; - conv_transpose_attrs_.ComputePadsAndOutputShape(input_shape, num_output_channels, kernel_shape, - strides, dilations, local_output_padding, N, &pads, &y_dims, channels_last); + ORT_RETURN_IF_ERROR(conv_transpose_attrs_.ComputePadsAndOutputShape(input_shape, num_output_channels, kernel_shape, + strides, dilations, local_output_padding, N, &pads, &y_dims, channels_last)); s_.y_dims = gsl::make_span(y_dims); s_.Y = context->Output(0, s_.y_dims); diff --git a/onnxruntime/core/providers/webgpu/nn/conv_transpose.cc b/onnxruntime/core/providers/webgpu/nn/conv_transpose.cc index 488fc75382040..5132cc51d0a0a 100644 --- a/onnxruntime/core/providers/webgpu/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/webgpu/nn/conv_transpose.cc @@ -19,19 +19,36 @@ Status ConvTranspose::ComputeInternal(ComputeContext& context) const auto* filter = context.Input(1); TensorShape input_shape = input->Shape(); TensorShape filter_shape = filter->Shape(); + + const auto rank = input_shape.NumDimensions(); + if (rank < 3) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Input X must have at least 3 dimensions (N x C x D1...Dn).", + " X: ", input_shape.ToString().c_str()); + } + if (filter_shape.NumDimensions() < 3) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Filter W must have at least 3 dimensions (C x M/group x k1...kn).", + " W: ", filter_shape.ToString().c_str()); + } + const InlinedVector perm = {2, 3, 0, 1}; TensorShapeVector local_output_padding(conv_transpose_attrs_.output_padding.begin(), conv_transpose_attrs_.output_padding.end()); ConvAttributes::ConvPadVector local_pads(conv_transpose_attrs_.pads.begin(), conv_transpose_attrs_.pads.end()); TensorShapeVector local_dilations(conv_transpose_attrs_.dilations.begin(), conv_transpose_attrs_.dilations.end()); TensorShapeVector local_strides(conv_transpose_attrs_.strides.begin(), conv_transpose_attrs_.strides.end()); TensorShapeVector kernel_shape_vector; - auto rank = input_shape.NumDimensions(); TensorShape input_spacial_shape = input_shape.Slice(is_channels_last ? 1 : 2, is_channels_last ? rank - 1 : rank); local_pads.reserve(2 * (input_spacial_shape.NumDimensions())); ORT_RETURN_IF_ERROR(conv_transpose_attrs_.ComputeKernelShape(filter_shape, kernel_shape_vector, false)); if (local_output_padding.empty()) { local_output_padding.resize(kernel_shape_vector.size(), 0); } + if (local_output_padding.size() != kernel_shape_vector.size()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "output_padding size (", local_output_padding.size(), + ") does not match the number of spatial dimensions (", kernel_shape_vector.size(), ")."); + } if (local_pads.empty()) { local_pads.resize(kernel_shape_vector.size() * 2, 0); } @@ -41,11 +58,31 @@ Status ConvTranspose::ComputeInternal(ComputeContext& context) if (local_strides.empty()) { local_strides.resize(kernel_shape_vector.size(), 1); } + if (local_strides.size() != kernel_shape_vector.size()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "strides size (", local_strides.size(), + ") does not match the number of spatial dimensions (", kernel_shape_vector.size(), ")."); + } + if (local_dilations.size() != kernel_shape_vector.size()) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "dilations size (", local_dilations.size(), + ") does not match the number of spatial dimensions (", kernel_shape_vector.size(), ")."); + } + // ONNX spec: "output_padding[i] should be less than max(stride[i], dilation[i])". + for (size_t i = 0; i < local_output_padding.size(); ++i) { + int64_t limit = std::max(local_strides[i], local_dilations[i]); + if (local_output_padding[i] >= limit) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "output_padding[", i, "] (", local_output_padding[i], + ") must be less than max(stride, dilation) (", limit, + ") for spatial dimension ", i, "."); + } + } auto group = conv_transpose_attrs_.group; auto num_output_channels = group * filter_shape[1]; auto batch_size = input_shape[0]; TensorShapeVector output_shape_vector; - conv_transpose_attrs_.ComputePadsAndOutputShape(input_spacial_shape, num_output_channels, kernel_shape_vector, local_strides, local_dilations, local_output_padding, batch_size, &local_pads, &output_shape_vector, is_channels_last); + ORT_RETURN_IF_ERROR(conv_transpose_attrs_.ComputePadsAndOutputShape(input_spacial_shape, num_output_channels, kernel_shape_vector, local_strides, local_dilations, local_output_padding, batch_size, &local_pads, &output_shape_vector, is_channels_last)); TensorShape computed_output_shape(output_shape_vector); std::vector strides; std::vector pads; diff --git a/onnxruntime/core/providers/xnnpack/nn/conv_base.cc b/onnxruntime/core/providers/xnnpack/nn/conv_base.cc index 9742f397315a7..df43dfe1384ef 100644 --- a/onnxruntime/core/providers/xnnpack/nn/conv_base.cc +++ b/onnxruntime/core/providers/xnnpack/nn/conv_base.cc @@ -490,11 +490,14 @@ ConvBase::ConvBase(const OpKernelInfo& info, bool is_transpose) if (conv_transpose_attrs_.output_padding.empty()) { conv_transpose_attrs_.output_padding.resize(kernel_shape_.size(), 0); } + ORT_ENFORCE(conv_transpose_attrs_.output_padding.size() == kernel_shape_.size(), + "output_padding size (", conv_transpose_attrs_.output_padding.size(), + ") does not match the number of spatial dimensions (", kernel_shape_.size(), ")."); - conv_transpose_attrs_.ComputePadsAndOutputShape( + ORT_THROW_IF_ERROR(conv_transpose_attrs_.ComputePadsAndOutputShape( input_shape, M_, kernel_shape_, conv_transpose_attrs_.strides, conv_transpose_attrs_.dilations, - conv_transpose_attrs_.output_padding, 1, &conv_transpose_attrs_.pads, &output_shape_); + conv_transpose_attrs_.output_padding, 1, &conv_transpose_attrs_.pads, &output_shape_)); output_shape_[1] = output_shape_[2]; if (rank == 4) { diff --git a/onnxruntime/test/contrib_ops/conv_transpose_with_dynamic_pads_test.cc b/onnxruntime/test/contrib_ops/conv_transpose_with_dynamic_pads_test.cc index 092d07cc0e9a6..345a143d5b87c 100644 --- a/onnxruntime/test/contrib_ops/conv_transpose_with_dynamic_pads_test.cc +++ b/onnxruntime/test/contrib_ops/conv_transpose_with_dynamic_pads_test.cc @@ -3,6 +3,7 @@ #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" +#include "default_providers.h" namespace onnxruntime { namespace test { @@ -19,5 +20,132 @@ TEST(ContribOpTest, ConvTransposeWithDynamicPads) { test.AddOutput("Y", {1, 1, 6, 6}, std::vector{0.07368518f, -0.08925839f, -0.06627201f, 0.06301362f, 0.03732984f, -0.01919658f, -0.00628807f, -0.02817563f, -0.01472169f, 0.04392925f, -0.00689478f, -0.01549204f, 0.07957941f, -0.11459791f, -0.09505399f, 0.07681622f, 0.03604182f, -0.01853423f, -0.0270785f, -0.00680824f, -0.06650258f, 0.08004665f, 0.07918708f, -0.0724144f, 0.06256775f, -0.17838378f, -0.18863615f, 0.20064656f, 0.133717f, -0.06876295f, -0.06398046f, -0.00864975f, 0.19289537f, -0.01490572f, -0.13673618f, 0.01949645f}); test.Run(); } + +// Test that a rank-0 W input is gracefully rejected rather than causing undefined behavior. +// These tests exercise shape inference which uses fail_shape_inference (throws InferenceError). +// In no-exception builds, fail_shape_inference calls abort(), so these tests must be skipped. +#ifndef ORT_NO_EXCEPTIONS +TEST(ContribOpTest, ConvTransposeWithDynamicPads_InvalidWeightRank0) { + OpTester test("ConvTransposeWithDynamicPads", 1, onnxruntime::kMSDomain); + test.AddAttribute("kernel_shape", std::vector{3, 3}); + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{1, 1}); + + test.AddInput("X", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("W", {}, std::vector{1.0f}); // scalar (rank 0) + test.AddInput("Pads", {4}, std::vector{1, 1, 1, 1}); + test.AddOutput("Y", {}, std::vector{0.0f}); + test.Run(OpTester::ExpectResult::kExpectFailure, "Weight tensor must have at least 3 dimensions", + {kTensorrtExecutionProvider}); +} + +// Test that a rank-1 W input is gracefully rejected. +TEST(ContribOpTest, ConvTransposeWithDynamicPads_InvalidWeightRank1) { + OpTester test("ConvTransposeWithDynamicPads", 1, onnxruntime::kMSDomain); + test.AddAttribute("kernel_shape", std::vector{3, 3}); + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{1, 1}); + + test.AddInput("X", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("W", {9}, std::vector(9, 1.0f)); // rank 1 + test.AddInput("Pads", {4}, std::vector{1, 1, 1, 1}); + test.AddOutput("Y", {}, std::vector{0.0f}); + test.Run(OpTester::ExpectResult::kExpectFailure, "Weight tensor must have at least 3 dimensions", + {kTensorrtExecutionProvider}); +} + +// Test that a rank-2 input is rejected (requires at least 3 dims for ConvTranspose). +TEST(ContribOpTest, ConvTransposeWithDynamicPads_InvalidInputRank2) { + OpTester test("ConvTransposeWithDynamicPads", 1, onnxruntime::kMSDomain); + test.AddAttribute("kernel_shape", std::vector{3, 3}); + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{1, 1}); + + test.AddInput("X", {1, 1}, std::vector{1.0f}); // rank 2 + test.AddInput("W", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("Pads", {4}, std::vector{1, 1, 1, 1}); + test.AddOutput("Y", {}, std::vector{0.0f}); + test.Run(OpTester::ExpectResult::kExpectFailure, "Input tensor must have at least 3 dimensions", + {kTensorrtExecutionProvider}); +} +#endif // !ORT_NO_EXCEPTIONS + +// Test that incorrectly sized dynamic pads are rejected. +// This runs through kernel validation (not shape inference) so it works in no-exception builds. +TEST(ContribOpTest, ConvTransposeWithDynamicPads_InvalidPadsSize) { + OpTester test("ConvTransposeWithDynamicPads", 1, onnxruntime::kMSDomain); + test.AddShapeToTensorData(false); + test.AddAttribute("kernel_shape", std::vector{3, 3}); + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{1, 1}); + + test.AddInput("X", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("W", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("Pads", {3}, std::vector{0, 0, 0}); // Wrong size: should be 4 + test.AddOutput("Y", {1, 1, 5, 5}, std::vector(25, 0.0f)); + + test.Run(OpTester::ExpectResult::kExpectFailure, "does not match expected size", + {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider}); +} + +// Test that negative pad values are rejected. +TEST(ContribOpTest, ConvTransposeWithDynamicPads_NegativePads) { + OpTester test("ConvTransposeWithDynamicPads", 1, onnxruntime::kMSDomain); + test.AddShapeToTensorData(false); + test.AddAttribute("kernel_shape", std::vector{3, 3}); + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{1, 1}); + + test.AddInput("X", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("W", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("Pads", {4}, std::vector{-1, 0, 0, 0}); // Negative pad + test.AddOutput("Y", {1, 1, 5, 5}, std::vector(25, 0.0f)); + + test.Run(OpTester::ExpectResult::kExpectFailure, "non-negative", + {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider}); +} + +// DML-specific tests for invalid dynamic pads. +// DML validates operator parameters internally before ORT kernel code runs. When inputs are +// invalid, DML's COM/HRESULT boundary strips the descriptive message and re-throws with just +// E_INVALIDARG (0x80070057), surfacing as "The parameter is incorrect." on Windows. +// We still want to verify DML rejects these inputs rather than crashing, so we test separately +// with the DML-specific error text. +#ifdef USE_DML +TEST(ContribOpTest, ConvTransposeWithDynamicPads_InvalidPadsSize_Dml) { + OpTester test("ConvTransposeWithDynamicPads", 1, onnxruntime::kMSDomain); + test.AddShapeToTensorData(false); + test.AddAttribute("kernel_shape", std::vector{3, 3}); + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{1, 1}); + + test.AddInput("X", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("W", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("Pads", {3}, std::vector{0, 0, 0}); // Wrong size: should be 4 + test.AddOutput("Y", {1, 1, 5, 5}, std::vector(25, 0.0f)); + + test.ConfigEp(DefaultDmlExecutionProvider()) + .Config(OpTester::ExpectResult::kExpectFailure, "The parameter is incorrect") + .RunWithConfig(); +} + +TEST(ContribOpTest, ConvTransposeWithDynamicPads_NegativePads_Dml) { + OpTester test("ConvTransposeWithDynamicPads", 1, onnxruntime::kMSDomain); + test.AddShapeToTensorData(false); + test.AddAttribute("kernel_shape", std::vector{3, 3}); + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("dilations", std::vector{1, 1}); + + test.AddInput("X", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("W", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("Pads", {4}, std::vector{-1, 0, 0, 0}); // Negative pad + test.AddOutput("Y", {1, 1, 5, 5}, std::vector(25, 0.0f)); + + test.ConfigEp(DefaultDmlExecutionProvider()) + .Config(OpTester::ExpectResult::kExpectFailure, "The parameter is incorrect") + .RunWithConfig(); +} +#endif // USE_DML + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc b/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc index a3b039a9694fe..4553537219409 100644 --- a/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc @@ -1573,5 +1573,77 @@ TEST(ConvTransposeTest, ConvTranspose_ZeroDilation_Dml) { .RunWithConfig(); } +TEST(ConvTransposeTest, ConvTranspose_InvalidInputRank0) { + OpTester test("ConvTranspose", 11); + // Skip ONNX shape inference which may crash on invalid-rank inputs (not our code to fix). + // Must be set before AddInput/AddOutput so type protos are built without shape info. + test.AddShapeToTensorData(false); + test.AddInput("X", {}, {1.0f}); + test.AddInput("W", {}, {1.0f}); + test.AddOutput("Y", {0}, {}); + + test.Run(OpTester::ExpectResult::kExpectFailure, "at least 3 dimensions", + {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider}); +} + +TEST(ConvTransposeTest, ConvTranspose_InvalidInputRank1) { + OpTester test("ConvTranspose", 11); + test.AddShapeToTensorData(false); + test.AddInput("X", {2}, {1.0f, 2.0f}); + test.AddInput("W", {2}, {1.0f, 2.0f}); + test.AddOutput("Y", {0}, {}); + + test.Run(OpTester::ExpectResult::kExpectFailure, "at least 3 dimensions", + {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider}); +} + +TEST(ConvTransposeTest, ConvTranspose_InvalidInputRank2) { + OpTester test("ConvTranspose", 11); + test.AddShapeToTensorData(false); + test.AddInput("X", {1, 1}, {1.0f}); + test.AddInput("W", {1, 1}, {1.0f}); + test.AddOutput("Y", {0}, {}); + + test.Run(OpTester::ExpectResult::kExpectFailure, "at least 3 dimensions", + {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider}); +} + +TEST(ConvTransposeTest, ConvTranspose_InvalidWeightRank0) { + OpTester test("ConvTranspose", 11); + test.AddShapeToTensorData(false); + test.AddInput("X", {1, 1, 3}, {1.0f, 2.0f, 3.0f}); + test.AddInput("W", {}, {1.0f}); + test.AddOutput("Y", {0}, {}); + + test.Run(OpTester::ExpectResult::kExpectFailure, "at least 3 dimensions", + {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider}); +} + +TEST(ConvTransposeTest, ConvTranspose_InvalidOutputPaddingSize) { + OpTester test("ConvTranspose", 11); + test.AddShapeToTensorData(false); + test.AddAttribute("output_padding", std::vector{0, 0, 0}); // 3 values for 2D spatial + test.AddInput("X", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("W", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddOutput("Y", {0}, {}); + + test.Run(OpTester::ExpectResult::kExpectFailure, "output_padding size", + {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider, kWebGpuExecutionProvider}); +} + +TEST(ConvTransposeTest, ConvTranspose_OutputPaddingExceedsStride) { + OpTester test("ConvTranspose", 11); + test.AddShapeToTensorData(false); + // output_padding[i] must be < max(stride[i], dilation[i]). stride=2, so output_padding must be < 2. + test.AddAttribute("strides", std::vector{2, 2}); + test.AddAttribute("output_padding", std::vector{2, 2}); + test.AddInput("X", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddInput("W", {1, 1, 3, 3}, std::vector(9, 1.0f)); + test.AddOutput("Y", {0}, {}); + + test.Run(OpTester::ExpectResult::kExpectFailure, "output_padding", + {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider, kWebGpuExecutionProvider}); +} + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/providers/cuda/nhwc/conv_transpose_test.cc b/onnxruntime/test/providers/cuda/nhwc/conv_transpose_test.cc index 13a8f42e1fd4c..dfa63ae35bd21 100644 --- a/onnxruntime/test/providers/cuda/nhwc/conv_transpose_test.cc +++ b/onnxruntime/test/providers/cuda/nhwc/conv_transpose_test.cc @@ -16,7 +16,7 @@ struct ConvTransposeOp { bool bias = false; std::vector strides = {1, 1}; std::vector padding = {0, 0, 0, 0}; - std::vector output_padding = {0, 0, 0, 0}; + std::vector output_padding; std::vector dilations = {1, 1}; std::unique_ptr get_test() { @@ -48,8 +48,6 @@ struct ConvTransposeOp { test->AddAttribute("pads", padding); if (!output_padding.empty()) { test->AddAttribute("output_padding", output_padding); - } else { - output_padding = {0, 0, 0, 0}; } // the test input is NCHW so calculate output based on that. conversion to/from NHWC is internal to execution. @@ -57,9 +55,11 @@ struct ConvTransposeOp { for (size_t i = 0, end = is_1D ? 1 : 2; i < end; ++i) { // formula from https://github.com/onnx/onnx/blob/main/docs/Operators.md#ConvTranspose + assert(output_padding.empty() || output_padding.size() >= end); const size_t start_pad = i * 2; + int64_t out_pad = i < output_padding.size() ? output_padding[i] : 0; output_dims.push_back( - strides[i] * (input_dims[i + 2] - 1) + output_padding[i] + + strides[i] * (input_dims[i + 2] - 1) + out_pad + ((kernel_shape[i] - 1) * dilations[i] + 1) - padding[start_pad] - padding[start_pad + 1]); } @@ -132,7 +132,7 @@ TYPED_TEST(CudaNhwcTypedTest, ConvTransposeNhwcOutPad) { op.kernel_shape = {3, 3}; op.channels = 32; op.strides = {2, 2}; - op.output_padding = {1, 1, 1, 1}; + op.output_padding = {1, 1}; MAKE_PROVIDERS_EPS_TYPE(TypeParam) }