microsoft · yuslepukhin · May 26, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.cc b/onnxruntime/core/graph/contrib_ops/contrib_defs.cc
@@ -56,10 +56,22 @@ void convTransposeWithDynamicPadsShapeInference(InferenceContext& ctx) {
   }
 
   int64_t group = getAttribute(ctx, "group", 1);
+  if (group <= 0) {
+    fail_shape_inference("group attribute must be positive. Got: ", group);
+  }
 
   auto input_shape = ctx.getInputType(0)->tensor_type().shape();
-  if (input_shape.dim_size() < 2) {
-    return;  // Input tensor should have at least two dimensions.
+  // ConvTranspose requires X=(N x C x D1...Dn) and W=(C x M/group x k1...kn), both rank >= 3.
+  // The upstream ONNX ConvTranspose shape inference only checks rank >= 2, which allows rank-2
+  // inputs to pass shape inference but crash at kernel execution time. We tighten the check here
+  // to fail early at model load with a clear error. Fixing ONNX upstream is tracked separately.
+  if (input_shape.dim_size() < 3) {
+    fail_shape_inference("Input tensor must have at least 3 dimensions. Got: ", input_shape.dim_size());
+  }
+
+  auto weight_shape = ctx.getInputType(1)->tensor_type().shape();
+  if (weight_shape.dim_size() < 3) {
+    fail_shape_inference("Weight tensor must have at least 3 dimensions. Got: ", weight_shape.dim_size());
   }
 
   // first dim is the batch axis and the next is the number of channels.
@@ -147,7 +159,7 @@ void convTransposeWithDynamicPadsShapeInference(InferenceContext& ctx) {
 
   *final_output_shape->add_dim() = input_shape.dim(0);
   *final_output_shape->add_dim() =
-      ctx.getInputType(1)->tensor_type().shape().dim(1) *
+      weight_shape.dim(1) *
       group;  // channels should be the second dim of second input multiply
   // group.
 

diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc
@@ -76,16 +76,17 @@ Status ConvTranspose<float>::PrePack(const Tensor& tensor, int input_idx, Alloca
     size_t packed_filter_data_size = SafeInt<size_t>(packed_elements_per_group) * sizeof(float) * conv_transpose_attrs_.group;
     auto* packed_filter_data = alloc->Alloc(packed_filter_data_size);
 
+    // Wrap in BufferUniquePtr immediately to prevent leaks.
+    transposed_filter_ = BufferUniquePtr(packed_filter_data, BufferDeleter(std::move(alloc)));
+
     // Initialize memory to 0 as there could be some padding associated with pre-packed
     // buffer memory and we don not want it uninitialized and generate different hashes
     // if and when we try to cache this pre-packed buffer for sharing between sessions.
     memset(packed_filter_data, 0, packed_filter_data_size);
 
-    transposed_filter_ = BufferUniquePtr(packed_filter_data, BufferDeleter(std::move(alloc)));
-
     for (int64_t group_id = 0; group_id < conv_transpose_attrs_.group; ++group_id) {
       MlasTranspose(tensor.Data<float>() + (group_id * N * K),
-                    ((float*)packed_filter_data) + (group_id * packed_elements_per_group),
+                    static_cast<float*>(packed_filter_data) + (group_id * packed_elements_per_group),
                     K, N, nullptr);
     }
 

diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h b/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h
@@ -18,7 +18,10 @@
 
 #pragma once
 
+#include <algorithm>
+
 #include "core/providers/cpu/nn/conv_attributes.h"
+#include "core/common/safeint.h"
 
 namespace onnxruntime {
 
@@ -61,6 +64,21 @@ struct ConvTransposeAttributes : public ConvAttributes {
     const Tensor* B = has_bias ? (dynamic_padding ? context->Input<Tensor>(3) : context->Input<Tensor>(2)) : nullptr;
 
     const int rank = static_cast<int>(X->Shape().NumDimensions());
+
+    // ConvTranspose requires X shape (N x C x D1...Dn) and W shape (C x M/group x k1...kn),
+    // both must have at least 3 dimensions.
+    if (rank < 3) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Input X must have at least 3 dimensions (N x C x D1...Dn).",
+                             " X: ", X->Shape().ToString().c_str());
+    }
+
+    if (static_cast<int>(F_Shape.NumDimensions()) < 3) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Filter W must have at least 3 dimensions (C x M/group x k1...kn).",
+                             " W: ", F_Shape.ToString().c_str());
+    }
+
     TensorShape input_shape = X->Shape().Slice(is_nhwc ? 1 : 2, is_nhwc ? rank - 1 : rank);
     const int64_t num_input_channels = is_nhwc ? X->Shape()[rank - 1] : X->Shape()[1];
     const int64_t N = X->Shape()[0];
@@ -119,11 +137,32 @@ struct ConvTransposeAttributes : public ConvAttributes {
     if (local_output_padding.empty()) {
       local_output_padding.resize(kernel_shape.size(), 0);
     }
+    if (local_output_padding.size() != kernel_shape.size()) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "output_padding size (", local_output_padding.size(),
+                             ") does not match the number of spatial dimensions (", kernel_shape.size(), ").");
+    }
     ConvPadVector local_pads;
     local_pads.reserve(2 * (input_shape.NumDimensions()));
     if (dynamic_padding) {
-      for (int64_t i = 0; i < Pads->Shape().SizeFromDimension(0); ++i) {
-        local_pads.push_back(Pads->Data<int64_t>()[i]);
+      if (Pads->Shape().NumDimensions() != 1) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Dynamic pads tensor must be 1-D. Got rank: ", Pads->Shape().NumDimensions());
+      }
+      const int64_t expected_pads_size = SafeInt<int64_t>(kernel_shape.size()) * 2;
+      const int64_t actual_pads_size = Pads->Shape().SizeFromDimension(0);
+      if (actual_pads_size != expected_pads_size) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Dynamic pads tensor size (", actual_pads_size,
+                               ") does not match expected size (2 * spatial_dims = ", expected_pads_size, ").");
+      }
+      const auto* pads_data = Pads->Data<int64_t>();
+      for (int64_t i = 0; i < actual_pads_size; ++i) {
+        if (pads_data[i] < 0) {
+          return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                                 "Dynamic pads must be non-negative. Got pads[", i, "] = ", pads_data[i]);
+        }
+        local_pads.push_back(pads_data[i]);
       }
     } else {
       local_pads.assign(pads.begin(), pads.end());
@@ -140,10 +179,34 @@ struct ConvTransposeAttributes : public ConvAttributes {
       local_strides.resize(kernel_shape.size(), 1);
     }
 
+    if (local_strides.size() != kernel_shape.size()) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "strides size (", local_strides.size(),
+                             ") does not match the number of spatial dimensions (", kernel_shape.size(), ").");
+    }
+    if (local_dilations.size() != kernel_shape.size()) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "dilations size (", local_dilations.size(),
+                             ") does not match the number of spatial dimensions (", kernel_shape.size(), ").");
+    }
+
+    // ONNX spec: "output_padding[i] should be less than max(stride[i], dilation[i])".
+    // This constraint ensures the output_padding is unambiguous — larger values would shift
+    // the output by more than one stride/dilation step, making the inverse of Conv ill-defined.
+    for (size_t i = 0; i < local_output_padding.size(); ++i) {
+      int64_t limit = std::max(local_strides[i], local_dilations[i]);
+      if (local_output_padding[i] >= limit) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "output_padding[", i, "] (", local_output_padding[i],
+                               ") must be less than max(stride, dilation) (", limit,
+                               ") for spatial dimension ", i, ".");
+      }
+    }
+
     TensorShapeVector Y_dims;
 
-    ComputePadsAndOutputShape(input_shape, num_output_channels, kernel_shape,
-                              local_strides, local_dilations, local_output_padding, N, &local_pads, &Y_dims, is_nhwc);
+    ORT_RETURN_IF_ERROR(ComputePadsAndOutputShape(input_shape, num_output_channels, kernel_shape,
+                                                  local_strides, local_dilations, local_output_padding, N, &local_pads, &Y_dims, is_nhwc));
     TensorShape Yshape(Y_dims);
     Tensor* Y = context->Output(0, Yshape);
 
@@ -162,50 +225,68 @@ struct ConvTransposeAttributes : public ConvAttributes {
     return Status::OK();
   }
 
-  void ComputePadsAndOutputShape(TensorShape input_shape, int64_t output_channel,
-                                 const TensorShapeVector& kernel_shape, const TensorShapeVector& p_strides,
-                                 const TensorShapeVector& p_dilations, const TensorShapeVector& p_output_padding, const int64_t N,
-                                 ConvPadVector* p_pads, TensorShapeVector* output_shape_p,
-                                 bool is_nhwc = false) const {
+  Status ComputePadsAndOutputShape(TensorShape input_shape, int64_t output_channel,
+                                   const TensorShapeVector& kernel_shape, const TensorShapeVector& p_strides,
+                                   const TensorShapeVector& p_dilations, const TensorShapeVector& p_output_padding, const int64_t N,
+                                   ConvPadVector* p_pads, TensorShapeVector* output_shape_p,
+                                   bool is_nhwc = false) const {
     size_t output_shape_size = output_shape.size();
+    size_t rank = input_shape.NumDimensions();
+
+    if (p_pads->size() != 2 * rank) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "pads size (", p_pads->size(), ") does not match expected size (2 * ", rank, ").");
+    }
+
+    // output_shape attribute, if specified, must have either 'rank' or 'rank + 2' elements
+    if (output_shape_size != 0 && output_shape_size != rank && output_shape_size != rank + 2) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "output_shape attribute has ", output_shape_size,
+                             " elements, expected ", rank, " or ", rank + 2, ".");
+    }
+
     if (is_nhwc) {
       output_shape_p->insert(output_shape_p->begin(), {N});
     } else {
       output_shape_p->insert(output_shape_p->begin(), {N, output_channel});
     }
 
-    size_t rank = input_shape.NumDimensions();
     for (size_t dim = 0; dim < rank; ++dim) {
       int64_t dim_size = -1;
 
       if (output_shape_size != 0) {
         dim_size = output_shape_size == rank ? output_shape[dim] : output_shape[dim + 2];
       }
 
-      ComputeTransposePadAndOutputShape(
+      ORT_RETURN_IF_ERROR(ComputeTransposePadAndOutputShape(
           input_shape[dim],
           p_strides[dim],
           kernel_shape[dim],
           p_dilations[dim],
           p_output_padding[dim],
           auto_pad,
-          &p_pads->at(dim),
-          &p_pads->at(input_shape.NumDimensions() + dim),
-          &dim_size);
+          &(*p_pads)[dim],
+          &(*p_pads)[input_shape.NumDimensions() + dim],
+          &dim_size));
 
-      ORT_ENFORCE(dim_size > 0, "Invalid input shape: ", input_shape.ToString());
+      if (dim_size <= 0) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Computed output dimension is <= 0 for dim ", dim,
+                               ". Input shape: ", input_shape.ToString());
+      }
       output_shape_p->push_back(dim_size);
     }
     if (is_nhwc) {
       output_shape_p->push_back(output_channel);
     }
+    return Status::OK();
   }
 
   TensorShapeVector output_padding;
   TensorShapeVector output_shape;
 
  private:
-  void ComputeTransposePadAndOutputShape(
+  Status ComputeTransposePadAndOutputShape(
       const int64_t in_size,
       const int64_t stride,
       const int64_t kernel,
@@ -217,27 +298,48 @@ struct ConvTransposeAttributes : public ConvAttributes {
       int64_t* out_size) const {
     // Output shape is explicitly provided - pad values will have to be computed
     if (*out_size != -1) {
-      ORT_ENFORCE(*out_size >= 0);
+      if (*out_size < 0) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Explicit output size is negative: ", *out_size);
+      }
       // total pad
       auto total_pad = ComputeTotalPad(in_size, stride, adj,
                                        kernel, dilation, *out_size);
       DistributePadding(pad_type, total_pad, *pad_head, *pad_tail);
-      return;
+      return Status::OK();
     }
 
     // Output shape is not provided - it needs to be computed along with pad values (if applicable)
 
+    // Validate that stride, kernel, and dilation are positive
+    if (stride <= 0) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Stride must be positive. Got: ", stride);
+    }
+    if (kernel <= 0) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Kernel size must be positive. Got: ", kernel);
+    }
+    if (dilation <= 0) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Dilation must be positive. Got: ", dilation);
+    }
+    if (adj < 0) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Output padding must be non-negative. Got: ", adj);
+    }
+
     // Compute padding if the auto_pad attribute is SAME_UPPER/SAME_LOWER
     if (pad_type == AutoPadType::SAME_UPPER || pad_type == AutoPadType::SAME_LOWER) {
       // The ONNX spec says if `auto_pad` attribute is set, pad until the `out_size`
       // is `in_size * stride`
+      int64_t auto_out_size = SafeInt<int64_t>(in_size) * stride;
       auto total_pad = ComputeTotalPad(in_size, stride, adj,
-                                       kernel, dilation, /*out_size = */ in_size * stride);
+                                       kernel, dilation, auto_out_size);
       DistributePadding(pad_type, total_pad, *pad_head, *pad_tail);
     }
 
-    *out_size =
-        (in_size - 1) * stride + adj + (kernel - 1) * dilation + 1 - *pad_head - *pad_tail;
+    // *out_size = (in_size - 1) * stride + adj + (kernel - 1) * dilation + 1 - *pad_head - *pad_tail
+    *out_size = SafeInt<int64_t>(in_size - 1) * stride + adj +
+                SafeInt<int64_t>(kernel - 1) * dilation + 1 -
+                *pad_head - *pad_tail;
+    return Status::OK();
   }
 };