Address MaxUnpool shortcomings msrc116345 (#28550)

yuslepukhin · web-flow · commit a894893f3cfc · 2026-05-21T09:47:58.000-07:00
This pull request enhances the robustness of the `MaxUnpool` operator in ONNX Runtime by adding additional input validation and expanding test coverage for invalid input scenarios. The changes improve error handling for mismatched shapes, invalid dimensions, and other edge cases, ensuring the operator fails gracefully and predictably when given incorrect inputs. **Operator input validation improvements:** * Added runtime checks in `MaxUnpool::Compute` to ensure the `kernel_shape` rank matches the expected pooling dimensions, and that the indices tensor is present and correctly shaped. * Added validation to ensure that computed output dimensions are positive, with descriptive error messages if not. * Enforced that the `output_shape` tensor, if provided, must have the same number of elements as the rank of the input tensor. **Test coverage enhancements:** * Introduced multiple new tests in `unpool_op_test.cc` to cover invalid input cases, including mismatched indices shapes, rank-0 and rank-2 input tensors, negative indices, and incorrect `output_shape` element counts. These tests confirm that the operator fails with appropriate error messages in these scenarios. **References** onnx/onnx#7997 #28524
diff --git a/onnxruntime/core/providers/cpu/nn/Unpool.cc b/onnxruntime/core/providers/cpu/nn/Unpool.cc
@@ -46,32 +46,43 @@ Status MaxUnpool::Compute(OpKernelContext* context) const {
   const TensorShape& X_shape = X->Shape();
   const auto* X_data = X->Data<float>();
 
+  // Spec: "Dimensions ... are in the form of (N x C x D1 x D2 ... Dn)" — minimum rank is 3.
   ORT_RETURN_IF_NOT(X_shape.NumDimensions() >= 3, "Input dimension cannot be less than 3.");
 
-  // Supported sizes check
+  // Implementation limitation: only 1D/2D/3D spatial pooling supported.
   size_t pooling_dims = X_shape.NumDimensions() - 2;
   if (pooling_dims > 3) {
     return Status(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported pooling size.");
   }
 
+  // Spec: "The size of the kernel along each axis" — must match number of spatial dims.
+  ORT_RETURN_IF_NOT(kernel_shape_.size() == pooling_dims,
+                    "kernel_shape rank mismatch: expected ", pooling_dims, " got ", kernel_shape_.size());
+
   // Get pooled index tensor
   const auto* I = context->Input<Tensor>(1);
   const TensorShape& I_shape = I->Shape();
   const auto* I_data = I->Data<int64_t>();
 
+  // Spec: Input I "Dimensions must be the same as input tensor X."
   ORT_RETURN_IF_NOT(I_shape == X_shape, "Index tensor shape should be same as that of the input data tensor to unpool.");
 
   // Calculate output tensor shape from attributes
-  std::vector<int64_t> inferred_output_dims(X_shape.NumDimensions());
+  TensorShapeVector inferred_output_dims(X_shape.NumDimensions());
 
   // Copy batch and channel dims
   inferred_output_dims[0] = X_shape[0];
   inferred_output_dims[1] = X_shape[1];
 
   // For feature dims calculate reversing the formula used for MaxPool
   for (size_t dim = 0; dim < kernel_shape_.size(); ++dim) {
-    inferred_output_dims[dim + 2] =
-        (X_shape[dim + 2] - 1) * strides_[dim] - (pads_[dim] + pads_[kernel_shape_.size() + dim]) + kernel_shape_[dim];
+    int64_t dim_value = (X_shape[dim + 2] - 1) * strides_[dim] -
+                        (pads_[dim] + pads_[kernel_shape_.size() + dim]) + kernel_shape_[dim];
+    // Each inferred spatial dim must be positive for a valid unpooling configuration.
+    ORT_RETURN_IF_NOT(dim_value > 0,
+                      "Computed output dimension is not positive for axis ", dim + 2,
+                      ". Check kernel_shape, strides, and pads attributes.");
+    inferred_output_dims[dim + 2] = dim_value;
   }
 
   TensorShape shape(inferred_output_dims);
@@ -80,14 +91,29 @@ Status MaxUnpool::Compute(OpKernelContext* context) const {
     auto tensor_shape = context->Input<Tensor>(2);
     if (tensor_shape == nullptr)
       return Status(common::ONNXRUNTIME, common::FAIL, "input count mismatch");
+    // Spec: output_shape is a 1-D tensor of int64.
     ORT_RETURN_IF_NOT(tensor_shape->Shape().GetDims().size() == 1,
                       "Shape must be 1 dimensional as it's tensor data of a shape");
 
+    // Spec: output_shape specifies the full output shape (N x C x D1 x ... x Dn) — same rank as X.
+    ORT_RETURN_IF_NOT(
+        static_cast<size_t>(tensor_shape->Shape().Size()) == X_shape.NumDimensions(),
+        "output_shape must have the same number of elements as the rank of input tensor X."
+        " Got ",
+        tensor_shape->Shape().Size(), ", expected ", X_shape.NumDimensions());
+
     // Turn the shape tensor data into an actual shape
-    const auto* p_shape = tensor_shape->Data<int64_t>();
-    std::vector<int64_t> given_output_dims(p_shape, p_shape + tensor_shape->Shape().Size());
-    TensorShape given_shape(given_output_dims);
+    auto output_shape_span = tensor_shape->DataAsSpan<int64_t>();
+    TensorShape given_shape(output_shape_span);
+
+    // Spec: output shape is (N x C x D1 x ... x Dn) — batch and channel must match input.
+    ORT_RETURN_IF_NOT(given_shape[0] == X_shape[0] && given_shape[1] == X_shape[1],
+                      "output_shape batch and channel dimensions must match input. "
+                      "Expected [",
+                      X_shape[0], ", ", X_shape[1], "], got [",
+                      given_shape[0], ", ", given_shape[1], "].");
 
+    // Spec: output_shape disambiguates size — must be at least as large as the inferred minimum.
     ORT_RETURN_IF_NOT(given_shape.Size() >= shape.Size(),
                       "output_shape is smaller than minimum required. output_shape:", given_shape,
                       " inferred output shape:", shape);
@@ -97,18 +123,17 @@ Status MaxUnpool::Compute(OpKernelContext* context) const {
 
   // unpool
   size_t total_elements = narrow<size_t>(X_shape.Size());
-  size_t output_size = narrow<size_t>(shape.Size());
 
   Tensor* Y = context->Output(0, shape);
-  auto* Y_data = Y->MutableData<float>();
-  auto out = gsl::make_span(Y_data, output_size);
+  auto out = Y->MutableDataAsSpan<float>();
   std::fill_n(out.data(), out.size(), 0.f);
 
   for (size_t cur_elem = 0; cur_elem < total_elements; ++cur_elem) {
     const int64_t idx = I_data[cur_elem];
-    if (idx < 0 || idx >= static_cast<int64_t>(output_size)) {
+    // Spec: "the values in indices are in the range [0, N x C x D1 x ... x Dn)."
+    if (idx < 0 || idx >= static_cast<int64_t>(out.size())) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                             "Index value out of bounds. Got: ", idx, ". Valid range is [0, ", output_size, ").");
+                             "Index value out of bounds. Got: ", idx, ". Valid range is [0, ", out.size(), ").");
     }
 
     out[static_cast<size_t>(idx)] = X_data[cur_elem];
diff --git a/onnxruntime/core/providers/cpu/nn/unpool.h b/onnxruntime/core/providers/cpu/nn/unpool.h
@@ -30,6 +30,10 @@ class MaxUnpool : public OpKernel {
       strides_.resize(kernel_shape_.size(), 1);
     }
 
+    ORT_ENFORCE(pads_.size() == kernel_shape_.size() * 2,
+                "Pads attribute size must be twice the kernel_shape size. Got: ", pads_.size(),
+                ", expected: ", kernel_shape_.size() * 2);
+
     for (size_t dim = 0; dim < kernel_shape_.size(); ++dim) {
       ORT_ENFORCE(kernel_shape_[dim] > 0);
       ORT_ENFORCE(pads_[dim] < kernel_shape_[dim] && pads_[dim + kernel_shape_.size()] < kernel_shape_[dim],
diff --git a/onnxruntime/test/providers/cpu/nn/unpool_op_test.cc b/onnxruntime/test/providers/cpu/nn/unpool_op_test.cc