microsoft
diff --git a/‎onnxruntime/contrib_ops/cpu/sparse/sparse_attention_helper.h‎
Lines changed: 16 additions & 3 deletions b/‎onnxruntime/contrib_ops/cpu/sparse/sparse_attention_helper.h‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎onnxruntime/core/graph/graph_flatbuffers_utils.cc‎
Lines changed: 73 additions & 11 deletions b/‎onnxruntime/core/graph/graph_flatbuffers_utils.cc‎
Lines changed: 73 additions & 11 deletions
diff --git a/‎onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc‎
Lines changed: 16 additions & 18 deletions b/‎onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc‎
Lines changed: 16 additions & 18 deletions
diff --git a/‎onnxruntime/core/providers/cpu/rnn/rnn_helpers.h‎
Lines changed: 23 additions & 0 deletions b/‎onnxruntime/core/providers/cpu/rnn/rnn_helpers.h‎
Lines changed: 23 additions & 0 deletions
@@ -97,7 +97,7 @@ Status CheckInputs(void* params,
 
     if (key->Shape() != value->Shape()) {
       return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                             "Input 'query' and 'value' shall have same shape");
+                             "Input 'key' and 'value' shall have same shape");
     }
   } else {
     // packed qkv
@@ -197,13 +197,26 @@ Status CheckInputs(void* params,
                            past_key_dims[3]);
   }
 
-  // Check the shape of total_key_sequence_lengths. We do not check the values here.
+  // Check the shape and values of total_key_sequence_lengths.
   const auto& k_len_dim = total_key_lengths->Shape().GetDims();
-  if (k_len_dim.size() != 1 && k_len_dim[0] != batch_size) {
+  if (k_len_dim.size() != 1 || k_len_dim[0] != batch_size) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
                            "key_total_sequence_lengths must have shape (batch_size).");
   }
 
+  const auto* key_len_data = total_key_lengths->Data<int32_t>();
+  const bool is_prompt = (sequence_length == total_sequence_length);
+  const int min_key_length = is_prompt ? 1 : sequence_length;
+  for (int i = 0; i < batch_size; ++i) {
+    const int key_length = key_len_data[i];
+    if (key_length < min_key_length || key_length > total_sequence_length) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "key_total_sequence_lengths value ", key_length,
+                             " at batch index ", i,
+                             " is out of range [", min_key_length, ", ", total_sequence_length, "].");
+    }
+  }
+
   int rotary_dim = 0;
   int max_rotary_sequence_length = 0;
   if (do_rotary) {
 
@@ -3,11 +3,14 @@
 
 #include "graph_flatbuffers_utils.h"
 
+#include <limits>
+
 #include "core/common/flatbuffers.h"
 
 #include "core/common/narrow.h"
 #include "core/flatbuffers/flatbuffers_utils.h"
 #include "core/flatbuffers/schema/ort.fbs.h"
+#include "core/framework/allocator.h"
 #include "core/framework/tensorprotoutils.h"
 #include "core/framework/tensor_external_data_info.h"
 #include "core/graph/graph.h"
@@ -215,13 +218,50 @@ Status SaveAttributeOrtFormat(flatbuffers::FlatBufferBuilder& builder,
  * to accommodate fbs::Tensors with external data.
  *
  * @param tensor flatbuffer representation of a tensor.
- * @return size_t size in bytes of the tensor's data.
+ * @param size_in_bytes Output size in bytes of the tensor's data.
+ * @return Status indicating success or providing error information.
  */
-size_t GetSizeInBytesFromFbsTensor(const fbs::Tensor& tensor) {
-  auto fbs_dims = tensor.dims();
+Status GetSizeInBytesFromFbsTensor(const fbs::Tensor& tensor, size_t& size_in_bytes) {
+  const auto* tensor_name = tensor.name();
+  const auto* tensor_name_str = tensor_name ? tensor_name->c_str() : "<unnamed>";
+  const auto* tensor_data_type_str = fbs::EnumNameTensorDataType(tensor.data_type());
+  if (tensor_data_type_str[0] == '\0') {
+    tensor_data_type_str = "<unknown>";
+  }
+
+  const auto* fbs_dims = tensor.dims();
+  if (nullptr == fbs_dims) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "Missing dimensions for tensor '", tensor_name_str,
+                           "' with data type '", tensor_data_type_str,
+                           "'. Invalid ORT format model.");
+  }
+
+  size_t num_elements = 1;
+  for (int64_t dim : *fbs_dims) {
+    if (dim < 0) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Invalid negative dimension ", dim,
+                             " for tensor '", tensor_name_str,
+                             "' with data type '", tensor_data_type_str,
+                             "'. Invalid ORT format model.");
+    }
 
-  auto num_elements = std::accumulate(fbs_dims->cbegin(), fbs_dims->cend(), SafeInt<size_t>(1),
-                                      std::multiplies<>());
+    if (static_cast<uint64_t>(dim) > static_cast<uint64_t>(std::numeric_limits<size_t>::max())) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Dimension ", dim,
+                             " does not fit in size_t for tensor '", tensor_name_str,
+                             "' with data type '", tensor_data_type_str,
+                             "'. Invalid ORT format model.");
+    }
+
+    if (!IAllocator::CalcMemSizeForArray(num_elements, static_cast<size_t>(dim), &num_elements)) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Tensor element count overflows size_t for tensor '", tensor_name_str,
+                             "' with data type '", tensor_data_type_str,
+                             "'. Invalid ORT format model.");
+    }
+  }
 
   size_t byte_size_of_one_element;
 
@@ -280,11 +320,24 @@ size_t GetSizeInBytesFromFbsTensor(const fbs::Tensor& tensor) {
       break;
 #endif
     case fbs::TensorDataType::STRING:
-      ORT_THROW("String data type is not supported for on-device training", tensor.name());
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "String data type is not supported for tensor '", tensor_name_str,
+                             "' in on-device training.");
     default:
-      ORT_THROW("Unsupported tensor data type for tensor ", tensor.name());
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                             "Unsupported tensor data type '", tensor_data_type_str,
+                             "' for tensor '", tensor_name_str,
+                             "'. Invalid ORT format model.");
   }
-  return num_elements * byte_size_of_one_element;
+
+  if (!IAllocator::CalcMemSizeForArray(num_elements, byte_size_of_one_element, &size_in_bytes)) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "Tensor byte size overflows size_t for tensor '", tensor_name_str,
+                           "' with data type '", tensor_data_type_str,
+                           "'. Invalid ORT format model.");
+  }
+
+  return Status::OK();
 }
 
 Status LoadInitializerOrtFormat(const fbs::Tensor& fbs_tensor, TensorProto& initializer,
@@ -306,7 +359,14 @@ Status LoadInitializerOrtFormat(const fbs::Tensor& fbs_tensor, TensorProto& init
     ORT_RETURN_IF(nullptr == fbs_str_data, "Missing string data for initializer. Invalid ORT format model.");
     auto mutable_str_data = initializer.mutable_string_data();
     mutable_str_data->Reserve(fbs_str_data->size());
-    for (const auto* fbs_str : *fbs_str_data) {
+    const auto* raw_string_offsets = reinterpret_cast<const uint8_t*>(fbs_str_data->Data());
+    for (flatbuffers::uoffset_t i = 0; i < fbs_str_data->size(); ++i) {
+      const auto entry_offset =
+          flatbuffers::ReadScalar<flatbuffers::uoffset_t>(raw_string_offsets + i * sizeof(flatbuffers::uoffset_t));
+      ORT_RETURN_IF(entry_offset == 0, "Null string data entry for initializer. Invalid ORT format model.");
+
+      const auto* fbs_str = fbs_str_data->Get(i);
+      ORT_RETURN_IF(nullptr == fbs_str, "Null string data entry for initializer. Invalid ORT format model.");
       mutable_str_data->Add(fbs_str->str());
     }
   } else {
@@ -338,7 +398,8 @@ Status LoadInitializerOrtFormat(const fbs::Tensor& fbs_tensor, TensorProto& init
 
       // FUTURE: This could be setup similarly to can_use_flatbuffer_for_initializers above if the external data file
       // is memory mapped and guaranteed to remain valid. This would avoid the copy.
-      auto num_bytes = GetSizeInBytesFromFbsTensor(fbs_tensor);
+      size_t num_bytes = 0;
+      ORT_RETURN_IF_ERROR(GetSizeInBytesFromFbsTensor(fbs_tensor, num_bytes));
 
       // pre-allocate so we can write directly to the string buffer
       std::string& raw_data = *initializer.mutable_raw_data();
@@ -542,7 +603,8 @@ struct UnpackTensorWithType {
       // no external data. should have had raw data.
       ORT_RETURN_IF(fbs_tensor_external_data_offset < 0, "Missing raw data for initializer. Invalid ORT format model.");
 
-      const size_t raw_data_len = fbs::utils::GetSizeInBytesFromFbsTensor(fbs_tensor);
+      size_t raw_data_len = 0;
+      ORT_RETURN_IF_ERROR(fbs::utils::GetSizeInBytesFromFbsTensor(fbs_tensor, raw_data_len));
 
       auto raw_buf = std::make_unique<uint8_t[]>(raw_data_len);
       gsl::span<uint8_t> raw_buf_span(raw_buf.get(), raw_data_len);
 
@@ -11,6 +11,7 @@
 
 #include "core/providers/cpu/rnn/deep_cpu_gru.h"
 #include "core/common/narrow.h"
+#include "core/common/safeint.h"
 
 #ifdef _MSC_VER
 #pragma warning(pop)
@@ -739,9 +740,8 @@ void UniDirectionalGru<T>::ComputeImpl(gsl::span<const T> inputs_arg,
   // we do not need to do that if there are two directions and we're doing the backwards pass as we
   // are writing to a temporary buffer (as outputs == outputs_reverse_) which is later copied
   // to the real output by ReverseSequence. this later copy includes num_directions in the step length.
-  int output_step_length = batch_size_ * hidden_size_;
-  if (direction_ == kForward && num_directions == 2)
-    output_step_length = 2 * batch_size_ * hidden_size_;
+  const int single_direction_output_step_length = rnn::detail::CalculateOutputStepLength(batch_size_, hidden_size_, 1, direction_);
+  const int output_step_length = rnn::detail::CalculateOutputStepLength(batch_size_, hidden_size_, num_directions, direction_);
 
   // convenience end iterators we use in the loops below to detect any bounds issues
   span_T_const_iter batched_bias_WRz_local_end = batched_bias_WRz_.end();
@@ -1030,13 +1030,13 @@ void UniDirectionalGru<T>::ComputeImpl(gsl::span<const T> inputs_arg,
   // zero any values beyond the evaluated steps if the maximum explicit sequence length we saw (max_sequence_length)
   // was shorter than the maximum possible sequence length (seq_length_)
   if (output_sequence && max_sequence_length < seq_length_) {
-    if (output_step_length == batch_size_ * hidden_size_) {  // contiguous
+    if (output_step_length == single_direction_output_step_length) {  // contiguous
       const auto span_to_zero = outputs.subspan(
           max_sequence_length * output_step_length, (seq_length_ - max_sequence_length) * output_step_length);
       std::fill_n(&*span_to_zero.begin(), span_to_zero.size(), T{});
     } else {
       for (int i = max_sequence_length; i < seq_length_; ++i) {  // non-contiguous
-        const auto span_to_zero = outputs.subspan(i * output_step_length, batch_size_ * hidden_size_);
+        const auto span_to_zero = outputs.subspan(i * output_step_length, single_direction_output_step_length);
         std::fill_n(&*span_to_zero.begin(), span_to_zero.size(), T{});
       }
     }
@@ -1051,34 +1051,32 @@ void UniDirectionalGru<T>::ComputeImpl(gsl::span<const T> inputs_arg,
 
 template <typename T>
 void UniDirectionalGru<T>::AllocateBuffers() {
-  cur_h_ = Allocate(allocator_, hidden_size_ * batch_size_, cur_h_ptr_);
-  batched_hidden0_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_hidden0_ptr_, true);
+  cur_h_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({hidden_size_, batch_size_}), cur_h_ptr_);
+  batched_hidden0_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_hidden0_ptr_, true);
 
   if (use_bias_) {
-    batched_bias_WRz_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_WRz_ptr_);
-    batched_bias_WRr_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_WRr_ptr_);
+    batched_bias_WRz_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_WRz_ptr_);
+    batched_bias_WRr_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_WRr_ptr_);
 
     if (linear_before_reset_) {
-      batched_bias_Wh_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_Wh_ptr_);
-      batched_bias_Rh_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_Rh_ptr_);
+      batched_bias_Wh_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_Wh_ptr_);
+      batched_bias_Rh_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_Rh_ptr_);
     } else {
-      batched_bias_WRh_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_WRh_ptr_);
+      batched_bias_WRh_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_WRh_ptr_);
     }
   }
 
   if (linear_before_reset_) {
-    linear_output_ = Allocate(allocator_, batch_size_ * hidden_size_, linear_output_ptr_);
+    linear_output_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), linear_output_ptr_);
   }
 
-  auto batch_times_seq_length = batch_size_ * seq_length_;
-
   if (!training_mode_) {
-    outputZRH_ = Allocate(allocator_, hidden_size_ * 3 * batch_times_seq_length, outputZRH_ptr_, true);
+    outputZRH_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({hidden_size_, 3, batch_size_, seq_length_}), outputZRH_ptr_, true);
   }
 
   if (direction_ == kReverse) {
-    inputs_reverse_ = Allocate(allocator_, batch_times_seq_length * input_size_, inputs_reverse_ptr_);
-    outputs_reverse_ = Allocate(allocator_, batch_times_seq_length * hidden_size_, outputs_reverse_ptr_);
+    inputs_reverse_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, seq_length_, input_size_}), inputs_reverse_ptr_);
+    outputs_reverse_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, seq_length_, hidden_size_}), outputs_reverse_ptr_);
   }
 }
 
 
@@ -3,6 +3,8 @@
 
 #pragma once
 
+#include <initializer_list>
+
 #ifdef _WIN32
 #pragma warning(disable : 4267)
 #endif
@@ -44,6 +46,27 @@ inline Direction MakeDirection(const std::string& direction) {
             "'. Must be one of 'forward', 'reverse', or 'bidirectional'.");
 }
 
+inline size_t CalculateBufferElementCount(std::initializer_list<int> dimensions) {
+  SafeInt<size_t> count{1};
+
+  for (int dimension : dimensions) {
+    count *= dimension;
+  }
+
+  return count;
+}
+
+inline int CalculateOutputStepLength(int batch_size, int hidden_size, int num_directions, Direction direction) {
+  SafeInt<int> output_step_length{batch_size};
+  output_step_length *= hidden_size;
+
+  if (direction == kForward && num_directions == 2) {
+    output_step_length *= 2;
+  }
+
+  return output_step_length;
+}
+
 /** Allocate a unique_ptr using allocator_, and return a span to the allocated memory so usage is safe
 @param allocator IAllocator to use for the allocation.
 @param size Allocation size. Number of elements of type TAlloc, or total size if TAlloc is 'void'.