Skip to content

Commit daa74b4

Browse files
hariharans29github-actions[bot]Copilot
authored
ICM fixes (2/n) (#27922)
### Description Fixes 3 ICM issues: https://portal.microsofticm.com/imp/v5/incidents/details/31000000575344/summary https://portal.microsofticm.com/imp/v5/incidents/details/31000000575473/summary https://portal.microsofticm.com/imp/v5/incidents/details/31000000574999/summary ### Motivation and Context Fix ICMs --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 0bae67d commit daa74b4

8 files changed

Lines changed: 527 additions & 50 deletions

File tree

onnxruntime/contrib_ops/cpu/sparse/sparse_attention_helper.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ Status CheckInputs(void* params,
9797

9898
if (key->Shape() != value->Shape()) {
9999
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
100-
"Input 'query' and 'value' shall have same shape");
100+
"Input 'key' and 'value' shall have same shape");
101101
}
102102
} else {
103103
// packed qkv
@@ -197,13 +197,26 @@ Status CheckInputs(void* params,
197197
past_key_dims[3]);
198198
}
199199

200-
// Check the shape of total_key_sequence_lengths. We do not check the values here.
200+
// Check the shape and values of total_key_sequence_lengths.
201201
const auto& k_len_dim = total_key_lengths->Shape().GetDims();
202-
if (k_len_dim.size() != 1 && k_len_dim[0] != batch_size) {
202+
if (k_len_dim.size() != 1 || k_len_dim[0] != batch_size) {
203203
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
204204
"key_total_sequence_lengths must have shape (batch_size).");
205205
}
206206

207+
const auto* key_len_data = total_key_lengths->Data<int32_t>();
208+
const bool is_prompt = (sequence_length == total_sequence_length);
209+
const int min_key_length = is_prompt ? 1 : sequence_length;
210+
for (int i = 0; i < batch_size; ++i) {
211+
const int key_length = key_len_data[i];
212+
if (key_length < min_key_length || key_length > total_sequence_length) {
213+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
214+
"key_total_sequence_lengths value ", key_length,
215+
" at batch index ", i,
216+
" is out of range [", min_key_length, ", ", total_sequence_length, "].");
217+
}
218+
}
219+
207220
int rotary_dim = 0;
208221
int max_rotary_sequence_length = 0;
209222
if (do_rotary) {

onnxruntime/core/graph/graph_flatbuffers_utils.cc

Lines changed: 73 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,14 @@
33

44
#include "graph_flatbuffers_utils.h"
55

6+
#include <limits>
7+
68
#include "core/common/flatbuffers.h"
79

810
#include "core/common/narrow.h"
911
#include "core/flatbuffers/flatbuffers_utils.h"
1012
#include "core/flatbuffers/schema/ort.fbs.h"
13+
#include "core/framework/allocator.h"
1114
#include "core/framework/tensorprotoutils.h"
1215
#include "core/framework/tensor_external_data_info.h"
1316
#include "core/graph/graph.h"
@@ -215,13 +218,50 @@ Status SaveAttributeOrtFormat(flatbuffers::FlatBufferBuilder& builder,
215218
* to accommodate fbs::Tensors with external data.
216219
*
217220
* @param tensor flatbuffer representation of a tensor.
218-
* @return size_t size in bytes of the tensor's data.
221+
* @param size_in_bytes Output size in bytes of the tensor's data.
222+
* @return Status indicating success or providing error information.
219223
*/
220-
size_t GetSizeInBytesFromFbsTensor(const fbs::Tensor& tensor) {
221-
auto fbs_dims = tensor.dims();
224+
Status GetSizeInBytesFromFbsTensor(const fbs::Tensor& tensor, size_t& size_in_bytes) {
225+
const auto* tensor_name = tensor.name();
226+
const auto* tensor_name_str = tensor_name ? tensor_name->c_str() : "<unnamed>";
227+
const auto* tensor_data_type_str = fbs::EnumNameTensorDataType(tensor.data_type());
228+
if (tensor_data_type_str[0] == '\0') {
229+
tensor_data_type_str = "<unknown>";
230+
}
231+
232+
const auto* fbs_dims = tensor.dims();
233+
if (nullptr == fbs_dims) {
234+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
235+
"Missing dimensions for tensor '", tensor_name_str,
236+
"' with data type '", tensor_data_type_str,
237+
"'. Invalid ORT format model.");
238+
}
239+
240+
size_t num_elements = 1;
241+
for (int64_t dim : *fbs_dims) {
242+
if (dim < 0) {
243+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
244+
"Invalid negative dimension ", dim,
245+
" for tensor '", tensor_name_str,
246+
"' with data type '", tensor_data_type_str,
247+
"'. Invalid ORT format model.");
248+
}
222249

223-
auto num_elements = std::accumulate(fbs_dims->cbegin(), fbs_dims->cend(), SafeInt<size_t>(1),
224-
std::multiplies<>());
250+
if (static_cast<uint64_t>(dim) > static_cast<uint64_t>(std::numeric_limits<size_t>::max())) {
251+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
252+
"Dimension ", dim,
253+
" does not fit in size_t for tensor '", tensor_name_str,
254+
"' with data type '", tensor_data_type_str,
255+
"'. Invalid ORT format model.");
256+
}
257+
258+
if (!IAllocator::CalcMemSizeForArray(num_elements, static_cast<size_t>(dim), &num_elements)) {
259+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
260+
"Tensor element count overflows size_t for tensor '", tensor_name_str,
261+
"' with data type '", tensor_data_type_str,
262+
"'. Invalid ORT format model.");
263+
}
264+
}
225265

226266
size_t byte_size_of_one_element;
227267

@@ -280,11 +320,24 @@ size_t GetSizeInBytesFromFbsTensor(const fbs::Tensor& tensor) {
280320
break;
281321
#endif
282322
case fbs::TensorDataType::STRING:
283-
ORT_THROW("String data type is not supported for on-device training", tensor.name());
323+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
324+
"String data type is not supported for tensor '", tensor_name_str,
325+
"' in on-device training.");
284326
default:
285-
ORT_THROW("Unsupported tensor data type for tensor ", tensor.name());
327+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
328+
"Unsupported tensor data type '", tensor_data_type_str,
329+
"' for tensor '", tensor_name_str,
330+
"'. Invalid ORT format model.");
286331
}
287-
return num_elements * byte_size_of_one_element;
332+
333+
if (!IAllocator::CalcMemSizeForArray(num_elements, byte_size_of_one_element, &size_in_bytes)) {
334+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
335+
"Tensor byte size overflows size_t for tensor '", tensor_name_str,
336+
"' with data type '", tensor_data_type_str,
337+
"'. Invalid ORT format model.");
338+
}
339+
340+
return Status::OK();
288341
}
289342

290343
Status LoadInitializerOrtFormat(const fbs::Tensor& fbs_tensor, TensorProto& initializer,
@@ -306,7 +359,14 @@ Status LoadInitializerOrtFormat(const fbs::Tensor& fbs_tensor, TensorProto& init
306359
ORT_RETURN_IF(nullptr == fbs_str_data, "Missing string data for initializer. Invalid ORT format model.");
307360
auto mutable_str_data = initializer.mutable_string_data();
308361
mutable_str_data->Reserve(fbs_str_data->size());
309-
for (const auto* fbs_str : *fbs_str_data) {
362+
const auto* raw_string_offsets = reinterpret_cast<const uint8_t*>(fbs_str_data->Data());
363+
for (flatbuffers::uoffset_t i = 0; i < fbs_str_data->size(); ++i) {
364+
const auto entry_offset =
365+
flatbuffers::ReadScalar<flatbuffers::uoffset_t>(raw_string_offsets + i * sizeof(flatbuffers::uoffset_t));
366+
ORT_RETURN_IF(entry_offset == 0, "Null string data entry for initializer. Invalid ORT format model.");
367+
368+
const auto* fbs_str = fbs_str_data->Get(i);
369+
ORT_RETURN_IF(nullptr == fbs_str, "Null string data entry for initializer. Invalid ORT format model.");
310370
mutable_str_data->Add(fbs_str->str());
311371
}
312372
} else {
@@ -338,7 +398,8 @@ Status LoadInitializerOrtFormat(const fbs::Tensor& fbs_tensor, TensorProto& init
338398

339399
// FUTURE: This could be setup similarly to can_use_flatbuffer_for_initializers above if the external data file
340400
// is memory mapped and guaranteed to remain valid. This would avoid the copy.
341-
auto num_bytes = GetSizeInBytesFromFbsTensor(fbs_tensor);
401+
size_t num_bytes = 0;
402+
ORT_RETURN_IF_ERROR(GetSizeInBytesFromFbsTensor(fbs_tensor, num_bytes));
342403

343404
// pre-allocate so we can write directly to the string buffer
344405
std::string& raw_data = *initializer.mutable_raw_data();
@@ -542,7 +603,8 @@ struct UnpackTensorWithType {
542603
// no external data. should have had raw data.
543604
ORT_RETURN_IF(fbs_tensor_external_data_offset < 0, "Missing raw data for initializer. Invalid ORT format model.");
544605

545-
const size_t raw_data_len = fbs::utils::GetSizeInBytesFromFbsTensor(fbs_tensor);
606+
size_t raw_data_len = 0;
607+
ORT_RETURN_IF_ERROR(fbs::utils::GetSizeInBytesFromFbsTensor(fbs_tensor, raw_data_len));
546608

547609
auto raw_buf = std::make_unique<uint8_t[]>(raw_data_len);
548610
gsl::span<uint8_t> raw_buf_span(raw_buf.get(), raw_data_len);

onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "core/providers/cpu/rnn/deep_cpu_gru.h"
1313
#include "core/common/narrow.h"
14+
#include "core/common/safeint.h"
1415

1516
#ifdef _MSC_VER
1617
#pragma warning(pop)
@@ -739,9 +740,8 @@ void UniDirectionalGru<T>::ComputeImpl(gsl::span<const T> inputs_arg,
739740
// we do not need to do that if there are two directions and we're doing the backwards pass as we
740741
// are writing to a temporary buffer (as outputs == outputs_reverse_) which is later copied
741742
// to the real output by ReverseSequence. this later copy includes num_directions in the step length.
742-
int output_step_length = batch_size_ * hidden_size_;
743-
if (direction_ == kForward && num_directions == 2)
744-
output_step_length = 2 * batch_size_ * hidden_size_;
743+
const int single_direction_output_step_length = rnn::detail::CalculateOutputStepLength(batch_size_, hidden_size_, 1, direction_);
744+
const int output_step_length = rnn::detail::CalculateOutputStepLength(batch_size_, hidden_size_, num_directions, direction_);
745745

746746
// convenience end iterators we use in the loops below to detect any bounds issues
747747
span_T_const_iter batched_bias_WRz_local_end = batched_bias_WRz_.end();
@@ -1030,13 +1030,13 @@ void UniDirectionalGru<T>::ComputeImpl(gsl::span<const T> inputs_arg,
10301030
// zero any values beyond the evaluated steps if the maximum explicit sequence length we saw (max_sequence_length)
10311031
// was shorter than the maximum possible sequence length (seq_length_)
10321032
if (output_sequence && max_sequence_length < seq_length_) {
1033-
if (output_step_length == batch_size_ * hidden_size_) { // contiguous
1033+
if (output_step_length == single_direction_output_step_length) { // contiguous
10341034
const auto span_to_zero = outputs.subspan(
10351035
max_sequence_length * output_step_length, (seq_length_ - max_sequence_length) * output_step_length);
10361036
std::fill_n(&*span_to_zero.begin(), span_to_zero.size(), T{});
10371037
} else {
10381038
for (int i = max_sequence_length; i < seq_length_; ++i) { // non-contiguous
1039-
const auto span_to_zero = outputs.subspan(i * output_step_length, batch_size_ * hidden_size_);
1039+
const auto span_to_zero = outputs.subspan(i * output_step_length, single_direction_output_step_length);
10401040
std::fill_n(&*span_to_zero.begin(), span_to_zero.size(), T{});
10411041
}
10421042
}
@@ -1051,34 +1051,32 @@ void UniDirectionalGru<T>::ComputeImpl(gsl::span<const T> inputs_arg,
10511051

10521052
template <typename T>
10531053
void UniDirectionalGru<T>::AllocateBuffers() {
1054-
cur_h_ = Allocate(allocator_, hidden_size_ * batch_size_, cur_h_ptr_);
1055-
batched_hidden0_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_hidden0_ptr_, true);
1054+
cur_h_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({hidden_size_, batch_size_}), cur_h_ptr_);
1055+
batched_hidden0_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_hidden0_ptr_, true);
10561056

10571057
if (use_bias_) {
1058-
batched_bias_WRz_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_WRz_ptr_);
1059-
batched_bias_WRr_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_WRr_ptr_);
1058+
batched_bias_WRz_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_WRz_ptr_);
1059+
batched_bias_WRr_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_WRr_ptr_);
10601060

10611061
if (linear_before_reset_) {
1062-
batched_bias_Wh_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_Wh_ptr_);
1063-
batched_bias_Rh_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_Rh_ptr_);
1062+
batched_bias_Wh_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_Wh_ptr_);
1063+
batched_bias_Rh_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_Rh_ptr_);
10641064
} else {
1065-
batched_bias_WRh_ = Allocate(allocator_, batch_size_ * hidden_size_, batched_bias_WRh_ptr_);
1065+
batched_bias_WRh_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), batched_bias_WRh_ptr_);
10661066
}
10671067
}
10681068

10691069
if (linear_before_reset_) {
1070-
linear_output_ = Allocate(allocator_, batch_size_ * hidden_size_, linear_output_ptr_);
1070+
linear_output_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, hidden_size_}), linear_output_ptr_);
10711071
}
10721072

1073-
auto batch_times_seq_length = batch_size_ * seq_length_;
1074-
10751073
if (!training_mode_) {
1076-
outputZRH_ = Allocate(allocator_, hidden_size_ * 3 * batch_times_seq_length, outputZRH_ptr_, true);
1074+
outputZRH_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({hidden_size_, 3, batch_size_, seq_length_}), outputZRH_ptr_, true);
10771075
}
10781076

10791077
if (direction_ == kReverse) {
1080-
inputs_reverse_ = Allocate(allocator_, batch_times_seq_length * input_size_, inputs_reverse_ptr_);
1081-
outputs_reverse_ = Allocate(allocator_, batch_times_seq_length * hidden_size_, outputs_reverse_ptr_);
1078+
inputs_reverse_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, seq_length_, input_size_}), inputs_reverse_ptr_);
1079+
outputs_reverse_ = Allocate(allocator_, rnn::detail::CalculateBufferElementCount({batch_size_, seq_length_, hidden_size_}), outputs_reverse_ptr_);
10821080
}
10831081
}
10841082

onnxruntime/core/providers/cpu/rnn/rnn_helpers.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
#pragma once
55

6+
#include <initializer_list>
7+
68
#ifdef _WIN32
79
#pragma warning(disable : 4267)
810
#endif
@@ -44,6 +46,27 @@ inline Direction MakeDirection(const std::string& direction) {
4446
"'. Must be one of 'forward', 'reverse', or 'bidirectional'.");
4547
}
4648

49+
inline size_t CalculateBufferElementCount(std::initializer_list<int> dimensions) {
50+
SafeInt<size_t> count{1};
51+
52+
for (int dimension : dimensions) {
53+
count *= dimension;
54+
}
55+
56+
return count;
57+
}
58+
59+
inline int CalculateOutputStepLength(int batch_size, int hidden_size, int num_directions, Direction direction) {
60+
SafeInt<int> output_step_length{batch_size};
61+
output_step_length *= hidden_size;
62+
63+
if (direction == kForward && num_directions == 2) {
64+
output_step_length *= 2;
65+
}
66+
67+
return output_step_length;
68+
}
69+
4770
/** Allocate a unique_ptr using allocator_, and return a span to the allocated memory so usage is safe
4871
@param allocator IAllocator to use for the allocation.
4972
@param size Allocation size. Number of elements of type TAlloc, or total size if TAlloc is 'void'.

0 commit comments

Comments
 (0)