Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions onnxruntime/core/common/safeint.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,48 @@ class SafeIntExceptionHandler<onnxruntime::OnnxRuntimeException> {
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif

#include <type_traits>

namespace onnxruntime {

template <typename T>
using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;

template <typename T>
inline constexpr bool is_supported_integer_v =
std::is_integral_v<remove_cvref_t<T>> && !std::is_same_v<remove_cvref_t<T>, bool>;

//------------------------------------------------------------------------------
// Safe multiplication of two or more integer values into an explicit result type R.
// Throws OnnxRuntimeException on overflow.
//------------------------------------------------------------------------------
template <typename R, typename T, typename U, typename... Rest>
[[nodiscard]] R SafeMul(T a, U b, Rest... rest) {
static_assert(is_supported_integer_v<R>,
"SafeMul requires an integral result type (excluding bool)");
static_assert(is_supported_integer_v<T> && is_supported_integer_v<U>,
"SafeMul requires integral operand types (excluding bool)");
static_assert((is_supported_integer_v<Rest> && ...),
"SafeMul requires integral operand types (excluding bool)");

// SafeMultiply(T, U, T&) requires the first argument and result to share
// the same type. Cast the first operand to R so the result is directly in R.
R cast_a{};
if (!SafeCast(a, cast_a)) {
SafeIntDefaultExceptionHandler::SafeIntOnOverflow();
}

R result{};
if (!SafeMultiply(cast_a, b, result)) {
SafeIntDefaultExceptionHandler::SafeIntOnOverflow();
}

if constexpr (sizeof...(rest) > 0) {
return SafeMul<R>(result, rest...);
} else {
return result;
}
}

} // namespace onnxruntime
58 changes: 38 additions & 20 deletions onnxruntime/core/providers/cpu/rnn/rnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "core/providers/cpu/rnn/rnn.h"

#include "core/common/narrow.h"
#include "core/common/safeint.h"
#include "core/framework/op_kernel_context_internal.h"
#include "core/providers/cpu/rnn/rnn_activation_functors.h"
Expand Down Expand Up @@ -84,15 +85,32 @@ void ApplyActivationToBatches(const Tensor* sequence_lens, const T* h_prev, T* Y
template <typename T>
void Assign_Y_h(const T* Y_buffer_data, Tensor* Y_h, const Tensor* sequence_lens,
int64_t num_directions, int direction, bool isReverse, int64_t batch_size, int64_t seq_length, int64_t hidden_size) {
if (seq_length == 0) {
// No sequence data was processed; zero out Y_h for this direction.
const size_t y_h_direction_size = SafeMul<size_t>(batch_size, hidden_size);
const size_t Y_h_direction_offset = SafeMul<size_t>(direction, y_h_direction_size);
math::Set<T, CPUMathUtil>(y_h_direction_size, T{0},
Y_h->MutableData<T>() + Y_h_direction_offset, &CPUMathUtil::Instance());
return;
}

for (int batch = 0; batch < batch_size; batch++) {
int64_t last_time_step = isReverse ? 0 : seq_length - 1;
if (nullptr != sequence_lens && !isReverse)
if (nullptr != sequence_lens && !isReverse) {
last_time_step = sequence_lens->Data<int>()[batch] - 1;
if (last_time_step < 0) {
// sequence_lens[batch] == 0: no data was processed for this batch; zero out Y_h.
int64_t Y_h_offset = direction * batch_size * hidden_size + batch * hidden_size;
math::Set<T, CPUMathUtil>(narrow<size_t>(hidden_size), T{0},
Y_h->MutableData<T>() + Y_h_offset, &CPUMathUtil::Instance());
continue;
}
Comment thread
tianleiwu marked this conversation as resolved.
}
int64_t y_offset = last_time_step * num_directions * batch_size * hidden_size +
direction * batch_size * hidden_size +
batch * hidden_size;
int64_t Y_h_offset = direction * batch_size * hidden_size + batch * hidden_size;
math::CopyVector<T, CPUMathUtil>(static_cast<int>(hidden_size), Y_buffer_data + y_offset,
math::CopyVector<T, CPUMathUtil>(narrow<int>(hidden_size), Y_buffer_data + y_offset,
Y_h->MutableData<T>() + Y_h_offset,
&CPUMathUtil::Instance());
}
Expand All @@ -109,7 +127,7 @@ void ClearMissingFrames(T* Y_buffer_data, const Tensor* sequence_lens,
seq * num_directions * batch_size * hidden_size +
direction * batch_size * hidden_size +
batch * hidden_size;
math::Set<T, CPUMathUtil>(onnxruntime::narrow<size_t>(hidden_size), 0, Y_buffer_data + offset, &CPUMathUtil::Instance());
math::Set<T, CPUMathUtil>(narrow<size_t>(hidden_size), 0, Y_buffer_data + offset, &CPUMathUtil::Instance());
}
}
}
Expand Down Expand Up @@ -155,7 +173,7 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
ORT_RETURN_IF_ERROR(ctx->GetTempSpaceAllocator(&alloc));

// X * W^t, each direction has shape of [seq_length, batch_size, hidden_size]
auto x_matmul_data = alloc->Alloc(SafeInt<size_t>(sizeof(float)) * seq_length * batch_size * hidden_size_);
auto x_matmul_data = alloc->Alloc(SafeMul<size_t>(sizeof(float), seq_length, batch_size, hidden_size_));
BufferUniquePtr x_matmul_buffer(x_matmul_data, BufferDeleter(alloc));
auto* x_matmul_w_buffer_data = static_cast<float*>(x_matmul_buffer.get());

Expand All @@ -165,7 +183,7 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
if (Y != nullptr)
Y_buffer_data = Y->MutableData<float>();
else {
Y_data = alloc->Alloc(SafeInt<size_t>(sizeof(float)) * seq_length * num_directions * batch_size * hidden_size_);
Y_data = alloc->Alloc(SafeMul<size_t>(sizeof(float), seq_length, num_directions, batch_size, hidden_size_));
Y_matmul_buffer = BufferUniquePtr(Y_data, BufferDeleter(alloc));
Y_buffer_data = static_cast<float*>(Y_matmul_buffer.get());
}
Expand All @@ -177,20 +195,20 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
bool isReverse = direction_ == "reverse" || direction == 1;

if (B != nullptr) {
EigenMatrixMapRowMajor<float>(x_matmul_w_buffer_data, seq_length * SafeInt<size_t>(batch_size), onnxruntime::narrow<size_t>(hidden_size_)).rowwise() =
ConstEigenVectorMap<float>(B->Data<float>() + direction * 2 * hidden_size_, onnxruntime::narrow<size_t>(hidden_size_)).transpose() +
ConstEigenVectorMap<float>(B->Data<float>() + direction * 2 * hidden_size_ + hidden_size_, onnxruntime::narrow<size_t>(hidden_size_)).transpose();
EigenMatrixMapRowMajor<float>(x_matmul_w_buffer_data, SafeMul<size_t>(seq_length, batch_size), narrow<size_t>(hidden_size_)).rowwise() =
ConstEigenVectorMap<float>(B->Data<float>() + direction * 2 * hidden_size_, narrow<size_t>(hidden_size_)).transpose() +
ConstEigenVectorMap<float>(B->Data<float>() + direction * 2 * hidden_size_ + hidden_size_, narrow<size_t>(hidden_size_)).transpose();
} else {
math::Set<float, CPUMathUtil>(seq_length * batch_size * SafeInt<size_t>(hidden_size_), 0, x_matmul_w_buffer_data, &CPUMathUtil::Instance());
math::Set<float, CPUMathUtil>(SafeMul<size_t>(seq_length, batch_size, hidden_size_), 0, x_matmul_w_buffer_data, &CPUMathUtil::Instance());
}

// X * W[direction]^t + B
math::Gemm<float>(
CblasNoTrans,
CblasTrans,
static_cast<int>(seq_length * batch_size),
static_cast<int>(hidden_size_),
static_cast<int>(input_size),
SafeMul<int>(seq_length, batch_size),
narrow<int>(hidden_size_),
narrow<int>(input_size),
1,
X.Data<float>(),
W.Data<float>() + direction * hidden_size_ * input_size,
Expand All @@ -202,7 +220,7 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
int64_t time_step = isReverse ? (seq_length - t - 1) : t;
int64_t Y_frame_offset = (time_step * num_directions + direction) * Y_frame_size;
float* Y_buffer_data_current_frame = Y_buffer_data + Y_frame_offset;
auto y_frame_mat = EigenMatrixMapRowMajor<float>(Y_buffer_data_current_frame, onnxruntime::narrow<size_t>(batch_size), onnxruntime::narrow<size_t>(hidden_size_));
auto y_frame_mat = EigenMatrixMapRowMajor<float>(Y_buffer_data_current_frame, narrow<size_t>(batch_size), narrow<size_t>(hidden_size_));

const float* h_prev = nullptr;
if (t == 0) {
Expand All @@ -224,21 +242,21 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
math::Gemm<float>(
CblasNoTrans,
CblasTrans,
static_cast<int>(batch_size),
static_cast<int>(hidden_size_),
static_cast<int>(hidden_size_),
narrow<int>(batch_size),
narrow<int>(hidden_size_),
narrow<int>(hidden_size_),
1,
h_prev,
R.Data<float>() + direction * hidden_size_ * hidden_size_,
0,
Y_buffer_data_current_frame,
tp, &mlas_backend_kernel_selector_config_);
} else {
math::Set<float, CPUMathUtil>(batch_size * SafeInt<size_t>(hidden_size_), 0, Y_buffer_data_current_frame, &CPUMathUtil::Instance());
math::Set<float, CPUMathUtil>(SafeMul<size_t>(batch_size, hidden_size_), 0, Y_buffer_data_current_frame, &CPUMathUtil::Instance());
}

// X[time_step] * W^t + H_t_1 * R^t
y_frame_mat += EigenMatrixMapRowMajor<float>(&x_matmul_w_buffer_data[time_step * Y_frame_size], onnxruntime::narrow<size_t>(batch_size), onnxruntime::narrow<size_t>(hidden_size_));
y_frame_mat += EigenMatrixMapRowMajor<float>(&x_matmul_w_buffer_data[time_step * Y_frame_size], narrow<size_t>(batch_size), narrow<size_t>(hidden_size_));

// apply activation
ApplyActivationToBatches<float>(sequence_lens, h_prev, Y_buffer_data_current_frame,
Expand All @@ -258,10 +276,10 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
}

if (Y != nullptr)
DumpMatrix("Y", Y_buffer_data, (int)(seq_length * num_directions * batch_size), (int)hidden_size_);
DumpMatrix("Y", Y_buffer_data, SafeMul<int>(seq_length, num_directions, batch_size), narrow<int>(hidden_size_));

if (Y_h != nullptr)
DumpMatrix("Y_h", Y_h->Data<float>(), (int)(num_directions * batch_size), (int)hidden_size_);
DumpMatrix("Y_h", Y_h->Data<float>(), SafeMul<int>(num_directions, batch_size), narrow<int>(hidden_size_));

return Status::OK();
}
Expand Down
38 changes: 38 additions & 0 deletions onnxruntime/test/common/safeint_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/common/safeint.h"

#include <cstddef>
#include <cstdint>
#include <limits>

#include "gtest/gtest.h"

namespace onnxruntime::test {

static_assert(is_supported_integer_v<int>);
static_assert(is_supported_integer_v<uint8_t>);
static_assert(!is_supported_integer_v<bool>);

TEST(SafeIntTest, SafeMulMultipliesOperands) {
EXPECT_EQ(SafeMul<size_t>(size_t{2}, 3U), size_t{6});
EXPECT_EQ(SafeMul<int>(-2, 3, 4), -24);
}

TEST(SafeIntTest, SafeMulHandlesSameVariableOperands) {
const int value = 7;
EXPECT_EQ(SafeMul<int>(value, value), 49);
}

#ifndef ORT_NO_EXCEPTIONS
TEST(SafeIntTest, SafeMulThrowsOnInitialCastOverflow) {
EXPECT_THROW((void)SafeMul<uint32_t>(-1, 2), OnnxRuntimeException);
}

TEST(SafeIntTest, SafeMulThrowsOnMultiplyOverflow) {
EXPECT_THROW((void)SafeMul<int>(std::numeric_limits<int>::max(), 2), OnnxRuntimeException);
}
#endif

} // namespace onnxruntime::test
103 changes: 103 additions & 0 deletions onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include <cmath>

#include "core/providers/cpu/rnn/rnn.h"
#include "gtest/gtest.h"
#include "test/providers/provider_test_utils.h"
Expand Down Expand Up @@ -883,5 +885,106 @@ TEST(RNNTest, RNN_with_invalid_activation_load_failure) {
{kCudaExecutionProvider, kTensorrtExecutionProvider});
}

// Test that seq_length == 0 produces zero-filled Y and Y_h without crashing.
TEST(RNNTest, RNN_seq_length_zero) {
auto cpu = DefaultCpuExecutionProvider();
if (!cpu) GTEST_SKIP() << "CPU EP not available in this build.";

OpTester test("RNN");
int64_t num_directions = 1, input_size = 2, hidden_size = 3, batch_size = 2, seq_length = 0;

test.AddAttribute("activations", vector<string>(num_directions, "Tanh"));
test.AddAttribute("direction", "forward");
test.AddAttribute("hidden_size", hidden_size);

std::vector<int64_t> X_dims = {seq_length, batch_size, input_size};
std::vector<float> X_data{};
test.AddInput<float>("X", X_dims, X_data);

std::vector<int64_t> W_dims = {num_directions, hidden_size, input_size};
std::vector<float> W_data({-0.1f, 0.2f, 1.f, -2.f, -1.f, 3.f});
test.AddInput<float>("W", W_dims, W_data);

std::vector<int64_t> R_dims = {num_directions, hidden_size, hidden_size};
std::vector<float> R_data(hidden_size * hidden_size, 0.f);
test.AddInput<float>("R", R_dims, R_data);

// Y: shape [0, 1, 2, 3] -> empty
std::vector<int64_t> Y_dims = {seq_length, num_directions, batch_size, hidden_size};
std::vector<float> Y_data{};
test.AddOutput<float>("Y", Y_dims, Y_data);

// Y_h: shape [1, 2, 3] -> all zeros
std::vector<int64_t> Y_h_dims{num_directions, batch_size, hidden_size};
std::vector<float> Y_h_data(num_directions * batch_size * hidden_size, 0.f);
test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
test.ConfigEp(std::move(cpu)).RunWithConfig();
}

// Test that per-batch sequence_lens containing 0 produces zero-filled Y_h for those batches.
TEST(RNNTest, RNN_forward_sequence_lens_with_zero) {
auto cpu = DefaultCpuExecutionProvider();
if (!cpu) GTEST_SKIP() << "CPU EP not available in this build.";

OpTester test("RNN");
int64_t num_directions = 1, input_size = 2, hidden_size = 3, batch_size = 2, seq_length = 2;

test.AddAttribute("activations", vector<string>(num_directions, "Tanh"));
test.AddAttribute("direction", "forward");
test.AddAttribute("hidden_size", hidden_size);

// X shape: [seq_length=2, batch_size=2, input_size=2]
std::vector<int64_t> X_dims = {seq_length, batch_size, input_size};
std::vector<float> X_data({0.1f, 0.2f,
0.3f, 0.4f,
0.5f, 0.6f,
0.7f, 0.8f});
test.AddInput<float>("X", X_dims, X_data);

std::vector<int64_t> W_dims = {num_directions, hidden_size, input_size};
std::vector<float> W_data({-0.1f, 0.2f, 1.f, -2.f, -1.f, 3.f});
test.AddInput<float>("W", W_dims, W_data);

std::vector<int64_t> R_dims = {num_directions, hidden_size, hidden_size};
std::vector<float> R_data(hidden_size * hidden_size, 0.f);
test.AddInput<float>("R", R_dims, R_data);

std::vector<int64_t> B_dims = {num_directions, 2 * hidden_size};
std::vector<float> B_data(2 * hidden_size, 0.f);
test.AddInput<float>("B", B_dims, B_data);

// batch 0 has sequence_lens=2, batch 1 has sequence_lens=0
std::vector<int64_t> sequence_lens_dims{batch_size};
std::vector<int> sequence_lens_data{2, 0};
test.AddInput<int>("sequence_lens", sequence_lens_dims, sequence_lens_data);

std::vector<int64_t> initial_h_dims = {num_directions, batch_size, hidden_size};
std::vector<float> initial_h_data(num_directions * batch_size * hidden_size, 0.f);
test.AddInput<float>("initial_h", initial_h_dims, initial_h_data);

// Y output is optional; skip it to keep test simple.
test.AddOptionalOutputEdge<float>();

// Y_h: shape [1, 2, 3]
// batch 0 gets the result of forward pass at last time step (seq_length-1=1).
// batch 1 has sequence_lens=0 so Y_h should be zero.
//
// For batch 0:
// time_step 0: X=[0.1, 0.2], Y = tanh(X * W^T) = tanh([-0.1*0.1+0.2*0.2, 1*0.1-2*0.2, -1*0.1+3*0.2])
// = tanh([0.03, -0.3, 0.5])
// time_step 1: X=[0.5, 0.6], Y = tanh(X * W^T + H_prev * R^T)
// R is zero, so Y = tanh([-0.1*0.5+0.2*0.6, 1*0.5-2*0.6, -1*0.5+3*0.6])
// = tanh([0.07, -0.7, 1.3])
float y_h_batch0_f0 = std::tanh(0.07f);
float y_h_batch0_f1 = std::tanh(-0.7f);
float y_h_batch0_f2 = std::tanh(1.3f);
Comment thread
tianleiwu marked this conversation as resolved.

std::vector<int64_t> Y_h_dims{num_directions, batch_size, hidden_size};
std::vector<float> Y_h_data{y_h_batch0_f0, y_h_batch0_f1, y_h_batch0_f2,
0.f, 0.f, 0.f};
test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
test.ConfigEp(std::move(cpu)).RunWithConfig();
}

} // namespace test
} // namespace onnxruntime
Loading