|
1 | 1 | // Copyright (c) Microsoft Corporation. All rights reserved. |
2 | 2 | // Licensed under the MIT License. |
3 | 3 |
|
| 4 | +#include <cmath> |
| 5 | + |
4 | 6 | #include "core/providers/cpu/rnn/rnn.h" |
5 | 7 | #include "gtest/gtest.h" |
6 | 8 | #include "test/providers/provider_test_utils.h" |
@@ -883,5 +885,106 @@ TEST(RNNTest, RNN_with_invalid_activation_load_failure) { |
883 | 885 | {kCudaExecutionProvider, kTensorrtExecutionProvider}); |
884 | 886 | } |
885 | 887 |
|
| 888 | +// Test that seq_length == 0 produces zero-filled Y and Y_h without crashing. |
| 889 | +TEST(RNNTest, RNN_seq_length_zero) { |
| 890 | + OpTester test("RNN"); |
| 891 | + int64_t num_directions = 1, input_size = 2, hidden_size = 3, batch_size = 2, seq_length = 0; |
| 892 | + |
| 893 | + test.AddAttribute("activations", vector<string>(num_directions, "Tanh")); |
| 894 | + test.AddAttribute("direction", "forward"); |
| 895 | + test.AddAttribute("hidden_size", hidden_size); |
| 896 | + |
| 897 | + std::vector<int64_t> X_dims = {seq_length, batch_size, input_size}; |
| 898 | + std::vector<float> X_data{}; |
| 899 | + test.AddInput<float>("X", X_dims, X_data); |
| 900 | + |
| 901 | + std::vector<int64_t> W_dims = {num_directions, hidden_size, input_size}; |
| 902 | + std::vector<float> W_data({-0.1f, 0.2f, 1.f, -2.f, -1.f, 3.f}); |
| 903 | + test.AddInput<float>("W", W_dims, W_data); |
| 904 | + |
| 905 | + std::vector<int64_t> R_dims = {num_directions, hidden_size, hidden_size}; |
| 906 | + std::vector<float> R_data(hidden_size * hidden_size, 0.f); |
| 907 | + test.AddInput<float>("R", R_dims, R_data); |
| 908 | + |
| 909 | + // Y: shape [0, 1, 2, 3] -> empty |
| 910 | + std::vector<int64_t> Y_dims = {seq_length, num_directions, batch_size, hidden_size}; |
| 911 | + std::vector<float> Y_data{}; |
| 912 | + test.AddOutput<float>("Y", Y_dims, Y_data); |
| 913 | + |
| 914 | + // Y_h: shape [1, 2, 3] -> all zeros |
| 915 | + std::vector<int64_t> Y_h_dims{num_directions, batch_size, hidden_size}; |
| 916 | + std::vector<float> Y_h_data(num_directions * batch_size * hidden_size, 0.f); |
| 917 | + test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data); |
| 918 | + |
| 919 | + test.Run(OpTester::ExpectResult::kExpectSuccess, "", |
| 920 | + {kCudaExecutionProvider, kCudaNHWCExecutionProvider, |
| 921 | + kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); |
| 922 | +} |
| 923 | + |
| 924 | +// Test that per-batch sequence_lens containing 0 produces zero-filled Y_h for those batches. |
| 925 | +TEST(RNNTest, RNN_forward_sequence_lens_with_zero) { |
| 926 | + OpTester test("RNN"); |
| 927 | + int64_t num_directions = 1, input_size = 2, hidden_size = 3, batch_size = 2, seq_length = 2; |
| 928 | + |
| 929 | + test.AddAttribute("activations", vector<string>(num_directions, "Tanh")); |
| 930 | + test.AddAttribute("direction", "forward"); |
| 931 | + test.AddAttribute("hidden_size", hidden_size); |
| 932 | + |
| 933 | + // X shape: [seq_length=2, batch_size=2, input_size=2] |
| 934 | + std::vector<int64_t> X_dims = {seq_length, batch_size, input_size}; |
| 935 | + std::vector<float> X_data({0.1f, 0.2f, |
| 936 | + 0.3f, 0.4f, |
| 937 | + 0.5f, 0.6f, |
| 938 | + 0.7f, 0.8f}); |
| 939 | + test.AddInput<float>("X", X_dims, X_data); |
| 940 | + |
| 941 | + std::vector<int64_t> W_dims = {num_directions, hidden_size, input_size}; |
| 942 | + std::vector<float> W_data({-0.1f, 0.2f, 1.f, -2.f, -1.f, 3.f}); |
| 943 | + test.AddInput<float>("W", W_dims, W_data); |
| 944 | + |
| 945 | + std::vector<int64_t> R_dims = {num_directions, hidden_size, hidden_size}; |
| 946 | + std::vector<float> R_data(hidden_size * hidden_size, 0.f); |
| 947 | + test.AddInput<float>("R", R_dims, R_data); |
| 948 | + |
| 949 | + std::vector<int64_t> B_dims = {num_directions, 2 * hidden_size}; |
| 950 | + std::vector<float> B_data(2 * hidden_size, 0.f); |
| 951 | + test.AddInput<float>("B", B_dims, B_data); |
| 952 | + |
| 953 | + // batch 0 has sequence_lens=2, batch 1 has sequence_lens=0 |
| 954 | + std::vector<int64_t> sequence_lens_dims{batch_size}; |
| 955 | + std::vector<int> sequence_lens_data{2, 0}; |
| 956 | + test.AddInput<int>("sequence_lens", sequence_lens_dims, sequence_lens_data); |
| 957 | + |
| 958 | + std::vector<int64_t> initial_h_dims = {num_directions, batch_size, hidden_size}; |
| 959 | + std::vector<float> initial_h_data(num_directions * batch_size * hidden_size, 0.f); |
| 960 | + test.AddInput<float>("initial_h", initial_h_dims, initial_h_data); |
| 961 | + |
| 962 | + // Y output is optional; skip it to keep test simple. |
| 963 | + test.AddOptionalOutputEdge<float>(); |
| 964 | + |
| 965 | + // Y_h: shape [1, 2, 3] |
| 966 | + // batch 0 gets the result of forward pass at last time step (seq_length-1=1). |
| 967 | + // batch 1 has sequence_lens=0 so Y_h should be zero. |
| 968 | + // |
| 969 | + // For batch 0: |
| 970 | + // time_step 0: X=[0.1, 0.2], Y = tanh(X * W^T) = tanh([-0.1*0.1+0.2*0.2, 1*0.1-2*0.2, -1*0.1+3*0.2]) |
| 971 | + // = tanh([0.03, -0.3, 0.5]) |
| 972 | + // time_step 1: X=[0.5, 0.6], Y = tanh(X * W^T + H_prev * R^T) |
| 973 | + // R is zero, so Y = tanh([-0.1*0.5+0.2*0.6, 1*0.5-2*0.6, -1*0.5+3*0.6]) |
| 974 | + // = tanh([0.07, -0.7, 1.3]) |
| 975 | + float y_h_batch0_f0 = std::tanh(0.07f); |
| 976 | + float y_h_batch0_f1 = std::tanh(-0.7f); |
| 977 | + float y_h_batch0_f2 = std::tanh(1.3f); |
| 978 | + |
| 979 | + std::vector<int64_t> Y_h_dims{num_directions, batch_size, hidden_size}; |
| 980 | + std::vector<float> Y_h_data{y_h_batch0_f0, y_h_batch0_f1, y_h_batch0_f2, |
| 981 | + 0.f, 0.f, 0.f}; |
| 982 | + test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data); |
| 983 | + |
| 984 | + test.Run(OpTester::ExpectResult::kExpectSuccess, "", |
| 985 | + {kCudaExecutionProvider, kCudaNHWCExecutionProvider, |
| 986 | + kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); |
| 987 | +} |
| 988 | + |
886 | 989 | } // namespace test |
887 | 990 | } // namespace onnxruntime |
0 commit comments