@@ -880,6 +880,95 @@ TEST(MatMulNBits, Basic_M10_N128_K512) {
880880}
881881#endif
882882
883+ // Test that out-of-range g_idx values are rejected with INVALID_ARGUMENT.
884+ // CUDA EP is excluded from these tests, so no risk of hitting CUDA_KERNEL_ASSERT.
885+ TEST (MatMulNBits, InvalidGIdx_OutOfRange) {
886+ constexpr int64_t M = 2 , N = 4 , K = 32 , block_size = 16 ;
887+ constexpr int64_t k_blocks = (K + block_size - 1 ) / block_size; // 2
888+ constexpr int64_t blob_size = block_size * QBits / 8 ; // 8
889+
890+ OpTester test (" MatMulNBits" , 1 , kMSDomain );
891+ test.AddAttribute <int64_t >(" K" , K);
892+ test.AddAttribute <int64_t >(" N" , N);
893+ test.AddAttribute <int64_t >(" block_size" , block_size);
894+ test.AddAttribute <int64_t >(" bits" , QBits);
895+ test.AddAttribute <int64_t >(" accuracy_level" , int64_t {0 });
896+
897+ // A: [M, K]
898+ std::vector<float > a_data (M * K, 1 .0f );
899+ test.AddInput <float >(" A" , {M, K}, a_data, false );
900+
901+ // B: [N, k_blocks, blob_size]
902+ std::vector<uint8_t > b_data (N * k_blocks * blob_size, 0 );
903+ test.AddInput <uint8_t >(" B" , {N, k_blocks, blob_size}, b_data, true );
904+
905+ // scales: [N, k_blocks]
906+ std::vector<float > scales (N * k_blocks, 1 .0f );
907+ test.AddInput <float >(" scales" , {N, k_blocks}, scales, true );
908+
909+ // zero_points: optional (skip)
910+ test.AddOptionalInputEdge <uint8_t >();
911+
912+ // g_idx with out-of-range values (valid range is [0, k_blocks) = [0, 2))
913+ std::vector<int32_t > g_idx (K);
914+ for (int64_t i = 0 ; i < K; i++) {
915+ g_idx[i] = 99999 ; // way out of range
916+ }
917+ test.AddInput <int32_t >(" g_idx" , {K}, g_idx, true );
918+
919+ // bias: optional (skip)
920+ test.AddOptionalInputEdge <float >();
921+
922+ // Output placeholder (won't actually be compared since we expect failure)
923+ std::vector<float > y_data (M * N, 0 .0f );
924+ test.AddOutput <float >(" Y" , {M, N}, y_data);
925+
926+ test.Run (OpTester::ExpectResult::kExpectFailure , " group_index value" ,
927+ {kCudaExecutionProvider , kCudaNHWCExecutionProvider , kDmlExecutionProvider , kWebGpuExecutionProvider ,
928+ kOpenVINOExecutionProvider });
929+ }
930+
931+ // Test that negative g_idx values are rejected.
932+ TEST (MatMulNBits, InvalidGIdx_Negative) {
933+ constexpr int64_t M = 2 , N = 4 , K = 32 , block_size = 16 ;
934+ constexpr int64_t k_blocks = (K + block_size - 1 ) / block_size;
935+ constexpr int64_t blob_size = block_size * QBits / 8 ;
936+
937+ OpTester test (" MatMulNBits" , 1 , kMSDomain );
938+ test.AddAttribute <int64_t >(" K" , K);
939+ test.AddAttribute <int64_t >(" N" , N);
940+ test.AddAttribute <int64_t >(" block_size" , block_size);
941+ test.AddAttribute <int64_t >(" bits" , QBits);
942+ test.AddAttribute <int64_t >(" accuracy_level" , int64_t {0 });
943+
944+ std::vector<float > a_data (M * K, 1 .0f );
945+ test.AddInput <float >(" A" , {M, K}, a_data, false );
946+
947+ std::vector<uint8_t > b_data (N * k_blocks * blob_size, 0 );
948+ test.AddInput <uint8_t >(" B" , {N, k_blocks, blob_size}, b_data, true );
949+
950+ std::vector<float > scales (N * k_blocks, 1 .0f );
951+ test.AddInput <float >(" scales" , {N, k_blocks}, scales, true );
952+
953+ test.AddOptionalInputEdge <uint8_t >();
954+
955+ // g_idx with negative values
956+ std::vector<int32_t > g_idx (K);
957+ for (int64_t i = 0 ; i < K; i++) {
958+ g_idx[i] = -1 ;
959+ }
960+ test.AddInput <int32_t >(" g_idx" , {K}, g_idx, true );
961+
962+ test.AddOptionalInputEdge <float >();
963+
964+ std::vector<float > y_data (M * N, 0 .0f );
965+ test.AddOutput <float >(" Y" , {M, N}, y_data);
966+
967+ test.Run (OpTester::ExpectResult::kExpectFailure , " group_index value" ,
968+ {kCudaExecutionProvider , kCudaNHWCExecutionProvider , kDmlExecutionProvider , kWebGpuExecutionProvider ,
969+ kOpenVINOExecutionProvider });
970+ }
971+
883972} // namespace test
884973} // namespace onnxruntime
885974
0 commit comments