Revert "[Test] Add dtype-mismatch rejection test for GQA pre-norm fusion"

hariharans29 · hariharans29 · commit 199d5f7e9782 · 2026-05-15T14:28:57.000-07:00
This reverts commit cef037d.
diff --git a/onnxruntime/test/optimizer/group_query_attention_pre_norm_fusion_test.cc b/onnxruntime/test/optimizer/group_query_attention_pre_norm_fusion_test.cc
@@ -56,11 +56,6 @@ struct BuildOptions {
   // If true, pre-populate the GQA node's slot 14 with a q_norm_weight initializer so the
   // optimizer treats the node as already fused and skips it.
   bool pre_fused = false;
-  // If true, build the q_norm_weight initializer as float32 while the SLN input/output
-  // remain MLFloat16. SimplifiedLayerNormalization's schema allows scale (V) to differ
-  // from input/output (T), but the fused GQA op reuses T for the norm-weight slots, so
-  // the optimizer must reject the rewrite.
-  bool mismatched_norm_weight_dtype = false;
 };
 
 void BuildQwenQkPostNormPattern(ModelTestBuilder& builder, const BuildOptions& opts) {
@@ -82,13 +77,10 @@ void BuildQwenQkPostNormPattern(ModelTestBuilder& builder, const BuildOptions& o
   NodeArg* seqlens_k = builder.MakeInput<int32_t>(std::vector<int64_t>{kBatch}, std::vector<int32_t>{0});
   NodeArg* total_seq_len = builder.MakeInput<int32_t>(std::vector<int64_t>{1}, std::vector<int32_t>{1});
 
-  // Norm weight initializers: [head_size]. (Or non-1D when forcing a shape mismatch, or
-  // a different element type to exercise the dtype-mismatch gate.)
+  // Norm weight initializers: [head_size]. (Or non-1D when forcing a shape mismatch.)
   std::vector<int64_t> q_norm_weight_shape =
       opts.break_q_norm_weight_shape ? std::vector<int64_t>{1, kHeadSize} : std::vector<int64_t>{kHeadSize};
-  NodeArg* q_norm_weight = opts.mismatched_norm_weight_dtype
-                               ? builder.MakeInitializer<float>(q_norm_weight_shape, 1.0f, 1.0f)
-                               : builder.MakeInitializer<MLFloat16>(q_norm_weight_shape, MLFloat16(1.0f), MLFloat16(1.0f));
+  NodeArg* q_norm_weight = builder.MakeInitializer<MLFloat16>(q_norm_weight_shape, MLFloat16(1.0f), MLFloat16(1.0f));
   NodeArg* k_norm_weight = builder.MakeInitializer<MLFloat16>({kHeadSize}, MLFloat16(1.0f), MLFloat16(1.0f));
 
   // Reshape "shape" initializers.
@@ -262,19 +254,6 @@ TEST_F(GraphTransformationTests, GroupQueryAttentionPreNormFusionRejectsNon1DNor
       TransformerLevel::Level2, /*steps=*/1, nullptr, CheckUnfusedGraph));
 }
 
-TEST_F(GraphTransformationTests, GroupQueryAttentionPreNormFusionRejectsMismatchedNormWeightDtype) {
-  // SimplifiedLayerNormalization permits its scale (V) to differ from input/output (T),
-  // but the fused GroupQueryAttention slot reuses T for the norm-weight inputs. Wiring a
-  // float32 scale into a float16 chain must skip the rewrite to avoid changing type
-  // constraints on the fused node.
-  BuildOptions opts;
-  opts.mismatched_norm_weight_dtype = true;
-  auto build = [opts](ModelTestBuilder& builder) { BuildQwenQkPostNormPattern(builder, opts); };
-  ASSERT_STATUS_OK(TestGraphTransformer(
-      build, /*opset_version=*/21, *logger_, MakeWebGpuTransformer(),
-      TransformerLevel::Level2, /*steps=*/1, nullptr, CheckUnfusedGraph));
-}
-
 TEST_F(GraphTransformationTests, GroupQueryAttentionPreNormFusionSkipsCpuEp) {
   // Build the pattern but assign all nodes to CPU EP. The fusion is gated to WebGPU only,
   // so the graph must remain unfused.