transforms: Fix bug in SDPA decomposition (#17238)

oscarandersson8218 · web-flow · commit 62d6dc1fb23e · 2026-02-05T15:39:47.000+01:00
When SDPA with causal=True was lowered, a bug in decompose_sdpa was discovered. It assumed that all args are nodes which caused a crash when this is not the case. The pass has been updated to instead access node.all_input_nodes instead of node.args. New test cases are added to backends/arm/test/ops/test_sdpa.py. cc @freddan80 @per @zingo @digantdesai Signed-off-by: Oscar Andersson <oscar.andersson@arm.com>
diff --git a/backends/arm/test/ops/test_sdpa.py b/backends/arm/test/ops/test_sdpa.py
@@ -1,10 +1,10 @@
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 
-from typing import Tuple
+from typing import Callable, Tuple
 
 import torch
 
@@ -17,28 +17,57 @@
 
 
 class SDPA(torch.nn.Module):
-    def __init__(self):
+    def __init__(self, attn_mask=None, is_causal=False):
         super().__init__()
+        self.attn_mask = attn_mask
+        self.is_causal = is_causal
 
     def forward(self, query, key, value):
         return torch.nn.functional.scaled_dot_product_attention(
-            query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False
+            query, key, value, attn_mask=self.attn_mask, is_causal=self.is_causal
         )
 
 
 input_t = Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
-
-
-def test_sdpa_tosa_FP():
-    test_input = tuple(torch.randn(1, 3, 197, 64) for x in range(3))
-    pipeline = TosaPipelineFP[input_t](SDPA(), test_input, [], [])
+test_case_t = Callable[[], Tuple[SDPA, input_t]]
+
+test_suite = {
+    # test_name: generator(model, inputs)
+    "randn_no_mask_non_causal": lambda: (
+        SDPA(attn_mask=None, is_causal=False),
+        tuple(torch.randn(1, 3, 197, 64) for _ in range(3)),
+    ),
+    "randn_no_mask_causal": lambda: (
+        SDPA(attn_mask=None, is_causal=True),
+        tuple(torch.randn(1, 3, 197, 64) for _ in range(3)),
+    ),
+    "randn_with_bool_mask_non_causal": lambda: (
+        SDPA(attn_mask=(torch.rand(1, 3, 197, 1) > 0.5), is_causal=False),
+        tuple(torch.randn(1, 3, 197, 64) for _ in range(3)),
+    ),
+    "randn_with_additive_mask_non_causal": lambda: (
+        SDPA(
+            attn_mask=torch.where(torch.rand(1, 3, 197, 1) > 0.5, 0.0, -float("inf")),
+            is_causal=False,
+        ),
+        tuple(torch.randn(1, 3, 197, 64) for _ in range(3)),
+    ),
+    # causal with mask is not supported in PyTorch (https://docs.pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html)
+}
+
+
+@common.parametrize("test_case", test_suite)
+def test_sdpa_tosa_FP(test_case: test_case_t):
+    model, test_input = test_case()
+    pipeline = TosaPipelineFP[input_t](model, test_input, [], [])
     pipeline.pop_stage("check_count.exir")
     pipeline.run()
 
 
-def test_sdpa_tosa_INT():
-    test_input = tuple(torch.randn(1, 3, 197, 64) for x in range(3))
-    pipeline = TosaPipelineINT[input_t](SDPA(), test_input, [], [])
+@common.parametrize("test_case", test_suite)
+def test_sdpa_tosa_INT(test_case: test_case_t):
+    model, test_input = test_case()
+    pipeline = TosaPipelineINT[input_t](model, test_input, [], [])
     pipeline.pop_stage("check.quant_nodes")
     pipeline.pop_stage("check_count.exir")
     pipeline.pop_stage(
@@ -48,10 +77,11 @@ def test_sdpa_tosa_INT():
 
 
 @common.SkipIfNoModelConverter
-def test_sdpa_vgf_no_quant():
-    test_input = tuple(torch.randn(1, 3, 197, 64) for _ in range(3))
+@common.parametrize("test_case", test_suite)
+def test_sdpa_vgf_no_quant(test_case: test_case_t):
+    model, test_input = test_case()
     pipeline = VgfPipeline[input_t](
-        SDPA(),
+        model,
         test_input,
         [],
         [],
@@ -61,13 +91,10 @@ def test_sdpa_vgf_no_quant():
 
 
 @common.SkipIfNoModelConverter
-def test_sdpa_vgf_quant():
-    test_input = tuple(torch.randn(1, 3, 197, 64) for _ in range(3))
+@common.parametrize("test_case", test_suite)
+def test_sdpa_vgf_quant(test_case: test_case_t):
+    model, test_input = test_case()
     pipeline = VgfPipeline[input_t](
-        SDPA(),
-        test_input,
-        [],
-        [],
-        quantize=True,
+        model, test_input, [], [], quantize=True, run_on_vulkan_runtime=False
     )
     pipeline.run()
diff --git a/backends/transforms/decompose_sdpa.py b/backends/transforms/decompose_sdpa.py
@@ -1,6 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
-# Copyright 2025 Arm Limited and/or its affiliates.
+# Copyright 2025-2026 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -49,7 +49,7 @@ def _decompose_sdpa_node(
         allow_non_fake_inputs: bool,
     ) -> None:
         graph = graph_module.graph
-        input_tensors = (arg.meta["val"] for arg in node.args)
+        input_tensors = (input_node.meta["val"] for input_node in node.all_input_nodes)
         scale = node.kwargs.get("scale", None)
 
         # refer to pytorch/test/test_decomp.py