Qualcomm AI Engine Direct - Adding QNN backend support for trunc core ATen Op (pytorch#18543)

qti-horodnic · web-flow · commit 15e8bf70536b · 2026-03-30T13:10:09.000-05:00
diff --git a/backends/qualcomm/_passes/__init__.py b/backends/qualcomm/_passes/__init__.py
@@ -30,6 +30,7 @@
 from .decompose_silu import DecomposeSilu
 from .decompose_threshold import DecomposeThreshold
 from .decompose_triu import DecomposeTriu
+from .decompose_trunc import DecomposeTrunc
 from .decompose_wrap_with_autocast import DecomposeWrapWithAutocast
 from .expand_broadcast_tensor_shape import ExpandBroadcastTensorShape
 from .fixed_linear_keep_dim import FixedLinearKeepDim
@@ -81,6 +82,7 @@
     DecomposeSilu,
     DecomposeThreshold,
     DecomposeTriu,
+    DecomposeTrunc,
     DecomposeWrapWithAutocast,
     ExpandBroadcastTensorShape,
     FixedLinearKeepDim,
diff --git a/backends/qualcomm/_passes/decompose_trunc.py b/backends/qualcomm/_passes/decompose_trunc.py
@@ -0,0 +1,93 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.dialects.edge._ops import EdgeOpOverload
+from executorch.exir.pass_base import ExportPass, PassResult
+
+from .utils import copy_meta
+
+
+class DecomposeTrunc(ExportPass):
+    """
+    Decompose trunc via the identity: trunc(x) = sign(x) * floor(abs(x)).
+    """
+
+    def __init__(self):
+        super(DecomposeTrunc, self).__init__()
+        self.trunc_targets = {
+            torch.ops.aten.trunc.default,
+            exir_ops.edge.aten.trunc.default,
+        }
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        graph = graph_module.graph
+        for node in graph.nodes:
+            if node.op == "call_function" and node.target in self.trunc_targets:
+                trunc_node = node
+                input_node = node.args[0]
+
+                is_edge = isinstance(node.target, EdgeOpOverload)
+                sign_op = (
+                    exir_ops.edge.aten.sign.default
+                    if is_edge
+                    else torch.ops.aten.sign.default
+                )
+                abs_op = (
+                    exir_ops.edge.aten.abs.default
+                    if is_edge
+                    else torch.ops.aten.abs.default
+                )
+                floor_op = (
+                    exir_ops.edge.aten.floor.default
+                    if is_edge
+                    else torch.ops.aten.floor.default
+                )
+                mul_op = (
+                    exir_ops.edge.aten.mul.Tensor
+                    if is_edge
+                    else torch.ops.aten.mul.Tensor
+                )
+
+                with graph_module.graph.inserting_after(input_node):
+                    sign_node = graph.create_node(
+                        "call_function",
+                        sign_op,
+                        (input_node,),
+                    )
+                    sign_node.meta = copy_meta(trunc_node.meta)
+
+                    with graph_module.graph.inserting_after(sign_node):
+                        abs_node = graph.create_node(
+                            "call_function",
+                            abs_op,
+                            (input_node,),
+                        )
+                        abs_node.meta = copy_meta(trunc_node.meta)
+
+                        with graph_module.graph.inserting_after(abs_node):
+                            floor_node = graph.create_node(
+                                "call_function",
+                                floor_op,
+                                (abs_node,),
+                            )
+                            floor_node.meta = copy_meta(trunc_node.meta)
+
+                            with graph_module.graph.inserting_after(floor_node):
+                                mul_node = graph.create_node(
+                                    "call_function",
+                                    mul_op,
+                                    (sign_node, floor_node),
+                                )
+                                mul_node.meta = copy_meta(trunc_node.meta)
+
+                                for user in trunc_node.users.copy():
+                                    user.replace_input_with(trunc_node, mul_node)
+
+        graph.eliminate_dead_code()
+        graph_module.recompile()
+        return PassResult(graph_module, True)
diff --git a/backends/qualcomm/_passes/qnn_pass_manager.py b/backends/qualcomm/_passes/qnn_pass_manager.py
@@ -35,6 +35,7 @@
     DecomposeSilu,
     DecomposeThreshold,
     DecomposeTriu,
+    DecomposeTrunc,
     DecomposeWrapWithAutocast,
     ExpandBroadcastTensorShape,
     FixedLinearKeepDim,
@@ -100,6 +101,7 @@ def get_capture_program_passes():
         (DecomposeLogVariants, True),
         (DecomposeMaxPool3d, True),
         (DecomposeMinMaxDim, True),
+        (DecomposeTrunc, True),
         (ExpandBroadcastTensorShape, True),
         (FixedLinearKeepDim, True),
         (FoldQDQ, True),
@@ -219,6 +221,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeSilu())
         self.add_pass(DecomposeThreshold())
         self.add_pass(DecomposeTriu())
+        self.add_pass(DecomposeTrunc())
         self.add_pass(DecomposeWrapWithAutocast())
         self.add_pass(DecomposeEinsum())
         self.add_pass(DecomposeExpM1())
diff --git a/backends/qualcomm/_passes/utils.py b/backends/qualcomm/_passes/utils.py
@@ -71,6 +71,7 @@ def get_passes_dependency_for_capture_program():
         DecomposeLinalgVectorNorm,
         DecomposeLogVariants,
         DecomposeMaxPool3d,
+        DecomposeTrunc,
         ExpandBroadcastTensorShape,
         FixedLinearKeepDim,
         FoldQDQ,
@@ -99,6 +100,7 @@ def get_passes_dependency_for_capture_program():
         DecomposeLinalgVectorNorm: [RemoveRedundancy],
         DecomposeLogVariants: [RemoveRedundancy],
         DecomposeMaxPool3d: [RemoveRedundancy],
+        DecomposeTrunc: [RemoveRedundancy],
         ExpandBroadcastTensorShape: [FoldQDQ],
         FixedLinearKeepDim: [FoldQDQ],
         FoldQDQ: [AnnotateQuantAttrs, AnnotateStack, AnnotateUnbind],
diff --git a/backends/qualcomm/partition/common_defs.py b/backends/qualcomm/partition/common_defs.py
@@ -26,7 +26,6 @@
     exir_ops.edge.aten.median.dim,
     exir_ops.edge.aten.round.decimals,
     exir_ops.edge.aten.le.Scalar,
-    exir_ops.edge.aten.trunc.default,
 ]
 
 constant_operator = [
diff --git a/backends/qualcomm/tests/models.py b/backends/qualcomm/tests/models.py
@@ -2315,6 +2315,14 @@ def forward(self, x):
         return mask + x
 
 
+class Trunc(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return torch.trunc(x)
+
+
 class Unbind(torch.nn.Module):
     def __init__(self):
         super().__init__()
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -1918,6 +1918,11 @@ def test_qnn_backend_triu(self):
                         index += 1
                         self.lower_module_and_test_output(module, sample_input)
 
+    def test_qnn_backend_trunc(self):
+        module = Trunc()  # noqa: F405
+        sample_input = (torch.randn(3, 4),)
+        self.lower_module_and_test_output(module, sample_input)
+
     def test_qnn_backend_unflatten(self):
         module = Unflatten(dim=1, sizes=(2, 3, 4))  # noqa: F405
         sample_input = (torch.randn([1, 24]),)
@@ -4319,6 +4324,12 @@ def test_qnn_backend_triu(self):
                         qdq_module = self.get_qdq_module(module, sample_input)
                         self.lower_module_and_test_output(qdq_module, sample_input)
 
+    def test_qnn_backend_trunc(self):
+        module = Trunc()  # noqa: F405
+        sample_input = (torch.randn(3, 4),)
+        module = self.get_qdq_module(module, sample_input)
+        self.lower_module_and_test_output(module, sample_input)
+
     def test_qnn_backend_unflatten(self):
         module = Unflatten(dim=1, sizes=(2, 3, 4))  # noqa: F405
         sample_input = (torch.randn([1, 24]),)

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,6 @@`
`26`	`26`	`exir_ops.edge.aten.median.dim,`
`27`	`27`	`exir_ops.edge.aten.round.decimals,`
`28`	`28`	`exir_ops.edge.aten.le.Scalar,`
`29`		`- exir_ops.edge.aten.trunc.default,`
`30`	`29`	`]`
`31`	`30`
`32`	`31`	`constant_operator = [`