pytorch
diff --git a/‎backends/cadence/aot/BUCK‎
Lines changed: 7 additions & 0 deletions b/‎backends/cadence/aot/BUCK‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎backends/cadence/aot/fuse_ops.py‎
Lines changed: 18 additions & 276 deletions b/‎backends/cadence/aot/fuse_ops.py‎
Lines changed: 18 additions & 276 deletions
@@ -267,6 +267,10 @@ fbcode_target(_kind = runtime.python_library,
         "//caffe2:torch",
         "//executorch/backends/cadence/aot:pass_utils",
         "//executorch/backends/cadence/aot:utils",
+        "//executorch/backends/transforms:fuse_cascaded_transpose_or_permute_ops",
+        "//executorch/backends/transforms:fuse_cascaded_view_ops",
+        "//executorch/backends/transforms:fuse_transpose_or_permute_op_pairs_pass",
+        "//executorch/backends/transforms:permute_pass_utils",
         "//executorch/exir:pass_base",
         "//executorch/exir/dialects:lib",
         "//executorch/exir/dialects/edge:lib",
@@ -304,6 +308,7 @@ fbcode_target(_kind = runtime.python_library,
         "//executorch/backends/cadence/aot:pass_utils",
         "//executorch/backends/cadence/aot:simplify_ops",
         "//executorch/backends/transforms:remove_clone_ops",
+        "//executorch/backends/transforms:remove_permutes_around_elementwise_ops",
         "//executorch/exir:pass_base",
         "//executorch/exir/dialects:lib",
         "//executorch/exir/dialects/edge:lib",
@@ -322,6 +327,7 @@ fbcode_target(_kind = runtime.python_library,
         "//executorch/backends/cadence/aot:compiler_utils",
         "//executorch/backends/cadence/aot:pass_utils",
         "//executorch/backends/cadence/aot:utils",
+        "//executorch/backends/transforms:postpone_permute_below_squeeze_view",
         "//executorch/exir:pass_base",
         "//executorch/exir:tensor",
         "//executorch/exir/dialects:lib",
@@ -343,6 +349,7 @@ fbcode_target(_kind = runtime.python_library,
         "//executorch/backends/cadence/aot:pass_utils",
         "//executorch/backends/cadence/aot:remove_ops",
         "//executorch/backends/cadence/aot:utils",
+        "//executorch/backends/transforms:replace_nop_transpose_or_permute_with_view",
         "//executorch/backends/transforms:replace_scalar_with_tensor",
         "//executorch/exir:pass_base",
         "//executorch/exir/dialects:lib",
 
@@ -41,6 +41,18 @@
 from executorch.exir.pass_base import ExportPass, PassResult
 from executorch.exir.passes.cse_pass import CSEPass
 from torch.nn.utils.fusion import fuse_conv_bn_weights
+from executorch.backends.transforms.fuse_cascaded_transpose_or_permute_ops import (
+    FuseCascadedTransposeOrPermuteOps as _SharedFuseCascadedTransposeOrPermuteOps,
+)
+from executorch.backends.transforms.fuse_cascaded_view_ops import (
+    FuseCascadedViewOps as _SharedFuseCascadedViewOps,
+)
+from executorch.backends.transforms.fuse_transpose_or_permute_op_pairs_pass import (
+    FuseTransposeOrPermuteOpPairsPass as _SharedFuseTransposeOrPermuteOpPairsPass,
+)
+from executorch.backends.transforms.permute_pass_utils import (
+    FuseOpPairsAcrossBranchesPass,
+)
 
 
 def get_tensor_arg(node: torch.fx.Node, arg_name: str) -> torch.Tensor:
@@ -578,207 +590,14 @@ def maybe_remove_or_replace(self, node: torch.fx.Node) -> bool:
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=1))
-class FuseCascadedTransposeOrPermuteOps(RemoveOrReplacePassInterface):
-    """
-    Fuse a chain of transpose and permute ops into a single permute or a no-op.
-    Handles branches and chains permutes.
-    """
-
-    transpose_or_permute_target = {
-        exir_ops.edge.aten.transpose_copy.int,
-        exir_ops.edge.aten.permute_copy.default,
-    }
-
-    @property
-    def targets(self) -> list[EdgeOpOverload]:
-        return list(self.transpose_or_permute_target)
-
-    def maybe_remove_or_replace(self, node: torch.fx.Node) -> bool:
-        # Fuse with the parent node if it's also a permute or a transpose. Since the
-        # pass interface traverses all ops in order the pass will properly fuse a chain
-        # of permutes.
-        parent_node = get_arg(node, "input", torch.fx.Node)
-        if parent_node.target not in self.transpose_or_permute_target:
-            return False
-        input_of_parent = get_arg(parent_node, "input", torch.fx.Node)
-
-        # Compute combined effect of permutes.
-        dims = list(range(node.meta["val"].ndim))
-
-        if parent_node.target == exir_ops.edge.aten.transpose_copy.int:
-            dims = get_transposed_dims(parent_node, dims)
-        else:
-            dims = get_permuted_dims(parent_node, dims)
-
-        if node.target == exir_ops.edge.aten.transpose_copy.int:
-            dims = get_transposed_dims(node, dims)
-        else:
-            dims = get_permuted_dims(node, dims)
-
-        # If combined effect is identity replace the node with input.
-        if dims == sorted(dims):
-            node.replace_all_uses_with(input_of_parent)
-        else:
-            with node.graph.inserting_before(node):
-                new_permute = node.graph.call_function(
-                    exir_ops.edge.aten.permute_copy.default,
-                    args=(input_of_parent, dims),
-                )
-                new_permute.meta = node.meta
-            node.replace_all_uses_with(new_permute)
-
-        return True
+class FuseCascadedTransposeOrPermuteOps(_SharedFuseCascadedTransposeOrPermuteOps):
+    pass
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=1))
-class FuseCascadedViewOps(RemoveOrReplacePassInterface):
-    """
-    Fuse a cascaded chain of view ops
-    """
+class FuseCascadedViewOps(_SharedFuseCascadedViewOps):
+    pass
 
-    @property
-    def targets(self) -> list[EdgeOpOverload]:
-        return [exir_ops.edge.aten.view_copy.default]
-
-    def maybe_remove_or_replace(self, node: torch.fx.Node) -> bool:
-        # Check if the input to this view node is also a view node
-        input_view = node.args[0]
-        if not isinstance(input_view, torch.fx.Node):
-            return False
-
-        if (
-            input_view.op != "call_function"
-            or input_view.target != exir_ops.edge.aten.view_copy.default
-        ):
-            return False
-
-        # Replace the input of this view node with the input of the cascaded view
-        # This effectively "skips" the intermediate view node
-        node.replace_input_with(input_view, cast(torch.fx.Node, input_view.args[0]))
-        return True
-
-
-class FuseOpPairsAcrossBranchesPass(ExportPass):
-    """
-    Base class for passes that fuse op pairs across branches.
-    Provides common functionality for finding and fusing producer-consumer chains.
-    """
-
-    def check_ok_to_fuse(
-        self,
-        producer: torch.fx.Node,
-        consumers: list[torch.fx.Node],
-    ) -> bool:
-        # Always ok to replace / remove.
-        return True
-
-    def can_fuse_for_chain(
-        self,
-        producer: torch.fx.Node,
-        consumer: torch.fx.Node,
-        consumer_op_packets: set[EdgeOpOverloadPacket],
-    ) -> bool:
-        """
-        Returns true if producer and consumer can be fused for a single chain
-        (-> producer -> ops -> consumer ->) to (-> ops -> fused_op)
-        """
-        if (
-            isinstance(consumer.target, EdgeOpOverload)
-            and get_edge_overload_packet(consumer.target) in consumer_op_packets
-        ):
-            return True
-        return False
-
-    def get_fuse_candidates(
-        self,
-        producer: torch.fx.Node,
-        consumer_op_packets: set[EdgeOpOverloadPacket],
-        bypass_ops: set[EdgeOpOverload],
-    ) -> list[torch.fx.Node]:
-        # Start by iterating over all the users of this node, and check
-        # if they are have their target in consumer_op_packets.
-        users = deque(producer.users.keys())
-        # This holds the list of the user ops that directly (or transitively
-        # via view/slice) consume this producer_op_packets, and hence can be removed.
-        removal_candidates = []
-        while users:
-            user = users.popleft()
-
-            # If the user is a bypass op, we bypass it, and examine
-            # its users instead for consumer_op_packets.
-            if user.target in bypass_ops:
-                users.extend(list(user.users.keys()))
-            elif self.can_fuse_for_chain(producer, user, consumer_op_packets):
-                removal_candidates.append(user)
-            else:
-                removal_candidates.clear()
-                break
-        return removal_candidates
-
-    def find_and_fuse(
-        self,
-        graph_module: torch.fx.GraphModule,
-        producer_op_packets: set[EdgeOpOverloadPacket],
-        consumer_op_packets: set[EdgeOpOverloadPacket],
-        bypass_ops: set[EdgeOpOverload],
-    ) -> bool:
-        """
-        Find and fuse producer-consumer op pairs.
-
-        Returns True if any fusion was performed, False otherwise.
-        """
-        modified = False
-        for node in graph_module.graph.nodes:
-            # We are only interested in ops that have overload target in
-            # producer_op.
-            if not (
-                isinstance(node.target, EdgeOpOverload)
-                and get_edge_overload_packet(node.target) in producer_op_packets
-            ):
-                continue
-
-            removal_candidates = self.get_fuse_candidates(
-                node, consumer_op_packets, bypass_ops
-            )
-
-            if len(removal_candidates) == 0:
-                # No candidates found.
-                continue
-
-            if not self.check_ok_to_fuse(node, removal_candidates):
-                # Not ok to remove quant-dequant pairs or replace with requantize.
-                continue
-
-            self.fuse(node, removal_candidates, graph_module)
-            modified = True
-
-        if modified:
-            graph_module.recompile()
-
-        return modified
-
-    def get_fused_node(
-        self,
-        producer: torch.fx.Node,
-        consumer: torch.fx.Node,
-        graph_module: torch.fx.GraphModule,
-    ) -> torch.fx.Node:
-        return consumer
-
-    def fuse(
-        self,
-        node: torch.fx.Node,
-        removal_candidates: list[torch.fx.Node],
-        graph_module: torch.fx.GraphModule,
-    ) -> None:
-        # Replace all the uses of the producer op with it's input.
-        node.replace_all_uses_with(cast(torch.fx.Node, node.args[0]))
-        graph_module.graph.erase_node(node)
-
-        # Iterate over all the removal candidates (quantize op users) and generate replacements.
-        for rnode in removal_candidates:
-            rnode.replace_all_uses_with(self.get_fused_node(node, rnode, graph_module))
-            graph_module.graph.erase_node(rnode)
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=1))
@@ -1123,90 +942,13 @@ def maybe_remove_or_replace(self, node: torch.fx.Node) -> bool:
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=1))
-class FuseTransposeOrPermuteOpPairsPass(FuseOpPairsAcrossBranchesPass):
-    """
-    Fuse transpose or permute op pairs to a single view op.
-    (transpose or permutation) -> (quant or dequant) -> (transpose or permutation)
-    This happens when op2(op1) == identity, modulo unitary dimensions.
-    'unitary dimensions' example: a tensor of shape [1, 5, 30] is equivalent (in memory) to [5, 1, 30]
-    so transpose(1, 2) then transpose(0, 2) is a pseudo identity and should be fused.
-    """
-
-    # A list of ops that can be bypassed when looking for a
-    # dequantize->quantize chain
-    bypass_ops: set[EdgeOpOverload] = {
+class FuseTransposeOrPermuteOpPairsPass(_SharedFuseTransposeOrPermuteOpPairsPass):
+    bypass_ops: set[EdgeOpOverload] = _SharedFuseTransposeOrPermuteOpPairsPass.bypass_ops | {
         exir_ops.edge.cadence.quantize_per_tensor.default,
-        exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
-        exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
         exir_ops.edge.cadence.dequantize_per_tensor.default,
-        exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
-        exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
         exir_ops.edge.cadence.quantized_relu.per_tensor,
     }
 
-    def can_fuse_for_chain(
-        self,
-        producer: torch.fx.Node,
-        consumer: torch.fx.Node,
-        consumer_op_packets: set[EdgeOpOverloadPacket],
-    ) -> bool:
-        if not super().can_fuse_for_chain(producer, consumer, consumer_op_packets):
-            return False
-
-        # checking that permut2(permut1(identity)) == identity, modulo unitary dimensions
-        producer_input = cast(torch.fx.Node, producer.args[0])
-        if "val" not in producer_input.meta:
-            return False
-        input_shape = producer_input.meta["val"].shape
-        ident_dims = list(range(len(input_shape)))
-        # this mapping helps to handle both transpose and permutations
-        f: dict[Any, Callable] = {
-            exir_ops.edge.aten.transpose_copy.int: get_transposed_dims,
-            exir_ops.edge.aten.permute_copy.default: get_permuted_dims,
-        }
-        in_dims = f[producer.target](producer, ident_dims)
-        out_dims = f[consumer.target](consumer, in_dims)
-        # Filtering out unitary dimensions
-        non_unit_ident_dims = [dim for dim in ident_dims if input_shape[dim] != 1]
-        non_unit_out_dims = [dim for dim in out_dims if input_shape[dim] != 1]
-        return non_unit_out_dims == non_unit_ident_dims
-
-    def get_fused_node(
-        self,
-        producer: torch.fx.Node,
-        consumer: torch.fx.Node,
-        graph_module: torch.fx.GraphModule,
-    ) -> torch.fx.Node:
-        # This step is important because of how we can fuse transpositions that are not perfectly
-        # reverse one of another but will be fused if there are unitary dimensions.
-        # The fused operation must have the same output shape as the consumer.
-        output_shape = consumer.meta["val"].shape
-        with graph_module.graph.inserting_after(consumer):
-            view = graph_module.graph.call_function(
-                exir_ops.edge.aten.view_copy.default,
-                (consumer.args[0], output_shape),
-                {},
-            )
-        return view
-
-    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
-        # Remove any transpose/permutation op pair that cancel each other.
-        modified = self.find_and_fuse(
-            graph_module,
-            producer_op_packets={
-                exir_ops.edge.aten.transpose_copy,
-                exir_ops.edge.aten.permute_copy,
-            },
-            consumer_op_packets={
-                exir_ops.edge.aten.transpose_copy,
-                exir_ops.edge.aten.permute_copy,
-            },
-            bypass_ops=self.bypass_ops,
-        )
-        if modified:
-            return super().call(graph_module)
-        return PassResult(graph_module, False)
-
 
 @register_cadence_pass(CadencePassAttribute(opt_level=1))
 class FuseFullThenReshapePass(RemoveOrReplacePassInterface):