Fix issue in not checking dequant node parent for mul node

DrJessop · web-flow · commit 7412efe71b6a · 2026-01-24T01:43:54.000Z
Differential Revision: D91337631 Pull Request resolved: #16832
diff --git a/backends/cadence/aot/fuse_ops.py b/backends/cadence/aot/fuse_ops.py
@@ -819,68 +819,86 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=1))
-class FuseMulScalarIntoDequantPass(ExportPass):
+class FuseMulScalarIntoDequantPass(RemoveOrReplacePassInterface):
     """
     Looks for the pattern where aten.mul.Scalar is multiplying the
      outputs of dequantize. If found, updates the dequant scale
     to reflect the multiplication and removes the mul node.
     """
 
-    def attempt_fusion(
-        self, graph_module: torch.fx.GraphModule, node: torch.fx.Node
-    ) -> None:
-        if node.target not in {
-            exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
-            exir_ops.edge.cadence.dequantize_per_tensor.default,
-        }:
-            return
+    @property
+    def targets(self) -> list[EdgeOpOverload]:
+        return [exir_ops.edge.aten.mul.Scalar]
 
-        # ensure that the single user of dequant is aten.mul.Scalar
-        user = list(node.users.keys())[0]
-        if len(node.users) != 1 or user.target != exir_ops.edge.aten.mul.Scalar:
-            return
+    def maybe_remove_or_replace(self, node: torch.fx.Node) -> bool:
+        # Ensure that the single user of dequant is aten.mul.Scalar
+        mul_node = node
+        input_nodes = mul_node.all_input_nodes
+        if len(input_nodes) != 1 or len(input_nodes[0].users) != 1:
+            return False
 
-        # ensure that the other arg to mul is a node (i.e. not a constant)
-        if len(user.args) > 1 and isinstance(user.args[1], torch.fx.Node):
-            return
+        dequant_node = input_nodes[0]
 
-        new_deq_args = list(node.args)
-        assert isinstance(node.args[1], Number)
-        assert isinstance(user.args[1], Number)
-        # pyre-ignore[58]: Unsupported operand *
-        new_deq_args[1] = node.args[1] * user.args[1]
+        if dequant_node.target not in [
+            exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
+            exir_ops.edge.cadence.dequantize_per_tensor.default,
+        ]:
+            return False
 
-        logging.debug(
-            f"Fused {node} and {user} into {node}. Updated scale from {node.args[1]} to {new_deq_args[1]}"
-        )
+        if len(mul_node.args) <= 1 or isinstance(mul_node.args[1], torch.fx.Node):
+            return False
 
-        user.replace_all_uses_with(node)
-        node.args = tuple(new_deq_args)
+        new_deq_args = list(dequant_node.args)
+        assert isinstance(dequant_node.args[1], Number)
+        assert isinstance(mul_node.args[1], Number)
+        # pyre-ignore[58]: Unsupported operand *
+        new_deq_args[1] = dequant_node.args[1] * mul_node.args[1]
 
-        graph_module.graph.erase_node(user)
+        # Replace all uses of mul with the dequant node
+        mul_node.replace_all_uses_with(dequant_node)
+        # Update the dequant node's args with the new scale
+        dequant_node.args = tuple(new_deq_args)
 
-        graph_module.recompile()
+        # Erase the mul node
+        mul_node.graph.erase_node(mul_node)
 
-    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
-        for node in graph_module.graph.nodes:
-            self.attempt_fusion(graph_module, node)
-        result = super().call(graph_module)
-        return result
+        logging.debug(
+            f"Fused {dequant_node} and {mul_node} into {dequant_node}. Updated scale from {dequant_node.args[1]} to {new_deq_args[1]}"
+        )
+        return True
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=1))
-class FuseMulTensorIntoQuantPass(ExportPass):
+class FuseMulTensorIntoQuantPass(RemoveOrReplacePassInterface):
     """
     Looks for the pattern where aten.mul.Tensor is followed by quant node.
     If found, updates the quant scale to reflect the multiplication and
     removes the mul node.
     """
 
-    def attempt_fusion(
-        self, graph_module: torch.fx.GraphModule, mul_node: torch.fx.Node
-    ) -> None:
-        if len(mul_node.args) != 2 or len(mul_node.users) != 1:
-            return
+    @property
+    def targets(self) -> list[EdgeOpOverload]:
+        return [exir_ops.edge.aten.mul.Tensor]
+
+    def maybe_remove_or_replace(self, node: torch.fx.Node) -> bool:
+
+        mul_node = node
+        if len(mul_node.users) != 1:
+            return False
+
+        user = next(iter(mul_node.users))
+        user_input_nodes = user.all_input_nodes
+        if len(user_input_nodes) != 1:
+            return False
+
+        if user.target not in [
+            exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
+            exir_ops.edge.cadence.quantize_per_tensor.default,
+        ]:
+            return False
+
+        # Alias for readability.
+        quant_node = user
 
         first_arg = cast(torch.fx.Node, mul_node.args[0])
         second_arg = cast(torch.fx.Node, mul_node.args[1])
@@ -896,22 +914,11 @@ def attempt_fusion(
             input_node = second_arg
         else:
             # Full node is not found, skip.
-            return
+            return False
 
         # Ensure that the mul op does not do any broadcasting.
-        if input_node.meta["val"].shape != mul_node.meta["val"].shape:
-            return
-
-        mul_user = list(mul_node.users.keys())[0]
-
-        # Ensure only the expected quant ops are using the current mul op.
-        if mul_user.target not in {
-            exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
-            exir_ops.edge.cadence.quantize_per_tensor.default,
-        }:
-            return
-
-        quant_node = mul_user
+        if input_node.meta["val"].shape != node.meta["val"].shape:
+            return False
 
         # Calculate the new scale value.
         old_scale = quant_node.args[1]
@@ -925,42 +932,41 @@ def attempt_fusion(
             new_scale = old_scale / mul_scalar
             q = zp + x / new_scale
         """
+
+        # Cannot fuse if either value is zero:
+        # - mul_scalar == 0 would cause division by zero computing new_scale
+        # - old_scale == 0 would result in new_scale = 0, causing division by zero during quantization
+        if mul_scalar == 0 or old_scale == 0:
+            return False
         new_scale = float(old_scale) / float(mul_scalar)
 
         logging.debug(
-            f"Fused {mul_node} and {full_node} into {quant_node}. Updated scale from {quant_node.args[1]} to {new_scale}"
+            f"Fused {node} and {full_node} into {quant_node}. Updated scale from {quant_node.args[1]} to {new_scale}"
         )
 
         # Update quant node input and scale.
         old_quant_input = cast(torch.fx.Node, quant_node.args[0])
-        new_quant_input = cast(torch.fx.Node, mul_node.args[0])
+        new_quant_input = input_node
         quant_node.replace_input_with(old_quant_input, new_quant_input)
         quant_node.update_arg(1, new_scale)
 
-    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
-        for node in graph_module.graph.find_nodes(
-            op="call_function", target=exir_ops.edge.aten.mul.Tensor
-        ):
-            self.attempt_fusion(graph_module, node)
-        graph_module.graph.eliminate_dead_code()
-        return super().call(graph_module)
+        return True
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=1))
-class FuseMulTensorIntoDequantPass(ExportPass):
+class FuseMulTensorIntoDequantPass(RemoveOrReplacePassInterface):
     """
     Looks for the pattern where aten.mul is multiplying the outputs of dequantize
     and aten.full, or vice versa. If found, updates the dequant scale to reflect
     the multiplication and removes the full and mul nodes.
     """
 
-    def attempt_fusion(
-        self, graph_module: torch.fx.GraphModule, node: torch.fx.Node
-    ) -> None:
-        if node.target != exir_ops.edge.aten.mul.Tensor:
-            return
+    @property
+    def targets(self) -> list[EdgeOpOverload]:
+        return [exir_ops.edge.aten.mul.Tensor]
 
-        # ensure that one of the args to mul is dequantize and the other is aten.full
+    def maybe_remove_or_replace(self, node: torch.fx.Node) -> bool:
+        # Ensure that one of the args to mul is dequantize and the other is aten.full
         dequant_nodes = [
             arg
             for arg in node.args
@@ -980,14 +986,14 @@ def attempt_fusion(
         ]
 
         if len(dequant_nodes) != 1 or len(multiplier_nodes) != 1:
-            return
+            return False
 
         deq_node = dequant_nodes[0]
         mplier_node = multiplier_nodes[0]
 
-        # ensure that dequant and full don't have any other users
+        # Ensure that dequant and full don't have any other users
         if len(deq_node.users) > 1 or len(mplier_node.users) > 1:
-            return
+            return False
 
         new_deq_args = list(deq_node.args)
         assert isinstance(deq_node.args[1], Number)
@@ -999,18 +1005,16 @@ def attempt_fusion(
             f"Fused {node} and {mplier_node} into {deq_node}. Updated scale from {deq_node.args[1]} to {new_deq_args[1]}"
         )
 
+        # Replace all uses of the mul node with the dequant node
         node.replace_all_uses_with(deq_node)
+        # Update the dequant node's args with the new scale
         deq_node.args = tuple(new_deq_args)
 
-        graph_module.graph.erase_node(node)
-        graph_module.graph.erase_node(mplier_node)
-        graph_module.recompile()
+        # Erase the mul and full nodes
+        node.graph.erase_node(node)
+        node.graph.erase_node(mplier_node)
 
-    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
-        for node in graph_module.graph.nodes:
-            self.attempt_fusion(graph_module, node)
-        result = super().call(graph_module)
-        return result
+        return True
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=1))
diff --git a/backends/cadence/aot/tests/test_fusion_ops_passes.py b/backends/cadence/aot/tests/test_fusion_ops_passes.py
@@ -602,7 +602,8 @@ def test_fuse_mul_into_dequant(self) -> None:
         FULL_VALUE: Final[float] = 3
 
         builder = GraphBuilder()
-        x = builder.placeholder("x", torch.randn(*INPUT_SHAPE, dtype=torch.float32))
+        x_input = torch.randint(low=0, high=255, size=INPUT_SHAPE, dtype=torch.uint8)
+        x = builder.placeholder("x", x_input)
         dequant = builder.call_operator(
             op=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
             args=(x, DEQUANT_SCALE, 0, 0, 255, torch.uint8),
@@ -617,8 +618,17 @@ def test_fuse_mul_into_dequant(self) -> None:
         )
         builder.output([mul])
         original_graph = builder.get_graph_module()
+        gm_before = copy.deepcopy(original_graph)
+
         p = FuseMulTensorIntoDequantPass()
-        converted_graph = cast(PassResult, p(original_graph)).graph_module
+        result = cast(PassResult, p(original_graph))
+        self.assertTrue(result.modified)
+        converted_graph = result.graph_module
+
+        # Validate numerical accuracy
+        validate_numerics(
+            gm_before, converted_graph, (x_input,), "FuseMulTensorIntoDequantPass"
+        )
 
         # verify that the mul and full ops were removed
         self.check_op_counts(
@@ -640,12 +650,49 @@ def test_fuse_mul_into_dequant(self) -> None:
                 deq_scale = node.args[1]
         self.assertEqual(deq_scale, DEQUANT_SCALE * FULL_VALUE)
 
+    def test_fuse_mul_into_dequant_no_match(self) -> None:
+        """
+        Test that FuseMulTensorIntoDequantPass does NOT modify the graph
+        when the mul node's inputs are not dequant + full.
+        """
+        INPUT_SHAPE: Final[List[int]] = [4, 32]
+
+        builder = GraphBuilder()
+        # Create two regular placeholder inputs (not dequant outputs)
+        x_input = torch.randn(*INPUT_SHAPE, dtype=torch.float32)
+        y_input = torch.randn(*INPUT_SHAPE, dtype=torch.float32)
+        x = builder.placeholder("x", x_input)
+        y = builder.placeholder("y", y_input)
+
+        # Mul of two placeholders - no dequant node involved
+        mul = builder.call_operator(
+            op=exir_ops.edge.aten.mul.Tensor,
+            args=(x, y),
+        )
+        builder.output([mul])
+        original_graph = builder.get_graph_module()
+
+        p = FuseMulTensorIntoDequantPass()
+        result = cast(PassResult, p(original_graph))
+
+        # The pass should NOT modify the graph since there's no dequant node
+        self.assertFalse(result.modified)
+
+        # Verify that the mul op is still present
+        self.check_op_counts(
+            result.graph_module,
+            expected_op_counts={
+                exir_ops.edge.aten.mul.Tensor: 1,
+            },
+        )
+
     def test_fuse_mul_scalar_into_dequant(self) -> None:
         dequant_scale = 0.006
         mul_value = 0.3
 
         builder = GraphBuilder()
-        x = builder.placeholder("x", torch.randn(2, 3, 4, dtype=torch.float32))
+        x_input = torch.randn(2, 3, 4, dtype=torch.float32)
+        x = builder.placeholder("x", x_input)
         quant = builder.call_operator(
             op=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
             args=(x, 1, 0, -128, 127, torch.int8),
@@ -660,8 +707,17 @@ def test_fuse_mul_scalar_into_dequant(self) -> None:
         )
         builder.output([mul_scalar])
         original_graph = builder.get_graph_module()
+        gm_before = copy.deepcopy(original_graph)
+
         p = FuseMulScalarIntoDequantPass()
-        converted_graph = cast(PassResult, p(original_graph)).graph_module
+        result = cast(PassResult, p(original_graph))
+        self.assertTrue(result.modified)
+        converted_graph = result.graph_module
+
+        # Validate numerical accuracy
+        validate_numerics(
+            gm_before, converted_graph, (x_input,), "FuseMulScalarIntoDequantPass"
+        )
 
         # verify that the mul and full ops were removed
         self.check_op_counts(
@@ -687,7 +743,8 @@ def test_fuse_mul_into_quant(self) -> None:
         mul_value = 10
 
         builder = GraphBuilder()
-        x = builder.placeholder("x", torch.randn(4, 32, dtype=torch.float32))
+        x_input = torch.randn(4, 32, dtype=torch.float32)
+        x = builder.placeholder("x", x_input)
         full = builder.call_operator(
             op=exir_ops.edge.aten.full.default,
             args=([1], mul_value),
@@ -702,8 +759,17 @@ def test_fuse_mul_into_quant(self) -> None:
         )
         builder.output([quant])
         original_graph = builder.get_graph_module()
+        gm_before = copy.deepcopy(original_graph)
+
         p = FuseMulTensorIntoQuantPass()
-        converted_graph = cast(PassResult, p(original_graph)).graph_module
+        result = cast(PassResult, p(original_graph))
+        self.assertTrue(result.modified)
+        converted_graph = result.graph_module
+
+        # Validate numerical accuracy
+        validate_numerics(
+            gm_before, converted_graph, (x_input,), "FuseMulTensorIntoQuantPass"
+        )
 
         # verify that the mul and full ops were removed
         self.check_op_counts(
@@ -723,12 +789,6 @@ def test_fuse_mul_into_quant(self) -> None:
             new_quant_scale = node.args[1]
             self.assertEqual(new_quant_scale, quant_scale / mul_value)
 
-        # verify the math is correct
-        inp = torch.randn(4, 32, dtype=torch.float32)
-        original_out = original_graph(inp)[0]
-        new_out = converted_graph(inp)[0]
-        assert torch.equal(original_out, new_out)
-
     def test_fuse_then_transpose_pass(self) -> None:
         # Create a graph with full -> transpose -> permute -> view.
         builder = GraphBuilder()