Enable QuantFusionPass in compiler pipeline (pytorch#19728) (pytorch#19728)

ethansfng · web-flow · commit ec317357dce5 · 2026-05-30T01:32:23.000-04:00
Summary:

Both and Cadence now use the shared `QuantFusionPass` from
`compiler_funcs.py`.

- `QuantFusionPass` in `compiler_funcs.py` iterates patterns, matches
`anchor_ops()`, calls `fuse()` on each match, with debug logging and
dead code elimination
- Cadence: `compiler.py` now uses `QuantFusionPass` instead of the old
`QuantFusion` isinstance switch
- Removed Cadence `compiler` target's dep on `:fusion_pass` (no longer
imported)

Reviewed By: DrJessop

Differential Revision: D105728219
diff --git a/backends/cadence/aot/BUCK b/backends/cadence/aot/BUCK
@@ -44,7 +44,6 @@ fbcode_target(_kind = runtime.python_library,
         ":compiler_funcs",
         ":utils",
         "//caffe2:torch",
-        "//executorch/backends/cadence/aot/quantizer:fusion_pass",
         "//executorch/backends/cadence/aot/quantizer/passes:fuse_ops",
         "//executorch/backends/cadence/aot/quantizer:quantizer",
         "//executorch/backends/transforms:decompose_sdpa",
@@ -65,7 +64,6 @@ fbcode_target(_kind = runtime.python_library,
         ":replace_ops",
         ":utils",
         "//caffe2:torch",
-        "//executorch/backends/cadence/aot/quantizer:fusion_pass",
         "//executorch/backends/cadence/aot/quantizer:quantizer",
         "//executorch/backends/cadence/runtime:runtime",
         "//executorch/backends/transforms:decompose_sdpa",
diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py
@@ -14,14 +14,14 @@
 import torch
 from executorch.backends.cadence.aot.compiler_funcs import (
     prepare as prepare_fn,
+    QuantFusionPass,
     QuantizedInputWrapper,
     trace as trace_fn,
 )
 from executorch.backends.cadence.aot.memory_planning import (
     CadenceMemoryPlanning,
     print_memory_planning_info,
 )
-from executorch.backends.cadence.aot.quantizer.fusion_pass import QuantFusion
 from executorch.backends.cadence.aot.quantizer.passes.fuse_ops import FuseQATConvBN
 from executorch.backends.cadence.aot.quantizer.quantizer import (
     CadenceDefaultQuantizer,
@@ -154,9 +154,9 @@ def apply_pre_edge_transform_passes(
     quantizer: CadenceQuantizer,
 ) -> ExportedProgram:
     """
-    Apply pre-edge transform passes including QuantFusion and torch ops passes.
+    Apply pre-edge transform passes including QuantFusionPass and torch ops passes.
     This mirrors the Cadence AOT compiler flow:
-    1. QuantFusion - fuses dq->op->q patterns
+    1. QuantFusionPass - fuses dq->op->q patterns
     2. apply_torch_ops_passes - applied just before to_edge()
 
     The quantizer must be the same as the one used to convert the model.
@@ -169,7 +169,7 @@ def apply_pre_edge_transform_passes(
     PassManager(
         [
             FuseQATConvBN(converted_program),
-            QuantFusion(patterns),
+            QuantFusionPass(patterns),
         ]
     )(converted_program.graph_module)