Remove deprecated XNNPACK capture utilities and migrate tests (#18134) (#18134)

JacobSzwejbka · web-flow · commit 1e17e28abd58 · 2026-03-16T13:09:32.000-07:00
Summary:

Delete capture_graph_for_xnnpack() and get_xnnpack_capture_config()
which
were only used in test files and relied on the deprecated exir.capture
API.
Migrate test_xnnpack_utils.py to use inline to_edge(export(...)) calls.
Remove the deprecated exports from xnnpack/__init__.py.

Differential Revision: D95605468
diff --git a/backends/xnnpack/__init__.py b/backends/xnnpack/__init__.py
@@ -12,23 +12,17 @@
 
 # Exposed Configs in XNNPACK Package
 from .utils.configs import (
-    get_xnnpack_capture_config,
     get_xnnpack_edge_compile_config,
     get_xnnpack_executorch_backend_config,
 )
 
-# Easy util functions
-from .utils.utils import capture_graph_for_xnnpack
-
 # XNNPACK Backend
 from .xnnpack_preprocess import XnnpackBackend
 
 __all__ = [
     "XnnpackDynamicallyQuantizedPartitioner",
     "XnnpackPartitioner",
     "XnnpackBackend",
-    "capture_graph_for_xnnpack",
-    "get_xnnpack_capture_config",
     "get_xnnpack_edge_compile_config",
     "get_xnnpack_executorch_backend_config",
 ]
diff --git a/backends/xnnpack/test/test_xnnpack_utils.py b/backends/xnnpack/test/test_xnnpack_utils.py
@@ -10,8 +10,6 @@
 
 import torch
 import torch.nn.functional as F
-from executorch import exir
-
 from executorch.backends.xnnpack.partition.xnnpack_partitioner import (
     XnnpackDynamicallyQuantizedPartitioner,
     XnnpackPartitioner,
@@ -25,7 +23,6 @@
     get_xnnpack_edge_compile_config,
     get_xnnpack_executorch_backend_config,
 )
-from executorch.backends.xnnpack.utils.utils import capture_graph_for_xnnpack
 
 # import the xnnpack backend implementation
 from executorch.backends.xnnpack.xnnpack_preprocess import XnnpackBackend
@@ -35,7 +32,7 @@
 from executorch.devtools.bundled_program.serialize import (
     serialize_from_bundled_program_to_flatbuffer,
 )
-from executorch.exir import ExecutorchProgram, ExirExportedProgram
+from executorch.exir import EdgeProgramManager, to_edge
 from executorch.exir.backend.backend_api import to_backend, validation_disabled
 
 from executorch.exir.passes.spec_prop_pass import SpecPropPass
@@ -157,6 +154,14 @@ def assert_outputs_equal(self, model_output, ref_output):
                 torch.allclose(model_output[0], ref_output, atol=1e-03, rtol=1e-03)
             )
 
+    def _capture_graph_for_xnnpack(
+        self, module: torch.nn.Module, sample_inputs: Tuple[torch.Tensor]
+    ) -> EdgeProgramManager:
+        return to_edge(
+            export(module, sample_inputs, strict=True),
+            compile_config=get_xnnpack_edge_compile_config(),
+        ).transform(*get_transform_passes())
+
     def lower_module_and_test_output(
         self,
         module: Any,
@@ -167,15 +172,15 @@ def lower_module_and_test_output(
         # TODO: remove this after we migrate to use long term flow
         quantizer_api_test: bool = False,
         dump_bundled_program: bool = False,  # for debugging, dump the generated bundled program file
-    ) -> ExirExportedProgram:
+    ) -> EdgeProgramManager:
         """
         Helper testing function that takes a torch.nn.Module and lowers it to XNNPACK with
         the given sample inputs. It then runs the lowered module and compares its
         outputs with the outputs of the eager module.
         """
 
         if quantizer_api_test:
-            assert isinstance(module, ExirExportedProgram)
+            assert isinstance(module, EdgeProgramManager)
             edge_program = module
         else:
 
@@ -187,7 +192,9 @@ def __init__(self):
                 def forward(self, *args):
                     return self.one_module(*args)
 
-            edge_program = capture_graph_for_xnnpack(WrappedModule(), sample_inputs)
+            edge_program = self._capture_graph_for_xnnpack(
+                WrappedModule(), sample_inputs
+            )
 
         partitioner = None
         if quantized:
@@ -201,35 +208,32 @@ def forward(self, *args):
         if use_partitioner:
             with validation_disabled():
                 delegated_program = edge_program
-                delegated_program.exported_program = to_backend(
-                    edge_program.exported_program, partitioner
+                delegated_program._edge_programs["forward"] = to_backend(
+                    edge_program.exported_program(), partitioner
                 )
 
-            executorch_program: ExecutorchProgram = delegated_program.to_executorch(
+            executorch_program = delegated_program.to_executorch(
                 get_xnnpack_executorch_backend_config([SpecPropPass()]),
             )
         else:
-            delegated_program = to_backend(
-                "XnnpackBackend", edge_program.exported_program, []
+            delegated_module = to_backend(
+                "XnnpackBackend", edge_program.exported_program(), []
             )
 
-            exported_program: ExirExportedProgram = capture_graph_for_xnnpack(
-                delegated_program, sample_inputs
+            exported_program = self._capture_graph_for_xnnpack(
+                delegated_module, sample_inputs
             )
-            executorch_program: ExecutorchProgram = exported_program.to_executorch(
+            executorch_program = exported_program.to_executorch(
                 get_xnnpack_executorch_backend_config(),
             )
 
-        # print("Graph Module with delegate:")
-        # delegated_module.print_readable()
-
         # Assert the backend name is xnnpack
         self.assertEqual(
-            executorch_program.program.execution_plan[0].delegates[0].id,
+            executorch_program.executorch_program.execution_plan[0].delegates[0].id,
             XnnpackBackend.__name__,
         )
 
-        ref_output = delegated_program(*sample_inputs)
+        ref_output = delegated_program.exported_program().module()(*sample_inputs)
         if dump_bundled_program:
             save_bundled_program(
                 representative_inputs=sample_inputs,
@@ -325,14 +329,9 @@ def quantize_and_test_model_with_quantizer(
         prepared = prepare_pt2e(m, quantizer)
         converted = convert_pt2e(prepared)
 
-        captured_program = exir.capture(
-            converted,
-            example_inputs,
-            config=exir.CaptureConfig(enable_aot=True, _unlift=True),
-        )
-
-        edge_program = captured_program.to_edge(
-            get_xnnpack_edge_compile_config()
+        edge_program = to_edge(
+            export(converted, example_inputs, strict=True),
+            compile_config=get_xnnpack_edge_compile_config(),
         ).transform(*get_transform_passes())
         delegated_module = self.lower_module_and_test_output(
             module=edge_program,
@@ -350,7 +349,7 @@ def quantize_and_test_model_with_quantizer(
         }
         for op in supported_ops:
             FileCheck().check_count(op, 0, exactly=True).run(
-                delegated_module.exported_program.graph_module.code
+                delegated_module.exported_program().graph_module.code
             )
 
     def _test_xnnpack_dqlinear(
@@ -398,12 +397,14 @@ def _test_xnnpack_dqlinear(
             prepared_linear,
         )
 
-        captured_dqlinear = capture_graph_for_xnnpack(converted_linear, example_inputs)
+        captured_dqlinear = self._capture_graph_for_xnnpack(
+            converted_linear, example_inputs
+        )
 
-        captured_dqlinear.exported_program.graph_module.graph.print_tabular()
+        captured_dqlinear.exported_program().graph_module.graph.print_tabular()
 
         lowered_module = to_backend(
-            "XnnpackBackend", captured_dqlinear.exported_program, []
+            "XnnpackBackend", captured_dqlinear.exported_program(), []
         )
 
         class CompositeModule(torch.nn.Module):
@@ -417,19 +418,19 @@ def forward(self, x):
         composite_model = CompositeModule()
         composite_model(*example_inputs)
 
-        exported_program: ExirExportedProgram = capture_graph_for_xnnpack(
+        exported_program = self._capture_graph_for_xnnpack(
             composite_model, example_inputs
         )
-        executorch_program: ExecutorchProgram = exported_program.to_executorch(
+        executorch_program = exported_program.to_executorch(
             get_xnnpack_executorch_backend_config(),
         )
 
         self.assertEqual(
-            executorch_program.program.execution_plan[0].delegates[0].id,
+            executorch_program.executorch_program.execution_plan[0].delegates[0].id,
             XnnpackBackend.__name__,
         )
 
-        ref_output = captured_dqlinear(*example_inputs)
+        ref_output = captured_dqlinear.exported_program().module()(*example_inputs)
         ref_output = composite_model(*example_inputs)
         print("ref_output:", ref_output)
 
diff --git a/backends/xnnpack/utils/configs.py b/backends/xnnpack/utils/configs.py
@@ -4,10 +4,9 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import List, Optional
+from typing import List
 
 import executorch.exir as exir
-from executorch.exir import CaptureConfig
 from executorch.exir.pass_manager import PassType
 
 
@@ -33,17 +32,3 @@ def get_xnnpack_executorch_backend_config(
         passes=additional_passes,
         extract_delegate_segments=True,
     )
-
-
-def get_xnnpack_capture_config(
-    dynamic_shape=False,
-    enable_aot: Optional[bool] = None,
-    unlift: Optional[bool] = None,
-):
-    if enable_aot is None:
-        return CaptureConfig(enable_dynamic_shape=dynamic_shape)
-    else:
-        unlift = unlift if unlift is not None else enable_aot
-        return CaptureConfig(
-            enable_dynamic_shape=dynamic_shape, enable_aot=enable_aot, _unlift=unlift
-        )
diff --git a/backends/xnnpack/utils/utils.py b/backends/xnnpack/utils/utils.py
@@ -6,14 +6,8 @@
 
 from typing import Any, cast, Optional, Tuple
 
-import executorch.exir as exir
 import torch
 
-from executorch.backends.xnnpack.utils.configs import (
-    get_transform_passes,
-    get_xnnpack_capture_config,
-    get_xnnpack_edge_compile_config,
-)
 from executorch.exir import ExportedProgram
 from executorch.exir.dialects._ops import ops as exir_ops
 
@@ -28,24 +22,6 @@
 from torchao.quantization.pt2e.utils import _is_conv_node, _is_conv_transpose_node
 
 
-### XNNPACK Capture ###
-def capture_graph_for_xnnpack(
-    module: torch.nn.Module,
-    inputs: Tuple[torch.Tensor],
-    enable_aot: Optional[bool] = None,
-    unlift: Optional[bool] = None,
-) -> exir.ExirExportedProgram:
-    return (
-        exir.capture(
-            module,
-            inputs,
-            get_xnnpack_capture_config(enable_aot=enable_aot, unlift=unlift),
-        )
-        .to_edge(get_xnnpack_edge_compile_config())
-        .transform(*get_transform_passes())
-    )
-
-
 ### XNNPACK Utils ###
 PERM_NCHW_TO_NHWC = [0, 2, 3, 1]
 PERM_NHWC_TO_NCHW = [0, 3, 1, 2]
diff --git a/devtools/size_analysis_tool/size_analysis_tool.py b/devtools/size_analysis_tool/size_analysis_tool.py
@@ -56,13 +56,14 @@ def _get_delegate_blob_data(
     return delegate_blob_data
 
 
-def _get_nested_model_data(
+def _get_nested_model_data(  # noqa: C901
     graph_module: torch.fx.GraphModule,
     delegate_deserializers: Optional[
         Dict[str, Callable[[bytes], Dict[str, Any]]]
     ] = None,
     tensor_data: Optional[List[Dict[str, Any]]] = None,
     delegate_blob_data: Optional[List[Dict[str, Any]]] = None,
+    exported_program: Optional["ExportedProgram"] = None,
 ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
     if tensor_data is None:
         tensor_data = []
@@ -71,7 +72,20 @@ def _get_nested_model_data(
         delegate_blob_data = []
 
     for node in graph_module.graph.nodes:
-        if node.op == "get_attr":
+        if node.op == "placeholder" and exported_program is not None:
+            sig = exported_program.graph_signature
+            fqn = None
+            if node.name in getattr(sig, "inputs_to_parameters", {}):
+                fqn = sig.inputs_to_parameters[node.name]
+            elif node.name in getattr(sig, "inputs_to_buffers", {}):
+                fqn = sig.inputs_to_buffers[node.name]
+
+            if fqn is not None:
+                tensor = exported_program.state_dict.get(fqn)
+                if isinstance(tensor, torch.Tensor):
+                    tensor_data.append(_get_tensor_data(node, tensor))
+
+        elif node.op == "get_attr":
             node_attr = getattr(node.graph.owning_module, node.target)
             if isinstance(node_attr, torch.Tensor):
                 tensor_data.append(_get_tensor_data(node, node_attr))
@@ -105,7 +119,7 @@ def generate_model_size_information(
     """
 
     tensor_and_delegate_blob_data = _get_nested_model_data(
-        model.graph_module, delegate_deserializers
+        model.graph_module, delegate_deserializers, exported_program=model
     )
 
     for data_list in tensor_and_delegate_blob_data:
diff --git a/devtools/size_analysis_tool/size_analysis_tool_test.py b/devtools/size_analysis_tool/size_analysis_tool_test.py
@@ -11,15 +11,16 @@
     XnnpackFloatingPointPartitioner,
 )
 from executorch.backends.xnnpack.utils.configs import (
+    get_xnnpack_edge_compile_config,
     get_xnnpack_executorch_backend_config,
 )
-from executorch.backends.xnnpack.utils.utils import capture_graph_for_xnnpack
 
 from executorch.devtools.size_analysis_tool.size_analysis_tool import (
     generate_model_size_information,
 )
-from executorch.exir.backend.backend_api import to_backend, validation_disabled
+from executorch.exir import to_edge
 from executorch.exir.passes.spec_prop_pass import SpecPropPass
+from torch.export import export
 
 
 class SizeAnalysisToolTest(unittest.TestCase):
@@ -52,21 +53,20 @@ def forward(self, x):
 
         test_input = torch.ones(size=(4, 7, 5, 6), dtype=torch.float)
 
-        edge_program = capture_graph_for_xnnpack(mm, (test_input,))
+        edge_program = to_edge(
+            export(mm, (test_input,), strict=True),
+            compile_config=get_xnnpack_edge_compile_config(),
+        )
         partitioner = XnnpackFloatingPointPartitioner()
 
-        with validation_disabled():
-            delegated_program = edge_program
-            delegated_program.exported_program = to_backend(
-                edge_program.exported_program, partitioner
-            )
+        delegated_program = edge_program.to_backend(partitioner)
 
         program = delegated_program.to_executorch(
             get_xnnpack_executorch_backend_config([SpecPropPass()]),
         )
 
         size_information = generate_model_size_information(
-            model=program,
+            model=program.exported_program(),
             delegate_deserializers=None,
             flatbuffer=program.buffer,
         )