NXP backend: NXP backend: Add option to run test reference quantized in Python (#17733)

MartinPavella · web-flow · commit 28fc8b390dc1 · 2026-03-02T11:29:22.000+01:00
### Summary
NXP tests run models delegated to Neutron using the NSYS simulator. To
determine correct output, a reference model is run on the CPU. Before,
there were 2 choices for the reference, either non-delegated .pte file
running in c++, or the original non-quantized float32 PyTorch model
running in Python. This PR adds a 3rd option (to run in quantized edge
dialect in Python), as well as easy extension to 2 more options in the
future.

### Test plan
Unit-tests provided.
diff --git a/backends/nxp/requirements-eiq.txt b/backends/nxp/requirements-eiq.txt
@@ -1,3 +1,3 @@
 --index-url https://eiq.nxp.com/repository
-eiq_neutron_sdk==2.2.2
+eiq-neutron-sdk==2.2.2
 eiq_nsys
diff --git a/backends/nxp/tests_models/executors.py b/backends/nxp/tests_models/executors.py
@@ -9,16 +9,15 @@
 import os.path
 import shutil
 import subprocess
+from enum import Enum
 from os import mkdir
 
 import numpy as np
 import torch
 from executorch.backends.nxp.backend.edge_helper import is_channels_last_dim_order
 from executorch.backends.nxp.backend.ir.converter.conversion import translator
 from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
-
 from executorch.backends.nxp.tests_models.config_importer import test_config
-
 from executorch.backends.nxp.tests_models.dataset_creator import RandomDatasetCreator
 from executorch.backends.nxp.tests_models.graph_verifier import GraphVerifier
 from executorch.backends.nxp.tests_models.model_input_spec import ModelInputSpec
@@ -28,13 +27,15 @@
 from executorch.backends.nxp.tests_models.outputs_dir_importer import outputs_dir
 from executorch.backends.nxp.tests_models.utils import (
     save_pte_program,
+    to_quantized_edge_program,
     to_quantized_executorch_program,
 )
 from executorch.devtools.visualization.visualization_utils import (
     visualize_with_clusters,
 )
 from pytest_mock import MockerFixture
 from torch.export import ExportedProgram
+from torch.fx import GraphModule
 
 logger = logging.getLogger(__name__)
 
@@ -45,6 +46,14 @@
 NEUTRON_TEST_PATH = test_config.NEUTRON_TEST_PATH
 
 
+class ReferenceModel(Enum):
+    QUANTIZED_EXECUTORCH_CPP = 0
+    QUANTIZED_EDGE_PYTHON = 1
+    # QUANTIZED_ATEN_PYTHON = 2  # Not implemented.
+    # FLOAT_ATEN_PYTHON = 3  # Not implemented.
+    FLOAT_PYTORCH_PYTHON = 4
+
+
 def _run_delegated_executorch_program(
     model,
     test_dir,
@@ -266,14 +275,27 @@ def store_results(
             output_array.tofile(bin_file_path)
 
 
-def _run_pytorch_program(
-    model,
+def _run_python_program(
+    model: torch.nn.Module | GraphModule,
     testing_dataset_dir,
     input_spec: list[ModelInputSpec],
     output_spec: list[torch.Tensor],
     cpu_results_dir,
     npu_results_dir,
 ):
+    """Run a model in Python with channels first (contiguous) inputs.
+
+    :param model: Any PyTorch/ExecuTorch model runnable directly in python with channels first (contiguous) inputs.
+    :param testing_dataset_dir: Directory containing testing data. The samples can be channels last (NHWC) or channels
+                                 first (NCHW). The format must match the input_spec.dim_order. The data will be
+                                 converted to channels first if needed.
+    :param input_spec: List of ModelInputSpec defining the shape, type, and dimension order of each input.
+    :param output_spec: List of output tensor specifications.
+    :param cpu_results_dir: Directory where CPU results will be stored. The structure will match the existing structure
+                             of `npu_results_dir`.
+    :param npu_results_dir: Directory where NPU results are already stored, to serve as reference directory structure
+                             for `cpu_results_dir`.
+    """
     all_outputs = []
 
     for input_samples in read_prepared_samples(testing_dataset_dir, input_spec):
@@ -333,7 +355,7 @@ def convert_run_compare(
     dataset_creator=None,
     output_comparator=None,
     mocker: MockerFixture = None,
-    run_cpu_version_in_pytorch: bool = False,
+    reference_model: ReferenceModel = ReferenceModel.QUANTIZED_EXECUTORCH_CPP,
     use_qat: bool = False,
 ):
     """
@@ -347,7 +369,7 @@ def convert_run_compare(
     :param dataset_creator: Creator that should fill provided `dataset_dir` with model input samples.
     :param output_comparator: Comparator of results produced by NPU and CPU runs of the program.
     :param dlg_model_verifier: Graph verifier instance.
-    :param run_cpu_version_in_pytorch: If True, runs CPU version in float32 PyTorch instead of quantized ExecuTorch.
+    :param reference_model: Version of the model which will be run to obtain reference output data.
     :param mocker: Mocker instance used by visualizer.
     :param use_qat: If True, applies quantization-aware training before conversion (without the QAT training).
     """
@@ -393,25 +415,55 @@ def convert_run_compare(
 
     output_spec = _get_program_output_spec(delegated_program)
 
-    if run_cpu_version_in_pytorch:
-        _run_pytorch_program(
-            model,
-            testing_dataset_dir,
-            input_spec,
-            output_spec,
-            cpu_results_dir,
-            npu_results_dir,
-        )
-    else:
-        _run_non_delegated_executorch_program(
-            model,
-            test_dir,
-            test_name,
-            calibration_dataset_dir,
-            testing_dataset_dir,
-            input_spec,
-            cpu_results_dir,
-        )
+    match reference_model:
+        case ReferenceModel.QUANTIZED_EXECUTORCH_CPP:
+            # Lower to quantized executorch program, export to `.pte` file and run in c++ using
+            #  examples/nxp/executor_runner/nxp_executor_runner.cpp
+            _run_non_delegated_executorch_program(
+                model,
+                test_dir,
+                test_name,
+                calibration_dataset_dir,
+                testing_dataset_dir,
+                input_spec,
+                cpu_results_dir,
+            )
+
+        case ReferenceModel.QUANTIZED_EDGE_PYTHON:
+            # Lower to quantized edge program and run in Python.
+            non_delegated_edge_program = (
+                to_quantized_edge_program(
+                    model,
+                    input_spec,
+                    calibration_dataset_dir,
+                    delegate_to_npu=False,
+                    use_qat=use_qat,
+                )
+                .exported_program()
+                .module()
+            )
+            _run_python_program(
+                non_delegated_edge_program,
+                testing_dataset_dir,
+                input_spec,
+                output_spec,
+                cpu_results_dir,
+                npu_results_dir,
+            )
+
+        case ReferenceModel.FLOAT_PYTORCH_PYTHON:
+            # Run the PyTorch nn.Module directly in Python.
+            _run_python_program(
+                model,
+                testing_dataset_dir,
+                input_spec,
+                output_spec,
+                cpu_results_dir,
+                npu_results_dir,
+            )
+
+        case _:
+            raise ValueError(f"Unsupported reference model: `{reference_model}`.")
 
     output_tensor_spec = _get_program_output_spec(delegated_program)
 
diff --git a/backends/nxp/tests_models/test_cifarnet.py b/backends/nxp/tests_models/test_cifarnet.py
@@ -10,7 +10,11 @@
 
 from executorch.backends.nxp.tests_models.config_importer import test_config
 from executorch.backends.nxp.tests_models.dataset_creator import CopyDatasetCreator
-from executorch.backends.nxp.tests_models.executors import convert_run_compare
+
+from executorch.backends.nxp.tests_models.executors import (
+    convert_run_compare,
+    ReferenceModel,
+)
 from executorch.backends.nxp.tests_models.graph_verifier import (
     BaseGraphVerifier,
     NonDelegatedNode,
@@ -57,9 +61,8 @@ def test_cifarnet(mocker, cifar_test_files, channels_last):
 
     non_dlg_nodes = [NonDelegatedNode("aten__softmax_default", 1)]
 
-    mse = 2.4e-3 if channels_last else 1e-3
     comparator = NumericalStatsOutputComparator(
-        max_mse_error=mse, is_classification_task=True
+        max_mse_error=1.0e-3, is_classification_task=True
     )
     convert_run_compare(
         model,
@@ -71,7 +74,11 @@ def test_cifarnet(mocker, cifar_test_files, channels_last):
         # Run the channels last reference in PyTorch as the ExecuTorch CPU model contains incorrectly
         #  lowered channels last convolution weights, which cause incorrect inference results. The issue
         #  is caused by ExecuTorch (not NXP). https://github.com/pytorch/executorch/issues/16464
-        run_cpu_version_in_pytorch=channels_last,
+        reference_model=(
+            ReferenceModel.QUANTIZED_EDGE_PYTHON
+            if channels_last
+            else ReferenceModel.QUANTIZED_EXECUTORCH_CPP
+        ),
     )
 
 
diff --git a/examples/nxp/setup.sh b/examples/nxp/setup.sh
@@ -8,7 +8,7 @@ set -u
 EIQ_PYPI_URL="${EIQ_PYPI_URL:-https://eiq.nxp.com/repository}"
 
 # Install eIQ Neutron dependencies - SDK and simulator
-pip install --index-url ${EIQ_PYPI_URL} eiq_neutron_sdk==2.2.2 eiq_nsys
+pip install --index-url ${EIQ_PYPI_URL} eiq-neutron-sdk==2.2.2 eiq_nsys
 
 # Get the directory of the current script
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"