pytorch · MartinPavella · Apr 20, 2026
@@ -67,6 +67,7 @@ def convert(
         target: str,
         delegation_tag: str,
         fetch_constants_to_sram: bool = False,
+        use_new_flow_neutron_c: bool = False,
     ) -> bytes:
         """
         Call Neutron Converter.
@@ -75,6 +76,7 @@ def convert(
         :param target: The target platform.
         :param delegation_tag: The delegation tag of model partition.
         :param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
+        :param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
         This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).
 
         :return: TFLite model with Neutron microcode as bytes.
@@ -90,6 +92,7 @@ def convert(
         )
         cctx.compilationOpts.fetchConstantsToSRAM = fetch_constants_to_sram
         cctx.compilationOpts.dumpKernelSelectionCode = self.dump_kernel_selection_code
+        cctx.compilationOpts.useNewFlowNeutronC = use_new_flow_neutron_c
 
         # Try to use multiprocessing for isolation, but fall back to direct execution
         # if the environment doesn't support it (e.g., in sandcastle/build environments)

@@ -50,6 +50,7 @@ def __init__(self):
         self.use_neutron_for_format_conversion = True
         self.fetch_constants_to_sram = False
         self.dump_kernel_selection_code = False
+        self.use_new_flow_neutron_c = False
 
     def _replace_colons(self, operator: str) -> str:
         """
@@ -65,20 +66,21 @@ def neutron_compile_spec(
         use_neutron_for_format_conversion: bool = True,
         fetch_constants_to_sram: bool = False,
         dump_kernel_selection_code: bool = False,
-    ):
-        """
-        Generate compile spec for Neutron NPU
-
-        Args:
-            config: Neutron accelerator configuration, e.g. "imxrt700"
-            extra_flags: Extra flags for the Neutron compiler
-            operators_not_to_delegate: List of operators that should not be delegated
-            use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
+        use_new_flow_neutron_c: bool = False,
+    ) -> "NeutronCompileSpecBuilder":
+        """Generate compile spec for Neutron NPU
+
+        :param config: Neutron accelerator configuration, e.g. "imxrt700"
+        :param extra_flags: Extra flags for the Neutron compiler
+        :param operators_not_to_delegate: List of operators that should not be delegated
+        :param use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
                                                 ensure that the IO matches the executorch partition, which will be
                                                 delegated to Neutron.
-            fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
+        :param fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
                                      from FLASH to SRAM. This should be used when the whole model does not fit into SRAM.
-            dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
+        :param dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
+        :param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
+        :return: self for method chaining
         """
 
         self.config = NeutronTargetSpec(config)
@@ -100,6 +102,7 @@ def neutron_compile_spec(
         self.use_neutron_for_format_conversion = use_neutron_for_format_conversion
         self.fetch_constants_to_sram = fetch_constants_to_sram
         self.dump_kernel_selection_code = dump_kernel_selection_code
+        self.use_new_flow_neutron_c = use_new_flow_neutron_c
 
         return self
 
@@ -128,6 +131,10 @@ def build(self):
                     "dump_kernel_selection_code",
                     f"{self.dump_kernel_selection_code}".encode(),
                 ),
+                CompileSpec(
+                    "use_new_flow_neutron_c",
+                    f"{self.use_new_flow_neutron_c}".encode(),
+                ),
             ]
 
         return self.compile_spec
@@ -141,6 +148,7 @@ def generate_neutron_compile_spec(
     use_neutron_for_format_conversion: bool = True,
     fetch_constants_to_sram: bool = False,
     dump_kernel_selection_code: bool = False,
+    use_new_flow_neutron_c: bool = False,
 ) -> List[CompileSpec]:
     return (
         NeutronCompileSpecBuilder()
@@ -151,6 +159,7 @@ def generate_neutron_compile_spec(
             use_neutron_for_format_conversion=use_neutron_for_format_conversion,
             fetch_constants_to_sram=fetch_constants_to_sram,
             dump_kernel_selection_code=dump_kernel_selection_code,
+            use_new_flow_neutron_c=use_new_flow_neutron_c,
         )
         .build()
     )
@@ -175,6 +184,7 @@ def preprocess(  # noqa C901
         use_neutron_for_format_conversion = None
         fetch_constants_to_sram = False
         dump_kernel_selection_code = None
+        use_new_flow_neutron_c = False
         for spec in compile_spec:
             if spec.key == "output_format":
                 output_format = spec.value.decode()
@@ -188,6 +198,8 @@ def preprocess(  # noqa C901
                 fetch_constants_to_sram = spec.value.decode() == "True"
             if spec.key == "dump_kernel_selection_code":
                 dump_kernel_selection_code = spec.value.decode() == "True"
+            if spec.key == "use_new_flow_neutron_c":
+                use_new_flow_neutron_c = spec.value.decode() == "True"
 
         # Check that the output format is set in the compile spec
         if not output_format:
@@ -220,7 +232,11 @@ def preprocess(  # noqa C901
             )
 
             neutron_model = NeutronConverterManager(dump_kernel_selection_code).convert(
-                tflite_model, target, delegation_tag, fetch_constants_to_sram
+                tflite_model,
+                target,
+                delegation_tag,
+                fetch_constants_to_sram,
+                use_new_flow_neutron_c,
             )
 
             # Dump the tflite file if logging level is enabled

@@ -130,6 +130,7 @@ def to_quantized_edge_program(
     use_quant_state_dict: bool = True,
     fetch_constants_to_sram: bool = False,
     dump_kernel_selection_code: bool = False,
+    use_new_flow_neutron_c: bool = False,
 ) -> EdgeProgramManager:
     _neutron_target_spec = NeutronTargetSpec(target)
     if get_quantizer_fn is None:
@@ -160,6 +161,7 @@ def to_quantized_edge_program(
         use_neutron_for_format_conversion=use_neutron_for_format_conversion,
         fetch_constants_to_sram=fetch_constants_to_sram,
         dump_kernel_selection_code=dump_kernel_selection_code,
+        use_new_flow_neutron_c=use_new_flow_neutron_c,
     )
     post_quant_state_dict = (
         exir_program_aten__module_quant.state_dict() if use_quant_state_dict else None

@@ -3,7 +3,10 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import multiprocessing
+
 import torch
+from eiq_neutron_sdk.neutron_converter.neutron_converter import CompilationContext
 
 from executorch import exir
 from executorch.backends.nxp.backend.edge_program_converter import (
@@ -56,3 +59,17 @@ def test_conv2d_neutron_conversion__prefetching(mocker):
     assert len(neutron_model_prefetch) != len(
         neutron_model_regular
     ), "The weight prefetching flag does not make a difference!"
+
+
+def test_neutron_converter_with_experimental_mlir_flow(mocker):
+    model = LinearModule(True)
+    input_shape = (1, 1, 32, 32)
+
+    process_spy = mocker.spy(multiprocessing, "Process")
+    to_quantized_edge_program(
+        model, input_shape, use_new_flow_neutron_c=True
+    ).exported_program()
+
+    compilation_context = process_spy.call_args.kwargs["args"][2]
+    assert isinstance(compilation_context, CompilationContext)
+    assert compilation_context.compilationOpts.useNewFlowNeutronC
@@ -68,6 +68,7 @@ def _run_delegated_executorch_program(
     mocker,
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
+    use_new_flow_neutron_c: bool = False,
 ) -> ExportedProgram:
     if len(input_spec) == 1:
         # Single input, use --dataset
@@ -116,6 +117,7 @@ def wrapper(*args, **kwargs):
             delegate_to_npu=True,
             use_qat=use_qat,
             train_fn=train_fn,
+            use_new_flow_neutron_c=use_new_flow_neutron_c,
         )
     except RuntimeError as e:
         if "Model converted with neutron-converter has" in str(e):
@@ -375,6 +377,7 @@ def convert_run_compare(
     reference_model: ReferenceModel = ReferenceModel.QUANTIZED_EXECUTORCH_CPP,
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
+    use_new_flow_neutron_c: bool = False,
 ):
     """
     Run provided program twice with neutron-test and check if results correspond. At first,
@@ -391,6 +394,7 @@ def convert_run_compare(
     :param mocker: Mocker instance used by visualizer.
     :param use_qat: If True, applies quantization-aware training before conversion (without the QAT training).
     :param train_fn: Train/finetune function for QAT training. Is used only when `use_qat=True`.
+    :param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
     """
     assert_NSYS()
 
@@ -432,6 +436,7 @@ def convert_run_compare(
         mocker,
         use_qat=use_qat,
         train_fn=train_fn,
+        use_new_flow_neutron_c=use_new_flow_neutron_c,
     )
 
     output_spec = _get_program_output_spec(delegated_program)

@@ -67,6 +67,7 @@ def to_quantized_edge_program(
     delegate_to_npu=True,
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
+    use_new_flow_neutron_c: bool = False,
 ) -> EdgeProgramManager:
     assert isinstance(input_spec, list) and all(
         isinstance(spec, ModelInputSpec) for spec in input_spec
@@ -157,7 +158,9 @@ def to_quantized_edge_program(
         (
             [
                 NeutronPartitioner(
-                    generate_neutron_compile_spec("imxrt700"),
+                    generate_neutron_compile_spec(
+                        "imxrt700", use_new_flow_neutron_c=use_new_flow_neutron_c
+                    ),
                     neutron_target_spec=neutron_target_spec,
                     post_quantization_state_dict=exir_program_aten_quant.state_dict(),
                 )
@@ -186,6 +189,7 @@ def to_quantized_executorch_program(
     delegate_to_npu=True,
     use_qat: bool = False,
     train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
+    use_new_flow_neutron_c: bool = False,
 ) -> ExecutorchProgramManager:
     edge_program_manager = to_quantized_edge_program(
         model,
@@ -194,6 +198,7 @@ def to_quantized_executorch_program(
         delegate_to_npu,
         use_qat=use_qat,
         train_fn=train_fn,
+        use_new_flow_neutron_c=use_new_flow_neutron_c,
     )
 
     return edge_program_manager.to_executorch(

@@ -240,6 +240,13 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
         action="store_true",
         help="This feature allows running models which do not fit into SRAM by offloading them to an external memory.",
     )
+    parser.add_argument(
+        "--use_new_flow_neutron_c",
+        required=False,
+        default=False,
+        action="store_true",
+        help="Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.",
+    )
 
     args = parser.parse_args()
 
@@ -323,6 +330,7 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
         operators_not_to_delegate=args.operators_not_to_delegate,
         fetch_constants_to_sram=args.fetch_constants_to_sram,
         dump_kernel_selection_code=args.dump_kernel_selection_code,
+        use_new_flow_neutron_c=args.use_new_flow_neutron_c,
     )
     partitioners = (
         [