Skip to content

Commit 7acffcd

Browse files
committed
NXP backend: Add option to use the new Neutron flow.
The new Neutron C flow is based on MLIR, and it provides improved support for many operators.
1 parent c48ea12 commit 7acffcd

7 files changed

Lines changed: 69 additions & 13 deletions

File tree

backends/nxp/backend/neutron_converter_manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def convert(
6767
target: str,
6868
delegation_tag: str,
6969
fetch_constants_to_sram: bool = False,
70+
use_new_flow_neutron_c: bool = False,
7071
) -> bytes:
7172
"""
7273
Call Neutron Converter.
@@ -75,6 +76,7 @@ def convert(
7576
:param target: The target platform.
7677
:param delegation_tag: The delegation tag of model partition.
7778
:param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
79+
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
7880
This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).
7981
8082
:return: TFLite model with Neutron microcode as bytes.
@@ -90,6 +92,7 @@ def convert(
9092
)
9193
cctx.compilationOpts.fetchConstantsToSRAM = fetch_constants_to_sram
9294
cctx.compilationOpts.dumpKernelSelectionCode = self.dump_kernel_selection_code
95+
cctx.compilationOpts.useNewFlowNeutronC = use_new_flow_neutron_c
9396

9497
# Try to use multiprocessing for isolation, but fall back to direct execution
9598
# if the environment doesn't support it (e.g., in sandcastle/build environments)

backends/nxp/nxp_backend.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(self):
5050
self.use_neutron_for_format_conversion = True
5151
self.fetch_constants_to_sram = False
5252
self.dump_kernel_selection_code = False
53+
self.use_new_flow_neutron_c = False
5354

5455
def _replace_colons(self, operator: str) -> str:
5556
"""
@@ -65,20 +66,21 @@ def neutron_compile_spec(
6566
use_neutron_for_format_conversion: bool = True,
6667
fetch_constants_to_sram: bool = False,
6768
dump_kernel_selection_code: bool = False,
68-
):
69-
"""
70-
Generate compile spec for Neutron NPU
71-
72-
Args:
73-
config: Neutron accelerator configuration, e.g. "imxrt700"
74-
extra_flags: Extra flags for the Neutron compiler
75-
operators_not_to_delegate: List of operators that should not be delegated
76-
use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
69+
use_new_flow_neutron_c: bool = False,
70+
) -> "NeutronCompileSpecBuilder":
71+
"""Generate compile spec for Neutron NPU
72+
73+
:param config: Neutron accelerator configuration, e.g. "imxrt700"
74+
:param extra_flags: Extra flags for the Neutron compiler
75+
:param operators_not_to_delegate: List of operators that should not be delegated
76+
:param use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
7777
ensure that the IO matches the executorch partition, which will be
7878
delegated to Neutron.
79-
fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
79+
:param fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
8080
from FLASH to SRAM. This should be used when the whole model does not fit into SRAM.
81-
dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
81+
:param dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
82+
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
83+
:return: self for method chaining
8284
"""
8385

8486
self.config = NeutronTargetSpec(config)
@@ -100,6 +102,7 @@ def neutron_compile_spec(
100102
self.use_neutron_for_format_conversion = use_neutron_for_format_conversion
101103
self.fetch_constants_to_sram = fetch_constants_to_sram
102104
self.dump_kernel_selection_code = dump_kernel_selection_code
105+
self.use_new_flow_neutron_c = use_new_flow_neutron_c
103106

104107
return self
105108

@@ -128,6 +131,10 @@ def build(self):
128131
"dump_kernel_selection_code",
129132
f"{self.dump_kernel_selection_code}".encode(),
130133
),
134+
CompileSpec(
135+
"use_new_flow_neutron_c",
136+
f"{self.use_new_flow_neutron_c}".encode(),
137+
),
131138
]
132139

133140
return self.compile_spec
@@ -141,6 +148,7 @@ def generate_neutron_compile_spec(
141148
use_neutron_for_format_conversion: bool = True,
142149
fetch_constants_to_sram: bool = False,
143150
dump_kernel_selection_code: bool = False,
151+
use_new_flow_neutron_c: bool = False,
144152
) -> List[CompileSpec]:
145153
return (
146154
NeutronCompileSpecBuilder()
@@ -151,6 +159,7 @@ def generate_neutron_compile_spec(
151159
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
152160
fetch_constants_to_sram=fetch_constants_to_sram,
153161
dump_kernel_selection_code=dump_kernel_selection_code,
162+
use_new_flow_neutron_c=use_new_flow_neutron_c,
154163
)
155164
.build()
156165
)
@@ -175,6 +184,7 @@ def preprocess( # noqa C901
175184
use_neutron_for_format_conversion = None
176185
fetch_constants_to_sram = False
177186
dump_kernel_selection_code = None
187+
use_new_flow_neutron_c = False
178188
for spec in compile_spec:
179189
if spec.key == "output_format":
180190
output_format = spec.value.decode()
@@ -188,6 +198,8 @@ def preprocess( # noqa C901
188198
fetch_constants_to_sram = spec.value.decode() == "True"
189199
if spec.key == "dump_kernel_selection_code":
190200
dump_kernel_selection_code = spec.value.decode() == "True"
201+
if spec.key == "use_new_flow_neutron_c":
202+
use_new_flow_neutron_c = spec.value.decode() == "True"
191203

192204
# Check that the output format is set in the compile spec
193205
if not output_format:
@@ -220,7 +232,11 @@ def preprocess( # noqa C901
220232
)
221233

222234
neutron_model = NeutronConverterManager(dump_kernel_selection_code).convert(
223-
tflite_model, target, delegation_tag, fetch_constants_to_sram
235+
tflite_model,
236+
target,
237+
delegation_tag,
238+
fetch_constants_to_sram,
239+
use_new_flow_neutron_c,
224240
)
225241

226242
# Dump the tflite file if logging level is enabled

backends/nxp/tests/executorch_pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ def to_quantized_edge_program(
130130
use_quant_state_dict: bool = True,
131131
fetch_constants_to_sram: bool = False,
132132
dump_kernel_selection_code: bool = False,
133+
use_new_flow_neutron_c: bool = False,
133134
) -> EdgeProgramManager:
134135
_neutron_target_spec = NeutronTargetSpec(target)
135136
if get_quantizer_fn is None:
@@ -160,6 +161,7 @@ def to_quantized_edge_program(
160161
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
161162
fetch_constants_to_sram=fetch_constants_to_sram,
162163
dump_kernel_selection_code=dump_kernel_selection_code,
164+
use_new_flow_neutron_c=use_new_flow_neutron_c,
163165
)
164166
post_quant_state_dict = (
165167
exir_program_aten__module_quant.state_dict() if use_quant_state_dict else None

backends/nxp/tests/test_neutron_converter_manager.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
55

6+
import multiprocessing
7+
68
import torch
9+
from eiq_neutron_sdk.neutron_converter.neutron_converter import CompilationContext
710

811
from executorch import exir
912
from executorch.backends.nxp.backend.edge_program_converter import (
@@ -56,3 +59,17 @@ def test_conv2d_neutron_conversion__prefetching(mocker):
5659
assert len(neutron_model_prefetch) != len(
5760
neutron_model_regular
5861
), "The weight prefetching flag does not make a difference!"
62+
63+
64+
def test_neutron_converter_with_experimental_mlir_flow(mocker):
65+
model = LinearModule(True)
66+
input_shape = (1, 1, 32, 32)
67+
68+
process_spy = mocker.spy(multiprocessing, "Process")
69+
to_quantized_edge_program(
70+
model, input_shape, use_new_flow_neutron_c=True
71+
).exported_program()
72+
73+
compilation_context = process_spy.call_args.kwargs["args"][2]
74+
assert isinstance(compilation_context, CompilationContext)
75+
assert compilation_context.compilationOpts.useNewFlowNeutronC

backends/nxp/tests_models/executors.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def _run_delegated_executorch_program(
6868
mocker,
6969
use_qat: bool = False,
7070
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
71+
use_new_flow_neutron_c: bool = False,
7172
) -> ExportedProgram:
7273
if len(input_spec) == 1:
7374
# Single input, use --dataset
@@ -116,6 +117,7 @@ def wrapper(*args, **kwargs):
116117
delegate_to_npu=True,
117118
use_qat=use_qat,
118119
train_fn=train_fn,
120+
use_new_flow_neutron_c=use_new_flow_neutron_c,
119121
)
120122
except RuntimeError as e:
121123
if "Model converted with neutron-converter has" in str(e):
@@ -375,6 +377,7 @@ def convert_run_compare(
375377
reference_model: ReferenceModel = ReferenceModel.QUANTIZED_EXECUTORCH_CPP,
376378
use_qat: bool = False,
377379
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
380+
use_new_flow_neutron_c: bool = False,
378381
):
379382
"""
380383
Run provided program twice with neutron-test and check if results correspond. At first,
@@ -391,6 +394,7 @@ def convert_run_compare(
391394
:param mocker: Mocker instance used by visualizer.
392395
:param use_qat: If True, applies quantization-aware training before conversion (without the QAT training).
393396
:param train_fn: Train/finetune function for QAT training. Is used only when `use_qat=True`.
397+
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
394398
"""
395399
assert_NSYS()
396400

@@ -432,6 +436,7 @@ def convert_run_compare(
432436
mocker,
433437
use_qat=use_qat,
434438
train_fn=train_fn,
439+
use_new_flow_neutron_c=use_new_flow_neutron_c,
435440
)
436441

437442
output_spec = _get_program_output_spec(delegated_program)

backends/nxp/tests_models/utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def to_quantized_edge_program(
6767
delegate_to_npu=True,
6868
use_qat: bool = False,
6969
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
70+
use_new_flow_neutron_c: bool = False,
7071
) -> EdgeProgramManager:
7172
assert isinstance(input_spec, list) and all(
7273
isinstance(spec, ModelInputSpec) for spec in input_spec
@@ -157,7 +158,9 @@ def to_quantized_edge_program(
157158
(
158159
[
159160
NeutronPartitioner(
160-
generate_neutron_compile_spec("imxrt700"),
161+
generate_neutron_compile_spec(
162+
"imxrt700", use_new_flow_neutron_c=use_new_flow_neutron_c
163+
),
161164
neutron_target_spec=neutron_target_spec,
162165
post_quantization_state_dict=exir_program_aten_quant.state_dict(),
163166
)
@@ -186,6 +189,7 @@ def to_quantized_executorch_program(
186189
delegate_to_npu=True,
187190
use_qat: bool = False,
188191
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
192+
use_new_flow_neutron_c: bool = False,
189193
) -> ExecutorchProgramManager:
190194
edge_program_manager = to_quantized_edge_program(
191195
model,
@@ -194,6 +198,7 @@ def to_quantized_executorch_program(
194198
delegate_to_npu,
195199
use_qat=use_qat,
196200
train_fn=train_fn,
201+
use_new_flow_neutron_c=use_new_flow_neutron_c,
197202
)
198203

199204
return edge_program_manager.to_executorch(

examples/nxp/aot_neutron_compile.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
240240
action="store_true",
241241
help="This feature allows running models which do not fit into SRAM by offloading them to an external memory.",
242242
)
243+
parser.add_argument(
244+
"--use_new_flow_neutron_c",
245+
required=False,
246+
default=False,
247+
action="store_true",
248+
help="Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.",
249+
)
243250

244251
args = parser.parse_args()
245252

@@ -323,6 +330,7 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
323330
operators_not_to_delegate=args.operators_not_to_delegate,
324331
fetch_constants_to_sram=args.fetch_constants_to_sram,
325332
dump_kernel_selection_code=args.dump_kernel_selection_code,
333+
use_new_flow_neutron_c=args.use_new_flow_neutron_c,
326334
)
327335
partitioners = (
328336
[

0 commit comments

Comments
 (0)