Skip to content

Commit fd82286

Browse files
committed
NXP backend: Add option to use the new Neutron flow.
The new Neutron C flow is based on MLIR, and it provides improved support for many operators.
1 parent c48ea12 commit fd82286

5 files changed

Lines changed: 58 additions & 12 deletions

File tree

backends/nxp/backend/neutron_converter_manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def convert(
6767
target: str,
6868
delegation_tag: str,
6969
fetch_constants_to_sram: bool = False,
70+
use_new_flow_neutron_c: bool = False,
7071
) -> bytes:
7172
"""
7273
Call Neutron Converter.
@@ -75,6 +76,7 @@ def convert(
7576
:param target: The target platform.
7677
:param delegation_tag: The delegation tag of model partition.
7778
:param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
79+
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.
7880
This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).
7981
8082
:return: TFLite model with Neutron microcode as bytes.
@@ -90,6 +92,7 @@ def convert(
9092
)
9193
cctx.compilationOpts.fetchConstantsToSRAM = fetch_constants_to_sram
9294
cctx.compilationOpts.dumpKernelSelectionCode = self.dump_kernel_selection_code
95+
cctx.compilationOpts.useNewFlowNeutronC = use_new_flow_neutron_c
9396

9497
# Try to use multiprocessing for isolation, but fall back to direct execution
9598
# if the environment doesn't support it (e.g., in sandcastle/build environments)

backends/nxp/nxp_backend.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(self):
5050
self.use_neutron_for_format_conversion = True
5151
self.fetch_constants_to_sram = False
5252
self.dump_kernel_selection_code = False
53+
self.use_new_flow_neutron_c = False
5354

5455
def _replace_colons(self, operator: str) -> str:
5556
"""
@@ -65,20 +66,21 @@ def neutron_compile_spec(
6566
use_neutron_for_format_conversion: bool = True,
6667
fetch_constants_to_sram: bool = False,
6768
dump_kernel_selection_code: bool = False,
68-
):
69-
"""
70-
Generate compile spec for Neutron NPU
71-
72-
Args:
73-
config: Neutron accelerator configuration, e.g. "imxrt700"
74-
extra_flags: Extra flags for the Neutron compiler
75-
operators_not_to_delegate: List of operators that should not be delegated
76-
use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
69+
use_new_flow_neutron_c: bool = False,
70+
) -> "NeutronCompileSpecBuilder":
71+
"""Generate compile spec for Neutron NPU
72+
73+
:param config: Neutron accelerator configuration, e.g. "imxrt700"
74+
:param extra_flags: Extra flags for the Neutron compiler
75+
:param operators_not_to_delegate: List of operators that should not be delegated
76+
:param use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
7777
ensure that the IO matches the executorch partition, which will be
7878
delegated to Neutron.
79-
fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
79+
:param fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
8080
from FLASH to SRAM. This should be used when the whole model does not fit into SRAM.
81-
dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
81+
:param dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
82+
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.
83+
:return: self for method chaining
8284
"""
8385

8486
self.config = NeutronTargetSpec(config)
@@ -100,6 +102,7 @@ def neutron_compile_spec(
100102
self.use_neutron_for_format_conversion = use_neutron_for_format_conversion
101103
self.fetch_constants_to_sram = fetch_constants_to_sram
102104
self.dump_kernel_selection_code = dump_kernel_selection_code
105+
self.use_new_flow_neutron_c = use_new_flow_neutron_c
103106

104107
return self
105108

@@ -128,6 +131,10 @@ def build(self):
128131
"dump_kernel_selection_code",
129132
f"{self.dump_kernel_selection_code}".encode(),
130133
),
134+
CompileSpec(
135+
"use_new_flow_neutron_c",
136+
f"{self.use_new_flow_neutron_c}".encode(),
137+
),
131138
]
132139

133140
return self.compile_spec
@@ -141,6 +148,7 @@ def generate_neutron_compile_spec(
141148
use_neutron_for_format_conversion: bool = True,
142149
fetch_constants_to_sram: bool = False,
143150
dump_kernel_selection_code: bool = False,
151+
use_new_flow_neutron_c: bool = False,
144152
) -> List[CompileSpec]:
145153
return (
146154
NeutronCompileSpecBuilder()
@@ -151,6 +159,7 @@ def generate_neutron_compile_spec(
151159
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
152160
fetch_constants_to_sram=fetch_constants_to_sram,
153161
dump_kernel_selection_code=dump_kernel_selection_code,
162+
use_new_flow_neutron_c=use_new_flow_neutron_c,
154163
)
155164
.build()
156165
)
@@ -175,6 +184,7 @@ def preprocess( # noqa C901
175184
use_neutron_for_format_conversion = None
176185
fetch_constants_to_sram = False
177186
dump_kernel_selection_code = None
187+
use_new_flow_neutron_c = False
178188
for spec in compile_spec:
179189
if spec.key == "output_format":
180190
output_format = spec.value.decode()
@@ -188,6 +198,8 @@ def preprocess( # noqa C901
188198
fetch_constants_to_sram = spec.value.decode() == "True"
189199
if spec.key == "dump_kernel_selection_code":
190200
dump_kernel_selection_code = spec.value.decode() == "True"
201+
if spec.key == "use_new_flow_neutron_c":
202+
use_new_flow_neutron_c = spec.value.decode() == "True"
191203

192204
# Check that the output format is set in the compile spec
193205
if not output_format:
@@ -220,7 +232,11 @@ def preprocess( # noqa C901
220232
)
221233

222234
neutron_model = NeutronConverterManager(dump_kernel_selection_code).convert(
223-
tflite_model, target, delegation_tag, fetch_constants_to_sram
235+
tflite_model,
236+
target,
237+
delegation_tag,
238+
fetch_constants_to_sram,
239+
use_new_flow_neutron_c,
224240
)
225241

226242
# Dump the tflite file if logging level is enabled

backends/nxp/tests/executorch_pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ def to_quantized_edge_program(
130130
use_quant_state_dict: bool = True,
131131
fetch_constants_to_sram: bool = False,
132132
dump_kernel_selection_code: bool = False,
133+
use_new_flow_neutron_c: bool = False,
133134
) -> EdgeProgramManager:
134135
_neutron_target_spec = NeutronTargetSpec(target)
135136
if get_quantizer_fn is None:
@@ -160,6 +161,7 @@ def to_quantized_edge_program(
160161
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
161162
fetch_constants_to_sram=fetch_constants_to_sram,
162163
dump_kernel_selection_code=dump_kernel_selection_code,
164+
use_new_flow_neutron_c=use_new_flow_neutron_c,
163165
)
164166
post_quant_state_dict = (
165167
exir_program_aten__module_quant.state_dict() if use_quant_state_dict else None

backends/nxp/tests/test_neutron_converter_manager.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
55

6+
import multiprocessing
7+
68
import torch
9+
from eiq_neutron_sdk.neutron_converter.neutron_converter import CompilationContext
710

811
from executorch import exir
912
from executorch.backends.nxp.backend.edge_program_converter import (
@@ -56,3 +59,17 @@ def test_conv2d_neutron_conversion__prefetching(mocker):
5659
assert len(neutron_model_prefetch) != len(
5760
neutron_model_regular
5861
), "The weight prefetching flag does not make a difference!"
62+
63+
64+
def test_neutron_converter_with_experimental_mlir_flow(mocker):
65+
model = LinearModule(True)
66+
input_shape = (1, 1, 32, 32)
67+
68+
process_spy = mocker.spy(multiprocessing, "Process")
69+
to_quantized_edge_program(
70+
model, input_shape, use_new_flow_neutron_c=True
71+
).exported_program()
72+
73+
compilation_context = process_spy.call_args.kwargs["args"][2]
74+
assert isinstance(compilation_context, CompilationContext)
75+
assert compilation_context.compilationOpts.useNewFlowNeutronC

examples/nxp/aot_neutron_compile.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
240240
action="store_true",
241241
help="This feature allows running models which do not fit into SRAM by offloading them to an external memory.",
242242
)
243+
parser.add_argument(
244+
"--use_new_flow_neutron_c",
245+
required=False,
246+
default=False,
247+
action="store_true",
248+
help="Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.",
249+
)
243250

244251
args = parser.parse_args()
245252

@@ -323,6 +330,7 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
323330
operators_not_to_delegate=args.operators_not_to_delegate,
324331
fetch_constants_to_sram=args.fetch_constants_to_sram,
325332
dump_kernel_selection_code=args.dump_kernel_selection_code,
333+
use_new_flow_neutron_c=args.use_new_flow_neutron_c,
326334
)
327335
partitioners = (
328336
[

0 commit comments

Comments
 (0)