Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backends/nxp/backend/neutron_converter_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def convert(
target: str,
delegation_tag: str,
fetch_constants_to_sram: bool = False,
use_new_flow_neutron_c: bool = False,
) -> bytes:
"""
Call Neutron Converter.
Expand All @@ -75,6 +76,7 @@ def convert(
:param target: The target platform.
:param delegation_tag: The delegation tag of model partition.
:param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).

:return: TFLite model with Neutron microcode as bytes.
Expand All @@ -90,6 +92,7 @@ def convert(
)
cctx.compilationOpts.fetchConstantsToSRAM = fetch_constants_to_sram
cctx.compilationOpts.dumpKernelSelectionCode = self.dump_kernel_selection_code
cctx.compilationOpts.useNewFlowNeutronC = use_new_flow_neutron_c

# Try to use multiprocessing for isolation, but fall back to direct execution
# if the environment doesn't support it (e.g., in sandcastle/build environments)
Expand Down
40 changes: 28 additions & 12 deletions backends/nxp/nxp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(self):
self.use_neutron_for_format_conversion = True
self.fetch_constants_to_sram = False
self.dump_kernel_selection_code = False
self.use_new_flow_neutron_c = False

def _replace_colons(self, operator: str) -> str:
"""
Expand All @@ -65,20 +66,21 @@ def neutron_compile_spec(
use_neutron_for_format_conversion: bool = True,
fetch_constants_to_sram: bool = False,
dump_kernel_selection_code: bool = False,
):
"""
Generate compile spec for Neutron NPU

Args:
config: Neutron accelerator configuration, e.g. "imxrt700"
extra_flags: Extra flags for the Neutron compiler
operators_not_to_delegate: List of operators that should not be delegated
use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
use_new_flow_neutron_c: bool = False,
) -> "NeutronCompileSpecBuilder":
"""Generate compile spec for Neutron NPU

:param config: Neutron accelerator configuration, e.g. "imxrt700"
:param extra_flags: Extra flags for the Neutron compiler
:param operators_not_to_delegate: List of operators that should not be delegated
:param use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
ensure that the IO matches the executorch partition, which will be
delegated to Neutron.
fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
:param fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
from FLASH to SRAM. This should be used when the whole model does not fit into SRAM.
dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
:param dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
:return: self for method chaining
"""

self.config = NeutronTargetSpec(config)
Expand All @@ -100,6 +102,7 @@ def neutron_compile_spec(
self.use_neutron_for_format_conversion = use_neutron_for_format_conversion
self.fetch_constants_to_sram = fetch_constants_to_sram
self.dump_kernel_selection_code = dump_kernel_selection_code
self.use_new_flow_neutron_c = use_new_flow_neutron_c

return self

Expand Down Expand Up @@ -128,6 +131,10 @@ def build(self):
"dump_kernel_selection_code",
f"{self.dump_kernel_selection_code}".encode(),
),
CompileSpec(
"use_new_flow_neutron_c",
f"{self.use_new_flow_neutron_c}".encode(),
),
]

return self.compile_spec
Expand All @@ -141,6 +148,7 @@ def generate_neutron_compile_spec(
use_neutron_for_format_conversion: bool = True,
fetch_constants_to_sram: bool = False,
dump_kernel_selection_code: bool = False,
use_new_flow_neutron_c: bool = False,
) -> List[CompileSpec]:
return (
NeutronCompileSpecBuilder()
Expand All @@ -151,6 +159,7 @@ def generate_neutron_compile_spec(
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
fetch_constants_to_sram=fetch_constants_to_sram,
dump_kernel_selection_code=dump_kernel_selection_code,
use_new_flow_neutron_c=use_new_flow_neutron_c,
)
.build()
)
Expand All @@ -175,6 +184,7 @@ def preprocess( # noqa C901
use_neutron_for_format_conversion = None
fetch_constants_to_sram = False
dump_kernel_selection_code = None
use_new_flow_neutron_c = False
for spec in compile_spec:
if spec.key == "output_format":
output_format = spec.value.decode()
Expand All @@ -188,6 +198,8 @@ def preprocess( # noqa C901
fetch_constants_to_sram = spec.value.decode() == "True"
if spec.key == "dump_kernel_selection_code":
dump_kernel_selection_code = spec.value.decode() == "True"
if spec.key == "use_new_flow_neutron_c":
use_new_flow_neutron_c = spec.value.decode() == "True"

# Check that the output format is set in the compile spec
if not output_format:
Expand Down Expand Up @@ -220,7 +232,11 @@ def preprocess( # noqa C901
)

neutron_model = NeutronConverterManager(dump_kernel_selection_code).convert(
tflite_model, target, delegation_tag, fetch_constants_to_sram
tflite_model,
target,
delegation_tag,
fetch_constants_to_sram,
use_new_flow_neutron_c,
)

# Dump the tflite file if logging level is enabled
Expand Down
2 changes: 2 additions & 0 deletions backends/nxp/tests/executorch_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def to_quantized_edge_program(
use_quant_state_dict: bool = True,
fetch_constants_to_sram: bool = False,
dump_kernel_selection_code: bool = False,
use_new_flow_neutron_c: bool = False,
) -> EdgeProgramManager:
_neutron_target_spec = NeutronTargetSpec(target)
if get_quantizer_fn is None:
Expand Down Expand Up @@ -160,6 +161,7 @@ def to_quantized_edge_program(
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
fetch_constants_to_sram=fetch_constants_to_sram,
dump_kernel_selection_code=dump_kernel_selection_code,
use_new_flow_neutron_c=use_new_flow_neutron_c,
)
post_quant_state_dict = (
exir_program_aten__module_quant.state_dict() if use_quant_state_dict else None
Expand Down
17 changes: 17 additions & 0 deletions backends/nxp/tests/test_neutron_converter_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import multiprocessing

import torch
from eiq_neutron_sdk.neutron_converter.neutron_converter import CompilationContext

from executorch import exir
from executorch.backends.nxp.backend.edge_program_converter import (
Expand Down Expand Up @@ -56,3 +59,17 @@ def test_conv2d_neutron_conversion__prefetching(mocker):
assert len(neutron_model_prefetch) != len(
neutron_model_regular
), "The weight prefetching flag does not make a difference!"


def test_neutron_converter_with_experimental_mlir_flow(mocker):
model = LinearModule(True)
input_shape = (1, 1, 32, 32)

process_spy = mocker.spy(multiprocessing, "Process")
to_quantized_edge_program(
model, input_shape, use_new_flow_neutron_c=True
).exported_program()

compilation_context = process_spy.call_args.kwargs["args"][2]
assert isinstance(compilation_context, CompilationContext)
assert compilation_context.compilationOpts.useNewFlowNeutronC
5 changes: 5 additions & 0 deletions backends/nxp/tests_models/executors.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def _run_delegated_executorch_program(
mocker,
use_qat: bool = False,
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
use_new_flow_neutron_c: bool = False,
) -> ExportedProgram:
if len(input_spec) == 1:
# Single input, use --dataset
Expand Down Expand Up @@ -116,6 +117,7 @@ def wrapper(*args, **kwargs):
delegate_to_npu=True,
use_qat=use_qat,
train_fn=train_fn,
use_new_flow_neutron_c=use_new_flow_neutron_c,
)
except RuntimeError as e:
if "Model converted with neutron-converter has" in str(e):
Expand Down Expand Up @@ -375,6 +377,7 @@ def convert_run_compare(
reference_model: ReferenceModel = ReferenceModel.QUANTIZED_EXECUTORCH_CPP,
use_qat: bool = False,
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
use_new_flow_neutron_c: bool = False,
):
"""
Run provided program twice with neutron-test and check if results correspond. At first,
Expand All @@ -391,6 +394,7 @@ def convert_run_compare(
:param mocker: Mocker instance used by visualizer.
:param use_qat: If True, applies quantization-aware training before conversion (without the QAT training).
:param train_fn: Train/finetune function for QAT training. Is used only when `use_qat=True`.
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
"""
assert_NSYS()

Expand Down Expand Up @@ -432,6 +436,7 @@ def convert_run_compare(
mocker,
use_qat=use_qat,
train_fn=train_fn,
use_new_flow_neutron_c=use_new_flow_neutron_c,
)

output_spec = _get_program_output_spec(delegated_program)
Expand Down
7 changes: 6 additions & 1 deletion backends/nxp/tests_models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def to_quantized_edge_program(
delegate_to_npu=True,
use_qat: bool = False,
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
use_new_flow_neutron_c: bool = False,
) -> EdgeProgramManager:
assert isinstance(input_spec, list) and all(
isinstance(spec, ModelInputSpec) for spec in input_spec
Expand Down Expand Up @@ -157,7 +158,9 @@ def to_quantized_edge_program(
(
[
NeutronPartitioner(
generate_neutron_compile_spec("imxrt700"),
generate_neutron_compile_spec(
"imxrt700", use_new_flow_neutron_c=use_new_flow_neutron_c
),
neutron_target_spec=neutron_target_spec,
post_quantization_state_dict=exir_program_aten_quant.state_dict(),
)
Expand Down Expand Up @@ -186,6 +189,7 @@ def to_quantized_executorch_program(
delegate_to_npu=True,
use_qat: bool = False,
train_fn: Callable[[torch.fx.GraphModule], None] | None = None,
use_new_flow_neutron_c: bool = False,
) -> ExecutorchProgramManager:
edge_program_manager = to_quantized_edge_program(
model,
Expand All @@ -194,6 +198,7 @@ def to_quantized_executorch_program(
delegate_to_npu,
use_qat=use_qat,
train_fn=train_fn,
use_new_flow_neutron_c=use_new_flow_neutron_c,
)

return edge_program_manager.to_executorch(
Expand Down
8 changes: 8 additions & 0 deletions examples/nxp/aot_neutron_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,13 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
action="store_true",
help="This feature allows running models which do not fit into SRAM by offloading them to an external memory.",
)
parser.add_argument(
"--use_new_flow_neutron_c",
required=False,
default=False,
action="store_true",
help="Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.",
Comment thread
MartinPavella marked this conversation as resolved.
)

args = parser.parse_args()

Expand Down Expand Up @@ -323,6 +330,7 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
operators_not_to_delegate=args.operators_not_to_delegate,
fetch_constants_to_sram=args.fetch_constants_to_sram,
dump_kernel_selection_code=args.dump_kernel_selection_code,
use_new_flow_neutron_c=args.use_new_flow_neutron_c,
)
partitioners = (
[
Expand Down
Loading