Skip to content

Commit 3169302

Browse files
authored
NXP backend: Add nxp backend profiling support (#19225)
### Summary Add profiling support for the NXP backend. ### Test plan All CI tests passed including new test for the profiling feature. --------- Signed-off-by: Irina Korchakova <irina.trukhina@nxp.com>
1 parent 58447b2 commit 3169302

32 files changed

Lines changed: 1268 additions & 63 deletions

backends/nxp/backend/edge_program_converter.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,22 +74,28 @@ class EdgeProgramToIRConverter:
7474
_default_target_spec = NeutronTargetSpec("imxrt700")
7575
_default_delegation_options = CustomDelegationOptions()
7676

77+
def __init__(self):
78+
self.edge_to_tflite_map = {}
79+
7780
def convert_program(
7881
self,
7982
edge_program: ExportedProgram,
8083
conversion_config: ConversionConfig = _default_conversion_config,
8184
neutron_target_spec: NeutronTargetSpec = _default_target_spec,
8285
custom_delegation_options: CustomDelegationOptions = _default_delegation_options,
83-
) -> tuple[bytes, dict[str, dict[str, DataFormat]]]:
86+
) -> tuple[bytes, dict[str, dict[str, DataFormat]], dict[int, tuple[int, ...]]]:
8487
"""
8588
Convert ExportedProgram in Edge dialect to IR (TFLite flatbuffers) as bytes.
8689
8790
:param edge_program: Converter ExportedProgram.
8891
:param conversion_config: ConversionConfig instance.
8992
:param neutron_target_spec: Object for querying the target platform to retrieve its properties.
9093
:param custom_delegation_options: Custom user options which affect node delegation.
91-
:return: TFLite flatbuffers as bytes.
94+
:return: TFLite flatbuffers as bytes, I/O formats, and edge-to-tflite mapping.
9295
"""
96+
# Reset the edge to tflite map for each conversion
97+
self.edge_to_tflite_map = {}
98+
9399
parameters_mapping = self.map_inputs_to_parameters(edge_program)
94100
dim_order_map = self.map_nodes_to_dim_order(edge_program)
95101

@@ -113,14 +119,17 @@ def convert_program(
113119
# Apply optimizations and finalize the model.
114120
internal_tflite_model = cc.tflite_builder.finish()
115121

122+
# Get the final edge to tflite mapping after optimization
123+
self.edge_to_tflite_map = cc.tflite_builder.edge_to_tflite_map
124+
116125
# Extract the formats of the model's inputs and outputs.
117126
io_formats = cc.tflite_builder.get_io_formats(edge_program.graph_signature)
118127

119128
# TFLite model generation
120129
flatbuffers_builder = flatbuffers.Builder()
121130
internal_tflite_model.gen_tflite(flatbuffers_builder)
122131

123-
return bytes(flatbuffers_builder.Output()), io_formats
132+
return bytes(flatbuffers_builder.Output()), io_formats, self.edge_to_tflite_map
124133

125134
@staticmethod
126135
def append_placeholders_and_tensors(nodes: list[Node], context: ConversionContext):
@@ -162,7 +171,6 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
162171
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
163172
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
164173
]
165-
166174
for node in nodes:
167175
if node.op == "call_function":
168176
if node.target in qdq_related_functions and "cluster" in node.meta:
@@ -174,7 +182,37 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
174182
# The node was already processed alongside the Q/DQ ops.
175183
pass
176184
elif node.target in functions_converters:
185+
# Get TFLite op count BEFORE conversion
186+
tflite_op_count_before = len(
187+
conversion_context.tflite_builder.get_operators().vector
188+
)
189+
# Convert the node
177190
functions_converters[node.target](conversion_context).convert(node)
191+
# Get TFLite op count AFTER conversion
192+
tflite_op_count_after = len(
193+
conversion_context.tflite_builder.get_operators().vector
194+
)
195+
196+
# Track the mapping - store edge debug handle in operators.
197+
# Get the edge debug handle so it can be associated with newly created operators.
198+
edge_debug_handle = node.meta.get("debug_handle", None)
199+
if (
200+
edge_debug_handle is not None
201+
and tflite_op_count_after > tflite_op_count_before
202+
):
203+
operators = (
204+
conversion_context.tflite_builder.get_operators().vector
205+
)
206+
# Node converters append new operators to the TFLite builder.
207+
# Only operators added during this conversion step (from "before" to "after")
208+
# are tagged with the current edge_debug_handle.
209+
for i in range(tflite_op_count_before, tflite_op_count_after):
210+
# Store edge debug handle in operator's temporary attribute
211+
operators[i].tmp_edge_debug_handle = edge_debug_handle
212+
logger.d(
213+
f"Tagged TFLite ops {list(range(tflite_op_count_before, tflite_op_count_after))} with edge debug_handle={edge_debug_handle} for node '{node.name}'"
214+
)
215+
178216
else:
179217
logger.e(
180218
logger.Code.NOT_IMPLEMENTED,

backends/nxp/backend/ir/converter/builder/model_builder.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ class ModelBuilder:
8585

8686
conversion_config: ConversionConfig
8787

88+
edge_to_tflite_map: dict[
89+
int, tuple[int, ...]
90+
] # Mapping edge debug handles to tuple of TFLite operator indices
91+
8892
_default_conversion_config = ConversionConfig()
8993

9094
def __init__(
@@ -105,6 +109,7 @@ def __init__(
105109
self._nchw_tensor_version = {}
106110
self._skipped_output_map = {}
107111
self._zeros_tensor_map = {}
112+
self.edge_to_tflite_map = {}
108113

109114
def create_zeros_tensor(
110115
self, dims: List[int], name: str, dtype: np.dtype, can_reuse: bool = False
@@ -503,6 +508,9 @@ def finish(self) -> tflite_model.Model:
503508
self.conversion_config.optimization_blacklist,
504509
)
505510

511+
# Create the final edge-to-tflite mapping after model optimization
512+
self._create_edge_to_tflite_mapping()
513+
506514
self._keep_one_empty_buffer()
507515

508516
# Remove outputs, which are not produced by any node. Otherwise, there would be errors after inference.
@@ -524,6 +532,29 @@ def finish(self) -> tflite_model.Model:
524532

525533
return self._tfl_model
526534

535+
def _create_edge_to_tflite_mapping(self):
536+
"""Create edge-to-TFLite mapping and save it to the edge_to_tflite_map class variable.
537+
538+
This function should be called after all model optimizations have been applied to match the output TFLite model.
539+
"""
540+
541+
edge_to_tflite_dict = {}
542+
for idx, op in enumerate(self.get_operators().vector):
543+
if (
544+
hasattr(op, "tmp_edge_debug_handle")
545+
and op.tmp_edge_debug_handle is not None
546+
):
547+
debug_handle = op.tmp_edge_debug_handle
548+
if debug_handle not in edge_to_tflite_dict:
549+
edge_to_tflite_dict[debug_handle] = []
550+
edge_to_tflite_dict[debug_handle].append(idx)
551+
552+
# Convert lists to tuples in the dictionary
553+
self.edge_to_tflite_map = {k: tuple(v) for k, v in edge_to_tflite_dict.items()}
554+
logger.i(
555+
f"\nFinal edge_to_tflite_map after optimization: {self.edge_to_tflite_map}"
556+
)
557+
527558
def _assign_io_tensor_indices(self, inputs, outputs, allow_inputs_stripping: bool):
528559
for tensor in outputs.tmp_outputs:
529560
try:

backends/nxp/backend/ir/tflite_generator/tflite_model.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,9 @@ class Operator(meta.TFLiteObject):
514514
# If `True`, this is an extra operator added during conversion. It was not present in the original input model.
515515
tmp_added_extra: bool
516516

517+
# Edge program debug handle for mapping edge nodes to TFLite operators
518+
tmp_edge_debug_handle: Optional[int]
519+
517520
def __init__(
518521
self,
519522
inputs: OperatorInputs = None,
@@ -541,6 +544,8 @@ def __init__(
541544
self.tmp_version = 1
542545
self.tmp_added_extra = False
543546

547+
self.tmp_edge_debug_handle = None
548+
544549
def uses_per_channel_quantization(self) -> bool:
545550
"""Determine if this operator uses per-channel quantization."""
546551
for tensor in itertools.chain(self.tmp_inputs, self.tmp_outputs):

backends/nxp/backend/neutron_converter_manager.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,15 @@ def _build_compilation_context(compilation_opts):
2525
cctx.compilationOpts.dumpKernelSelectionCode = compilation_opts[
2626
"dumpKernelSelectionCode"
2727
]
28+
if (
29+
hasattr(cctx.compilationOpts, "useProfiling")
30+
and compilation_opts["useProfiling"]
31+
):
32+
cctx.compilationOpts.useProfiling = compilation_opts["useProfiling"]
33+
cctx.compilationOpts.dumpAfterImport = "console"
34+
cctx.compilationOpts.dumpAfterGenerate = "console"
35+
cctx.compilationOpts.verbose = compilation_opts["useProfiling"]
36+
2837
return cctx
2938

3039

@@ -81,6 +90,7 @@ def convert(
8190
target: str,
8291
delegation_tag: str,
8392
fetch_constants_to_sram: bool = False,
93+
use_profiling: bool = False,
8494
) -> bytes:
8595
"""
8696
Call Neutron Converter.
@@ -89,6 +99,7 @@ def convert(
8999
:param target: The target platform.
90100
:param delegation_tag: The delegation tag of model partition.
91101
:param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
102+
:param use_profiling: Use profiling for neutron delegated model.
92103
This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).
93104
94105
:return: TFLite model with Neutron microcode as bytes.
@@ -102,6 +113,7 @@ def convert(
102113
"excludeGraphPasses": "HoistSliceAboveTranspose,MergeTranspose",
103114
"fetchConstantsToSRAM": fetch_constants_to_sram,
104115
"dumpKernelSelectionCode": self.dump_kernel_selection_code,
116+
"useProfiling": use_profiling,
105117
}
106118

107119
# Try to use multiprocessing for isolation, but fall back to direct execution

0 commit comments

Comments
 (0)