Skip to content

Commit 7a2d30c

Browse files
committed
Add nxp backend profiling support
Signed-off-by: Irina Korchakova <irina.trukhina@nxp.com>
1 parent e4ede92 commit 7a2d30c

48 files changed

Lines changed: 973 additions & 102 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

backends/nxp/backend/edge_program_converter.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,22 +72,28 @@ class EdgeProgramToIRConverter:
7272
_default_target_spec = NeutronTargetSpec("imxrt700")
7373
_default_delegation_options = CustomDelegationOptions()
7474

75+
def __init__(self):
76+
self.edge_to_tflite_map = {}
77+
7578
def convert_program(
7679
self,
7780
edge_program: ExportedProgram,
7881
conversion_config: ConversionConfig = _default_conversion_config,
7982
neutron_target_spec: NeutronTargetSpec = _default_target_spec,
8083
custom_delegation_options: CustomDelegationOptions = _default_delegation_options,
81-
) -> tuple[bytes, dict[str, DataFormat]]:
84+
) -> tuple[bytes, dict[str, DataFormat], dict[int, tuple[int, ...]]]:
8285
"""
8386
Convert ExportedProgram in Edge dialect to IR (TFLite flatbuffers) as bytes.
8487
8588
:param edge_program: Converter ExportedProgram.
8689
:param conversion_config: ConversionConfig instance.
8790
:param neutron_target_spec: Object for querying the target platform to retrieve its properties.
8891
:param custom_delegation_options: Custom user options which affect node delegation.
89-
:return: TFLite flatbuffers as bytes.
92+
:return: TFLite flatbuffers as bytes, I/O formats, and edge-to-tflite mapping.
9093
"""
94+
# Reset the edge to tflite map for each conversion
95+
self.edge_to_tflite_map = {}
96+
9197
parameters_mapping = self.map_inputs_to_parameters(edge_program)
9298
dim_order_map = self.map_nodes_to_dim_order(edge_program)
9399

@@ -110,14 +116,17 @@ def convert_program(
110116
# Apply optimizations and finalize the model.
111117
internal_tflite_model = cc.tflite_builder.finish()
112118

119+
# Get the final edge to tflite mapping after optimization
120+
self.edge_to_tflite_map = cc.tflite_builder.edge_to_tflite_map
121+
113122
# Extract the formats of the model's inputs and outputs.
114123
io_formats = cc.tflite_builder.get_io_formats(edge_program.graph_signature)
115124

116125
# TFLite model generation
117126
flatbuffers_builder = flatbuffers.Builder()
118127
internal_tflite_model.gen_tflite(flatbuffers_builder)
119128

120-
return bytes(flatbuffers_builder.Output()), io_formats
129+
return bytes(flatbuffers_builder.Output()), io_formats, self.edge_to_tflite_map
121130

122131
@staticmethod
123132
def append_placeholders_and_tensors(nodes: list[Node], context: ConversionContext):
@@ -159,7 +168,6 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
159168
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
160169
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
161170
]
162-
163171
for node in nodes:
164172
if node.op == "call_function":
165173
if node.target in qdq_related_functions and "cluster" in node.meta:
@@ -171,7 +179,22 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
171179
# The node was already processed alongside the Q/DQ ops.
172180
pass
173181
elif node.target in functions_converters:
182+
# Get TFLite op count BEFORE conversion
183+
tflite_op_count_before = len(conversion_context.tflite_builder.get_operators().vector)
184+
# Convert the node
174185
functions_converters[node.target](conversion_context).convert(node)
186+
# Get TFLite op count AFTER conversion
187+
tflite_op_count_after = len(conversion_context.tflite_builder.get_operators().vector)
188+
189+
# Track the mapping - store edge debug handle in operators
190+
edge_debug_handle = node.meta.get("debug_handle", None)
191+
if edge_debug_handle is not None and tflite_op_count_after > tflite_op_count_before:
192+
operators = conversion_context.tflite_builder.get_operators().vector
193+
for i in range(tflite_op_count_before, tflite_op_count_after):
194+
# Store edge debug handle in operator's temporary attribute
195+
operators[i].tmp_edge_debug_handle = edge_debug_handle
196+
logger.i(f"Tagged TFLite ops {list(range(tflite_op_count_before, tflite_op_count_after))} with edge debug_handle={edge_debug_handle} for node '{node.name}'")
197+
175198
else:
176199
logger.e(
177200
logger.Code.NOT_IMPLEMENTED,

backends/nxp/backend/ir/converter/builder/model_builder.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ class ModelBuilder:
8585

8686
conversion_config: ConversionConfig
8787

88+
edge_to_tflite_map: dict[int, tuple[int, ...]] # Mapping edge debug handles to tuple of TFLite operator indices
89+
8890
_default_conversion_config = ConversionConfig()
8991

9092
def __init__(
@@ -105,6 +107,7 @@ def __init__(
105107
self._nchw_tensor_version = {}
106108
self._skipped_output_map = {}
107109
self._zeros_tensor_map = {}
110+
self.edge_to_tflite_map = {}
108111

109112
def create_zeros_tensor(
110113
self, dims: List[int], name: str, dtype: np.dtype, can_reuse: bool = False
@@ -503,6 +506,9 @@ def finish(self) -> tflite_model.Model:
503506
self.conversion_config.optimization_blacklist,
504507
)
505508

509+
# Create the final edge-to-tflite mapping after model optimization
510+
self._create_edge_to_tflite_mapping()
511+
506512
self._keep_one_empty_buffer()
507513

508514
# Remove outputs, which are not produced by any node. Otherwise, there would be errors after inference.
@@ -524,6 +530,24 @@ def finish(self) -> tflite_model.Model:
524530

525531
return self._tfl_model
526532

533+
def _create_edge_to_tflite_mapping(self):
534+
"""Create edge-to-TFLite mapping and save it to the edge_to_tflite_map class variable.
535+
536+
This function should be called after all model optimizations have been applied to match the output TFLite model.
537+
"""
538+
539+
edge_to_tflite_dict = {}
540+
for idx, op in enumerate(self.get_operators().vector):
541+
if hasattr(op, 'tmp_edge_debug_handle') and op.tmp_edge_debug_handle is not None:
542+
debug_handle = op.tmp_edge_debug_handle
543+
if debug_handle not in edge_to_tflite_dict:
544+
edge_to_tflite_dict[debug_handle] = []
545+
edge_to_tflite_dict[debug_handle].append(idx)
546+
547+
# Convert lists to tuples in the dictionary
548+
self.edge_to_tflite_map = {k: tuple(v) for k, v in edge_to_tflite_dict.items()}
549+
logger.i(f"\nFinal edge_to_tflite_map after optimization: {self.edge_to_tflite_map}")
550+
527551
def _assign_io_tensor_indices(self, inputs, outputs, allow_inputs_stripping: bool):
528552
for tensor in outputs.tmp_outputs:
529553
try:

backends/nxp/backend/ir/tflite_generator/tflite_model.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,9 @@ class Operator(meta.TFLiteObject):
514514
# If `True`, this is an extra operator added during conversion. It was not present in the original input model.
515515
tmp_added_extra: bool
516516

517+
# Edge program debug handle for mapping edge nodes to TFLite operators
518+
tmp_edge_debug_handle: Optional[int]
519+
517520
def __init__(
518521
self,
519522
inputs: OperatorInputs = None,
@@ -541,6 +544,8 @@ def __init__(
541544
self.tmp_version = 1
542545
self.tmp_added_extra = False
543546

547+
self.tmp_edge_debug_handle = None
548+
544549
def uses_per_channel_quantization(self) -> bool:
545550
"""Determine if this operator uses per-channel quantization."""
546551
for tensor in itertools.chain(self.tmp_inputs, self.tmp_outputs):

backends/nxp/backend/neutron_converter_manager.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def convert(
6868
delegation_tag: str,
6969
fetch_constants_to_sram: bool = False,
7070
use_new_flow_neutron_c: bool = False,
71+
use_profiling: bool = False,
7172
) -> bytes:
7273
"""
7374
Call Neutron Converter.
@@ -77,6 +78,7 @@ def convert(
7778
:param delegation_tag: The delegation tag of model partition.
7879
:param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
7980
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improved INT8 operator support.
81+
:param use_profiling: Enable profiling for neutron delegated model.
8082
This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).
8183
8284
:return: TFLite model with Neutron microcode as bytes.
@@ -95,6 +97,13 @@ def convert(
9597
if hasattr(cctx.compilationOpts, "useNewFlowNeutronC"):
9698
cctx.compilationOpts.useNewFlowNeutronC = use_new_flow_neutron_c
9799

100+
if use_profiling:
101+
cctx.compilationOpts.useProfiling = use_profiling
102+
cctx.compilationOpts.dumpAfterImport = "console"
103+
cctx.compilationOpts.dumpAfterGenerate = "console"
104+
cctx.compilationOpts.verbose = True
105+
#cctx.compilationOpts.dumpGraphs = 1
106+
98107
# Try to use multiprocessing for isolation, but fall back to direct execution
99108
# if the environment doesn't support it (e.g., in sandcastle/build environments)
100109
try:

0 commit comments

Comments
 (0)