Skip to content

Commit ef02086

Browse files
psiddhCopilot
andauthored
Update examples/arm/aot_arm_compiler.py
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 0bba44a commit ef02086

1 file changed

Lines changed: 20 additions & 7 deletions

File tree

examples/arm/aot_arm_compiler.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -866,13 +866,26 @@ def _to_channels_last(x):
866866
# fully int8 inputs and outputs. This must run before CortexMPassManager
867867
# which renames quantized_decomposed ops to cortex_m variants.
868868
if args.quantize:
869-
print("Applying passes to create a fully int8 quantized model...")
870-
871-
edge = edge.transform([
872-
QuantizeInputs(edge, [0]),
873-
QuantizeOutputs(edge, [0]),
874-
])
875-
869+
# When BundleIO is enabled, the bundled example_inputs/expected_outputs
870+
# are still based on float I/O. Applying QuantizeInputs/QuantizeOutputs
871+
# here would make the ExecuTorch program use int8 I/O while the bundled
872+
# I/O remains float, causing dtype mismatches at runtime. Until the
873+
# BundleIO path is updated to support int8 I/O (or to quantize the
874+
# bundled data using recorded qparams), skip these passes when
875+
# bundleio is enabled.
876+
if getattr(args, "bundleio", False):
877+
logging.warning(
878+
"Skipping QuantizeInputs/QuantizeOutputs because --bundleio is "
879+
"enabled and bundled I/O currently assumes float tensors."
880+
)
881+
else:
882+
print("Applying passes to create a fully int8 quantized model...")
883+
edge = edge.transform(
884+
[
885+
QuantizeInputs(edge, [0]),
886+
QuantizeOutputs(edge, [0]),
887+
]
888+
)
876889
pass_manager = CortexMPassManager(edge.exported_program())
877890
edge._edge_programs["forward"] = pass_manager.transform()
878891

0 commit comments

Comments
 (0)