@@ -866,13 +866,26 @@ def _to_channels_last(x):
866866 # fully int8 inputs and outputs. This must run before CortexMPassManager
867867 # which renames quantized_decomposed ops to cortex_m variants.
868868 if args .quantize :
869- print ("Applying passes to create a fully int8 quantized model..." )
870-
871- edge = edge .transform ([
872- QuantizeInputs (edge , [0 ]),
873- QuantizeOutputs (edge , [0 ]),
874- ])
875-
869+ # When BundleIO is enabled, the bundled example_inputs/expected_outputs
870+ # are still based on float I/O. Applying QuantizeInputs/QuantizeOutputs
871+ # here would make the ExecuTorch program use int8 I/O while the bundled
872+ # I/O remains float, causing dtype mismatches at runtime. Until the
873+ # BundleIO path is updated to support int8 I/O (or to quantize the
874+ # bundled data using recorded qparams), skip these passes when
875+ # bundleio is enabled.
876+ if getattr (args , "bundleio" , False ):
877+ logging .warning (
878+ "Skipping QuantizeInputs/QuantizeOutputs because --bundleio is "
879+ "enabled and bundled I/O currently assumes float tensors."
880+ )
881+ else :
882+ print ("Applying passes to create a fully int8 quantized model..." )
883+ edge = edge .transform (
884+ [
885+ QuantizeInputs (edge , [0 ]),
886+ QuantizeOutputs (edge , [0 ]),
887+ ]
888+ )
876889 pass_manager = CortexMPassManager (edge .exported_program ())
877890 edge ._edge_programs ["forward" ] = pass_manager .transform ()
878891
0 commit comments