pytorch
diff --git a/‎backends/apple/mps/runtime/MPSDevice.mm‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/mps/runtime/MPSDevice.mm‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 50 additions & 1 deletion b/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 50 additions & 1 deletion
diff --git a/‎backends/arm/common/arm_compile_spec.py‎
Lines changed: 36 additions & 0 deletions b/‎backends/arm/common/arm_compile_spec.py‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎backends/arm/ethosu/compile_spec.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/ethosu/compile_spec.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/operators/op_tosa_rescale.py‎
Lines changed: 14 additions & 3 deletions b/‎backends/arm/operators/op_tosa_rescale.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎backends/arm/quantizer/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/quantizer/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/quantizer/arm_quantizer.py‎
Lines changed: 48 additions & 0 deletions b/‎backends/arm/quantizer/arm_quantizer.py‎
Lines changed: 48 additions & 0 deletions
@@ -138,7 +138,7 @@ static inline MTLLanguageVersion getMetalLanguageVersion(const id<MTLDevice>& de
     ET_CHECK_OR_RETURN_ERROR(
       err == Error::Ok,
       Internal,
-      "An error occured occured while compiling library %d", libraryType
+      "An error occurred while compiling library %d", libraryType
     );
   }
   if (_m_pso_cache.find(kernelName) == _m_pso_cache.end()) {
 
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import math
+import operator
 from copy import copy
 from typing import cast, Dict, Optional, Set, Tuple, Type
 
@@ -34,22 +35,67 @@ class InsertRescalePass(ArmPass):
 
     _passes_required_after: Set[Type[ExportPass]] = set()
 
+    def _ensure_uint8_io_only(self, graph_module: GraphModule) -> None:
+        """Ensure uint8 tensors only appear at IO boundaries.
+
+        TOSA has no true uint8 tensor type; unsigned semantics are carried via
+        RESCALE input/output flags. If uint8 appears for other nodes, it means
+        unsigned data leaked past IO.
+
+        """
+        for node in graph_module.graph.nodes:
+            meta_val = node.meta.get("val")
+            if not isinstance(meta_val, torch.Tensor):
+                continue
+            if meta_val.dtype != torch.uint8:
+                continue
+            if node.op in ("placeholder", "output"):
+                continue
+            if node.op == "call_function" and node.target == operator.getitem:
+                if all(user.op == "output" for user in node.users):
+                    continue
+            if (
+                node.op == "call_function"
+                and node.target
+                == exir_ops.edge.dim_order_ops._to_dim_order_copy.default
+            ):
+                # dim_order is a view-like transform; allow it to preserve uint8 at IO.
+                continue
+            if (
+                node.op == "call_function"
+                and node.target == exir_ops.backend.tosa.RESCALE.default
+            ):
+                continue
+            raise ValueError(
+                f"Found internal uint8 tensor at node {node.name} "
+                f"({node.target}). Uint8 is only allowed at IO boundaries."
+            )
+
     def fold_dq_q_to_rescale(self, node: Node, user: Node, graph_module: GraphModule):
         dq_args = QuantArgs.from_operator(node.target, node.args)
         q_args = QuantArgs.from_operator(user.target, user.args)
         new_scale = dq_args.scale / q_args.scale
+        input_unsigned = dq_args.dtype == torch.uint8
+        output_unsigned = q_args.dtype == torch.uint8
+        # TOSA has no true uint8 tensors; unsigned semantics are handled via
+        # the RESCALE flags, so uint8 does not propagate as a tensor dtype.
+        output_dtype = torch.int8 if output_unsigned else q_args.dtype
 
         with graph_module.graph.inserting_before(node):
             rescale_node = create_node(
                 graph_module.graph,
                 exir_ops.backend.tosa.RESCALE.default,
                 (
                     node.all_input_nodes[0],
-                    q_args.dtype,
+                    output_dtype,
                     [new_scale],
                     dq_args.zp,
                     q_args.zp,
                 ),
+                kwargs={
+                    "input_unsigned": input_unsigned,
+                    "output_unsigned": output_unsigned,
+                },
             )
             rescale_node.meta = copy(user.meta)
             user.replace_all_uses_with(rescale_node)
@@ -74,6 +120,9 @@ def call(self, graph_module: GraphModule) -> PassResult:
         graph_module.recompile()
         return PassResult(graph_module, modified)
 
+    def ensures(self, graph_module: GraphModule) -> None:
+        self._ensure_uint8_io_only(graph_module)
+
 
 class InsertRescaleInt32Pass(ArmPass):
     """Numerous TOSA ops require inputs and outputs to be 32-bit integers in
 
@@ -36,6 +36,7 @@ class DebugMode(Enum):
     compiler_flags: list[str] = field(default_factory=list)
     path_for_intermediates: str | None = None
     tosa_debug_mode: DebugMode | None = None
+    preserve_io_quantization: bool = False
 
     _TOSA_SPEC_KEY = "tosa_spec"
     _COMPILE_FLAGS_KEY = "compile_flags"
@@ -44,6 +45,7 @@ class DebugMode(Enum):
     _DEBUG_MODE_KEY = "dump_debug_info"
     _OUTPUT_REORDER_KEY = "ouput_reorder_workaround"
     _TRANSFORM_PIPELINE_CONFIG_KEY = "transform_pipeline_config"
+    _PRESERVE_IO_QUANT_KEY = "preserve_io_quantization"
 
     def _set_compile_specs(
         self,
@@ -53,6 +55,7 @@ def _set_compile_specs(
         tosa_debug_mode: DebugMode | None = None,
         output_order_workaround: bool = False,
         pipeline_config: ArmPassPipelineConfig | None = None,
+        preserve_io_quantization: bool = False,
     ):
         """Set all values of dataclass directly."""
         self.tosa_spec = tosa_spec
@@ -61,6 +64,8 @@ def _set_compile_specs(
         self.tosa_debug_mode = tosa_debug_mode
         self._pipeline_config = pipeline_config
         self.output_order_workaround = output_order_workaround
+        self.preserve_io_quantization = preserve_io_quantization
+        self._warn_if_redundant_preserve_io_quantization()
         if output_order_workaround:
             warnings.warn(
                 "ArmCompileSpec(output_order_workaround=True) is deprecated and will be "
@@ -78,6 +83,7 @@ def _from_list(cls, compile_specs: list[CompileSpec]):  # noqa: C901
         tosa_debug_mode: ArmCompileSpec.DebugMode | None = None
         output_order_workaround: bool = False
         pipeline_config: ArmPassPipelineConfig | None = None
+        preserve_io_quantization: bool = False
         unknown_specs: dict[str, str] = {}
         for spec in compile_specs:
             key = spec.key
@@ -128,6 +134,8 @@ def _from_list(cls, compile_specs: list[CompileSpec]):  # noqa: C901
                         "More than one transform pipeline entry in compile spec."
                     )
                 pipeline_config = ArmPassPipelineConfig.from_dict(json.loads(val))
+            elif key == ArmCompileSpec._PRESERVE_IO_QUANT_KEY:
+                preserve_io_quantization = str(val).lower() in ("1", "true", "yes")
             else:
                 unknown_specs[key] = val
 
@@ -151,6 +159,7 @@ def _from_list(cls, compile_specs: list[CompileSpec]):  # noqa: C901
             tosa_debug_mode=tosa_debug_mode,
             output_order_workaround=output_order_workaround,
             pipeline_config=pipeline_config,
+            preserve_io_quantization=preserve_io_quantization,
         )
         cls._from_list_hook(compile_spec, unknown_specs)
         compile_spec._validate()
@@ -227,8 +236,35 @@ def _to_list(self):
                     self._pipeline_config.serialize(),
                 )
             )
+        compile_spec.append(
+            CompileSpec(
+                ArmCompileSpec._PRESERVE_IO_QUANT_KEY,
+                str(bool(self.preserve_io_quantization)).encode(),
+            )
+        )
         return compile_spec
 
+    def _set_preserve_io_quantization(self, enabled: bool) -> "ArmCompileSpec":
+        """Preserve Q/DQ nodes at IO boundaries when lowering."""
+        self.preserve_io_quantization = enabled
+        self._warn_if_redundant_preserve_io_quantization()
+        return self
+
+    def _warn_if_redundant_preserve_io_quantization(self) -> None:
+        """Warn when preserve_io_quantization has no effect for INT-only
+        specs.
+        """
+        if (
+            self.preserve_io_quantization
+            and self.tosa_spec.support_integer()
+            and not self.tosa_spec.support_float()
+        ):
+            warnings.warn(
+                "preserve_io_quantization=True is redundant for INT-only TOSA "
+                "specifications because boundary Q/DQ are already de-tagged.",
+                stacklevel=3,
+            )
+
     def _get_pass_pipeline_config(self) -> ArmPassPipelineConfig:
         """Returns configuration that controls how the Arm pass pipeline should
         behave.
 
@@ -48,7 +48,7 @@ def _default_system_config_and_memory_mode(
             return resolved_system_config, resolved_memory_mode
         if "ethos-u65" in target_lower:
             resolved_system_config = (
-                "Ethos_U65_SYS_DRAM_Mid" if system_config is None else system_config
+                "Ethos_U65_High_End" if system_config is None else system_config
             )
             resolved_memory_mode = "Sram_Only" if memory_mode is None else memory_mode
             return resolved_system_config, resolved_memory_mode
 
@@ -161,6 +161,8 @@ def _build_rescale(
     rounding_mode: ts.RoundingMode,
     per_channel: bool = False,
     is_scale32: bool = True,
+    input_unsigned: bool = False,
+    output_unsigned: bool = False,
 ):
     """Insert a TOSA RESCALE operator configured for the quantized path.
 
@@ -198,8 +200,8 @@ def _build_rescale(
         scale32=is_scale32,
         rounding_mode=rounding_mode,
         per_channel=per_channel,
-        input_unsigned=False,
-        output_unsigned=False,
+        input_unsigned=input_unsigned,
+        output_unsigned=output_unsigned,
     )
 
     tosa_fb.addOperator(
@@ -228,6 +230,14 @@ def define_node(
         scales = cast(list[float], node.args[2])
         input_zp = cast(int, node.args[3])
         output_zp = cast(int, node.args[4])
+        if "input_unsigned" in node.kwargs:
+            input_unsigned = cast(bool, node.kwargs.get("input_unsigned", False))
+        else:
+            input_unsigned = cast(bool, node.args[5]) if len(node.args) > 5 else False
+        if "output_unsigned" in node.kwargs:
+            output_unsigned = cast(bool, node.kwargs.get("output_unsigned", False))
+        else:
+            output_unsigned = cast(bool, node.args[6]) if len(node.args) > 6 else False
 
         if (
             input_dtype
@@ -244,7 +254,6 @@ def define_node(
             raise ValueError(
                 f"If output dtype is not int8 or int16, output_zp must be 0. Got {ts.DTypeNames[output_dtype]}, {output_zp=}"
             )
-
         _build_rescale(
             tosa_graph,
             scale=scales,
@@ -255,4 +264,6 @@ def define_node(
             output_zp=[output_zp],
             rounding_mode=ts.RoundingMode.SINGLE_ROUND,
             per_channel=len(scales) > 1,
+            input_unsigned=input_unsigned,
+            output_unsigned=output_unsigned,
         )
@@ -15,6 +15,7 @@
     EthosUQuantizer,
     get_symmetric_a16w8_quantization_config,
     get_symmetric_quantization_config,
+    get_uint8_io_quantization_config,
     TOSAQuantizer,
     VgfQuantizer,
 )
 
@@ -105,6 +105,7 @@
     "VgfQuantizer",
     "get_symmetric_a16w8_quantization_config",
     "get_symmetric_quantization_config",
+    "get_uint8_io_quantization_config",
 ]
 
 logger = logging.getLogger(__name__)
@@ -234,6 +235,53 @@ def get_symmetric_quantization_config(
     return quantization_config
 
 
+@functools.lru_cache
+def get_uint8_io_quantization_config(
+    is_qat: bool = False,
+    is_dynamic: bool = False,
+    eps: float = 2**-16,
+) -> QuantizationConfig:
+    """Create a uint8 IO quantization config for TOSA backends.
+
+    This config is intended for model inputs/outputs only. Internal tensors
+    should remain int8 for TOSA INT lowering.
+
+    """
+    extra_args: Dict[str, Any] = {"eps": eps}
+    if is_qat:
+        if is_dynamic:
+            act_observer_or_fake_quant_ctr = FakeQuantize
+            dynamic_quant_observer = MovingAverageMinMaxObserver.with_args(
+                averaging_constant=1
+            )
+            extra_args["observer"] = dynamic_quant_observer
+        else:
+            act_observer_or_fake_quant_ctr = FusedMovingAvgObsFakeQuantize  # type: ignore[assignment]
+    else:
+        if is_dynamic:
+            act_observer_or_fake_quant_ctr = PlaceholderObserver  # type: ignore[assignment]
+        else:
+            act_observer_or_fake_quant_ctr = HistogramObserver  # type: ignore[assignment]
+
+    act_quantization_spec = QuantizationSpec(
+        dtype=torch.uint8,
+        quant_min=torch.iinfo(torch.uint8).min,
+        quant_max=torch.iinfo(torch.uint8).max,
+        qscheme=torch.per_tensor_affine,
+        is_dynamic=is_dynamic,
+        observer_or_fake_quant_ctr=act_observer_or_fake_quant_ctr.with_args(
+            **extra_args,
+        ),
+    )
+
+    return TOSAQuantizationConfig(
+        act_quantization_spec,
+        act_quantization_spec,
+        None,
+        None,
+    )
+
+
 def get_symmetric_a8w4_quantization_config(
     is_per_channel: bool = True, is_qat: bool = True, is_dynamic: bool = False
 ):
Original file line number	Diff line number	Diff line change
`@@ -138,7 +138,7 @@ static inline MTLLanguageVersion getMetalLanguageVersion(const id<MTLDevice>& de`
`138`	`138`	`ET_CHECK_OR_RETURN_ERROR(`
`139`	`139`	`err == Error::Ok,`
`140`	`140`	`Internal,`
`141`		`- "An error occured occured while compiling library %d", libraryType`
	`141`	`+ "An error occurred while compiling library %d", libraryType`
`142`	`142`	`);`
`143`	`143`	`}`
`144`	`144`	`if (_m_pso_cache.find(kernelName) == _m_pso_cache.end()) {`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ def _default_system_config_and_memory_mode(`
`48`	`48`	`return resolved_system_config, resolved_memory_mode`
`49`	`49`	`if "ethos-u65" in target_lower:`
`50`	`50`	`resolved_system_config = (`
`51`		`- "Ethos_U65_SYS_DRAM_Mid" if system_config is None else system_config`
	`51`	`+ "Ethos_U65_High_End" if system_config is None else system_config`
`52`	`52`	`)`
`53`	`53`	`resolved_memory_mode = "Sram_Only" if memory_mode is None else memory_mode`
`54`	`54`	`return resolved_system_config, resolved_memory_mode`
Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@`
`15`	`15`	`EthosUQuantizer,`
`16`	`16`	`get_symmetric_a16w8_quantization_config,`
`17`	`17`	`get_symmetric_quantization_config,`
	`18`	`+ get_uint8_io_quantization_config,`
`18`	`19`	`TOSAQuantizer,`
`19`	`20`	`VgfQuantizer,`
`20`	`21`	`)`