pytorch
diff --git a/‎.ci/scripts/wheel/pre_build_script.sh‎
Lines changed: 19 additions & 12 deletions b/‎.ci/scripts/wheel/pre_build_script.sh‎
Lines changed: 19 additions & 12 deletions
diff --git a/‎.github/workflows/build-wheels-windows.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/build-wheels-windows.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/cuda.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 11 additions & 1 deletion b/‎Makefile‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎backends/arm/common/arm_compile_spec.py‎
Lines changed: 8 additions & 8 deletions b/‎backends/arm/common/arm_compile_spec.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎backends/arm/ethosu/compile_spec.py‎
Lines changed: 4 additions & 4 deletions b/‎backends/arm/ethosu/compile_spec.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎backends/arm/quantizer/arm_quantizer.py‎
Lines changed: 39 additions & 2 deletions b/‎backends/arm/quantizer/arm_quantizer.py‎
Lines changed: 39 additions & 2 deletions
diff --git a/‎backends/arm/scripts/docgen/docgen.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/scripts/docgen/docgen.py‎
Lines changed: 2 additions & 2 deletions
@@ -9,34 +9,41 @@ set -euxo pipefail
 
 # This script is run before building ExecuTorch binaries
 
-if [[ "$(uname -m)" == "aarch64" ]]; then
-  # On some Linux aarch64 systems, the "atomic" library is not found during linking.
-  # To work around this, replace "atomic" with the literal ${ATOMIC_LIB} so the
-  # build system uses the full path to the atomic library.
-  file="extension/llm/tokenizers/third-party/sentencepiece/src/CMakeLists.txt"
-  sed 's/list(APPEND SPM_LIBS "atomic")/list(APPEND SPM_LIBS ${ATOMIC_LIB})/' \
-    "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
-
-  grep -n 'list(APPEND SPM_LIBS ${ATOMIC_LIB})' "$file" && \
-    echo "the file $file has been modified for atomic to use full path"
+# Initialize submodules here instead of during checkout so we can use OpenSSL
+# on Windows (schannel fails with SEC_E_ILLEGAL_MESSAGE on some gitlab hosts).
+UNAME_S=$(uname -s)
+if [[ $UNAME_S == *"MINGW"* || $UNAME_S == *"MSYS"* ]]; then
+  git -c http.sslBackend=openssl submodule update --init
+else
+  git submodule update --init
 fi
 
 # Clone nested submodules for tokenizers - this is a workaround for recursive
 # submodule clone failing due to path length limitations on Windows. Eventually,
 # we should update the core job in test-infra to enable long paths before
 # checkout to avoid needing to do this.
 pushd extension/llm/tokenizers
-UNAME_S=$(uname -s)
 if [[ $UNAME_S == *"MINGW"* || $UNAME_S == *"MSYS"* ]]; then
   git -c http.sslBackend=openssl submodule update --init
 else
   git submodule update --init
 fi
 popd
 
+if [[ "$(uname -m)" == "aarch64" ]]; then
+  # On some Linux aarch64 systems, the "atomic" library is not found during linking.
+  # To work around this, replace "atomic" with the literal ${ATOMIC_LIB} so the
+  # build system uses the full path to the atomic library.
+  file="extension/llm/tokenizers/third-party/sentencepiece/src/CMakeLists.txt"
+  sed 's/list(APPEND SPM_LIBS "atomic")/list(APPEND SPM_LIBS ${ATOMIC_LIB})/' \
+    "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
+
+  grep -n 'list(APPEND SPM_LIBS ${ATOMIC_LIB})' "$file" && \
+    echo "the file $file has been modified for atomic to use full path"
+fi
+
 # On Windows, enable symlinks and re-checkout the current revision to create
 # the symlinked src/ directory. This is needed to build the wheel.
-UNAME_S=$(uname -s)
 if [[ $UNAME_S == *"MINGW"* || $UNAME_S == *"MSYS"* ]]; then
     echo "Enabling symlinks on Windows"
     git config core.symlinks true
 
@@ -64,4 +64,6 @@ jobs:
       smoke-test-script: ${{ matrix.smoke-test-script }}
       trigger-event: ${{ github.event_name }}
       wheel-build-params: "--verbose"
-      submodules: true
+      # Submodules are initialized in pre_build_script.sh with OpenSSL to avoid
+      # schannel SSL errors on Windows when cloning from non-GitHub hosts.
+      submodules: false
@@ -135,6 +135,9 @@ jobs:
         # Run CUDA backend Python tests
         python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="
 
+        # Build Qwen3.5 MoE runner (ExecuTorch already built above)
+        cd examples/models/qwen3_5_moe && cmake --workflow --preset qwen3-5-moe-cuda
+
   export-model-cuda-artifact:
     name: export-model-cuda-artifact
     # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
 
@@ -91,7 +91,7 @@
 #
 # ==============================================================================
 
-.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-vulkan dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
+.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-vulkan dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu qwen3_5_moe-cuda clean help
 
 help:
 	@echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make <target>\`. Available targets:"
@@ -121,6 +121,7 @@ help:
 	@echo "  llava-cpu           - Build Llava runner with CPU backend"
 	@echo "  gemma3-cuda         - Build Gemma3 runner with CUDA backend"
 	@echo "  gemma3-cpu          - Build Gemma3 runner with CPU backend"
+	@echo "  qwen3_5_moe-cuda    - Build Qwen3.5 MoE runner with CUDA backend"
 	@echo "  clean               - Clean build artifacts"
 
 voxtral-cuda:
@@ -362,6 +363,15 @@ gemma3-cpu:
 	@echo "✓ Build complete!"
 	@echo "  Binary: cmake-out/examples/models/gemma3/gemma3_e2e_runner"
 
+qwen3_5_moe-cuda:
+	@echo "==> Building and installing ExecuTorch with CUDA..."
+	cmake --workflow --preset llm-release-cuda
+	@echo "==> Building Qwen3.5 MoE runner with CUDA..."
+	cd examples/models/qwen3_5_moe && cmake --workflow --preset qwen3-5-moe-cuda
+	@echo ""
+	@echo "✓ Build complete!"
+	@echo "  Binary: cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner"
+
 clean:
 	rm -rf cmake-out \
 	       extension/llm/tokenizers/build \
 
@@ -117,9 +117,9 @@ def from_list(cls, compile_specs: list[CompileSpec]):  # noqa: C901
             raise ValueError("No tosa_spec in compile spec.")
         if output_format is None:
             raise ValueError("No output_format in compile spec.")
-        if output_format != cls.get_output_format():
+        if output_format != cls._get_output_format():
             raise ValueError(
-                f"Incorrect output format '{output_format}' for {cls.__name__}, expected '{cls.get_output_format()}'"
+                f"Incorrect output format '{output_format}' for {cls.__name__}, expected '{cls._get_output_format()}'"
             )
         if compiler_flags is None:
             compiler_flags = []
@@ -134,17 +134,17 @@ def from_list(cls, compile_specs: list[CompileSpec]):  # noqa: C901
             output_order_workaround=output_order_workaround,
             pipeline_config=pipeline_config,
         )
-        cls.from_list_hook(compile_spec, unknown_specs)
-        compile_spec.validate()
+        cls._from_list_hook(compile_spec, unknown_specs)
+        compile_spec._validate()
         return compile_spec
 
     @classmethod
-    def from_list_hook(cls, compile_spec, specs: dict[str, str]):  # noqa: B027
+    def _from_list_hook(cls, compile_spec, specs: dict[str, str]):  # noqa: B027
         """Allows subclasses to hook into parsing compile spec lists."""
         pass
 
     @abstractmethod
-    def validate(self):
+    def _validate(self):
         """Throws an error if the compile spec is not valid."""
 
     def to_list(self):
@@ -170,7 +170,7 @@ def to_list(self):
         # Add output format to identify kind of compile spec.
         compile_spec.append(
             CompileSpec(
-                ArmCompileSpec._OUTPUT_FORMAT_KEY, self.get_output_format().encode()
+                ArmCompileSpec._OUTPUT_FORMAT_KEY, self._get_output_format().encode()
             )
         )
 
@@ -285,5 +285,5 @@ def get_output_order_workaround(self) -> bool:
 
     @classmethod
     @abstractmethod
-    def get_output_format(cls) -> str:
+    def _get_output_format(cls) -> str:
         """Returns a constant string that is the output format of the class."""
@@ -119,7 +119,7 @@ def __init__(
         )
         tosa_spec = self._tosa_spec_for_target(target_lower)
         self._set_compile_specs(tosa_spec, compiler_flags)
-        self.validate()
+        self._validate()
 
     def to_list(self):
         """Return compile specs including the encoded Ethos-U target."""
@@ -128,11 +128,11 @@ def to_list(self):
         return compile_specs
 
     @classmethod
-    def from_list_hook(cls, compile_spec, specs: dict[str, str]):
+    def _from_list_hook(cls, compile_spec, specs: dict[str, str]):
         """Restore target-specific metadata from serialized compile specs."""
         compile_spec.target = specs.get(cls._TARGET_KEY, None)
 
-    def validate(self):
+    def _validate(self):
         """Validate the configuration against supported Ethos-U settings."""
         if len(self.compiler_flags) == 0:
             raise ValueError(
@@ -144,7 +144,7 @@ def validate(self):
             )
 
     @classmethod
-    def get_output_format(cls) -> str:
+    def _get_output_format(cls) -> str:
         """Return the artifact format emitted by this compile spec."""
         return "vela"
 
 
@@ -71,6 +71,7 @@
     SharedQspecQuantizer,
 )
 from executorch.backends.arm.vgf import VgfCompileSpec
+from executorch.exir._warnings import experimental
 from torch.fx import GraphModule, Node
 from torchao.quantization.pt2e import (
     FakeQuantize,
@@ -441,14 +442,26 @@ def _for_each_filtered_node(
 
 
 class TOSAQuantizer(Quantizer):
-    """Manage quantization annotations for TOSA-compatible backends."""
+    """Manage quantization annotations for TOSA-compatible backends.
+
+    .. warning::
+        Setting ``use_composable_quantizer=True`` enables an experimental API
+        surface that may change without notice.
+
+    """
 
     def __init__(
         self,
         compile_spec_or_tosa_spec,
         use_composable_quantizer: bool = False,
     ) -> None:
-        """Create a TOSA quantizer from a TOSA spec or Arm compile spec."""
+        """Create a TOSA quantizer from a TOSA spec or Arm compile spec.
+
+        .. warning::
+            Setting ``use_composable_quantizer=True`` enables an experimental
+            API surface that may change without notice.
+
+        """
         self.use_composable_quantizer = use_composable_quantizer
         self.quantizer: _TOSAQuantizerV1 | _TOSAQuantizerV2
         if use_composable_quantizer:
@@ -606,6 +619,10 @@ def set_io(
         self.quantizer.set_io(quantization_config)
         return self
 
+    @experimental(
+        "This API is experimental and may change without notice. "
+        "It is only available when use_composable_quantizer=True."
+    )
     def add_quantizer(self, quantizer: Quantizer) -> TOSAQuantizer:
         """Insert a quantizer with highest precedence."""
         if self.use_composable_quantizer:
@@ -614,6 +631,10 @@ def add_quantizer(self, quantizer: Quantizer) -> TOSAQuantizer:
             "add_quantizer is only supported in the composable quantizer implementation."
         )
 
+    @experimental(
+        "This API is experimental and may change without notice. "
+        "It is only available when use_composable_quantizer=True."
+    )
     def set_node_finder(
         self, quantization_config: Optional[QuantizationConfig], node_finder: NodeFinder
     ) -> TOSAQuantizer:
@@ -631,6 +652,10 @@ def set_node_finder(
             "set_node_finder is only supported in the composable quantizer implementation."
         )
 
+    @experimental(
+        "This API is experimental and may change without notice. "
+        "It is only available when use_composable_quantizer=True."
+    )
     def set_node_target(
         self, node_target: OpOverload, quantization_config: Optional[QuantizationConfig]
     ) -> TOSAQuantizer:
@@ -641,6 +666,10 @@ def set_node_target(
             "set_node_target is only supported in the composable quantizer implementation."
         )
 
+    @experimental(
+        "This API is experimental and may change without notice. "
+        "It is only available when use_composable_quantizer=True."
+    )
     def set_node_name(
         self, node_name: str, quantization_config: Optional[QuantizationConfig]
     ) -> TOSAQuantizer:
@@ -1167,6 +1196,10 @@ def set_io(
 class EthosUQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Ethos-U backend.
 
+    .. warning::
+        Setting ``use_composable_quantizer=True`` enables an experimental API
+        surface that may change without notice.
+
     Args:
         compile_spec (EthosUCompileSpec): Backend compile specification for
             Ethos-U targets.
@@ -1185,6 +1218,10 @@ def __init__(
 class VgfQuantizer(TOSAQuantizer):
     """Quantizer supported by the Arm Vgf backend.
 
+    .. warning::
+        Setting ``use_composable_quantizer=True`` enables an experimental API
+        surface that may change without notice.
+
     Args:
         compile_spec (VgfCompileSpec): Backend compile specification for Vgf
             targets.
 
@@ -135,7 +135,7 @@ def generate_ethos_u_docs():
     """Generates documentation for the Ethos-U components in the backend."""
     compilespec_string = get_class_docstring(
         EthosUCompileSpec,
-        ("DebugMode", "to_list", "from_list", "from_list_hook", "validate"),
+        ("DebugMode", "to_list", "from_list"),
     )
     partitioner_string = get_class_docstring(EthosUPartitioner)
     quantizer_string = get_class_docstring(
@@ -190,7 +190,7 @@ def generate_vgf_docs():
     """Generates documentation for the VGF components in the backend."""
     compilespec_string = get_class_docstring(
         VgfCompileSpec,
-        ("DebugMode", "to_list", "from_list", "from_list_hook", "validate"),
+        ("DebugMode", "to_list", "from_list"),
     )
     partitioner_string = get_class_docstring(VgfPartitioner)
     quantizer_string = get_class_docstring(