pytorch
diff --git a/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 2 additions & 1 deletion b/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/wheel/test_linux.py‎
Lines changed: 13 additions & 6 deletions b/‎.ci/scripts/wheel/test_linux.py‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎.ci/scripts/wheel/test_linux_aarch64.py‎
Lines changed: 14 additions & 0 deletions b/‎.ci/scripts/wheel/test_linux_aarch64.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎.github/workflows/_test_cadence.yml‎
Lines changed: 77 additions & 0 deletions b/‎.github/workflows/_test_cadence.yml‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-cadence-runner.yml‎
Lines changed: 15 additions & 2 deletions b/‎.github/workflows/build-cadence-runner.yml‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎.github/workflows/cuda-windows.yml‎
Lines changed: 10 additions & 6 deletions b/‎.github/workflows/cuda-windows.yml‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/cuda.yml‎
Lines changed: 2 additions & 2 deletions
@@ -358,7 +358,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
   STREAMING_ARG=""
   PREPROCESSOR_ARGS="--feature_size 128 --output_file ${OUTPUT_DIR}/preprocessor.pte"
   if [ "$USE_STREAMING" = "true" ]; then
-    STREAMING_ARG="--streaming"
+    STREAMING_ARG="--streaming --sliding-window 2048"
     PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --streaming"
   else
     PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --stack_output --max_audio_len 300"
@@ -424,6 +424,7 @@ if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
   test -f "${OUTPUT_DIR}/model.pte"
   test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
   ls -al "${OUTPUT_DIR}"
+
   exit 0
 fi
 
 
@@ -354,7 +354,7 @@ EOF
     fi
     ;;
   qwen3_5_moe)
-    RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 32"
+    RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0"
     ;;
   voxtral_realtime)
     RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
 
@@ -11,18 +11,25 @@
 from examples.models import Backend, Model
 
 if __name__ == "__main__":
-    # On Linux x86_64 the wheel is built with the Qualcomm backend.
-    # Verify that it was registered correctly.
-    if platform.system() == "Linux" and platform.machine() in ("x86_64", "amd64"):
+    if platform.system() == "Linux":
         from executorch.extension.pybindings.portable_lib import (
             _get_registered_backend_names,
         )
 
         registered = _get_registered_backend_names()
+
+        # QNN backend is only available on x86_64.
+        if platform.machine() in ("x86_64", "amd64"):
+            assert (
+                "QnnBackend" in registered
+            ), f"QnnBackend not found in registered backends: {registered}"
+            print("✓ QnnBackend is registered")
+
+        # OpenVINO backend is available on all Linux architectures.
         assert (
-            "QnnBackend" in registered
-        ), f"QnnBackend not found in registered backends: {registered}"
-        print("✓ QnnBackend is registered")
+            "OpenvinoBackend" in registered
+        ), f"OpenvinoBackend not found in registered backends: {registered}"
+        print("✓ OpenvinoBackend is registered")
 
     test_base.run_tests(
         model_tests=[
 
@@ -12,6 +12,20 @@
     # coremltools does not support linux aarch64 yet and install from the source fails on runtime
     # https://github.com/apple/coremltools/issues/1254
     # https://github.com/apple/coremltools/issues/2195
+
+    from executorch.extension.pybindings.portable_lib import (
+        _get_registered_backend_names,
+    )
+
+    registered = _get_registered_backend_names()
+
+    # OpenVINO backend uses dlopen (no build-time SDK dependency), so it
+    # is compiled into the wheel on all Linux architectures.
+    assert (
+        "OpenvinoBackend" in registered
+    ), f"OpenvinoBackend not found in registered backends: {registered}"
+    print("✓ OpenvinoBackend is registered")
+
     test_base.run_tests(
         model_tests=[
             test_base.ModelTest(
 
@@ -0,0 +1,77 @@
+name: Test Cadence
+
+permissions:
+  id-token: write
+  contents: read
+
+on:
+  workflow_call:
+    inputs:
+      docker-image:
+        description: 'Docker image to use'
+        required: false
+        type: string
+        default: ci-image:executorch-ubuntu-22.04-clang12
+      runner:
+        description: 'Runner type'
+        required: false
+        type: string
+        default: linux.8xlarge.memory
+      ref:
+        description: 'Git ref to checkout'
+        required: false
+        type: string
+        default: ${{ github.sha }}
+      timeout:
+        description: 'Job timeout in minutes'
+        required: false
+        type: number
+        default: 90
+
+jobs:
+  test-aot:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      job-name: test-aot
+      runner: ${{ inputs.runner }}
+      docker-image: ${{ inputs.docker-image }}
+      submodules: recursive
+      ref: ${{ inputs.ref }}
+      timeout: ${{ inputs.timeout }}
+      script: |
+        set -eux
+        conda create -y -n cadence_test python=3.12 > /dev/null
+        conda activate cadence_test
+
+        ./install_requirements.sh > /dev/null
+        pip install -e . --no-build-isolation > /dev/null
+        pip install beartype later pyre_extensions pytest-xdist
+
+        python -m pytest backends/cadence/aot/tests/ -v -n auto
+
+  test-ops:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      job-name: test-ops
+      runner: ${{ inputs.runner }}
+      docker-image: ${{ inputs.docker-image }}
+      submodules: recursive
+      ref: ${{ inputs.ref }}
+      timeout: ${{ inputs.timeout }}
+      download-artifact: cadence-runner-build
+      script: |
+        set -eux
+        conda create -y -n cadence_test python=3.12 > /dev/null
+        conda activate cadence_test
+
+        ./install_requirements.sh > /dev/null
+        pip install -e . --no-build-isolation > /dev/null
+        pip install beartype later pyre_extensions pytest-xdist
+
+        # Use the pre-built runner from the build job
+        mkdir -p cmake-out/backends/cadence
+        cp "${RUNNER_ARTIFACT_DIR}/cadence_runner" cmake-out/backends/cadence/cadence_runner
+        chmod +x cmake-out/backends/cadence/cadence_runner
+
+        export PYTHONPATH="${PYTHONPATH:-}:$(pwd)/backends/cadence/utils/FACTO"
+        python -m pytest examples/cadence/operators/ -v -n auto
@@ -165,7 +165,7 @@ jobs:
       contents: read
     steps:
       - name: configure aws credentials
-        uses: aws-actions/configure-aws-credentials@v1.7.0
+        uses: aws-actions/configure-aws-credentials@v4
         with:
           role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-android
           aws-region: us-east-1
 
@@ -239,7 +239,7 @@ jobs:
           python-version: '3.11'
           cache: pip
       - name: configure aws credentials
-        uses: aws-actions/configure-aws-credentials@v1.7.0
+        uses: aws-actions/configure-aws-credentials@v4
         with:
           role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-ios
           aws-region: us-east-1
 
@@ -1,4 +1,4 @@
-name: Build Cadence
+name: Cadence Build & Test
 
 on:
   pull_request:
@@ -13,7 +13,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  cpu:
+  cpu-build:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
       id-token: write
@@ -25,6 +25,7 @@ jobs:
       submodules: recursive
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
+      upload-artifact: cadence-runner-build
       script: |
         set -eux
         # The generic Linux job chooses to use base env, not the one setup by the image
@@ -33,3 +34,15 @@ jobs:
 
         ./install_requirements.sh > /dev/null
         bash backends/cadence/build_cadence_runner.sh
+
+        # Copy runner binary to artifact dir for downstream test jobs
+        cp cmake-out/backends/cadence/cadence_runner "${RUNNER_ARTIFACT_DIR}/"
+
+  cpu-test:
+    needs: cpu-build
+    permissions:
+      id-token: write
+      contents: read
+    uses: ./.github/workflows/_test_cadence.yml
+    with:
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
@@ -48,9 +48,11 @@ jobs:
           - model_repo: "nvidia"
             model_name: "parakeet-tdt"
             quant: "quantized-int4-weight-only"
-          - model_repo: "nvidia"
-            model_name: "diar_streaming_sortformer_4spk-v2"
-            quant: "non-quantized"
+          # TODO: sortformer produces 0 segments on Windows after D97788666.
+          # Temporarily disabled until root cause is debugged.
+          # - model_repo: "nvidia"
+          #   model_name: "diar_streaming_sortformer_4spk-v2"
+          #   quant: "non-quantized"
           - model_repo: "mistralai"
             model_name: "Voxtral-Mini-4B-Realtime-2602"
             quant: "quantized-int4-tile-packed"
@@ -129,9 +131,11 @@ jobs:
           - model_repo: "nvidia"
             model_name: "parakeet-tdt"
             quant: "quantized-int4-weight-only"
-          - model_repo: "nvidia"
-            model_name: "diar_streaming_sortformer_4spk-v2"
-            quant: "non-quantized"
+          # TODO: sortformer produces 0 segments on Windows after D97788666.
+          # Temporarily disabled until root cause is debugged.
+          # - model_repo: "nvidia"
+          #   model_name: "diar_streaming_sortformer_4spk-v2"
+          #   quant: "non-quantized"
           - model_repo: "mistralai"
             model_name: "Voxtral-Mini-4B-Realtime-2602"
             quant: "quantized-int4-tile-packed"
 
@@ -145,8 +145,8 @@ jobs:
         # Run CUDA backend Python tests
         python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="
 
-        # Run quantize roundtrip tests (Qwen 3.5 MoE save/load prequantized)
-        python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py -v -o "addopts="
+        # Run Qwen 3.5 MoE tests (quantize roundtrip + TurboQuant KV cache)
+        python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py examples/models/qwen3_5_moe/test_turboquant.py -v -o "addopts="
 
   export-model-cuda-artifact:
     name: export-model-cuda-artifact