Skip to content

Commit bf08ec5

Browse files
authored
Merge branch 'main' into extra_ops_modes
2 parents dba1f4e + c7f1d72 commit bf08ec5

165 files changed

Lines changed: 8155 additions & 1071 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/scripts/export_model_artifact.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
358358
STREAMING_ARG=""
359359
PREPROCESSOR_ARGS="--feature_size 128 --output_file ${OUTPUT_DIR}/preprocessor.pte"
360360
if [ "$USE_STREAMING" = "true" ]; then
361-
STREAMING_ARG="--streaming"
361+
STREAMING_ARG="--streaming --sliding-window 2048"
362362
PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --streaming"
363363
else
364364
PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --stack_output --max_audio_len 300"
@@ -424,6 +424,7 @@ if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
424424
test -f "${OUTPUT_DIR}/model.pte"
425425
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
426426
ls -al "${OUTPUT_DIR}"
427+
427428
exit 0
428429
fi
429430

.ci/scripts/test_model_e2e.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ EOF
354354
fi
355355
;;
356356
qwen3_5_moe)
357-
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 32"
357+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0"
358358
;;
359359
voxtral_realtime)
360360
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"

.ci/scripts/wheel/test_linux.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,25 @@
1111
from examples.models import Backend, Model
1212

1313
if __name__ == "__main__":
14-
# On Linux x86_64 the wheel is built with the Qualcomm backend.
15-
# Verify that it was registered correctly.
16-
if platform.system() == "Linux" and platform.machine() in ("x86_64", "amd64"):
14+
if platform.system() == "Linux":
1715
from executorch.extension.pybindings.portable_lib import (
1816
_get_registered_backend_names,
1917
)
2018

2119
registered = _get_registered_backend_names()
20+
21+
# QNN backend is only available on x86_64.
22+
if platform.machine() in ("x86_64", "amd64"):
23+
assert (
24+
"QnnBackend" in registered
25+
), f"QnnBackend not found in registered backends: {registered}"
26+
print("✓ QnnBackend is registered")
27+
28+
# OpenVINO backend is available on all Linux architectures.
2229
assert (
23-
"QnnBackend" in registered
24-
), f"QnnBackend not found in registered backends: {registered}"
25-
print("✓ QnnBackend is registered")
30+
"OpenvinoBackend" in registered
31+
), f"OpenvinoBackend not found in registered backends: {registered}"
32+
print("✓ OpenvinoBackend is registered")
2633

2734
test_base.run_tests(
2835
model_tests=[

.ci/scripts/wheel/test_linux_aarch64.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,20 @@
1212
# coremltools does not support linux aarch64 yet and install from the source fails on runtime
1313
# https://github.com/apple/coremltools/issues/1254
1414
# https://github.com/apple/coremltools/issues/2195
15+
16+
from executorch.extension.pybindings.portable_lib import (
17+
_get_registered_backend_names,
18+
)
19+
20+
registered = _get_registered_backend_names()
21+
22+
# OpenVINO backend uses dlopen (no build-time SDK dependency), so it
23+
# is compiled into the wheel on all Linux architectures.
24+
assert (
25+
"OpenvinoBackend" in registered
26+
), f"OpenvinoBackend not found in registered backends: {registered}"
27+
print("✓ OpenvinoBackend is registered")
28+
1529
test_base.run_tests(
1630
model_tests=[
1731
test_base.ModelTest(
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
name: Test Cadence
2+
3+
permissions:
4+
id-token: write
5+
contents: read
6+
7+
on:
8+
workflow_call:
9+
inputs:
10+
docker-image:
11+
description: 'Docker image to use'
12+
required: false
13+
type: string
14+
default: ci-image:executorch-ubuntu-22.04-clang12
15+
runner:
16+
description: 'Runner type'
17+
required: false
18+
type: string
19+
default: linux.8xlarge.memory
20+
ref:
21+
description: 'Git ref to checkout'
22+
required: false
23+
type: string
24+
default: ${{ github.sha }}
25+
timeout:
26+
description: 'Job timeout in minutes'
27+
required: false
28+
type: number
29+
default: 90
30+
31+
jobs:
32+
test-aot:
33+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
34+
with:
35+
job-name: test-aot
36+
runner: ${{ inputs.runner }}
37+
docker-image: ${{ inputs.docker-image }}
38+
submodules: recursive
39+
ref: ${{ inputs.ref }}
40+
timeout: ${{ inputs.timeout }}
41+
script: |
42+
set -eux
43+
conda create -y -n cadence_test python=3.12 > /dev/null
44+
conda activate cadence_test
45+
46+
./install_requirements.sh > /dev/null
47+
pip install -e . --no-build-isolation > /dev/null
48+
pip install beartype later pyre_extensions pytest-xdist
49+
50+
python -m pytest backends/cadence/aot/tests/ -v -n auto
51+
52+
test-ops:
53+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
54+
with:
55+
job-name: test-ops
56+
runner: ${{ inputs.runner }}
57+
docker-image: ${{ inputs.docker-image }}
58+
submodules: recursive
59+
ref: ${{ inputs.ref }}
60+
timeout: ${{ inputs.timeout }}
61+
download-artifact: cadence-runner-build
62+
script: |
63+
set -eux
64+
conda create -y -n cadence_test python=3.12 > /dev/null
65+
conda activate cadence_test
66+
67+
./install_requirements.sh > /dev/null
68+
pip install -e . --no-build-isolation > /dev/null
69+
pip install beartype later pyre_extensions pytest-xdist
70+
71+
# Use the pre-built runner from the build job
72+
mkdir -p cmake-out/backends/cadence
73+
cp "${RUNNER_ARTIFACT_DIR}/cadence_runner" cmake-out/backends/cadence/cadence_runner
74+
chmod +x cmake-out/backends/cadence/cadence_runner
75+
76+
export PYTHONPATH="${PYTHONPATH:-}:$(pwd)/backends/cadence/utils/FACTO"
77+
python -m pytest examples/cadence/operators/ -v -n auto

.github/workflows/android-release-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ jobs:
165165
contents: read
166166
steps:
167167
- name: configure aws credentials
168-
uses: aws-actions/configure-aws-credentials@v1.7.0
168+
uses: aws-actions/configure-aws-credentials@v4
169169
with:
170170
role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-android
171171
aws-region: us-east-1

.github/workflows/apple.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ jobs:
239239
python-version: '3.11'
240240
cache: pip
241241
- name: configure aws credentials
242-
uses: aws-actions/configure-aws-credentials@v1.7.0
242+
uses: aws-actions/configure-aws-credentials@v4
243243
with:
244244
role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-ios
245245
aws-region: us-east-1

.github/workflows/build-cadence-runner.yml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Build Cadence
1+
name: Cadence Build & Test
22

33
on:
44
pull_request:
@@ -13,7 +13,7 @@ concurrency:
1313
cancel-in-progress: true
1414

1515
jobs:
16-
cpu:
16+
cpu-build:
1717
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1818
permissions:
1919
id-token: write
@@ -25,6 +25,7 @@ jobs:
2525
submodules: recursive
2626
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
2727
timeout: 90
28+
upload-artifact: cadence-runner-build
2829
script: |
2930
set -eux
3031
# The generic Linux job chooses to use base env, not the one setup by the image
@@ -33,3 +34,15 @@ jobs:
3334
3435
./install_requirements.sh > /dev/null
3536
bash backends/cadence/build_cadence_runner.sh
37+
38+
# Copy runner binary to artifact dir for downstream test jobs
39+
cp cmake-out/backends/cadence/cadence_runner "${RUNNER_ARTIFACT_DIR}/"
40+
41+
cpu-test:
42+
needs: cpu-build
43+
permissions:
44+
id-token: write
45+
contents: read
46+
uses: ./.github/workflows/_test_cadence.yml
47+
with:
48+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

.github/workflows/cuda-windows.yml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@ jobs:
4848
- model_repo: "nvidia"
4949
model_name: "parakeet-tdt"
5050
quant: "quantized-int4-weight-only"
51-
- model_repo: "nvidia"
52-
model_name: "diar_streaming_sortformer_4spk-v2"
53-
quant: "non-quantized"
51+
# TODO: sortformer produces 0 segments on Windows after D97788666.
52+
# Temporarily disabled until root cause is debugged.
53+
# - model_repo: "nvidia"
54+
# model_name: "diar_streaming_sortformer_4spk-v2"
55+
# quant: "non-quantized"
5456
- model_repo: "mistralai"
5557
model_name: "Voxtral-Mini-4B-Realtime-2602"
5658
quant: "quantized-int4-tile-packed"
@@ -129,9 +131,11 @@ jobs:
129131
- model_repo: "nvidia"
130132
model_name: "parakeet-tdt"
131133
quant: "quantized-int4-weight-only"
132-
- model_repo: "nvidia"
133-
model_name: "diar_streaming_sortformer_4spk-v2"
134-
quant: "non-quantized"
134+
# TODO: sortformer produces 0 segments on Windows after D97788666.
135+
# Temporarily disabled until root cause is debugged.
136+
# - model_repo: "nvidia"
137+
# model_name: "diar_streaming_sortformer_4spk-v2"
138+
# quant: "non-quantized"
135139
- model_repo: "mistralai"
136140
model_name: "Voxtral-Mini-4B-Realtime-2602"
137141
quant: "quantized-int4-tile-packed"

.github/workflows/cuda.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ jobs:
145145
# Run CUDA backend Python tests
146146
python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts="
147147
148-
# Run quantize roundtrip tests (Qwen 3.5 MoE save/load prequantized)
149-
python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py -v -o "addopts="
148+
# Run Qwen 3.5 MoE tests (quantize roundtrip + TurboQuant KV cache)
149+
python -m pytest examples/models/qwen3_5_moe/test_quantize_roundtrip.py examples/models/qwen3_5_moe/test_turboquant.py -v -o "addopts="
150150
151151
export-model-cuda-artifact:
152152
name: export-model-cuda-artifact

0 commit comments

Comments
 (0)