Skip to content

Commit 5c70037

Browse files
authored
Merge branch 'main' into fix/16032-tensors-same-dim-order-semantic-equivalence
2 parents fe33ca5 + 22867a9 commit 5c70037

1,376 files changed

Lines changed: 112528 additions & 28866 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
a9592258daacad7423fd5f39aaa59c6e36471520
1+
585799cf7039d376d2ac4848b5ef0b501f60679e
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
659af3c353e49b35c191cdd2dba3b3c79d0e6822
1+
release/2.11

.ci/docker/common/install_cuda_windows_cross_compile.sh

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,23 @@ get_torch_cuda_version() {
4848
}
4949

5050
install_windows_cuda() {
51-
# Get CUDA version from torch
52-
TORCH_CUDA_VERSION=$(get_torch_cuda_version)
51+
# Use CUDA_VERSION env var if set (from Docker build arg), otherwise query PyTorch
52+
if [ -n "${CUDA_VERSION:-}" ]; then
53+
echo "Using CUDA version from environment: ${CUDA_VERSION}"
54+
CUDA_MAJOR_MINOR=$(echo "${CUDA_VERSION}" | cut -d. -f1,2)
55+
else
56+
TORCH_CUDA_VERSION=$(get_torch_cuda_version)
57+
58+
if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
59+
echo "ERROR: Could not detect CUDA version from PyTorch."
60+
echo "Make sure PyTorch with CUDA support is installed or set CUDA_VERSION."
61+
exit 1
62+
fi
5363

54-
if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
55-
echo "ERROR: Could not detect CUDA version from PyTorch."
56-
echo "Make sure PyTorch with CUDA support is installed before running this script."
57-
exit 1
64+
echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
65+
CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
5866
fi
5967

60-
echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
61-
62-
# Extract major.minor version (e.g., "12.8" from "12.8.1" or "12.8")
63-
CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
64-
6568
# Look up the full version and driver version
6669
if [ -z "${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" ]; then
6770
echo "ERROR: CUDA version ${CUDA_MAJOR_MINOR} is not in the known version map."

.ci/docker/common/install_pytorch.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ install_pytorch_and_domains() {
3232
pip_install "$(echo dist/*.whl)"
3333

3434
# Grab the pinned audio and vision commits from PyTorch
35-
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
35+
TORCHAUDIO_VERSION=release/2.11
3636
export TORCHAUDIO_VERSION
37-
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
37+
TORCHVISION_VERSION=release/0.26
3838
export TORCHVISION_VERSION
3939

4040
install_domains

.ci/docker/ubuntu/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_c
105105
COPY ./common/utils.sh utils.sh
106106
RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \
107107
CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda.sh && \
108-
bash ./install_cuda_windows_cross_compile.sh; \
108+
CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda_windows_cross_compile.sh; \
109109
fi
110110
RUN rm -f install_cuda.sh install_cuda_windows_cross_compile.sh utils.sh
111111
# Set up CUDA environment for Linux compilation (nvcc, etc.)

.ci/scripts/build-qnn-sdk.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ set_up_aot() {
4040
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
4141
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
4242
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
43+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
44+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED_AOT=ON \
4345
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
4446
-DPYTHON_EXECUTABLE=python3
4547
cmake --build $PWD --target "PyQnnManagerAdaptor" -j$(nproc)

.ci/scripts/export_model_artifact.sh

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Arguments:
2424
- google/gemma-3-4b-it
2525
- nvidia/diar_streaming_sortformer_4spk-v2
2626
- nvidia/parakeet-tdt
27+
- facebook/dinov2-small-imagenet1k-1-layer
2728
2829
quant_name Quantization type (optional, default: non-quantized)
2930
Options:
@@ -167,6 +168,14 @@ case "$HF_MODEL" in
167168
PREPROCESSOR_FEATURE_SIZE=""
168169
PREPROCESSOR_OUTPUT=""
169170
;;
171+
facebook/dinov2-small-imagenet1k-1-layer)
172+
MODEL_NAME="dinov2"
173+
TASK=""
174+
MAX_SEQ_LEN=""
175+
EXTRA_PIP=""
176+
PREPROCESSOR_FEATURE_SIZE=""
177+
PREPROCESSOR_OUTPUT=""
178+
;;
170179
mistralai/Voxtral-Mini-4B-Realtime-2602)
171180
MODEL_NAME="voxtral_realtime"
172181
TASK=""
@@ -175,9 +184,17 @@ case "$HF_MODEL" in
175184
PREPROCESSOR_FEATURE_SIZE=""
176185
PREPROCESSOR_OUTPUT=""
177186
;;
187+
SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4)
188+
MODEL_NAME="qwen3_5_moe"
189+
TASK=""
190+
MAX_SEQ_LEN=""
191+
EXTRA_PIP=""
192+
PREPROCESSOR_FEATURE_SIZE=""
193+
PREPROCESSOR_OUTPUT=""
194+
;;
178195
*)
179196
echo "Error: Unsupported model '$HF_MODEL'"
180-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt"
197+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
181198
exit 1
182199
;;
183200
esac
@@ -293,6 +310,23 @@ if [ "$MODEL_NAME" = "sortformer" ]; then
293310
exit 0
294311
fi
295312

313+
# DINOv2 uses a custom export script
314+
if [ "$MODEL_NAME" = "dinov2" ]; then
315+
pip install -r examples/models/dinov2/install_requirements.txt
316+
317+
python -m executorch.examples.models.dinov2.export_dinov2 \
318+
--backend "$DEVICE" \
319+
--output-dir "${OUTPUT_DIR}"
320+
321+
test -f "${OUTPUT_DIR}/model.pte"
322+
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
323+
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
324+
fi
325+
ls -al "${OUTPUT_DIR}"
326+
echo "::endgroup::"
327+
exit 0
328+
fi
329+
296330
# Voxtral Realtime uses a custom export script
297331
if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
298332
pip install safetensors huggingface_hub
@@ -308,6 +342,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
308342
VR_QUANT_ARGS="--qlinear-encoder 8da4w --qlinear 8da4w --qlinear-group-size 32 --qembedding 8w"
309343
elif [ "$QUANT_NAME" = "quantized-int4-metal" ]; then
310344
VR_QUANT_ARGS="--qlinear-encoder fpa4w --qlinear fpa4w"
345+
VR_DTYPE_ARGS="--dtype bf16"
311346
elif [ "$QUANT_NAME" = "quantized-int4-tile-packed" ]; then
312347
VR_QUANT_ARGS="--qlinear-encoder 4w --qlinear-encoder-packing-format tile_packed_to_4d --qlinear 4w --qlinear-packing-format tile_packed_to_4d --qembedding 8w"
313348
VR_DTYPE_ARGS="--dtype bf16"
@@ -323,7 +358,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
323358
STREAMING_ARG=""
324359
PREPROCESSOR_ARGS="--feature_size 128 --output_file ${OUTPUT_DIR}/preprocessor.pte"
325360
if [ "$USE_STREAMING" = "true" ]; then
326-
STREAMING_ARG="--streaming"
361+
STREAMING_ARG="--streaming --sliding-window 2048"
327362
PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --streaming"
328363
else
329364
PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --stack_output --max_audio_len 300"
@@ -347,11 +382,52 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
347382
fi
348383
# Copy tokenizer from downloaded model weights
349384
cp "$LOCAL_MODEL_DIR/tekken.json" "${OUTPUT_DIR}/tekken.json"
385+
rm -rf "$LOCAL_MODEL_DIR"
350386
ls -al "${OUTPUT_DIR}"
351387
echo "::endgroup::"
352388
exit 0
353389
fi
354390

391+
# Qwen 3.5 MoE uses a prequantized checkpoint and custom export script
392+
if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
393+
pip install safetensors huggingface_hub
394+
pip install -r examples/models/qwen3_5_moe/requirements.txt
395+
396+
# Download prequantized model outside OUTPUT_DIR to avoid uploading on failure
397+
LOCAL_MODEL_DIR=$(mktemp -d)
398+
INDUCTOR_CACHE=$(mktemp -d)
399+
trap 'rm -rf "$LOCAL_MODEL_DIR" "$INDUCTOR_CACHE"' EXIT
400+
401+
python -c "from huggingface_hub import snapshot_download; snapshot_download('${HF_MODEL}', local_dir='${LOCAL_MODEL_DIR}')"
402+
403+
# Sanity check: run inference on the prequantized model
404+
echo "::group::Inference sanity check"
405+
python -m executorch.examples.models.qwen3_5_moe.inference \
406+
--prequantized "$LOCAL_MODEL_DIR" \
407+
--prompt "What is the capital of France?" \
408+
--max-new-tokens 32 \
409+
--temperature 0 \
410+
--no-compile
411+
echo "::endgroup::"
412+
413+
# Copy tokenizer for the runner
414+
cp "$LOCAL_MODEL_DIR/tokenizer.json" "${OUTPUT_DIR}/tokenizer.json"
415+
416+
# Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
417+
echo "::group::Export"
418+
TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
419+
python -m executorch.examples.models.qwen3_5_moe.export \
420+
--prequantized "$LOCAL_MODEL_DIR" \
421+
--output-dir "${OUTPUT_DIR}"
422+
echo "::endgroup::"
423+
424+
test -f "${OUTPUT_DIR}/model.pte"
425+
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
426+
ls -al "${OUTPUT_DIR}"
427+
428+
exit 0
429+
fi
430+
355431
MAX_SEQ_LEN_ARG=""
356432
if [ -n "$MAX_SEQ_LEN" ]; then
357433
MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"

.ci/scripts/setup-openvino.sh

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,41 @@ set -ex
1010
# shellcheck source=/dev/null
1111
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1212

13+
# Parse arguments
14+
USE_NIGHTLY=false
15+
for arg in "$@"; do
16+
case $arg in
17+
--nightly) USE_NIGHTLY=true ;;
18+
esac
19+
done
20+
1321
# Download and install OpenVINO from release packages
14-
OPENVINO_VERSION="2025.3"
15-
OPENVINO_BUILD="2025.3.0.19807.44526285f24"
16-
OPENVINO_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
22+
OPENVINO_VERSION="2026.0"
23+
OPENVINO_BUILD="2026.0.0.20965.c6d6a13a886"
24+
OPENVINO_STABLE_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
25+
26+
OPENVINO_NIGHTLY_BUILD_ID="2026.1.0-21310-c694fbc2b6d"
27+
OPENVINO_NIGHTLY_BUILD="2026.1.0.dev20260312"
28+
OPENVINO_NIGHTLY_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/${OPENVINO_NIGHTLY_BUILD_ID}/openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64.tgz"
29+
30+
if [ "${USE_NIGHTLY}" = true ]; then
31+
OPENVINO_URL="${OPENVINO_NIGHTLY_URL}"
32+
OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64"
33+
echo "Using OpenVINO nightly build: ${OPENVINO_NIGHTLY_BUILD_ID}"
34+
else
35+
OPENVINO_URL="${OPENVINO_STABLE_URL}"
36+
OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64"
37+
echo "Using OpenVINO stable release: ${OPENVINO_BUILD}"
38+
fi
1739

1840
curl -Lo /tmp/openvino_toolkit.tgz --retry 3 --fail ${OPENVINO_URL}
1941
tar -xzf /tmp/openvino_toolkit.tgz
20-
mv openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64 openvino
42+
mv "${OPENVINO_EXTRACTED_DIR}" openvino
2143

44+
set +u
2245
source openvino/setupvars.sh
23-
cd backends/openvino
24-
pip install -r requirements.txt
25-
cd scripts
46+
set -u
47+
pip install -r backends/openvino/requirements.txt
48+
pushd backends/openvino/scripts
2649
./openvino_build.sh --enable_python
50+
popd

.ci/scripts/setup-qnn-deps.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_q
1111

1212
setup_libcpp 12
1313
setup_android_ndk
14-
install_qnn
14+
install_qnn
15+
pip install -r backends/qualcomm/requirements.txt

.ci/scripts/test_backend.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ if [[ "$FLOW" == *qnn* ]]; then
4646
export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
4747

4848
# TODO Get SDK root from install scripts
49-
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
49+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
5050
fi
5151

5252
if [[ "$FLOW" == *vulkan* ]]; then
@@ -78,6 +78,12 @@ if [[ "$FLOW" == *arm* ]]; then
7878
fi
7979
fi
8080

81+
if [[ "$FLOW" == *openvino* ]]; then
82+
# Setup OpenVINO environment
83+
source .ci/scripts/setup-openvino.sh --nightly
84+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_OPENVINO=ON"
85+
fi
86+
8187
if [[ $IS_MACOS -eq 1 ]]; then
8288
SETUP_SCRIPT=.ci/scripts/setup-macos.sh
8389
else

0 commit comments

Comments
 (0)