Skip to content

Commit 7dd30f2

Browse files
authored
Merge branch 'main' into seperate-sampler-targets
2 parents f36eb60 + 12bb0e7 commit 7dd30f2

959 files changed

Lines changed: 70182 additions & 9541 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/docker/build.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ case "${IMAGE_NAME}" in
8484
CUDA_VERSION=12.8
8585
SKIP_PYTORCH=yes
8686
;;
87+
executorch-ubuntu-24.04-gcc14)
88+
LINTRUNNER=""
89+
OS_VERSION=24.04
90+
GCC_VERSION=14
91+
;;
8792
*)
8893
echo "Invalid image name ${IMAGE_NAME}"
8994
exit 1

.ci/docker/common/install_cuda.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,15 @@ apt-get update
3838
# - libcublas-dev: cuBLAS development files
3939
# - libcusparse-dev: cuSPARSE development files
4040
# - libcufft-dev: cuFFT development files
41+
# - libcurand-dev: cuRAND development files
4142
apt-get install -y --no-install-recommends \
4243
"cuda-nvcc-${CUDA_VERSION_DASH}" \
4344
"cuda-cudart-dev-${CUDA_VERSION_DASH}" \
4445
"cuda-nvrtc-dev-${CUDA_VERSION_DASH}" \
4546
"libcublas-dev-${CUDA_VERSION_DASH}" \
4647
"libcusparse-dev-${CUDA_VERSION_DASH}" \
47-
"libcufft-dev-${CUDA_VERSION_DASH}"
48+
"libcufft-dev-${CUDA_VERSION_DASH}" \
49+
"libcurand-dev-${CUDA_VERSION_DASH}"
4850

4951
# Clean up
5052
apt-get clean

.ci/docker/common/install_user.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77

88
set -ex
99

10+
# On Ubuntu 24.04, there is a `ubuntu` user with id=1000
11+
if id ubuntu >/dev/null && [[ "$(id -u ubuntu)" == 1000 ]]; then
12+
sudo userdel --remove ubuntu;
13+
fi
14+
1015
# Same as ec2-user
1116
echo "ci-user:x:1000:1000::/var/lib/ci-user:" >> /etc/passwd
1217
echo "ci-user:x:1000:" >> /etc/group

.ci/scripts/download_hf_hub.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#!/bin/bash
22

3+
# Disable HF Xet storage to avoid stalled downloads on CI runners
4+
export HF_HUB_DISABLE_XET=1
5+
36
# Function to download files from the Hugging Face Hub
47
# Arguments:
58
# 1. model_id: The Hugging Face repository ID (e.g., "organization/model_name")

.ci/scripts/export_model_artifact.sh

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ if [ -z "${1:-}" ]; then
6767
exit 1
6868
fi
6969

70+
# Disable HF Xet storage to avoid stalled downloads on CI runners
71+
export HF_HUB_DISABLE_XET=1
72+
7073
set -eux
7174

7275
DEVICE="$1"
@@ -192,9 +195,17 @@ case "$HF_MODEL" in
192195
PREPROCESSOR_FEATURE_SIZE=""
193196
PREPROCESSOR_OUTPUT=""
194197
;;
198+
SocialLocalMobile/gemma-4-31B-it-HQQ-INT4)
199+
MODEL_NAME="gemma4_31b"
200+
TASK=""
201+
MAX_SEQ_LEN=""
202+
EXTRA_PIP=""
203+
PREPROCESSOR_FEATURE_SIZE=""
204+
PREPROCESSOR_OUTPUT=""
205+
;;
195206
*)
196207
echo "Error: Unsupported model '$HF_MODEL'"
197-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
208+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4, SocialLocalMobile/gemma-4-31B-it-HQQ-INT4"
198209
exit 1
199210
;;
200211
esac
@@ -415,8 +426,80 @@ if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
415426

416427
# Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
417428
echo "::group::Export"
429+
EXPORT_LOG=$(mktemp)
418430
TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
419431
python -m executorch.examples.models.qwen3_5_moe.export \
432+
--prequantized "$LOCAL_MODEL_DIR" \
433+
--output-dir "${OUTPUT_DIR}" \
434+
--dense-prefill dequant \
435+
--moe-activation-dtype int8 2>&1 | tee "$EXPORT_LOG"
436+
EXPORT_RC=${PIPESTATUS[0]}
437+
echo "::endgroup::"
438+
439+
if [ "$EXPORT_RC" -ne 0 ]; then
440+
echo "ERROR: Qwen3.5 MoE export failed (exit $EXPORT_RC)"
441+
rm -f "$EXPORT_LOG"
442+
exit "$EXPORT_RC"
443+
fi
444+
445+
# Gate peak GPU memory so we keep the export viable on consumer GPUs
446+
# (e.g. RTX 4090 with 24 GB). The export script prints a machine-
447+
# parseable marker line "EXPORT_GPU_PEAK_MEMORY_MB: <float>".
448+
EXPORT_GPU_PEAK_MB_LIMIT="${EXPORT_GPU_PEAK_MB_LIMIT:-20480}"
449+
PEAK_LINE=$(grep -E '^EXPORT_GPU_PEAK_MEMORY_MB:' "$EXPORT_LOG" | tail -1)
450+
rm -f "$EXPORT_LOG"
451+
if [ -z "$PEAK_LINE" ]; then
452+
echo "ERROR: export did not emit EXPORT_GPU_PEAK_MEMORY_MB marker; cannot enforce GPU memory budget"
453+
exit 1
454+
fi
455+
PEAK_MB=$(echo "$PEAK_LINE" | awk '{print $2}')
456+
echo "Export GPU peak memory: ${PEAK_MB} MB (limit ${EXPORT_GPU_PEAK_MB_LIMIT} MB)"
457+
if awk -v p="$PEAK_MB" -v l="$EXPORT_GPU_PEAK_MB_LIMIT" 'BEGIN{exit !(p>l)}'; then
458+
echo "ERROR: export exceeded GPU memory budget (${PEAK_MB} MB > ${EXPORT_GPU_PEAK_MB_LIMIT} MB)"
459+
echo " — this would prevent the model from being exported on a 24 GB consumer GPU."
460+
exit 1
461+
fi
462+
463+
test -f "${OUTPUT_DIR}/model.pte"
464+
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
465+
ls -al "${OUTPUT_DIR}"
466+
467+
exit 0
468+
fi
469+
470+
# Gemma 4 31B uses a prequantized checkpoint and custom export script
471+
if [ "$MODEL_NAME" = "gemma4_31b" ]; then
472+
pip install safetensors huggingface_hub gguf
473+
474+
# Download prequantized model outside OUTPUT_DIR to avoid uploading on failure
475+
LOCAL_MODEL_DIR=$(mktemp -d)
476+
INDUCTOR_CACHE=$(mktemp -d)
477+
trap 'rm -rf "$LOCAL_MODEL_DIR" "$INDUCTOR_CACHE"' EXIT
478+
479+
python -c "from huggingface_hub import snapshot_download; snapshot_download('${HF_MODEL}', local_dir='${LOCAL_MODEL_DIR}')"
480+
481+
# Sanity check: run inference on the prequantized model
482+
echo "::group::Inference sanity check"
483+
INFERENCE_OUTPUT=$(python -m executorch.examples.models.gemma4_31b.inference \
484+
--prequantized "$LOCAL_MODEL_DIR" \
485+
--prompt "What is the capital of France?" \
486+
--max-new-tokens 32 \
487+
--temperature 0 \
488+
--no-compile 2>&1)
489+
echo "$INFERENCE_OUTPUT"
490+
if ! echo "$INFERENCE_OUTPUT" | grep -q "Paris"; then
491+
echo "ERROR: Inference sanity check failed — expected 'Paris' in output"
492+
exit 1
493+
fi
494+
echo "::endgroup::"
495+
496+
# Copy tokenizer for the runner
497+
cp "$LOCAL_MODEL_DIR/tokenizer.json" "${OUTPUT_DIR}/tokenizer.json"
498+
499+
# Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
500+
echo "::group::Export"
501+
TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
502+
python -m executorch.examples.models.gemma4_31b.export \
420503
--prequantized "$LOCAL_MODEL_DIR" \
421504
--output-dir "${OUTPUT_DIR}"
422505
echo "::endgroup::"

.ci/scripts/setup-macos.sh

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ setup_macos_env_variables
116116
# buck2 atm
117117
install_buck
118118
brew install libomp
119-
install_pip_dependencies
120119

121120
# TODO(huydhn): Unlike our self-hosted runner, GitHub runner doesn't have access
122121
# to our infra, so compiler caching needs to be setup differently using GitHub
@@ -125,10 +124,17 @@ if [[ -z "${GITHUB_RUNNER:-}" ]]; then
125124
install_sccache
126125
fi
127126

127+
# Install pinned torch before requirements-ci.txt so torchsr's transitive
128+
# torch dep is satisfied by the existing install and pip does not pull a
129+
# separate copy from PyPI. sccache is initialized above so source-build
130+
# cache misses still hit the cache.
128131
print_cmake_info
129132
install_pytorch_and_domains
130-
# We build PyTorch from source here instead of using nightly. This allows CI to test against
131-
# the pinned commit from PyTorch
133+
134+
install_pip_dependencies
135+
136+
# install_executorch's --use-pt-pinned-commit skips re-installing torch since
137+
# install_pytorch_and_domains already installed the pinned build above.
132138
if [[ "$EDITABLE" == "true" ]]; then
133139
install_executorch --use-pt-pinned-commit --editable
134140
else

.ci/scripts/test_backend.sh

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ export PYTHON_EXECUTABLE=python
3535

3636
# CMake options to use, in addition to the defaults.
3737
EXTRA_BUILD_ARGS=""
38+
PYTEST_RETRY_ARGS=()
3839

3940
if [[ "$FLOW" == *qnn* ]]; then
4041
# Setup QNN sdk and deps - note that this is a bit hacky due to the nature of the
@@ -57,6 +58,9 @@ if [[ "$FLOW" == *vulkan* ]]; then
5758
fi
5859

5960
if [[ "$FLOW" == *arm* ]]; then
61+
if [[ "$SUITE" == "operators" ]]; then
62+
PYTEST_RETRY_ARGS=(--reruns 2 --reruns-delay 1)
63+
fi
6064

6165
# Setup ARM deps.
6266
if [[ "$FLOW" == *vgf* ]]; then
@@ -95,6 +99,11 @@ GOLDEN_DIR="${ARTIFACT_DIR}/golden-artifacts"
9599
export GOLDEN_ARTIFACTS_DIR="${GOLDEN_DIR}"
96100

97101
EXIT_CODE=0
98-
${CONDA_RUN_CMD} pytest -c /dev/null -n auto backends/test/suite/$SUITE/ -m flow_$FLOW --json-report --json-report-file="$REPORT_FILE" || EXIT_CODE=$?
102+
PYTEST_ARGS=(-c /dev/null -n auto)
103+
if [[ ${#PYTEST_RETRY_ARGS[@]} -gt 0 ]]; then
104+
PYTEST_ARGS+=("${PYTEST_RETRY_ARGS[@]}")
105+
fi
106+
PYTEST_ARGS+=("backends/test/suite/$SUITE/" -m "flow_$FLOW" --json-report --json-report-file="$REPORT_FILE")
107+
${CONDA_RUN_CMD} pytest "${PYTEST_ARGS[@]}" || EXIT_CODE=$?
99108
# Generate markdown summary.
100109
${CONDA_RUN_CMD} python -m executorch.backends.test.suite.generate_markdown_summary_json "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
from __future__ import annotations
9+
10+
"""Validate that backend Python modules can be imported.
11+
12+
The workflow passes backend-specific paths and package prefixes so the same
13+
checker can be reused for different backends.
14+
"""
15+
16+
import argparse
17+
import importlib
18+
import sys
19+
from pathlib import Path
20+
21+
22+
def parse_args() -> argparse.Namespace:
23+
parser = argparse.ArgumentParser()
24+
parser.add_argument(
25+
"--name",
26+
required=True,
27+
help="Display name for log messages, for example `QNN`.",
28+
)
29+
parser.add_argument(
30+
"--package-root",
31+
required=True,
32+
help="Path to the backend package root, relative to ExecuTorch root.",
33+
)
34+
parser.add_argument(
35+
"--package-prefix",
36+
required=True,
37+
help="Python package prefix, for example `executorch.backends.qualcomm`.",
38+
)
39+
parser.add_argument(
40+
"--skip-segment",
41+
action="append",
42+
default=["fb", "test", "tests"],
43+
help="Package path segment to skip while walking modules.",
44+
)
45+
return parser.parse_args()
46+
47+
48+
def resolve_executorch_root() -> Path:
49+
for parent in Path(__file__).resolve().parents:
50+
if (parent / "backends").is_dir() and (parent / "examples").is_dir():
51+
return parent
52+
raise RuntimeError(
53+
f"Could not locate ExecuTorch root from {Path(__file__).resolve()}"
54+
)
55+
56+
57+
def resolve_directory(executorch_root: Path, relative_path: str) -> Path:
58+
directory = executorch_root / relative_path
59+
if not directory.is_dir():
60+
raise RuntimeError(
61+
f"Directory `{relative_path}` was not found under {executorch_root}"
62+
)
63+
return directory
64+
65+
66+
def normalize_package_prefix(package_prefix: str) -> str:
67+
return package_prefix[:-1] if package_prefix.endswith(".") else package_prefix
68+
69+
70+
def should_skip_path(path: Path, skip_segments: list[str]) -> bool:
71+
if any(segment in path.parts for segment in skip_segments):
72+
return True
73+
74+
stem = path.stem
75+
return any(
76+
stem == segment or stem.startswith(f"{segment}_") for segment in skip_segments
77+
)
78+
79+
80+
def discover_modules(
81+
package_root: Path,
82+
package_prefix: str,
83+
skip_segments: list[str],
84+
) -> list[str]:
85+
modules = []
86+
for path in sorted(package_root.rglob("*.py")):
87+
relative_path = path.relative_to(package_root)
88+
if should_skip_path(relative_path, skip_segments):
89+
continue
90+
91+
if relative_path.name == "__init__.py":
92+
module_suffix = ".".join(relative_path.parent.parts)
93+
if module_suffix:
94+
modules.append(f"{package_prefix}.{module_suffix}")
95+
else:
96+
modules.append(package_prefix)
97+
continue
98+
99+
modules.append(
100+
f"{package_prefix}.{'.'.join(relative_path.with_suffix('').parts)}"
101+
)
102+
return modules
103+
104+
105+
def main() -> None:
106+
args = parse_args()
107+
executorch_root = resolve_executorch_root()
108+
package_root = resolve_directory(executorch_root, args.package_root)
109+
package_prefix = normalize_package_prefix(args.package_prefix)
110+
111+
failures: list[tuple[str, str, str]] = []
112+
modules = discover_modules(package_root, package_prefix, args.skip_segment)
113+
total_modules = len(modules)
114+
if total_modules == 0:
115+
print(f"No {args.name} Python modules found under {package_root}")
116+
sys.exit(1)
117+
118+
for index, name in enumerate(modules, 1):
119+
print(f"[{index}/{total_modules}] importing {name}", flush=True)
120+
try:
121+
importlib.import_module(name)
122+
except Exception as error:
123+
failures.append((name, type(error).__name__, str(error)))
124+
125+
if failures:
126+
print(f"{len(failures)}/{total_modules} {args.name} import failure(s):")
127+
for name, error_type, message in failures:
128+
print(f" FAIL: {name} -- {error_type}: {message}")
129+
sys.exit(1)
130+
131+
print(f"All {total_modules} {args.name} modules imported successfully")
132+
133+
134+
if __name__ == "__main__":
135+
main()

.ci/scripts/test_coreml_bc.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ source "${REPO_ROOT}/.ci/scripts/utils.sh"
2323
# Create a conda environment with Python 3.10 for compatibility with old ET versions
2424
# ET 1.0.0 only supports Python >=3.10,<3.13
2525
CONDA_ENV_NAME="coreml_bc_test_env"
26-
conda create -y -n "${CONDA_ENV_NAME}" python=3.10
26+
conda create -y -n "${CONDA_ENV_NAME}" python=3.10 pip packaging
2727

2828
# Use conda run to execute commands in the new environment
2929
CONDA_RUN="conda run --no-capture-output -n ${CONDA_ENV_NAME}"
@@ -69,7 +69,7 @@ git submodule sync --recursive
6969
git submodule update --init --recursive
7070

7171
# Install executorch
72-
${CONDA_RUN} pip install --upgrade pip
72+
${CONDA_RUN} python -m pip install --upgrade pip
7373
${CONDA_RUN} python install_executorch.py
7474

7575
# Step 3: Export model
@@ -129,7 +129,7 @@ git submodule update --init --recursive
129129

130130
# Step 5: Install current version
131131
echo "=== Step 5: Installing current ET version ==="
132-
${CONDA_RUN} pip install --upgrade pip
132+
${CONDA_RUN} python -m pip install --upgrade pip
133133
${CONDA_RUN} python install_executorch.py
134134

135135
# Step 6: Run the old pte file

0 commit comments

Comments
 (0)