Skip to content

Commit 232574c

Browse files
authored
Merge branch 'main' into gemma4-mlx-install-path
2 parents e527890 + acffcb0 commit 232574c

399 files changed

Lines changed: 34544 additions & 5491 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/docker/build.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,18 @@ esac
9292
TORCH_VERSION=$(cat ci_commit_pins/pytorch.txt)
9393
BUILD_DOCS=1
9494

95+
# Pull channel + spec/url helpers out of torch_pin.py so install_pytorch.sh
96+
# (which runs inside the docker build, where torch_pin.py isn't available)
97+
# can decide between wheel install (test/release) and source build (nightly).
98+
# Self-hosted runners often have python3 but not the unversioned python alias.
99+
PYTHON_BIN=$(command -v python3 || command -v python)
100+
TORCH_PIN_HELPERS=$(cd ../.. && "$PYTHON_BIN" -c "from torch_pin import CHANNEL, torch_spec, torchaudio_spec, torchvision_spec, torch_index_url_base; print(CHANNEL); print(torch_spec()); print(torchaudio_spec()); print(torchvision_spec()); print(torch_index_url_base())")
101+
TORCH_CHANNEL=$(echo "${TORCH_PIN_HELPERS}" | sed -n '1p')
102+
TORCH_SPEC=$(echo "${TORCH_PIN_HELPERS}" | sed -n '2p')
103+
TORCHAUDIO_SPEC=$(echo "${TORCH_PIN_HELPERS}" | sed -n '3p')
104+
TORCHVISION_SPEC=$(echo "${TORCH_PIN_HELPERS}" | sed -n '4p')
105+
TORCH_INDEX_URL=$(echo "${TORCH_PIN_HELPERS}" | sed -n '5p')
106+
95107
# Copy requirements-lintrunner.txt from root to here
96108
cp ../../requirements-lintrunner.txt ./
97109

@@ -104,6 +116,11 @@ docker build \
104116
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
105117
--build-arg "MINICONDA_VERSION=${MINICONDA_VERSION}" \
106118
--build-arg "TORCH_VERSION=${TORCH_VERSION}" \
119+
--build-arg "TORCH_CHANNEL=${TORCH_CHANNEL}" \
120+
--build-arg "TORCH_SPEC=${TORCH_SPEC}" \
121+
--build-arg "TORCHAUDIO_SPEC=${TORCHAUDIO_SPEC}" \
122+
--build-arg "TORCHVISION_SPEC=${TORCHVISION_SPEC}" \
123+
--build-arg "TORCH_INDEX_URL=${TORCH_INDEX_URL}" \
107124
--build-arg "BUCK2_VERSION=${BUCK2_VERSION}" \
108125
--build-arg "LINTRUNNER=${LINTRUNNER:-}" \
109126
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
release/2.11
1+
release/2.11

.ci/docker/common/install_cuda.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,15 @@ apt-get update
3838
# - libcublas-dev: cuBLAS development files
3939
# - libcusparse-dev: cuSPARSE development files
4040
# - libcufft-dev: cuFFT development files
41+
# - libcurand-dev: cuRAND development files
4142
apt-get install -y --no-install-recommends \
4243
"cuda-nvcc-${CUDA_VERSION_DASH}" \
4344
"cuda-cudart-dev-${CUDA_VERSION_DASH}" \
4445
"cuda-nvrtc-dev-${CUDA_VERSION_DASH}" \
4546
"libcublas-dev-${CUDA_VERSION_DASH}" \
4647
"libcusparse-dev-${CUDA_VERSION_DASH}" \
47-
"libcufft-dev-${CUDA_VERSION_DASH}"
48+
"libcufft-dev-${CUDA_VERSION_DASH}" \
49+
"libcurand-dev-${CUDA_VERSION_DASH}"
4850

4951
# Clean up
5052
apt-get clean

.ci/docker/common/install_pytorch.sh

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,24 @@ install_domains() {
1717
}
1818

1919
install_pytorch_and_domains() {
20+
if [ "${TORCH_CHANNEL}" != "nightly" ]; then
21+
# Test/release: install the published wheels directly. The specs and URL
22+
# are passed in as docker build args (computed from torch_pin.py by
23+
# .ci/docker/build.sh). RC wheels at /whl/test/ get re-uploaded under the
24+
# same version, so use --no-cache-dir there to avoid stale cache hits.
25+
local cache_flag=""
26+
if [ "${TORCH_CHANNEL}" = "test" ]; then
27+
cache_flag="--no-cache-dir"
28+
fi
29+
pip_install --force-reinstall ${cache_flag} \
30+
"${TORCH_SPEC}" "${TORCHVISION_SPEC}" "${TORCHAUDIO_SPEC}" \
31+
--index-url "${TORCH_INDEX_URL}/cpu"
32+
return
33+
fi
34+
35+
# Nightly: build pytorch from source against the pinned SHA in pytorch.txt
36+
# so we catch upstream regressions, then install audio/vision from the
37+
# commits that pytorch itself pins.
2038
git clone https://github.com/pytorch/pytorch.git
2139

2240
# Fetch the target commit
@@ -27,14 +45,19 @@ install_pytorch_and_domains() {
2745
chown -R ci-user .
2846

2947
export _GLIBCXX_USE_CXX11_ABI=1
48+
# PyTorch's FindARM.cmake hard-fails when the SVE+BF16 compile probe
49+
# doesn't pass — gcc-11 in this image is too old to accept the combined
50+
# NEON/SVE/bfloat16 intrinsics the probe exercises. Executorch's aarch64
51+
# runtime targets (phones, embedded) don't use SVE, so bypass the check.
52+
export BUILD_IGNORE_SVE_UNAVAILABLE=1
3053
# Then build and install PyTorch
3154
conda_run python setup.py bdist_wheel
3255
pip_install "$(echo dist/*.whl)"
3356

34-
# Grab the pinned audio and vision commits from PyTorch
35-
TORCHAUDIO_VERSION=release/2.11
57+
# Defer to PyTorch's own pinned audio/vision commits.
58+
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
3659
export TORCHAUDIO_VERSION
37-
TORCHVISION_VERSION=release/0.26
60+
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
3861
export TORCHVISION_VERSION
3962

4063
install_domains

.ci/docker/ubuntu/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ ENV SCCACHE_S3_KEY_PREFIX executorch
6464
ENV SCCACHE_REGION us-east-1
6565

6666
ARG TORCH_VERSION
67+
ARG TORCH_CHANNEL
68+
ARG TORCH_SPEC
69+
ARG TORCHAUDIO_SPEC
70+
ARG TORCHVISION_SPEC
71+
ARG TORCH_INDEX_URL
6772
ARG SKIP_PYTORCH
6873
COPY ./common/install_pytorch.sh install_pytorch.sh
6974
COPY ./common/utils.sh utils.sh

.ci/scripts/export_model_artifact.sh

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,12 +415,40 @@ if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
415415

416416
# Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
417417
echo "::group::Export"
418+
EXPORT_LOG=$(mktemp)
418419
TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
419420
python -m executorch.examples.models.qwen3_5_moe.export \
420421
--prequantized "$LOCAL_MODEL_DIR" \
421-
--output-dir "${OUTPUT_DIR}"
422+
--output-dir "${OUTPUT_DIR}" \
423+
--dense-prefill dequant \
424+
--moe-activation-dtype int8 2>&1 | tee "$EXPORT_LOG"
425+
EXPORT_RC=${PIPESTATUS[0]}
422426
echo "::endgroup::"
423427

428+
if [ "$EXPORT_RC" -ne 0 ]; then
429+
echo "ERROR: Qwen3.5 MoE export failed (exit $EXPORT_RC)"
430+
rm -f "$EXPORT_LOG"
431+
exit "$EXPORT_RC"
432+
fi
433+
434+
# Gate peak GPU memory so we keep the export viable on consumer GPUs
435+
# (e.g. RTX 4090 with 24 GB). The export script prints a machine-
436+
# parseable marker line "EXPORT_GPU_PEAK_MEMORY_MB: <float>".
437+
EXPORT_GPU_PEAK_MB_LIMIT="${EXPORT_GPU_PEAK_MB_LIMIT:-20480}"
438+
PEAK_LINE=$(grep -E '^EXPORT_GPU_PEAK_MEMORY_MB:' "$EXPORT_LOG" | tail -1)
439+
rm -f "$EXPORT_LOG"
440+
if [ -z "$PEAK_LINE" ]; then
441+
echo "ERROR: export did not emit EXPORT_GPU_PEAK_MEMORY_MB marker; cannot enforce GPU memory budget"
442+
exit 1
443+
fi
444+
PEAK_MB=$(echo "$PEAK_LINE" | awk '{print $2}')
445+
echo "Export GPU peak memory: ${PEAK_MB} MB (limit ${EXPORT_GPU_PEAK_MB_LIMIT} MB)"
446+
if awk -v p="$PEAK_MB" -v l="$EXPORT_GPU_PEAK_MB_LIMIT" 'BEGIN{exit !(p>l)}'; then
447+
echo "ERROR: export exceeded GPU memory budget (${PEAK_MB} MB > ${EXPORT_GPU_PEAK_MB_LIMIT} MB)"
448+
echo " — this would prevent the model from being exported on a 24 GB consumer GPU."
449+
exit 1
450+
fi
451+
424452
test -f "${OUTPUT_DIR}/model.pte"
425453
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
426454
ls -al "${OUTPUT_DIR}"
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
from __future__ import annotations
9+
10+
"""Validate that backend Python modules can be imported.
11+
12+
The workflow passes backend-specific paths and package prefixes so the same
13+
checker can be reused for different backends.
14+
"""
15+
16+
import argparse
17+
import importlib
18+
import sys
19+
from pathlib import Path
20+
21+
22+
def parse_args() -> argparse.Namespace:
23+
parser = argparse.ArgumentParser()
24+
parser.add_argument(
25+
"--name",
26+
required=True,
27+
help="Display name for log messages, for example `QNN`.",
28+
)
29+
parser.add_argument(
30+
"--package-root",
31+
required=True,
32+
help="Path to the backend package root, relative to ExecuTorch root.",
33+
)
34+
parser.add_argument(
35+
"--package-prefix",
36+
required=True,
37+
help="Python package prefix, for example `executorch.backends.qualcomm`.",
38+
)
39+
parser.add_argument(
40+
"--skip-segment",
41+
action="append",
42+
default=["fb", "test", "tests"],
43+
help="Package path segment to skip while walking modules.",
44+
)
45+
return parser.parse_args()
46+
47+
48+
def resolve_executorch_root() -> Path:
49+
for parent in Path(__file__).resolve().parents:
50+
if (parent / "backends").is_dir() and (parent / "examples").is_dir():
51+
return parent
52+
raise RuntimeError(
53+
f"Could not locate ExecuTorch root from {Path(__file__).resolve()}"
54+
)
55+
56+
57+
def resolve_directory(executorch_root: Path, relative_path: str) -> Path:
58+
directory = executorch_root / relative_path
59+
if not directory.is_dir():
60+
raise RuntimeError(
61+
f"Directory `{relative_path}` was not found under {executorch_root}"
62+
)
63+
return directory
64+
65+
66+
def normalize_package_prefix(package_prefix: str) -> str:
67+
return package_prefix[:-1] if package_prefix.endswith(".") else package_prefix
68+
69+
70+
def should_skip_path(path: Path, skip_segments: list[str]) -> bool:
71+
if any(segment in path.parts for segment in skip_segments):
72+
return True
73+
74+
stem = path.stem
75+
return any(
76+
stem == segment or stem.startswith(f"{segment}_") for segment in skip_segments
77+
)
78+
79+
80+
def discover_modules(
81+
package_root: Path,
82+
package_prefix: str,
83+
skip_segments: list[str],
84+
) -> list[str]:
85+
modules = []
86+
for path in sorted(package_root.rglob("*.py")):
87+
relative_path = path.relative_to(package_root)
88+
if should_skip_path(relative_path, skip_segments):
89+
continue
90+
91+
if relative_path.name == "__init__.py":
92+
module_suffix = ".".join(relative_path.parent.parts)
93+
if module_suffix:
94+
modules.append(f"{package_prefix}.{module_suffix}")
95+
else:
96+
modules.append(package_prefix)
97+
continue
98+
99+
modules.append(
100+
f"{package_prefix}.{'.'.join(relative_path.with_suffix('').parts)}"
101+
)
102+
return modules
103+
104+
105+
def main() -> None:
106+
args = parse_args()
107+
executorch_root = resolve_executorch_root()
108+
package_root = resolve_directory(executorch_root, args.package_root)
109+
package_prefix = normalize_package_prefix(args.package_prefix)
110+
111+
failures: list[tuple[str, str, str]] = []
112+
modules = discover_modules(package_root, package_prefix, args.skip_segment)
113+
total_modules = len(modules)
114+
if total_modules == 0:
115+
print(f"No {args.name} Python modules found under {package_root}")
116+
sys.exit(1)
117+
118+
for index, name in enumerate(modules, 1):
119+
print(f"[{index}/{total_modules}] importing {name}", flush=True)
120+
try:
121+
importlib.import_module(name)
122+
except Exception as error:
123+
failures.append((name, type(error).__name__, str(error)))
124+
125+
if failures:
126+
print(f"{len(failures)}/{total_modules} {args.name} import failure(s):")
127+
for name, error_type, message in failures:
128+
print(f" FAIL: {name} -- {error_type}: {message}")
129+
sys.exit(1)
130+
131+
print(f"All {total_modules} {args.name} modules imported successfully")
132+
133+
134+
if __name__ == "__main__":
135+
main()

.ci/scripts/test_cortex_m_e2e.sh

Lines changed: 12 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -6,76 +6,20 @@
66
# This source code is licensed under the BSD-style license found in the
77
# LICENSE file in the root directory of this source tree.
88

9-
# End-to-end test for Cortex-M backend: export a model via aot_arm_compiler
10-
# with cortex-m55+int8 target, then run the .bpte on Corstone-300 FVP.
11-
#
12-
# Usage: bash .ci/scripts/test_cortex_m_e2e.sh <model_name>
13-
# Example: bash .ci/scripts/test_cortex_m_e2e.sh mv2
9+
# CI wrapper: export a model for the Cortex-M backend and run it on the
10+
# Corstone-300 FVP via examples/arm/run.sh. The real work (export, runner
11+
# build, FVP launch, Test_result: PASS/FAIL check) is done by run.sh and
12+
# the run_fvp.sh it invokes.
1413

15-
set -eux
14+
set -eu
1615

1716
MODEL=$1
18-
mkdir -p "./cortex_m_e2e/${MODEL}"
19-
WORK_DIR=$(realpath "./cortex_m_e2e/${MODEL}")
17+
script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
18+
et_root_dir=$(realpath "${script_dir}/../..")
2019

21-
echo "=== Exporting ${MODEL} with cortex-m55+int8 ==="
22-
python -m backends.arm.scripts.aot_arm_compiler \
23-
-m "${MODEL}" \
20+
# Quantization is the default for the cortex-m55+int8 target; run.sh's
21+
# arg parser only recognizes --no_quantize, so we omit any explicit flag.
22+
bash "${et_root_dir}/examples/arm/run.sh" \
23+
--model_name="${MODEL}" \
2424
--target=cortex-m55+int8 \
25-
--quantize \
26-
--bundleio \
27-
--intermediates="${WORK_DIR}/intermediates" \
28-
--output="${WORK_DIR}/${MODEL}.bpte"
29-
30-
BPTE="${WORK_DIR}/${MODEL}.bpte"
31-
test -f "${BPTE}" || { echo "FAIL: ${BPTE} not produced"; exit 1; }
32-
echo "=== Exported ${BPTE} ($(stat --printf='%s' "${BPTE}") bytes) ==="
33-
34-
ELF="arm_test/arm_semihosting_executor_runner_corstone-300/arm_executor_runner"
35-
test -f "${ELF}" || { echo "FAIL: executor runner not found at ${ELF}"; exit 1; }
36-
37-
LOG_FILE=$(mktemp)
38-
39-
# Create a tiny dummy input file — the runner requires -i but BundleIO
40-
# ignores it and uses the embedded test inputs instead.
41-
dd if=/dev/zero of="${WORK_DIR}/dummy.bin" bs=4 count=1 2>/dev/null
42-
43-
echo "=== Running ${MODEL} on Corstone-300 FVP ==="
44-
FVP_Corstone_SSE-300_Ethos-U55 \
45-
-C ethosu.num_macs=128 \
46-
-C mps3_board.visualisation.disable-visualisation=1 \
47-
-C mps3_board.telnetterminal0.start_telnet=0 \
48-
-C mps3_board.uart0.out_file='-' \
49-
-C mps3_board.uart0.shutdown_on_eot=1 \
50-
-C cpu0.semihosting-enable=1 \
51-
-C cpu0.semihosting-stack_base=0 \
52-
-C cpu0.semihosting-heap_limit=0 \
53-
-C "cpu0.semihosting-cwd=${WORK_DIR}" \
54-
-C "ethosu.extra_args='--fast'" \
55-
-C "cpu0.semihosting-cmd_line='executor_runner -m ${MODEL}.bpte -i dummy.bin -o out'" \
56-
-a "${ELF}" \
57-
--timelimit 300 2>&1 | tee "${LOG_FILE}" || true
58-
59-
echo "=== Checking FVP output ==="
60-
61-
if grep -q "Test_result: PASS" "${LOG_FILE}"; then
62-
echo "=== SUCCESS: ${MODEL} e2e BundleIO test PASSED on FVP ==="
63-
rm "${LOG_FILE}"
64-
exit 0
65-
fi
66-
67-
if grep -q "Test_result: FAIL" "${LOG_FILE}"; then
68-
echo "FAIL: ${MODEL} BundleIO output mismatch"
69-
rm "${LOG_FILE}"
70-
exit 1
71-
fi
72-
73-
if grep -qE "(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)" "${LOG_FILE}"; then
74-
echo "FAIL: ${MODEL} FVP run hit a fatal error"
75-
rm "${LOG_FILE}"
76-
exit 1
77-
fi
78-
79-
echo "FAIL: ${MODEL} no BundleIO test result found in FVP output"
80-
rm "${LOG_FILE}"
81-
exit 1
25+
--bundleio

0 commit comments

Comments
 (0)