Skip to content

Commit 363089e

Browse files
Update (base update)
[ghstack-poisoned]
2 parents a1afd65 + 68bb668 commit 363089e

460 files changed

Lines changed: 20794 additions & 5070 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/build-cadence-runner.yml

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -19,36 +19,18 @@ concurrency:
1919
cancel-in-progress: true
2020

2121
jobs:
22-
gate:
23-
runs-on: ubuntu-latest
24-
outputs:
25-
run-cadence: ${{ steps.decide.outputs.run }}
26-
steps:
27-
- id: decide
28-
env:
29-
EVENT: ${{ github.event_name }}
30-
IS_FORK: ${{ github.event.pull_request.head.repo.full_name != github.repository }}
31-
HAS_CLA: ${{ contains(github.event.pull_request.labels.*.name, 'CLA Signed') }}
32-
HAS_EXPORT: ${{ contains(github.event.pull_request.labels.*.name, 'meta-exported') }}
33-
run: |
34-
run=false
35-
case "${EVENT}" in
36-
push|schedule|workflow_dispatch)
37-
run=true
38-
;;
39-
pull_request)
40-
[ "${IS_FORK}" = "false" ] && run=true
41-
;;
42-
pull_request_target)
43-
if [ "${IS_FORK}" = "true" ] && [ "${HAS_CLA}" = "true" ] && [ "${HAS_EXPORT}" = "true" ]; then
44-
run=true
45-
fi
46-
;;
47-
esac
48-
echo "run=${run}" >> "${GITHUB_OUTPUT}"
49-
22+
# Same-repo PRs run on pull_request, which reads the PR's own workflow AND code
23+
# -- so CI changes, new test jobs, code, and tests are all validated pre-merge.
24+
# Fork PRs can't get credentials (OIDC) on pull_request, so Meta-exported forks
25+
# (labeled CLA Signed + meta-exported) run on pull_request_target instead. The
26+
# run condition is inlined per job (GitHub Actions has no YAML anchors and env
27+
# is unavailable in job-level if), so keep the copies in sync.
5028
cpu-build:
51-
if: github.event_name != 'pull_request_target'
29+
if: >-
30+
github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ||
31+
(github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) ||
32+
(github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository &&
33+
contains(github.event.pull_request.labels.*.name, 'CLA Signed') && contains(github.event.pull_request.labels.*.name, 'meta-exported'))
5234
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
5335
permissions:
5436
id-token: write
@@ -58,7 +40,7 @@ jobs:
5840
runner: linux.2xlarge
5941
docker-image: ci-image:executorch-ubuntu-22.04-clang12
6042
submodules: recursive
61-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
43+
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && github.event.pull_request.head.sha || github.sha }}
6244
timeout: 90
6345
upload-artifact: cadence-runner-build
6446
script: |
@@ -75,21 +57,28 @@ jobs:
7557
7658
cpu-test:
7759
needs: cpu-build
78-
if: github.event_name != 'pull_request_target'
60+
if: >-
61+
github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ||
62+
(github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) ||
63+
(github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository &&
64+
contains(github.event.pull_request.labels.*.name, 'CLA Signed') && contains(github.event.pull_request.labels.*.name, 'meta-exported'))
7965
permissions:
8066
id-token: write
8167
contents: read
8268
uses: ./.github/workflows/_test_cadence.yml
8369
with:
84-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
70+
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && github.event.pull_request.head.sha || github.sha }}
8571

8672
# Cross-compile cadence_executor_runner for each Cadence Xtensa core, one job
8773
# per backend so they show as separate lines (no matrix grouping). Shared logic
8874
# lives in _xtensa_build.yml. fusion_g3 is omitted until the upstream fusion_g3
8975
# <-> nnlib-FusionG3 API skew is fixed (its runner does not link).
9076
hifi-build:
91-
needs: gate
92-
if: needs.gate.outputs.run-cadence == 'true'
77+
if: >-
78+
github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ||
79+
(github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) ||
80+
(github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository &&
81+
contains(github.event.pull_request.labels.*.name, 'CLA Signed') && contains(github.event.pull_request.labels.*.name, 'meta-exported'))
9382
permissions:
9483
id-token: write
9584
contents: read
@@ -99,8 +88,11 @@ jobs:
9988
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && github.event.pull_request.head.sha || github.sha }}
10089

10190
vision-build:
102-
needs: gate
103-
if: needs.gate.outputs.run-cadence == 'true'
91+
if: >-
92+
github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ||
93+
(github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) ||
94+
(github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name != github.repository &&
95+
contains(github.event.pull_request.labels.*.name, 'CLA Signed') && contains(github.event.pull_request.labels.*.name, 'meta-exported'))
10496
permissions:
10597
id-token: write
10698
contents: read

.github/workflows/mlx.yml

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ jobs:
6666
echo "::endgroup::"
6767
6868
echo "::group::Build test runners"
69-
${CONDA_RUN} cmake --build cmake-out --target op_test_runner multi_thread_test_runner -j$(( $(sysctl -n hw.ncpu) - 1 ))
69+
${CONDA_RUN} cmake --build cmake-out --target op_test_runner multi_thread_test_runner mlx_mutable_state_test -j$(( $(sysctl -n hw.ncpu) - 1 ))
70+
echo "::endgroup::"
71+
72+
echo "::group::Run mutable-state (multi-session) unit test"
73+
./cmake-out/backends/mlx/test/mlx_mutable_state_test
7074
echo "::endgroup::"
7175
7276
echo "::group::Run op unit tests"
@@ -161,6 +165,29 @@ jobs:
161165
fi
162166
echo "::endgroup::"
163167
168+
echo "::group::Verify chunked == unchunked prefill"
169+
QWEN_TINY_PTE=/tmp/qwen35_moe_mlx_tiny/model.pte \
170+
${CONDA_RUN} python -m pytest \
171+
examples/models/qwen3_5_moe/test_chunked_prefill.py -v
172+
echo "::endgroup::"
173+
174+
echo "::group::Build Qwen 3.5 MoE MLX C++ runner"
175+
# Validates the MLX C++ runner build wiring (compile + link + metallib).
176+
# The tiny model has no compatible tokenizer (vocab 256, random weights),
177+
# so we don't run C++ inference here — only confirm it builds.
178+
${CONDA_RUN} make qwen3_5_moe-mlx
179+
RUNNER=cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner
180+
if [ ! -x "$RUNNER" ]; then
181+
echo "Failed: runner not found at $RUNNER"
182+
exit 1
183+
fi
184+
if [ ! -f "$(dirname "$RUNNER")/mlx.metallib" ]; then
185+
echo "Failed: mlx.metallib not copied next to runner"
186+
exit 1
187+
fi
188+
echo "Success: built $RUNNER"
189+
echo "::endgroup::"
190+
164191
backend-tester:
165192
needs: run-decision
166193
if: |

.github/workflows/pull.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,33 @@ jobs:
816816
# Test test_arm_backend.sh with test
817817
backends/arm/test/test_arm_backend.sh "${ARM_TEST}"
818818
819+
test-arm-backend-public-api-backward-compatibility:
820+
name: test-arm-backend-public-api-backward-compatibility
821+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
822+
permissions:
823+
id-token: write
824+
contents: read
825+
with:
826+
runner: linux.2xlarge.memory
827+
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
828+
submodules: 'recursive'
829+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
830+
timeout: 120
831+
script: |
832+
# The generic Linux job chooses to use base env, not the one setup by the image
833+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
834+
conda activate "${CONDA_ENV}"
835+
836+
source .ci/scripts/utils.sh
837+
install_executorch "--use-pt-pinned-commit"
838+
839+
.ci/scripts/setup-arm-baremetal-tools.sh --enable-mlsdk-deps --install-mlsdk-deps-with-pip
840+
source examples/arm/arm-scratch/setup_path.sh
841+
842+
backends/arm/scripts/public_api_manifest/validate_all_public_api_manifests.sh
843+
844+
python backends/arm/test/public_api_bc/run_public_api_bc_scenarios.py
845+
819846
test-llama-runner-qnn-linux:
820847
name: test-llama-runner-qnn-linux
821848
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

Makefile

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
#
9292
# ==============================================================================
9393

94-
.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral-mlx voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal voxtral_realtime-mlx voxtral_tts-cpu voxtral_tts-cuda whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-mlx parakeet-vulkan dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu lfm_2_5-mlx llava-cpu gemma3-cuda gemma3-cpu gemma4_31b-cuda gemma4_31b-mlx qwen3_5_moe-cuda qwen3_5_moe-metal clean help
94+
.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral-mlx voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal voxtral_realtime-mlx voxtral_tts-cpu voxtral_tts-cuda whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal parakeet-mlx parakeet-vulkan dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu lfm_2_5-mlx llava-cpu gemma3-cuda gemma3-cpu gemma4_31b-cuda gemma4_31b-mlx qwen3_5_moe-cuda qwen3_5_moe-metal qwen3_5_moe-mlx clean help
9595

9696
help:
9797
@echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make <target>\`. Available targets:"
@@ -131,6 +131,7 @@ help:
131131
@echo " gemma4_31b-mlx - Build Gemma 4 31B runner with MLX backend"
132132
@echo " qwen3_5_moe-cuda - Build Qwen3.5 MoE runner with CUDA backend"
133133
@echo " qwen3_5_moe-metal - Build Qwen3.5 MoE runner with Metal backend"
134+
@echo " qwen3_5_moe-mlx - Build Qwen3.5 MoE runner with MLX backend"
134135
@echo " clean - Clean build artifacts"
135136

136137
voxtral-cuda:
@@ -467,6 +468,15 @@ qwen3_5_moe-metal:
467468
@echo "✓ Build complete!"
468469
@echo " Binary: cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner"
469470

471+
qwen3_5_moe-mlx:
472+
@echo "==> Building and installing ExecuTorch with MLX..."
473+
cmake --workflow --preset mlx-release
474+
@echo "==> Building Qwen3.5 MoE runner with MLX..."
475+
cd examples/models/qwen3_5_moe && cmake --workflow --preset qwen3-5-moe-mlx
476+
@echo ""
477+
@echo "✓ Build complete!"
478+
@echo " Binary: cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner"
479+
470480
clean:
471481
rm -rf cmake-out \
472482
extension/llm/tokenizers/build \

backends/aoti/aoti_backend.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# LICENSE file in the root directory of this source tree.
66

77
import contextlib
8+
import hashlib
89
import os
910
import typing
1011
from abc import ABC, abstractmethod
@@ -276,18 +277,21 @@ def preprocess(
276277

277278
# Create named data store
278279
named_data_store = NamedDataStore()
279-
method_name = cls.method_name_from_compile_specs(compile_specs)
280280

281-
named_data_store.add_named_data(method_name + "_so_blob", so_data, 1, None)
281+
# Key each blob by a content hash so partitions in one method get distinct
282+
# keys (a method-name-only key collides). Runtime recovers them from
283+
# processed_bytes below.
284+
so_blob_key = hashlib.sha256(so_data).hexdigest() + "_so_blob"
285+
weights_blob_key = hashlib.sha256(blob_data).hexdigest() + "_weights_blob"
286+
287+
named_data_store.add_named_data(so_blob_key, so_data, 1, None)
282288
# Determine whether to save named data externally based on backend setting
283289
# External: save to separate .ptd file, otherwise merge with .pte file
284290
external_tag = (
285291
f"aoti_{device_name}_blob" if cls.save_data_externally() else None
286292
)
287293

288-
named_data_store.add_named_data(
289-
method_name + "_weights_blob", blob_data, 1, external_tag
290-
)
294+
named_data_store.add_named_data(weights_blob_key, blob_data, 1, external_tag)
291295

292296
# Clean up the generated files
293297
os.remove(so_path)
@@ -299,8 +303,11 @@ def preprocess(
299303
# the next preprocess call (e.g. for the next method).
300304
cls.release_moved_tensors(device_edge_program, compile_specs)
301305

306+
# The runtime cannot recompute these hash keys, so carry them (one per line).
307+
processed_bytes = (so_blob_key + "\n" + weights_blob_key).encode("utf-8")
308+
302309
return PreprocessResult(
303-
processed_bytes=b"",
310+
processed_bytes=processed_bytes,
304311
debug_handle_map={},
305312
data_store_output=named_data_store.get_named_data_store_output(),
306313
)

backends/aoti/aoti_delegate_handle.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@
1010

1111
#include <executorch/runtime/core/error.h>
1212
#include <executorch/runtime/core/evalue.h>
13+
#include <executorch/runtime/core/freeable_buffer.h>
1314
#include <string>
1415

1516
namespace executorch {
1617
namespace backends {
1718
namespace aoti {
1819

1920
using executorch::runtime::Error;
21+
using executorch::runtime::FreeableBuffer;
2022
using executorch::runtime::etensor::Tensor;
2123

2224
extern "C" {
@@ -148,6 +150,30 @@ struct AOTIDelegateHandle {
148150
update_user_managed_constant_buffer_pairs;
149151
};
150152

153+
// New-format payload is "<so_key>\n<weights_key>"; an empty payload is a
154+
// pre-this-change artifact, so fall back to the legacy method-name keys.
155+
inline Error resolve_blob_keys(
156+
const FreeableBuffer* processed,
157+
const std::string& method_name,
158+
std::string& so_blob_key,
159+
std::string& weights_blob_key) {
160+
if (processed != nullptr && processed->size() > 0) {
161+
const std::string keys(
162+
static_cast<const char*>(processed->data()), processed->size());
163+
const size_t newline = keys.find('\n');
164+
if (newline == std::string::npos) {
165+
return Error::Internal;
166+
}
167+
so_blob_key = keys.substr(0, newline);
168+
weights_blob_key = keys.substr(newline + 1);
169+
} else {
170+
so_blob_key = method_name.empty() ? "so_blob" : method_name + "_so_blob";
171+
weights_blob_key =
172+
method_name.empty() ? "weights_blob" : method_name + "_weights_blob";
173+
}
174+
return Error::Ok;
175+
}
176+
151177
} // namespace aoti
152178
} // namespace backends
153179
} // namespace executorch

backends/aoti/tests/TARGETS

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,18 @@ load("@fbcode_macros//build_defs/lib:re_test_utils.bzl", "re_test_utils")
33

44
oncall("executorch")
55

6+
cpp_unittest(
7+
name = "test_resolve_blob_keys",
8+
srcs = [
9+
"test_resolve_blob_keys.cpp",
10+
],
11+
deps = [
12+
"//executorch/backends/aoti:delegate_handle",
13+
"//executorch/runtime/core:core",
14+
"//executorch/runtime/core:evalue",
15+
],
16+
)
17+
618
cpp_unittest(
719
name = "test_common_shims",
820
srcs = [
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/backends/aoti/aoti_delegate_handle.h>
10+
11+
#include <gtest/gtest.h>
12+
#include <string>
13+
14+
#include <executorch/runtime/core/error.h>
15+
#include <executorch/runtime/core/freeable_buffer.h>
16+
17+
using executorch::backends::aoti::resolve_blob_keys;
18+
using executorch::runtime::Error;
19+
using executorch::runtime::FreeableBuffer;
20+
21+
TEST(ResolveBlobKeysTest, ParsesKeysFromPayload) {
22+
const std::string payload = "aaa_so_blob\nbbb_weights_blob";
23+
FreeableBuffer processed(payload.data(), payload.size(), nullptr);
24+
std::string so_key;
25+
std::string weights_key;
26+
27+
ASSERT_EQ(
28+
resolve_blob_keys(&processed, "forward", so_key, weights_key), Error::Ok);
29+
EXPECT_EQ(so_key, "aaa_so_blob");
30+
EXPECT_EQ(weights_key, "bbb_weights_blob");
31+
}
32+
33+
TEST(ResolveBlobKeysTest, FallsBackToMethodNameKeysWhenEmpty) {
34+
FreeableBuffer processed; // size 0: a pre-this-change artifact
35+
std::string so_key;
36+
std::string weights_key;
37+
38+
ASSERT_EQ(
39+
resolve_blob_keys(&processed, "forward", so_key, weights_key), Error::Ok);
40+
EXPECT_EQ(so_key, "forward_so_blob");
41+
EXPECT_EQ(weights_key, "forward_weights_blob");
42+
}
43+
44+
TEST(ResolveBlobKeysTest, FailsOnMalformedPayload) {
45+
const std::string payload = "missing_the_newline_separator";
46+
FreeableBuffer processed(payload.data(), payload.size(), nullptr);
47+
std::string so_key;
48+
std::string weights_key;
49+
50+
EXPECT_EQ(
51+
resolve_blob_keys(&processed, "forward", so_key, weights_key),
52+
Error::Internal);
53+
}

0 commit comments

Comments
 (0)