Skip to content

Commit e40ab21

Browse files
Metal CI: Add Qwen 3.5 MoE tiny model integration test
Export the tiny model with --backend metal, build the C++ runner, and verify both decode (T=1) and prefill (T>2) complete successfully. Uses a byte-level tokenizer matching the tiny model's vocab_size=256. Authored with Claude.
1 parent 6be4fb5 commit e40ab21

1 file changed

Lines changed: 85 additions & 0 deletions

File tree

.github/workflows/metal.yml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ on:
1212
- .github/workflows/metal.yml
1313
- backends/apple/metal/**
1414
- backends/aoti/**
15+
- examples/models/qwen3_5_moe/**
16+
- extension/llm/export/**
1517
workflow_dispatch:
1618

1719
concurrency:
@@ -59,6 +61,89 @@ jobs:
5961
${CONDA_RUN} python -m unittest backends.apple.metal.tests.test_modules.TestMetalBackendModules
6062
echo "::endgroup::"
6163
64+
test-metal-qwen35-moe-tiny:
65+
name: test-metal-qwen35-moe-tiny
66+
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
67+
with:
68+
runner: macos-m2-stable
69+
python-version: '3.11'
70+
submodules: 'recursive'
71+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
72+
timeout: 120
73+
script: |
74+
set -eux
75+
76+
echo "::group::Setup ExecuTorch"
77+
PYTHON_EXECUTABLE=python ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_EXPERIMENTAL_MPS=1 ./install_executorch.sh
78+
echo "::endgroup::"
79+
80+
# Isolate Inductor cache per job to prevent PCH conflicts
81+
export TMPDIR=$(mktemp -d "${RUNNER_TEMP}/tmpdir_XXXXXX")
82+
export TORCHINDUCTOR_CACHE_DIR=$(mktemp -d "${RUNNER_TEMP}/inductor_cache_XXXXXX")
83+
84+
echo "::group::Export Qwen 3.5 MoE (tiny model, Metal)"
85+
${CONDA_RUN} python -m executorch.examples.models.qwen3_5_moe.export \
86+
--tiny-test \
87+
--backend metal \
88+
--qlinear fpa4w \
89+
--output-dir /tmp/qwen35_moe_metal_tiny
90+
echo "::endgroup::"
91+
92+
echo "::group::Build Metal runtime and Qwen 3.5 MoE runner"
93+
${CONDA_RUN} cmake --workflow --preset llm-release-metal
94+
cd examples/models/qwen3_5_moe
95+
${CONDA_RUN} cmake --workflow --preset qwen3-5-moe-metal
96+
cd -
97+
echo "::endgroup::"
98+
99+
# Create a byte-level tokenizer for the tiny model (vocab_size=256).
100+
# Maps each byte value to its own token ID so any prompt produces valid IDs.
101+
${CONDA_RUN} python -c "
102+
import json
103+
vocab = {chr(i) if 32 <= i < 127 else f'<0x{i:02X}>': i for i in range(256)}
104+
merges = []
105+
tokenizer = {
106+
'version': '1.0',
107+
'model': {'type': 'BPE', 'vocab': vocab, 'merges': merges},
108+
'added_tokens': [{'id': i, 'content': chr(i) if 32 <= i < 127 else f'<0x{i:02X}>', 'single_word': False, 'lstrip': False, 'rstrip': False, 'normalized': False, 'special': False} for i in range(256)],
109+
}
110+
with open('/tmp/qwen35_moe_metal_tiny/tokenizer.json', 'w') as f:
111+
json.dump(tokenizer, f)
112+
print('Created byte-level tokenizer.json')
113+
"
114+
115+
echo "::group::Run Qwen 3.5 MoE inference (T=1 decode)"
116+
OUTPUT=$(./cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner \
117+
--model_path /tmp/qwen35_moe_metal_tiny/model.pte \
118+
--tokenizer_path /tmp/qwen35_moe_metal_tiny/tokenizer.json \
119+
--prompt "Hello" \
120+
--temperature 0 \
121+
--max_new_tokens 4 2>&1)
122+
echo "$OUTPUT"
123+
if echo "$OUTPUT" | grep -q "Decode:"; then
124+
echo "Success: decode completed"
125+
else
126+
echo "Failed: decode did not complete"
127+
exit 1
128+
fi
129+
echo "::endgroup::"
130+
131+
echo "::group::Run Qwen 3.5 MoE inference (T>2 prefill + decode)"
132+
OUTPUT=$(./cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner \
133+
--model_path /tmp/qwen35_moe_metal_tiny/model.pte \
134+
--tokenizer_path /tmp/qwen35_moe_metal_tiny/tokenizer.json \
135+
--prompt "one two three" \
136+
--temperature 0 \
137+
--max_new_tokens 4 2>&1)
138+
echo "$OUTPUT"
139+
if echo "$OUTPUT" | grep -q "Decode:"; then
140+
echo "Success: prefill + decode completed"
141+
else
142+
echo "Failed: prefill + decode did not complete"
143+
exit 1
144+
fi
145+
echo "::endgroup::"
146+
62147
export-model-metal-artifact:
63148
name: export-model-metal-artifact
64149
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)

0 commit comments

Comments
 (0)