|
12 | 12 | - .github/workflows/metal.yml |
13 | 13 | - backends/apple/metal/** |
14 | 14 | - backends/aoti/** |
| 15 | + - examples/models/qwen3_5_moe/** |
| 16 | + - extension/llm/export/** |
15 | 17 | workflow_dispatch: |
16 | 18 |
|
17 | 19 | concurrency: |
|
59 | 61 | ${CONDA_RUN} python -m unittest backends.apple.metal.tests.test_modules.TestMetalBackendModules |
60 | 62 | echo "::endgroup::" |
61 | 63 |
|
| 64 | + test-metal-qwen35-moe-tiny: |
| 65 | + name: test-metal-qwen35-moe-tiny |
| 66 | + uses: pytorch/test-infra/.github/workflows/macos_job.yml@main |
| 67 | + with: |
| 68 | + runner: macos-m2-stable |
| 69 | + python-version: '3.11' |
| 70 | + submodules: 'recursive' |
| 71 | + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} |
| 72 | + timeout: 120 |
| 73 | + script: | |
| 74 | + set -eux |
| 75 | +
|
| 76 | + echo "::group::Setup ExecuTorch" |
| 77 | + PYTHON_EXECUTABLE=python ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 TORCHAO_BUILD_EXPERIMENTAL_MPS=1 ./install_executorch.sh |
| 78 | + echo "::endgroup::" |
| 79 | +
|
| 80 | + # Isolate Inductor cache per job to prevent PCH conflicts |
| 81 | + export TMPDIR=$(mktemp -d "${RUNNER_TEMP}/tmpdir_XXXXXX") |
| 82 | + export TORCHINDUCTOR_CACHE_DIR=$(mktemp -d "${RUNNER_TEMP}/inductor_cache_XXXXXX") |
| 83 | +
|
| 84 | + echo "::group::Export Qwen 3.5 MoE (tiny model, Metal)" |
| 85 | + ${CONDA_RUN} python -m executorch.examples.models.qwen3_5_moe.export \ |
| 86 | + --tiny-test \ |
| 87 | + --backend metal \ |
| 88 | + --qlinear fpa4w \ |
| 89 | + --output-dir /tmp/qwen35_moe_metal_tiny |
| 90 | + echo "::endgroup::" |
| 91 | +
|
| 92 | + echo "::group::Build Metal runtime and Qwen 3.5 MoE runner" |
| 93 | + ${CONDA_RUN} cmake --workflow --preset llm-release-metal |
| 94 | + cd examples/models/qwen3_5_moe |
| 95 | + ${CONDA_RUN} cmake --workflow --preset qwen3-5-moe-metal |
| 96 | + cd - |
| 97 | + echo "::endgroup::" |
| 98 | +
|
| 99 | + # Create a byte-level tokenizer for the tiny model (vocab_size=256). |
| 100 | + # Maps each byte value to its own token ID so any prompt produces valid IDs. |
| 101 | + ${CONDA_RUN} python -c " |
| 102 | + import json |
| 103 | + vocab = {chr(i) if 32 <= i < 127 else f'<0x{i:02X}>': i for i in range(256)} |
| 104 | + merges = [] |
| 105 | + tokenizer = { |
| 106 | + 'version': '1.0', |
| 107 | + 'model': {'type': 'BPE', 'vocab': vocab, 'merges': merges}, |
| 108 | + 'added_tokens': [{'id': i, 'content': chr(i) if 32 <= i < 127 else f'<0x{i:02X}>', 'single_word': False, 'lstrip': False, 'rstrip': False, 'normalized': False, 'special': False} for i in range(256)], |
| 109 | + } |
| 110 | + with open('/tmp/qwen35_moe_metal_tiny/tokenizer.json', 'w') as f: |
| 111 | + json.dump(tokenizer, f) |
| 112 | + print('Created byte-level tokenizer.json') |
| 113 | + " |
| 114 | +
|
| 115 | + echo "::group::Run Qwen 3.5 MoE inference (T=1 decode)" |
| 116 | + OUTPUT=$(./cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner \ |
| 117 | + --model_path /tmp/qwen35_moe_metal_tiny/model.pte \ |
| 118 | + --tokenizer_path /tmp/qwen35_moe_metal_tiny/tokenizer.json \ |
| 119 | + --prompt "Hello" \ |
| 120 | + --temperature 0 \ |
| 121 | + --max_new_tokens 4 2>&1) |
| 122 | + echo "$OUTPUT" |
| 123 | + if echo "$OUTPUT" | grep -q "Decode:"; then |
| 124 | + echo "Success: decode completed" |
| 125 | + else |
| 126 | + echo "Failed: decode did not complete" |
| 127 | + exit 1 |
| 128 | + fi |
| 129 | + echo "::endgroup::" |
| 130 | +
|
| 131 | + echo "::group::Run Qwen 3.5 MoE inference (T>2 prefill + decode)" |
| 132 | + OUTPUT=$(./cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_runner \ |
| 133 | + --model_path /tmp/qwen35_moe_metal_tiny/model.pte \ |
| 134 | + --tokenizer_path /tmp/qwen35_moe_metal_tiny/tokenizer.json \ |
| 135 | + --prompt "one two three" \ |
| 136 | + --temperature 0 \ |
| 137 | + --max_new_tokens 4 2>&1) |
| 138 | + echo "$OUTPUT" |
| 139 | + if echo "$OUTPUT" | grep -q "Decode:"; then |
| 140 | + echo "Success: prefill + decode completed" |
| 141 | + else |
| 142 | + echo "Failed: prefill + decode did not complete" |
| 143 | + exit 1 |
| 144 | + fi |
| 145 | + echo "::endgroup::" |
| 146 | +
|
62 | 147 | export-model-metal-artifact: |
63 | 148 | name: export-model-metal-artifact |
64 | 149 | # Skip this job if the pull request is from a fork (HuggingFace secrets are not available) |
|
0 commit comments