|
| 1 | +name: Integration (Mac M4) |
| 2 | + |
| 3 | +# Self-hosted runner workflow that runs the integration suite under |
| 4 | +# tests/integration/ against real Qwen3-0.6B on Apple Silicon. |
| 5 | +# |
| 6 | +# Trigger model: |
| 7 | +# - Pull-request events. Only fires when the PR carries the |
| 8 | +# ``needs-mac-m4`` label (auto-applied by .github/workflows/ |
| 9 | +# auto-label-mac.yaml when a PR touches inference_engine/, |
| 10 | +# sdks/, proto/, or tests/integration/). PRs that don't touch |
| 11 | +# verifier-dependent code skip this gate entirely so the runner |
| 12 | +# pool isn't burned on doc-only or CI-only PRs. |
| 13 | +# - Manual workflow_dispatch for re-runs from the Actions UI. |
| 14 | +# |
| 15 | +# Runner requirements (self-hosted): |
| 16 | +# - macOS 14+ on Apple Silicon (M-series). |
| 17 | +# - Labels: [self-hosted, macOS, ARM64, kakeya-mac-m4]. |
| 18 | +# - Pre-warmed HF cache containing Qwen/Qwen3-0.6B at |
| 19 | +# ~/.cache/huggingface/hub/ (avoids 10-minute first-run download). |
| 20 | +# - Python 3.12+ on PATH. |
| 21 | +# - At least 24 GB unified memory and ~50 GB free disk. |
| 22 | +# |
| 23 | +# See docs/ops/mac-m4-runner-setup.md for the one-time runner setup. |
| 24 | + |
| 25 | +on: |
| 26 | + pull_request: |
| 27 | + # Only run on PR events for branches targeting main. |
| 28 | + types: [opened, synchronize, reopened, labeled] |
| 29 | + branches: [main] |
| 30 | + workflow_dispatch: {} |
| 31 | + |
| 32 | +# Cancel superseded runs on the same PR — saves runner time when |
| 33 | +# the contributor pushes a new commit before the previous run |
| 34 | +# finishes. |
| 35 | +concurrency: |
| 36 | + group: integration-${{ github.ref }} |
| 37 | + cancel-in-progress: true |
| 38 | + |
| 39 | +jobs: |
| 40 | + integration: |
| 41 | + name: pytest -m integration on Mac M4 |
| 42 | + # Only fire on labeled PRs (this saves the runner pool from |
| 43 | + # doc-only / CI-only PRs that don't touch verifier-dependent |
| 44 | + # code). The auto-label workflow adds 'needs-mac-m4' on file |
| 45 | + # paths that warrant the GA gate. |
| 46 | + if: | |
| 47 | + github.event_name == 'workflow_dispatch' || |
| 48 | + contains(github.event.pull_request.labels.*.name, 'needs-mac-m4') |
| 49 | + runs-on: [self-hosted, macOS, ARM64, kakeya-mac-m4] |
| 50 | + timeout-minutes: 90 |
| 51 | + steps: |
| 52 | + - uses: actions/checkout@v4 |
| 53 | + with: |
| 54 | + # Full history so the runner can compare against base for |
| 55 | + # any future rebase-based gating. |
| 56 | + fetch-depth: 0 |
| 57 | + |
| 58 | + - name: Verify host shape |
| 59 | + run: | |
| 60 | + echo "=== sysctl ===" |
| 61 | + sysctl -n hw.model || true |
| 62 | + sysctl -n hw.memsize || true |
| 63 | + sysctl -n machdep.cpu.brand_string || true |
| 64 | + echo "=== python ===" |
| 65 | + python3 --version |
| 66 | + python3 -c "import platform; print(platform.machine(), platform.platform())" |
| 67 | +
|
| 68 | + - name: Verify Qwen3-0.6B in HF cache |
| 69 | + run: | |
| 70 | + # Don't download here; the runner is expected to be |
| 71 | + # pre-warmed. If the model isn't cached the test loads |
| 72 | + # would hit HF and exceed the 90-min timeout. Surface a |
| 73 | + # clear error early. |
| 74 | + set -e |
| 75 | + MODEL_DIR="$HOME/.cache/huggingface/hub/models--Qwen--Qwen3-0.6B" |
| 76 | + if [ ! -d "$MODEL_DIR" ]; then |
| 77 | + echo "::error::HF cache miss for Qwen/Qwen3-0.6B." |
| 78 | + echo "::error::Pre-warm the runner: python3 -c 'from transformers import AutoModelForCausalLM, AutoTokenizer; AutoModelForCausalLM.from_pretrained(\"Qwen/Qwen3-0.6B\"); AutoTokenizer.from_pretrained(\"Qwen/Qwen3-0.6B\")'" |
| 79 | + exit 1 |
| 80 | + fi |
| 81 | + echo "Found $MODEL_DIR" |
| 82 | +
|
| 83 | + - name: Install Python dependencies |
| 84 | + run: | |
| 85 | + # The runner is expected to have a long-lived venv. |
| 86 | + # If a per-run venv is preferred, swap to ``python3 -m venv .venv``. |
| 87 | + python3 -m pip install --upgrade pip |
| 88 | + python3 -m pip install -e . |
| 89 | + python3 -m pip install pytest pytest-asyncio pytest-timeout coverage |
| 90 | +
|
| 91 | + - name: Run integration suite |
| 92 | + env: |
| 93 | + PYTHONPATH: .:sdks/python |
| 94 | + # No HF download in tests; if we hit a cache miss it's a |
| 95 | + # bug or a stale runner. |
| 96 | + HF_HUB_OFFLINE: "1" |
| 97 | + run: | |
| 98 | + mkdir -p results/platform-tests |
| 99 | + stamp=$(date +%s) |
| 100 | + python3 -m pytest \ |
| 101 | + -m integration \ |
| 102 | + tests/integration/ \ |
| 103 | + --junitxml="results/platform-tests/integration-mac-m4-${stamp}.junit.xml" \ |
| 104 | + -v |
| 105 | + # Record the artifact path for the upload step below. |
| 106 | + echo "artifact_stamp=${stamp}" >> "$GITHUB_OUTPUT" |
| 107 | + id: pytest_run |
| 108 | + |
| 109 | + - name: Upload JUnit + log artifacts |
| 110 | + if: always() |
| 111 | + uses: actions/upload-artifact@v4 |
| 112 | + with: |
| 113 | + name: integration-mac-m4-${{ steps.pytest_run.outputs.artifact_stamp || github.run_id }} |
| 114 | + path: | |
| 115 | + results/platform-tests/integration-mac-m4-*.junit.xml |
| 116 | + retention-days: 30 |
| 117 | + |
| 118 | + - name: Surface failure summary |
| 119 | + if: failure() |
| 120 | + run: | |
| 121 | + # Tail the last few lines of the JUnit so the failure is |
| 122 | + # visible in the action log, not just inside the artifact. |
| 123 | + for f in results/platform-tests/integration-mac-m4-*.junit.xml; do |
| 124 | + echo "=== $f ===" |
| 125 | + python3 - "$f" <<'PY' |
| 126 | + import sys, xml.etree.ElementTree as ET |
| 127 | + r = ET.parse(sys.argv[1]).getroot() |
| 128 | + for tc in r.iter("testcase"): |
| 129 | + for child in tc: |
| 130 | + if child.tag in ("failure", "error"): |
| 131 | + print(f"[{child.tag.upper()}] {tc.get('classname')}::{tc.get('name')}") |
| 132 | + msg = (child.get("message") or "").splitlines() |
| 133 | + if msg: |
| 134 | + print(f" {msg[0][:180]}") |
| 135 | + PY |
| 136 | + done |
0 commit comments