Revert "Merge branch 'feature/swiftbuddy-mempalace-v1' into main" #88
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI Pipeline | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| concurrency: | |
| group: ci-${{ github.ref }} | |
| cancel-in-progress: true | |
| jobs: | |
| ci: | |
| runs-on: macos-15 | |
| timeout-minutes: 40 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Install Metal Toolchain | |
| run: xcodebuild -downloadComponent MetalToolchain || true | |
| - name: Cache Swift packages | |
| uses: actions/cache@v4 | |
| with: | |
| path: .build | |
| # Key includes product name so any rename (e.g. mlx-server→SwiftLM) | |
| # automatically busts the cache and prevents stale PCH errors. | |
| key: ${{ runner.os }}-spm-SwiftLM-v2-${{ hashFiles('Package.resolved') }} | |
| restore-keys: | | |
| ${{ runner.os }}-spm-SwiftLM-v2- | |
| - name: Clear stale module cache | |
| # Prevents: "PCH was compiled with module cache path '…mlx-server…' | |
| # but the path is currently '…SwiftLM…'" after repo rename. | |
| run: find .build -type d -name ModuleCache -exec rm -rf {} + 2>/dev/null || true | |
| - name: Resolve dependencies | |
| run: swift package resolve | |
| - name: Build (Release) | |
| run: swift build -c release | |
| - name: Verify binary | |
| run: | | |
| ls -lh .build/release/SwiftLM | |
| file .build/release/SwiftLM | |
| - name: TurboQuant unit tests | |
| run: | | |
| # Compile and run standalone C++ unit tests for the TurboQuant | |
| # KV cache compression algorithm (ported from TheTom/llama-cpp-turboquant). | |
| # Tests: centroids, WHT self-inverse, rotation orthogonality, | |
| # 3-bit pack/unpack, V-cache SNR, K-cache IP SNR, fp16 round-trip. | |
| clang++ -std=c++17 -O2 -o /tmp/tq_test tests/test_turbo_quant.cpp | |
| /tmp/tq_test | |
| - name: Build Test Harness | |
| run: swift build --build-tests | |
| - name: Install MLX Metal library | |
| run: | | |
| python3 -m venv /tmp/mlx_venv | |
| /tmp/mlx_venv/bin/pip install --quiet mlx | |
| # Inject metallib for production e2e runner | |
| cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib .build/release/ | |
| # Distribute metallib exclusively to XCTest bundles so it satisfies memory.cpp current_binary_dir() constraints natively. | |
| find .build -type d -name "MacOS" -exec cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib {}/ \; | |
| - name: SwiftBuddy Tests (MemPalace & Lifecycle) | |
| run: swift test --skip-build --filter SwiftBuddyTests --disable-swift-testing | |
| - name: Cache MLX model | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/huggingface | |
| key: mlx-model-qwen2.5-0.5b-4bit | |
| - name: Run E2E tests | |
| env: | |
| HF_HUB_DOWNLOAD_TIMEOUT: "600" | |
| run: | | |
| chmod +x tests/test-server.sh | |
| # Retry up to 2 times for transient HuggingFace download failures | |
| for attempt in 1 2 3; do | |
| echo "Attempt $attempt of 3..." | |
| if tests/test-server.sh .build/release/SwiftLM 15413; then | |
| exit 0 | |
| fi | |
| if [ "$attempt" -lt 3 ]; then | |
| echo "Test failed, retrying in 10s..." | |
| sleep 10 | |
| fi | |
| done | |
| echo "All attempts failed" | |
| exit 1 | |
| - name: Upload test logs on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ci-test-logs | |
| path: /tmp/SwiftLM-test-*.log | |
| retention-days: 7 | |
| # ── Speculative Decoding E2E (dual-model: 0.8B draft + 4B main) ── | |
| # Uses the standard macos-15 runner (7 GB RAM). | |
| # We test the 4B main model which safely fits within memory. | |
| speculative-decoding: | |
| runs-on: macos-15 | |
| timeout-minutes: 45 | |
| needs: ci # Only run after core CI passes | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Install Metal Toolchain | |
| run: xcodebuild -downloadComponent MetalToolchain || true | |
| - name: Cache Swift packages | |
| uses: actions/cache@v4 | |
| with: | |
| path: .build | |
| key: ${{ runner.os }}-spm-SwiftLM-v2-${{ hashFiles('Package.resolved') }} | |
| restore-keys: | | |
| ${{ runner.os }}-spm-SwiftLM-v2- | |
| - name: Clear stale module cache | |
| run: find .build -type d -name ModuleCache -exec rm -rf {} + 2>/dev/null || true | |
| - name: Resolve dependencies | |
| run: swift package resolve | |
| - name: Build (Release) | |
| run: swift build -c release | |
| - name: Install MLX Metal library | |
| run: | | |
| python3 -m venv /tmp/mlx_venv | |
| /tmp/mlx_venv/bin/pip install --quiet mlx | |
| cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib .build/release/ | |
| - name: Cache MLX models (draft + main) | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/huggingface | |
| key: mlx-speculative-qwen35-0.8b-9b | |
| - name: Run speculative decoding E2E | |
| env: | |
| HF_HUB_DOWNLOAD_TIMEOUT: "900" | |
| SWIFTLM_TOP_K: "4" | |
| run: | | |
| chmod +x tests/test-speculative.sh | |
| for attempt in 1 2 3; do | |
| echo "Attempt $attempt of 3..." | |
| if tests/test-speculative.sh .build/release/SwiftLM 15414; then | |
| exit 0 | |
| fi | |
| if [ "$attempt" -lt 3 ]; then | |
| echo "Test failed, retrying in 10s..." | |
| sleep 10 | |
| fi | |
| done | |
| echo "All attempts failed" | |
| exit 1 | |
| - name: Upload speculative test logs on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: speculative-test-logs | |
| path: /tmp/SwiftLM-test-speculative.log | |
| retention-days: 7 | |
| # ── Speculative Decoding Memory Evaluation ── | |
| # Runs the 9B model with NUM_DRAFT_TOKENS=2 to check peak | |
| # memory compression/efficiency. Allowed to OOM/fail. | |
| speculative-decoding-eval: | |
| runs-on: macos-15 | |
| timeout-minutes: 45 | |
| needs: ci | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Install Metal Toolchain | |
| run: xcodebuild -downloadComponent MetalToolchain || true | |
| - name: Cache Swift packages | |
| uses: actions/cache@v4 | |
| with: | |
| path: .build | |
| key: ${{ runner.os }}-spm-SwiftLM-v2-${{ hashFiles('Package.resolved') }} | |
| restore-keys: | | |
| ${{ runner.os }}-spm-SwiftLM-v2- | |
| - name: Clear stale module cache | |
| run: find .build -type d -name ModuleCache -exec rm -rf {} + 2>/dev/null || true | |
| - name: Resolve dependencies | |
| run: swift package resolve | |
| - name: Build (Release) | |
| run: swift build -c release | |
| - name: Install MLX Metal library | |
| run: | | |
| python3 -m venv /tmp/mlx_venv | |
| /tmp/mlx_venv/bin/pip install --quiet mlx | |
| cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib .build/release/ | |
| - name: Run speculative evaluation E2E | |
| env: | |
| HF_HUB_DOWNLOAD_TIMEOUT: "900" | |
| SWIFTLM_TOP_K: "4" | |
| run: | | |
| chmod +x tests/test-speculative-eval.sh | |
| for attempt in 1 2 3; do | |
| echo "Attempt $attempt of 3..." | |
| if tests/test-speculative-eval.sh .build/release/SwiftLM 15414; then | |
| exit 0 | |
| fi | |
| if [ "$attempt" -lt 3 ]; then | |
| echo "Test failed, retrying in 10s..." | |
| sleep 10 | |
| fi | |
| done | |
| echo "All attempts failed" | |
| exit 1 | |
| - name: Upload speculative eval logs on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: speculative-eval-logs | |
| path: /tmp/SwiftLM-test-speculative-eval.log | |
| retention-days: 7 | |