Skip to content

Revert "Merge branch 'feature/swiftbuddy-mempalace-v1' into main" #88

Revert "Merge branch 'feature/swiftbuddy-mempalace-v1' into main"

Revert "Merge branch 'feature/swiftbuddy-mempalace-v1' into main" #88

Workflow file for this run

name: CI Pipeline
on:
push:
branches: [main]
pull_request:
branches: [main]
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
jobs:
ci:
runs-on: macos-15
timeout-minutes: 40
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Metal Toolchain
run: xcodebuild -downloadComponent MetalToolchain || true
- name: Cache Swift packages
uses: actions/cache@v4
with:
path: .build
# Key includes product name so any rename (e.g. mlx-server→SwiftLM)
# automatically busts the cache and prevents stale PCH errors.
key: ${{ runner.os }}-spm-SwiftLM-v2-${{ hashFiles('Package.resolved') }}
restore-keys: |
${{ runner.os }}-spm-SwiftLM-v2-
- name: Clear stale module cache
# Prevents: "PCH was compiled with module cache path '…mlx-server…'
# but the path is currently '…SwiftLM…'" after repo rename.
run: find .build -type d -name ModuleCache -exec rm -rf {} + 2>/dev/null || true
- name: Resolve dependencies
run: swift package resolve
- name: Build (Release)
run: swift build -c release
- name: Verify binary
run: |
ls -lh .build/release/SwiftLM
file .build/release/SwiftLM
- name: TurboQuant unit tests
run: |
# Compile and run standalone C++ unit tests for the TurboQuant
# KV cache compression algorithm (ported from TheTom/llama-cpp-turboquant).
# Tests: centroids, WHT self-inverse, rotation orthogonality,
# 3-bit pack/unpack, V-cache SNR, K-cache IP SNR, fp16 round-trip.
clang++ -std=c++17 -O2 -o /tmp/tq_test tests/test_turbo_quant.cpp
/tmp/tq_test
- name: Build Test Harness
run: swift build --build-tests
- name: Install MLX Metal library
run: |
python3 -m venv /tmp/mlx_venv
/tmp/mlx_venv/bin/pip install --quiet mlx
# Inject metallib for production e2e runner
cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib .build/release/
# Distribute metallib exclusively to XCTest bundles so it satisfies memory.cpp current_binary_dir() constraints natively.
find .build -type d -name "MacOS" -exec cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib {}/ \;
- name: SwiftBuddy Tests (MemPalace & Lifecycle)
run: swift test --skip-build --filter SwiftBuddyTests --disable-swift-testing
- name: Cache MLX model
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: mlx-model-qwen2.5-0.5b-4bit
- name: Run E2E tests
env:
HF_HUB_DOWNLOAD_TIMEOUT: "600"
run: |
chmod +x tests/test-server.sh
# Retry up to 2 times for transient HuggingFace download failures
for attempt in 1 2 3; do
echo "Attempt $attempt of 3..."
if tests/test-server.sh .build/release/SwiftLM 15413; then
exit 0
fi
if [ "$attempt" -lt 3 ]; then
echo "Test failed, retrying in 10s..."
sleep 10
fi
done
echo "All attempts failed"
exit 1
- name: Upload test logs on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: ci-test-logs
path: /tmp/SwiftLM-test-*.log
retention-days: 7
# ── Speculative Decoding E2E (dual-model: 0.8B draft + 4B main) ──
# Uses the standard macos-15 runner (7 GB RAM).
# We test the 4B main model which safely fits within memory.
speculative-decoding:
runs-on: macos-15
timeout-minutes: 45
needs: ci # Only run after core CI passes
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Metal Toolchain
run: xcodebuild -downloadComponent MetalToolchain || true
- name: Cache Swift packages
uses: actions/cache@v4
with:
path: .build
key: ${{ runner.os }}-spm-SwiftLM-v2-${{ hashFiles('Package.resolved') }}
restore-keys: |
${{ runner.os }}-spm-SwiftLM-v2-
- name: Clear stale module cache
run: find .build -type d -name ModuleCache -exec rm -rf {} + 2>/dev/null || true
- name: Resolve dependencies
run: swift package resolve
- name: Build (Release)
run: swift build -c release
- name: Install MLX Metal library
run: |
python3 -m venv /tmp/mlx_venv
/tmp/mlx_venv/bin/pip install --quiet mlx
cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib .build/release/
- name: Cache MLX models (draft + main)
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: mlx-speculative-qwen35-0.8b-9b
- name: Run speculative decoding E2E
env:
HF_HUB_DOWNLOAD_TIMEOUT: "900"
SWIFTLM_TOP_K: "4"
run: |
chmod +x tests/test-speculative.sh
for attempt in 1 2 3; do
echo "Attempt $attempt of 3..."
if tests/test-speculative.sh .build/release/SwiftLM 15414; then
exit 0
fi
if [ "$attempt" -lt 3 ]; then
echo "Test failed, retrying in 10s..."
sleep 10
fi
done
echo "All attempts failed"
exit 1
- name: Upload speculative test logs on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: speculative-test-logs
path: /tmp/SwiftLM-test-speculative.log
retention-days: 7
# ── Speculative Decoding Memory Evaluation ──
# Runs the 9B model with NUM_DRAFT_TOKENS=2 to check peak
# memory compression/efficiency. Allowed to OOM/fail.
speculative-decoding-eval:
runs-on: macos-15
timeout-minutes: 45
needs: ci
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Metal Toolchain
run: xcodebuild -downloadComponent MetalToolchain || true
- name: Cache Swift packages
uses: actions/cache@v4
with:
path: .build
key: ${{ runner.os }}-spm-SwiftLM-v2-${{ hashFiles('Package.resolved') }}
restore-keys: |
${{ runner.os }}-spm-SwiftLM-v2-
- name: Clear stale module cache
run: find .build -type d -name ModuleCache -exec rm -rf {} + 2>/dev/null || true
- name: Resolve dependencies
run: swift package resolve
- name: Build (Release)
run: swift build -c release
- name: Install MLX Metal library
run: |
python3 -m venv /tmp/mlx_venv
/tmp/mlx_venv/bin/pip install --quiet mlx
cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib .build/release/
- name: Run speculative evaluation E2E
env:
HF_HUB_DOWNLOAD_TIMEOUT: "900"
SWIFTLM_TOP_K: "4"
run: |
chmod +x tests/test-speculative-eval.sh
for attempt in 1 2 3; do
echo "Attempt $attempt of 3..."
if tests/test-speculative-eval.sh .build/release/SwiftLM 15414; then
exit 0
fi
if [ "$attempt" -lt 3 ]; then
echo "Test failed, retrying in 10s..."
sleep 10
fi
done
echo "All attempts failed"
exit 1
- name: Upload speculative eval logs on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: speculative-eval-logs
path: /tmp/SwiftLM-test-speculative-eval.log
retention-days: 7