SharpAI · solderzzc · Mar 31, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -2,7 +2,7 @@ name: Build
 
 on:
   push:
-    branches: [main]
+    branches: [main, develop, feature/*]
   pull_request:
     branches: [main]
 
@@ -11,24 +11,43 @@ jobs:
     runs-on: macos-15
     steps:
       - uses: actions/checkout@v4
+        with:
+          submodules: recursive
 
       - name: Install Metal Toolchain
         run: xcodebuild -downloadComponent MetalToolchain || true
 
+      - name: Cache Swift packages
+        uses: actions/cache@v4
+        with:
+          path: .build
+          # Key includes product name so any rename (e.g. mlx-server→SwiftLM)
+          # automatically busts the cache and prevents stale PCH errors.
+          key: ${{ runner.os }}-spm-SwiftLM-${{ hashFiles('Package.resolved') }}
+          restore-keys: |
+            ${{ runner.os }}-spm-SwiftLM-
+
       - name: Resolve dependencies
         run: swift package resolve
 
+      - name: Clear stale module cache
+        # Prevents: "PCH was compiled with module cache path '…mlx-server…'
+        # but the path is currently '…SwiftLM…'" after repo rename.
+        run: find .build -type d -name ModuleCache -exec rm -rf {} + 2>/dev/null || true
+
       - name: Build (Release)
         run: swift build -c release
 
       - name: Verify binary
         run: |
-          ls -lh .build/release/mlx-server
-          file .build/release/mlx-server
+          ls -lh .build/release/SwiftLM
+          file .build/release/SwiftLM
 
-      - name: Upload binary
-        uses: actions/upload-artifact@v4
-        with:
-          name: mlx-server-arm64
-          path: .build/release/mlx-server
-          retention-days: 30
+      - name: TurboQuant unit tests
+        run: |
+          # Compile and run standalone C++ unit tests for the TurboQuant
+          # KV cache compression algorithm (ported from TheTom/llama-cpp-turboquant).
+          # Tests: centroids, WHT self-inverse, rotation orthogonality,
+          #        3-bit pack/unpack, V-cache SNR, K-cache IP SNR, fp16 round-trip.
+          clang++ -std=c++17 -O2 -o /tmp/tq_test tests/test_turbo_quant.cpp
+          /tmp/tq_test
diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml
@@ -0,0 +1,85 @@
+name: E2E Tests
+
+on:
+  push:
+    branches: [main, feature/*]
+  pull_request:
+    branches: [main]
+
+concurrency:
+  group: e2e-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    runs-on: macos-15
+    timeout-minutes: 30
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Cache Swift packages
+        uses: actions/cache@v4
+        with:
+          path: .build
+          key: ${{ runner.os }}-spm-SwiftLM-${{ hashFiles('Package.resolved') }}
+          restore-keys: |
+            ${{ runner.os }}-spm-SwiftLM-
+
+      - name: Clear stale module cache
+        # Prevents: "PCH was compiled with module cache path '…mlx-server…'
+        # but the path is currently '…SwiftLM…'" after repo rename.
+        run: find .build -type d -name ModuleCache -exec rm -rf {} + 2>/dev/null || true
+
+      - name: Build (Release)
+        run: swift build -c release
+
+      - name: Install MLX Metal library
+        run: |
+          python3 -m venv /tmp/mlx_venv
+          /tmp/mlx_venv/bin/pip install --quiet mlx
+          cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib .build/release/
+
+      - name: Cache MLX model
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/huggingface
+          key: mlx-model-qwen2.5-0.5b-4bit
+
+      - name: TurboQuant unit tests
+        run: |
+          # Fast pre-flight: verify compression math before expensive model download.
+          # Tests: Lloyd-Max centroids, WHT correctness, rotation orthogonality,
+          #        3-bit pack/unpack, V-cache SNR (14.6 dB), K-cache IP SNR (13.7 dB), fp16.
+          # No external deps — compiles standalone with clang++.
+          clang++ -std=c++17 -O2 -o /tmp/tq_test tests/test_turbo_quant.cpp
+          /tmp/tq_test
+
+      - name: Run E2E tests
+        env:
+          HF_HUB_DOWNLOAD_TIMEOUT: "600"
+        run: |
+          chmod +x tests/test-server.sh
+          # Retry up to 2 times for transient HuggingFace download failures
+          for attempt in 1 2 3; do
+            echo "Attempt $attempt of 3..."
+            if tests/test-server.sh .build/release/SwiftLM 15413; then
+              exit 0
+            fi
+            if [ "$attempt" -lt 3 ]; then
+              echo "Test failed, retrying in 10s..."
+              sleep 10
+            fi
+          done
+          echo "All attempts failed"
+          exit 1
+
+      - name: Upload test logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: e2e-test-logs
+          path: /tmp/SwiftLM-test-*.log
+          retention-days: 7
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -31,6 +31,7 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0  # Full history for build number
+          submodules: recursive
 
       - name: Install Metal Toolchain
         run: xcodebuild -downloadComponent MetalToolchain || true
@@ -64,31 +65,31 @@ jobs:
 
       - name: Verify binary
         run: |
-          ls -lh .build/release/mlx-server
-          file .build/release/mlx-server
-          .build/release/mlx-server --help || true
+          ls -lh .build/release/SwiftLM
+          file .build/release/SwiftLM
+          .build/release/SwiftLM --help || true
 
       - name: Package binary
         run: |
           mkdir -p release
-          cp .build/release/mlx-server release/
+          cp .build/release/SwiftLM release/
           cp LICENSE README.md release/
           cd release
-          tar -czvf ../mlx-server-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz .
+          tar -czvf ../SwiftLM-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz .
 
       - name: Upload artifact
         uses: actions/upload-artifact@v4
         with:
-          name: mlx-server-${{ steps.tag.outputs.name }}-macos-arm64
-          path: mlx-server-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz
+          name: SwiftLM-${{ steps.tag.outputs.name }}-macos-arm64
+          path: SwiftLM-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz
           retention-days: 90
 
       - name: Prepare release notes
         id: notes
         run: |
           CHANGELOG=$(cat /tmp/changelog.txt)
           cat > /tmp/release_notes.md << 'RELEASE_EOF'
-          ## mlx-server ${{ steps.tag.outputs.full }}
+          ## SwiftLM ${{ steps.tag.outputs.full }}
 
           <details open>
 
@@ -104,25 +105,25 @@ jobs:
 
           ### Download
 
-          - [macOS Apple Silicon (arm64)](https://github.com/SharpAI/mlx-server/releases/download/${{ steps.tag.outputs.name }}/mlx-server-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz)
+          - [macOS Apple Silicon (arm64)](https://github.com/SharpAI/SwiftLM/releases/download/${{ steps.tag.outputs.name }}/SwiftLM-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz)
 
           ### Quick Start
           ```bash
-          tar -xzf mlx-server-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz
-          ./mlx-server --model mlx-community/Qwen2.5-3B-Instruct-4bit --port 5413
+          tar -xzf SwiftLM-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz
+          ./SwiftLM --model mlx-community/Qwen2.5-3B-Instruct-4bit --port 5413
           ```
 
-          > **Note:** Requires `mlx.metallib` next to the binary for GPU compute. See [README](https://github.com/SharpAI/mlx-server#metal-shader-library) for setup.
+          > **Note:** Requires `mlx.metallib` next to the binary for GPU compute. See [README](https://github.com/SharpAI/SwiftLM#metal-shader-library) for setup.
           RELEASE_EOF
 
       - name: Create release
         if: ${{ github.event_name == 'push' || github.event.inputs.create_release == 'true' }}
         uses: softprops/action-gh-release@v2
         with:
           tag_name: ${{ steps.tag.outputs.name }}
-          name: "mlx-server ${{ steps.tag.outputs.name }}"
+          name: "SwiftLM ${{ steps.tag.outputs.name }}"
           body_path: /tmp/release_notes.md
           files: |
-            mlx-server-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz
+            SwiftLM-${{ steps.tag.outputs.name }}-macos-arm64.tar.gz
           draft: false
           prerelease: false
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,10 @@ DerivedData/
 # IDE
 .vscode/
 .idea/
+
+# Temporary Artifacts & Logs
+*.log
+*.metallib
+*.pid
+curl_out.txt
+sample.txt
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "mlx-swift-lm"]
+	path = mlx-swift-lm
+	url = https://github.com/SharpAI/mlx-swift-lm.git