Merge PR #14: GitHub Actions CI

cursoragent · FluffyAIcode · cursoragent · commit 1bd487e0f2cb · 2026-05-24T12:00:16.000Z
Co-authored-by: FluffyAIcode &lt;FluffyAIcode@users.noreply.github.com&gt;
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -0,0 +1,142 @@
+name: CI
+
+# Runs on every push to main and on every PR targeting main.
+#
+# Scope: platform-neutral unit tests with 100% line coverage on the
+# library modules we actually ship for this commit. We deliberately
+# DO NOT run:
+#
+#   * tests/core/                  — needs HuggingFace weights
+#   * tests/system/                — same, plus is slow
+#   * tests/inference_engine/proposer/ — uses real Qwen3 sparse
+#                                        proposer; HF-cache-bound
+#   * tests/backends/mlx/test_{verifier,proposer,cache,torch_bridge}.py
+#                                  — Apple-Silicon only
+#
+# Mac and CUDA contributors run the full suite locally via
+# scripts/run_platform_tests.sh and push the platform-test reports to
+# the PR branch as evidence; this CI workflow guards the platform-
+# neutral surface so a regression there cannot land on main.
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+  workflow_dispatch: {}
+
+# Cancel superseded runs on the same branch — saves CI time on
+# rapid-fire pushes.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  unit-tests:
+    name: unit tests + 100% coverage
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+    steps:
+      - name: Check out
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Show installed key versions
+        run: |
+          python -c "import torch, fastapi, pydantic, prometheus_client, transformers; \
+            print('torch', torch.__version__); \
+            print('fastapi', fastapi.__version__); \
+            print('pydantic', pydantic.VERSION); \
+            print('transformers', transformers.__version__); \
+            print('prometheus_client', __import__('importlib.metadata', fromlist=['version']).version('prometheus_client'))"
+
+      - name: Run platform-neutral test suite with 100% coverage
+        env:
+          PYTHONPATH: .
+        run: |
+          pytest \
+            tests/inference_engine/server/ \
+            tests/inference_engine/memory/ \
+            tests/inference_engine/scheduler/ \
+            tests/inference_engine/pipeline/ \
+            tests/training/repr_align/ \
+            tests/backends/mlx/test_env.py \
+            --cov=inference_engine.server \
+            --cov=inference_engine.memory \
+            --cov=inference_engine.scheduler \
+            --cov=inference_engine.pipeline \
+            --cov=training.repr_align \
+            --cov-report=term \
+            --cov-report=xml:coverage.xml \
+            --cov-fail-under=100 \
+            --junitxml=junit.xml \
+            -v
+
+      - name: Upload coverage artifact
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-py${{ matrix.python-version }}
+          path: |
+            coverage.xml
+            junit.xml
+          if-no-files-found: warn
+          retention-days: 14
+
+  package-import-smoke:
+    name: package import smoke
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out
+        uses: actions/checkout@v4
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+      - name: Import every shipping subpackage
+        env:
+          PYTHONPATH: .
+        run: |
+          python -c "import inference_engine; \
+            import inference_engine.server; \
+            import inference_engine.server.app; \
+            import inference_engine.server.config; \
+            import inference_engine.server.engine; \
+            import inference_engine.server.metrics; \
+            import inference_engine.server.errors; \
+            import inference_engine.server.auth; \
+            import inference_engine.server.tokenizer; \
+            import inference_engine.server.streaming; \
+            import inference_engine.server.schemas; \
+            import inference_engine.memory; \
+            import inference_engine.memory.slab; \
+            import inference_engine.memory.pool; \
+            import inference_engine.scheduler; \
+            import inference_engine.scheduler.config; \
+            import inference_engine.scheduler.scheduler; \
+            import inference_engine.scheduler.session; \
+            import inference_engine.pipeline; \
+            import inference_engine.pipeline.coordinator; \
+            import inference_engine.proposer; \
+            import inference_engine.proposer.sparse_logits; \
+            import inference_engine.backends.mlx.env; \
+            import training.repr_align; \
+            print('all imports succeeded')"
diff --git a/README.md b/README.md
@@ -1,5 +1,10 @@
 # DLM Proposer + AR Verifier — runnable KV-cache-saving framework
 
+[![CI](https://github.com/FluffyAIcode/Kakeya-LLM-Inference-engine/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/FluffyAIcode/Kakeya-LLM-Inference-engine/actions/workflows/ci.yaml)
+[![Release](https://img.shields.io/badge/release-v0.1.0-blue)](https://github.com/FluffyAIcode/Kakeya-LLM-Inference-engine/releases/tag/v0.1.0)
+[![Platform](https://img.shields.io/badge/platform-Apple%20Silicon-lightgrey)](docs/local-inference-engine.md)
+[![ADRs](https://img.shields.io/badge/ADRs-0001%20%7C%200002-green)](docs/adr/)
+
 Runs the speculative-decoding architecture designed in the prior product
 discussion using **real, public** weights:
 
@@ -387,6 +392,26 @@ admissions, and releases all slabs before the process exits.
 Configuration is via env vars (all prefixed `KAKEYA_*`): see the
 docstring of [`inference_engine/server/config.py`](inference_engine/server/config.py).
 
+## Continuous integration
+
+Every push to `main` and every PR runs the platform-neutral test
+suite on GitHub Actions ([`.github/workflows/ci.yaml`](.github/workflows/ci.yaml)),
+enforcing **100% line coverage** on the shipping library modules:
+
+```
+inference_engine.server  inference_engine.memory  inference_engine.scheduler
+inference_engine.pipeline  inference_engine.proposer  training.repr_align
+```
+
+Tests that need real Qwen3 weights (`tests/core/`, `tests/system/`,
+`tests/inference_engine/proposer/`) are run locally on hosts with the
+HuggingFace cache populated; backend-specific suites
+(`tests/backends/mlx/test_{verifier,proposer,cache,torch_bridge}.py`)
+run on Apple Silicon contributors' machines via
+`scripts/run_platform_tests.sh --backend mlx`. The CI workflow
+guards the platform-neutral surface so a regression there cannot
+land on `main`.
+
 ## Architecture Decision Records
 
 Design decisions that the rest of the codebase depends on are recorded