Production-hardening: fix v2.1.0rc1 quantization-state and architecture-fallback bugs #15
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| # Cancel in-progress runs of the same workflow on the same branch / PR. | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| permissions: | |
| contents: read | |
| jobs: | |
| lint: | |
| name: Ruff lint | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.11" | |
| cache: pip | |
| - name: Install ruff | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install "ruff>=0.5.0" | |
| - name: Run ruff | |
| run: ruff check . | |
| tests: | |
| name: Tests (Python ${{ matrix.python-version }}) | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| python-version: ["3.10", "3.11", "3.12"] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| cache: pip | |
| - name: Install minimal runtime deps | |
| # We deliberately install CPU-only ``torch`` to keep CI fast and avoid | |
| # pulling CUDA / cuDNN wheels. ``bitsandbytes`` is also skipped (it is | |
| # GPU-only). Tests stub out the heavy I/O and never touch a real GPU. | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install --index-url https://download.pytorch.org/whl/cpu "torch>=2.0.0" | |
| pip install \ | |
| "transformers>=4.36.0" \ | |
| "datasets>=2.14.0" \ | |
| "accelerate>=0.24.0" \ | |
| "peft>=0.6.0" \ | |
| "scipy>=1.10.0" \ | |
| "scikit-learn>=1.3.0" \ | |
| "tqdm>=4.65.0" \ | |
| "rich>=13.0.0" \ | |
| "huggingface_hub>=0.20.0" \ | |
| "psutil" \ | |
| "gguf" \ | |
| "py-cpuinfo" \ | |
| "pytest>=7.4.0" | |
| - name: Install QuantLLM (no deps; we already installed them above) | |
| # ``--no-deps`` skips re-resolving the heavy dependency set (notably | |
| # ``bitsandbytes``, which is GPU-only and not needed by the test | |
| # suite). The import-only install is what makes ``import quantllm`` | |
| # work in the test workers. | |
| run: pip install --no-deps -e . | |
| - name: Run pytest | |
| env: | |
| QUANTLLM_BANNER: "0" | |
| run: pytest tests/ -ra | |
| build: | |
| name: Build sdist + wheel | |
| runs-on: ubuntu-latest | |
| needs: [lint, tests] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.11" | |
| cache: pip | |
| - name: Install build tooling | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install build twine | |
| - name: Build distribution | |
| run: python -m build | |
| - name: Validate artifacts | |
| run: twine check dist/* | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: dist-${{ github.sha }} | |
| path: dist/ | |
| retention-days: 14 |