QuantLLM/.github/workflows/ci.yml at main · codewithdark-git/QuantLLM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
name: CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  workflow_dispatch:

# Cancel in-progress runs of the same workflow on the same branch / PR.
concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  lint:
    name: Ruff lint
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
          cache: pip
      - name: Install ruff
        run: |
          python -m pip install --upgrade pip
          pip install "ruff>=0.5.0"
      - name: Run ruff
        run: ruff check .

  tests:
    name: Tests (Python ${{ matrix.python-version }})
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.10", "3.11", "3.12"]
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
          cache: pip
      - name: Install minimal runtime deps
        # We deliberately install CPU-only ``torch`` to keep CI fast and avoid
        # pulling CUDA / cuDNN wheels. ``bitsandbytes`` is also skipped (it is
        # GPU-only). Tests stub out the heavy I/O and never touch a real GPU.
        run: |
          python -m pip install --upgrade pip
          pip install --index-url https://download.pytorch.org/whl/cpu "torch>=2.0.0"
          pip install \
            "transformers>=4.36.0" \
            "datasets>=2.14.0" \
            "accelerate>=0.24.0" \
            "peft>=0.6.0" \
            "scipy>=1.10.0" \
            "scikit-learn>=1.3.0" \
            "tqdm>=4.65.0" \
            "rich>=13.0.0" \
            "huggingface_hub>=0.20.0" \
            "psutil" \
            "gguf" \
            "py-cpuinfo" \
            "pytest>=7.4.0"
      - name: Install QuantLLM (no deps; we already installed them above)
        # ``--no-deps`` skips re-resolving the heavy dependency set (notably
        # ``bitsandbytes``, which is GPU-only and not needed by the test
        # suite). The import-only install is what makes ``import quantllm``
        # work in the test workers.
        run: pip install --no-deps -e .
      - name: Run pytest
        env:
          QUANTLLM_BANNER: "0"
        run: pytest tests/ -ra

  build:
    name: Build sdist + wheel
    runs-on: ubuntu-latest
    needs: [lint, tests]
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
          cache: pip
      - name: Install build tooling
        run: |
          python -m pip install --upgrade pip
          pip install build twine
      - name: Build distribution
        run: python -m build
      - name: Validate artifacts
        run: twine check dist/*
      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: dist-${{ github.sha }}
          path: dist/
          retention-days: 14