Skip to content

Commit f14b8a6

Browse files
committed
a66601
1 parent 620479f commit f14b8a6

1 file changed

Lines changed: 4 additions & 111 deletions

File tree

.github/workflows/ci-gpu.yml

Lines changed: 4 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,13 @@ env:
2626
PIP_DISABLE_PIP_VERSION_CHECK: "1"
2727
APP_RELOAD: "false"
2828
APP_DEVICE: "cuda:0"
29-
3029
HF_HOME: ${{ github.workspace }}/models_cache/huggingface
3130
TORCH_HOME: ${{ github.workspace }}/models_cache/torch
3231
TRANSFORMERS_CACHE: ${{ github.workspace }}/models_cache/huggingface/hub
3332

3433
jobs:
35-
3634
windows-gpu:
3735
name: Windows GPU • py${{ matrix.python }}
38-
3936
runs-on: [self-hosted, windows, gpu, cuda]
4037
timeout-minutes: 45
4138
strategy:
@@ -58,25 +55,21 @@ jobs:
5855
path: |
5956
models_cache/huggingface
6057
models_cache/torch
61-
key: ${{ runner.os }}-gpu-caches-${{ hashFiles('requirements*.txt') }}
58+
key: windows-gpu-caches-${{ hashFiles('requirements*.txt') }}
6259
restore-keys: |
63-
${{ runner.os }}-gpu-caches-
60+
windows-gpu-caches-
6461
6562
- name: Install deps
66-
shell: powershell
6763
run: |
6864
python -m pip install -U pip ruff
6965
pip install -r requirements.txt
70-
if (Test-Path requirements-dev.txt) { pip install -r requirements-dev.txt }
66+
if exist requirements-dev.txt ( pip install -r requirements-dev.txt )
7167
7268
- name: Install PyTorch CUDA wheels
73-
shell: powershell
7469
run: |
75-
7670
pip install --extra-index-url https://download.pytorch.org/whl/cu124 torch torchvision torchaudio
7771
7872
- name: Verify CUDA is available
79-
shell: powershell
8073
run: |
8174
python - << 'PY'
8275
import torch, sys
@@ -86,38 +79,22 @@ jobs:
8679
PY
8780
8881
- name: Ruff (lint + fmt check)
89-
shell: powershell
9082
run: |
9183
ruff --version
9284
ruff check .
9385
ruff format --check
9486
9587
- name: Pytest (GPU tests)
96-
shell: powershell
9788
env:
9889
DEVICE: cuda
9990
run: |
100-
10191
pytest -q -m "gpu or gpu_cuda" --maxfail=1 -k "not slow" --durations=10
10292
10393
- name: Smoke test API on GPU (/health)
104-
shell: powershell
10594
env:
10695
DEVICE: cuda
10796
run: |
108-
$proc = Start-Process -FilePath uvicorn -ArgumentList "app.main:app --host 127.0.0.1 --port 8001" -PassThru
109-
try {
110-
$ok = $false
111-
for ($i = 0; $i -lt 40; $i++) {
112-
try {
113-
$r = Invoke-WebRequest -Uri "http://127.0.0.1:8001/health" -TimeoutSec 2 -UseBasicParsing
114-
if ($r.StatusCode -eq 200) { $ok = $true; break }
115-
} catch { Start-Sleep -Milliseconds 500 }
116-
}
117-
if (-not $ok) { exit 1 }
118-
} finally {
119-
if ($proc) { Stop-Process -Id $proc.Id -Force }
120-
}
97+
python scripts/smoke_test.py
12198
12299
- name: Upload logs (on failure)
123100
if: failure()
@@ -128,87 +105,3 @@ jobs:
128105
logs/**/*.log
129106
.pytest_cache/**
130107
.ruff_cache/**
131-
132-
133-
ubuntu-gpu:
134-
name: Ubuntu GPU • py${{ matrix.python }}
135-
runs-on: [self-hosted, linux, gpu, cuda]
136-
timeout-minutes: 45
137-
strategy:
138-
fail-fast: false
139-
matrix:
140-
python: ['3.12']
141-
steps:
142-
- uses: actions/checkout@v4
143-
144-
- name: Setup Python
145-
uses: actions/setup-python@v5
146-
with:
147-
python-version: ${{ matrix.python }}
148-
cache: pip
149-
150-
- name: Cache model caches (HF/Torch)
151-
uses: actions/cache@v4
152-
with:
153-
path: |
154-
models_cache/huggingface
155-
models_cache/torch
156-
key: linux-gpu-caches-${{ hashFiles('requirements*.txt') }}
157-
restore-keys: |
158-
linux-gpu-caches-
159-
160-
- name: Install deps
161-
shell: bash
162-
run: |
163-
python -m pip install -U pip ruff
164-
pip install -r requirements.txt
165-
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
166-
167-
- name: Install PyTorch CUDA wheels
168-
shell: bash
169-
run: |
170-
pip install --extra-index-url https://download.pytorch.org/whl/cu124 torch torchvision torchaudio
171-
172-
- name: Verify CUDA is available
173-
shell: bash
174-
run: |
175-
python - << 'PY'
176-
import torch, sys
177-
print("CUDA available:", torch.cuda.is_available())
178-
print("CUDA device count:", torch.cuda.device_count())
179-
sys.exit(0 if torch.cuda.is_available() else 1)
180-
PY
181-
182-
- name: Ruff (lint + fmt check)
183-
shell: bash
184-
run: |
185-
ruff --version
186-
ruff check .
187-
ruff format --check
188-
189-
- name: Pytest (GPU tests)
190-
shell: bash
191-
env:
192-
DEVICE: cuda
193-
run: |
194-
pytest -q -m "gpu or gpu_cuda" --maxfail=1 -k "not slow" --durations=10
195-
196-
- name: Smoke test API on GPU (/health)
197-
shell: bash
198-
env:
199-
DEVICE: cuda
200-
run: |
201-
uvicorn app.main:app --host 127.0.0.1 --port 8001 &
202-
pid=$!
203-
python - << 'PY'
204-
import time, urllib.request, sys
205-
for _ in range(40):
206-
try:
207-
with urllib.request.urlopen('http://127.0.0.1:8001/health', timeout=2) as r:
208-
print(r.read())
209-
sys.exit(0)
210-
except Exception:
211-
time.sleep(0.5)
212-
sys.exit(1)
213-
PY
214-
kill $pid || true

0 commit comments

Comments
 (0)