2626 PIP_DISABLE_PIP_VERSION_CHECK : " 1"
2727 APP_RELOAD : " false"
2828 APP_DEVICE : " cuda:0"
29-
3029 HF_HOME : ${{ github.workspace }}/models_cache/huggingface
3130 TORCH_HOME : ${{ github.workspace }}/models_cache/torch
3231 TRANSFORMERS_CACHE : ${{ github.workspace }}/models_cache/huggingface/hub
3332
3433jobs :
35-
3634 windows-gpu :
3735 name : Windows GPU • py${{ matrix.python }}
38-
3936 runs-on : [self-hosted, windows, gpu, cuda]
4037 timeout-minutes : 45
4138 strategy :
@@ -58,25 +55,21 @@ jobs:
5855 path : |
5956 models_cache/huggingface
6057 models_cache/torch
61- key : ${{ runner.os }} -gpu-caches-${{ hashFiles('requirements*.txt') }}
58+ key : windows -gpu-caches-${{ hashFiles('requirements*.txt') }}
6259 restore-keys : |
63- ${{ runner.os }} -gpu-caches-
60+ windows -gpu-caches-
6461
6562 - name : Install deps
66- shell : powershell
6763 run : |
6864 python -m pip install -U pip ruff
6965 pip install -r requirements.txt
70- if (Test-Path requirements-dev.txt) { pip install -r requirements-dev.txt }
66+ if exist requirements-dev.txt ( pip install -r requirements-dev.txt )
7167
7268 - name : Install PyTorch CUDA wheels
73- shell : powershell
7469 run : |
75-
7670 pip install --extra-index-url https://download.pytorch.org/whl/cu124 torch torchvision torchaudio
7771
7872 - name : Verify CUDA is available
79- shell : powershell
8073 run : |
8174 python - << 'PY'
8275 import torch, sys
@@ -86,38 +79,22 @@ jobs:
8679 PY
8780
8881 - name : Ruff (lint + fmt check)
89- shell : powershell
9082 run : |
9183 ruff --version
9284 ruff check .
9385 ruff format --check
9486
9587 - name : Pytest (GPU tests)
96- shell : powershell
9788 env :
9889 DEVICE : cuda
9990 run : |
100-
10191 pytest -q -m "gpu or gpu_cuda" --maxfail=1 -k "not slow" --durations=10
10292
10393 - name : Smoke test API on GPU (/health)
104- shell : powershell
10594 env :
10695 DEVICE : cuda
10796 run : |
108- $proc = Start-Process -FilePath uvicorn -ArgumentList "app.main:app --host 127.0.0.1 --port 8001" -PassThru
109- try {
110- $ok = $false
111- for ($i = 0; $i -lt 40; $i++) {
112- try {
113- $r = Invoke-WebRequest -Uri "http://127.0.0.1:8001/health" -TimeoutSec 2 -UseBasicParsing
114- if ($r.StatusCode -eq 200) { $ok = $true; break }
115- } catch { Start-Sleep -Milliseconds 500 }
116- }
117- if (-not $ok) { exit 1 }
118- } finally {
119- if ($proc) { Stop-Process -Id $proc.Id -Force }
120- }
97+ python scripts/smoke_test.py
12198
12299 - name : Upload logs (on failure)
123100 if : failure()
@@ -128,87 +105,3 @@ jobs:
128105 logs/**/*.log
129106 .pytest_cache/**
130107 .ruff_cache/**
131-
132-
133- ubuntu-gpu :
134- name : Ubuntu GPU • py${{ matrix.python }}
135- runs-on : [self-hosted, linux, gpu, cuda]
136- timeout-minutes : 45
137- strategy :
138- fail-fast : false
139- matrix :
140- python : ['3.12']
141- steps :
142- - uses : actions/checkout@v4
143-
144- - name : Setup Python
145- uses : actions/setup-python@v5
146- with :
147- python-version : ${{ matrix.python }}
148- cache : pip
149-
150- - name : Cache model caches (HF/Torch)
151- uses : actions/cache@v4
152- with :
153- path : |
154- models_cache/huggingface
155- models_cache/torch
156- key : linux-gpu-caches-${{ hashFiles('requirements*.txt') }}
157- restore-keys : |
158- linux-gpu-caches-
159-
160- - name : Install deps
161- shell : bash
162- run : |
163- python -m pip install -U pip ruff
164- pip install -r requirements.txt
165- if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
166-
167- - name : Install PyTorch CUDA wheels
168- shell : bash
169- run : |
170- pip install --extra-index-url https://download.pytorch.org/whl/cu124 torch torchvision torchaudio
171-
172- - name : Verify CUDA is available
173- shell : bash
174- run : |
175- python - << 'PY'
176- import torch, sys
177- print("CUDA available:", torch.cuda.is_available())
178- print("CUDA device count:", torch.cuda.device_count())
179- sys.exit(0 if torch.cuda.is_available() else 1)
180- PY
181-
182- - name : Ruff (lint + fmt check)
183- shell : bash
184- run : |
185- ruff --version
186- ruff check .
187- ruff format --check
188-
189- - name : Pytest (GPU tests)
190- shell : bash
191- env :
192- DEVICE : cuda
193- run : |
194- pytest -q -m "gpu or gpu_cuda" --maxfail=1 -k "not slow" --durations=10
195-
196- - name : Smoke test API on GPU (/health)
197- shell : bash
198- env :
199- DEVICE : cuda
200- run : |
201- uvicorn app.main:app --host 127.0.0.1 --port 8001 &
202- pid=$!
203- python - << 'PY'
204- import time, urllib.request, sys
205- for _ in range(40):
206- try:
207- with urllib.request.urlopen('http://127.0.0.1:8001/health', timeout=2) as r:
208- print(r.read())
209- sys.exit(0)
210- except Exception:
211- time.sleep(0.5)
212- sys.exit(1)
213- PY
214- kill $pid || true
0 commit comments