Skip to content

Commit 487119b

Browse files
authored
Update ci-cd.yml
1 parent 9edf45b commit 487119b

1 file changed

Lines changed: 221 additions & 0 deletions

File tree

.github/workflows/ci-cd.yml

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,222 @@
1+
name: CI/CD Pipeline
12

3+
on:
4+
push:
5+
branches: [ main, develop ]
6+
pull_request:
7+
branches: [ main, develop ]
8+
release:
9+
types: [ created ]
10+
11+
env:
12+
PYTHON_VERSION: '3.10'
13+
PYTORCH_VERSION: '2.0.0'
14+
15+
jobs:
16+
code-quality:
17+
name: Code Quality Checks
18+
runs-on: ubuntu-latest
19+
steps:
20+
- uses: actions/checkout@v3
21+
22+
- name: Set up Python
23+
uses: actions/setup-python@v4
24+
with:
25+
python-version: ${{ env.PYTHON_VERSION }}
26+
27+
- name: Install dependencies
28+
run: |
29+
pip install black isort flake8 mypy pylint bandit
30+
31+
- name: Black formatting check
32+
run: black --check .
33+
34+
- name: isort import sorting check
35+
run: isort --check-only .
36+
37+
- name: Flake8 linting
38+
run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
39+
40+
- name: Security check with bandit
41+
run: bandit -r . -f json -o bandit-report.json
42+
43+
- name: Type checking with mypy
44+
run: mypy . --ignore-missing-imports || true
45+
46+
unit-tests:
47+
name: Unit Tests
48+
runs-on: ubuntu-latest
49+
strategy:
50+
matrix:
51+
python-version: ['3.8', '3.9', '3.10']
52+
steps:
53+
- uses: actions/checkout@v3
54+
55+
- name: Set up Python ${{ matrix.python-version }}
56+
uses: actions/setup-python@v4
57+
with:
58+
python-version: ${{ matrix.python-version }}
59+
60+
- name: Install dependencies
61+
run: |
62+
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
63+
pip install -r requirements.txt
64+
pip install pytest pytest-cov pytest-xdist
65+
pip install -e .
66+
67+
- name: Run unit tests
68+
run: |
69+
pytest test_distributed.py -v --cov=. --cov-report=xml --cov-report=html
70+
71+
- name: Upload coverage to Codecov
72+
uses: codecov/codecov-action@v3
73+
with:
74+
file: ./coverage.xml
75+
76+
integration-tests:
77+
name: Integration Tests (GPU)
78+
runs-on: [self-hosted, gpu]
79+
if: github.event_name == 'push'
80+
steps:
81+
- uses: actions/checkout@v3
82+
83+
- name: Set up Python
84+
uses: actions/setup-python@v4
85+
with:
86+
python-version: ${{ env.PYTHON_VERSION }}
87+
88+
- name: Install dependencies
89+
run: |
90+
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118
91+
pip install -r requirements.txt
92+
pip install -e .
93+
94+
- name: Run single GPU tests
95+
run: |
96+
python production_train.py --batch-size 16 --epochs 1
97+
98+
- name: Run multi-GPU tests
99+
run: |
100+
torchrun --nproc_per_node=2 production_train.py --batch-size 16 --epochs 1 --strategy ddp
101+
102+
- name: Run benchmarks
103+
run: |
104+
python run_benchmark.py --gpus 1 2 --strategies ddp --batch-sizes 32
105+
106+
docker-build:
107+
name: Build Docker Image
108+
runs-on: ubuntu-latest
109+
needs: [code-quality, unit-tests]
110+
steps:
111+
- uses: actions/checkout@v3
112+
113+
- name: Set up Docker Buildx
114+
uses: docker/setup-buildx-action@v2
115+
116+
- name: Login to DockerHub
117+
uses: docker/login-action@v2
118+
with:
119+
username: ${{ secrets.DOCKERHUB_USERNAME }}
120+
password: ${{ secrets.DOCKERHUB_TOKEN }}
121+
122+
- name: Build and push
123+
uses: docker/build-push-action@v4
124+
with:
125+
context: .
126+
push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
127+
tags: |
128+
${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:latest
129+
${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:${{ github.sha }}
130+
cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:buildcache
131+
cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:buildcache,mode=max
132+
133+
performance-benchmarks:
134+
name: Performance Benchmarks
135+
runs-on: [self-hosted, gpu]
136+
if: github.event_name == 'pull_request'
137+
steps:
138+
- uses: actions/checkout@v3
139+
140+
- name: Install dependencies
141+
run: |
142+
pip install torch torchvision
143+
pip install -r requirements.txt
144+
pip install -e .
145+
146+
- name: Run performance benchmarks
147+
run: |
148+
python run_benchmark.py --gpus 1 2 4 --strategies ddp fsdp --output-dir benchmark-results
149+
150+
- name: Upload benchmark results
151+
uses: actions/upload-artifact@v3
152+
with:
153+
name: benchmark-results
154+
path: benchmark-results/
155+
156+
security-scan:
157+
name: Security Scanning
158+
runs-on: ubuntu-latest
159+
steps:
160+
- uses: actions/checkout@v3
161+
162+
- name: Run Trivy vulnerability scanner
163+
uses: aquasecurity/trivy-action@master
164+
with:
165+
scan-type: 'fs'
166+
scan-ref: '.'
167+
format: 'sarif'
168+
output: 'trivy-results.sarif'
169+
170+
- name: Upload Trivy results to GitHub Security tab
171+
uses: github/codeql-action/upload-sarif@v2
172+
with:
173+
sarif_file: 'trivy-results.sarif'
174+
175+
deploy-staging:
176+
name: Deploy to Staging
177+
runs-on: ubuntu-latest
178+
needs: [docker-build, integration-tests]
179+
if: github.ref == 'refs/heads/develop'
180+
steps:
181+
- uses: actions/checkout@v3
182+
183+
- name: Configure kubectl
184+
uses: azure/setup-kubectl@v3
185+
186+
- name: Deploy to staging
187+
run: |
188+
kubectl apply -f k8s-deployment.yaml --namespace=staging
189+
kubectl rollout status statefulset/distributed-training --namespace=staging
190+
191+
deploy-production:
192+
name: Deploy to Production
193+
runs-on: ubuntu-latest
194+
needs: [docker-build, integration-tests, performance-benchmarks]
195+
if: github.event_name == 'release'
196+
steps:
197+
- uses: actions/checkout@v3
198+
199+
- name: Configure kubectl
200+
uses: azure/setup-kubectl@v3
201+
202+
- name: Deploy to production
203+
run: |
204+
kubectl apply -f k8s-deployment.yaml --namespace=production
205+
kubectl rollout status statefulset/distributed-training --namespace=production
206+
207+
- name: Run smoke tests
208+
run: |
209+
kubectl exec -n production distributed-training-0 -- python -c "import torch; print(f'PyTorch {torch.__version__}')"
210+
211+
notification:
212+
name: Send Notifications
213+
runs-on: ubuntu-latest
214+
needs: [code-quality, unit-tests, integration-tests]
215+
if: always()
216+
steps:
217+
- name: Send Slack notification
218+
uses: 8398a7/action-slack@v3
219+
with:
220+
status: ${{ job.status }}
221+
text: 'CI/CD Pipeline Status: ${{ job.status }}'
222+
webhook_url: ${{ secrets.SLACK_WEBHOOK }}

0 commit comments

Comments
 (0)