Skip to content

Commit 9edf45b

Browse files
authored
Update ci-cd.yml
1 parent d5b6c11 commit 9edf45b

1 file changed

Lines changed: 0 additions & 221 deletions

File tree

.github/workflows/ci-cd.yml

Lines changed: 0 additions & 221 deletions
Original file line numberDiff line numberDiff line change
@@ -1,222 +1 @@
1-
name: CI/CD Pipeline
21

3-
on:
4-
push:
5-
branches: [ main, develop ]
6-
pull_request:
7-
branches: [ main, develop ]
8-
release:
9-
types: [ created ]
10-
11-
env:
12-
PYTHON_VERSION: '3.10'
13-
PYTORCH_VERSION: '2.0.0'
14-
15-
jobs:
16-
code-quality:
17-
name: Code Quality Checks
18-
runs-on: ubuntu-latest
19-
steps:
20-
- uses: actions/checkout@v3
21-
22-
- name: Set up Python
23-
uses: actions/setup-python@v4
24-
with:
25-
python-version: ${{ env.PYTHON_VERSION }}
26-
27-
- name: Install dependencies
28-
run: |
29-
pip install black isort flake8 mypy pylint bandit
30-
31-
- name: Black formatting check
32-
run: black --check .
33-
34-
- name: isort import sorting check
35-
run: isort --check-only .
36-
37-
- name: Flake8 linting
38-
run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
39-
40-
- name: Security check with bandit
41-
run: bandit -r . -f json -o bandit-report.json
42-
43-
- name: Type checking with mypy
44-
run: mypy . --ignore-missing-imports || true
45-
46-
unit-tests:
47-
name: Unit Tests
48-
runs-on: ubuntu-latest
49-
strategy:
50-
matrix:
51-
python-version: ['3.8', '3.9', '3.10']
52-
steps:
53-
- uses: actions/checkout@v3
54-
55-
- name: Set up Python ${{ matrix.python-version }}
56-
uses: actions/setup-python@v4
57-
with:
58-
python-version: ${{ matrix.python-version }}
59-
60-
- name: Install dependencies
61-
run: |
62-
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
63-
pip install -r requirements.txt
64-
pip install pytest pytest-cov pytest-xdist
65-
pip install -e .
66-
67-
- name: Run unit tests
68-
run: |
69-
pytest test_distributed.py -v --cov=. --cov-report=xml --cov-report=html
70-
71-
- name: Upload coverage to Codecov
72-
uses: codecov/codecov-action@v3
73-
with:
74-
file: ./coverage.xml
75-
76-
integration-tests:
77-
name: Integration Tests (GPU)
78-
runs-on: [self-hosted, gpu]
79-
if: github.event_name == 'push'
80-
steps:
81-
- uses: actions/checkout@v3
82-
83-
- name: Set up Python
84-
uses: actions/setup-python@v4
85-
with:
86-
python-version: ${{ env.PYTHON_VERSION }}
87-
88-
- name: Install dependencies
89-
run: |
90-
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118
91-
pip install -r requirements.txt
92-
pip install -e .
93-
94-
- name: Run single GPU tests
95-
run: |
96-
python production_train.py --batch-size 16 --epochs 1
97-
98-
- name: Run multi-GPU tests
99-
run: |
100-
torchrun --nproc_per_node=2 production_train.py --batch-size 16 --epochs 1 --strategy ddp
101-
102-
- name: Run benchmarks
103-
run: |
104-
python run_benchmark.py --gpus 1 2 --strategies ddp --batch-sizes 32
105-
106-
docker-build:
107-
name: Build Docker Image
108-
runs-on: ubuntu-latest
109-
needs: [code-quality, unit-tests]
110-
steps:
111-
- uses: actions/checkout@v3
112-
113-
- name: Set up Docker Buildx
114-
uses: docker/setup-buildx-action@v2
115-
116-
- name: Login to DockerHub
117-
uses: docker/login-action@v2
118-
with:
119-
username: ${{ secrets.DOCKERHUB_USERNAME }}
120-
password: ${{ secrets.DOCKERHUB_TOKEN }}
121-
122-
- name: Build and push
123-
uses: docker/build-push-action@v4
124-
with:
125-
context: .
126-
push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
127-
tags: |
128-
${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:latest
129-
${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:${{ github.sha }}
130-
cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:buildcache
131-
cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:buildcache,mode=max
132-
133-
performance-benchmarks:
134-
name: Performance Benchmarks
135-
runs-on: [self-hosted, gpu]
136-
if: github.event_name == 'pull_request'
137-
steps:
138-
- uses: actions/checkout@v3
139-
140-
- name: Install dependencies
141-
run: |
142-
pip install torch torchvision
143-
pip install -r requirements.txt
144-
pip install -e .
145-
146-
- name: Run performance benchmarks
147-
run: |
148-
python run_benchmark.py --gpus 1 2 4 --strategies ddp fsdp --output-dir benchmark-results
149-
150-
- name: Upload benchmark results
151-
uses: actions/upload-artifact@v3
152-
with:
153-
name: benchmark-results
154-
path: benchmark-results/
155-
156-
security-scan:
157-
name: Security Scanning
158-
runs-on: ubuntu-latest
159-
steps:
160-
- uses: actions/checkout@v3
161-
162-
- name: Run Trivy vulnerability scanner
163-
uses: aquasecurity/trivy-action@master
164-
with:
165-
scan-type: 'fs'
166-
scan-ref: '.'
167-
format: 'sarif'
168-
output: 'trivy-results.sarif'
169-
170-
- name: Upload Trivy results to GitHub Security tab
171-
uses: github/codeql-action/upload-sarif@v2
172-
with:
173-
sarif_file: 'trivy-results.sarif'
174-
175-
deploy-staging:
176-
name: Deploy to Staging
177-
runs-on: ubuntu-latest
178-
needs: [docker-build, integration-tests]
179-
if: github.ref == 'refs/heads/develop'
180-
steps:
181-
- uses: actions/checkout@v3
182-
183-
- name: Configure kubectl
184-
uses: azure/setup-kubectl@v3
185-
186-
- name: Deploy to staging
187-
run: |
188-
kubectl apply -f k8s-deployment.yaml --namespace=staging
189-
kubectl rollout status statefulset/distributed-training --namespace=staging
190-
191-
deploy-production:
192-
name: Deploy to Production
193-
runs-on: ubuntu-latest
194-
needs: [docker-build, integration-tests, performance-benchmarks]
195-
if: github.event_name == 'release'
196-
steps:
197-
- uses: actions/checkout@v3
198-
199-
- name: Configure kubectl
200-
uses: azure/setup-kubectl@v3
201-
202-
- name: Deploy to production
203-
run: |
204-
kubectl apply -f k8s-deployment.yaml --namespace=production
205-
kubectl rollout status statefulset/distributed-training --namespace=production
206-
207-
- name: Run smoke tests
208-
run: |
209-
kubectl exec -n production distributed-training-0 -- python -c "import torch; print(f'PyTorch {torch.__version__}')"
210-
211-
notification:
212-
name: Send Notifications
213-
runs-on: ubuntu-latest
214-
needs: [code-quality, unit-tests, integration-tests]
215-
if: always()
216-
steps:
217-
- name: Send Slack notification
218-
uses: 8398a7/action-slack@v3
219-
with:
220-
status: ${{ job.status }}
221-
text: 'CI/CD Pipeline Status: ${{ job.status }}'
222-
webhook_url: ${{ secrets.SLACK_WEBHOOK }}

0 commit comments

Comments
 (0)