|
1 | | -name: CI/CD Pipeline |
2 | 1 |
|
3 | | -on: |
4 | | - push: |
5 | | - branches: [ main, develop ] |
6 | | - pull_request: |
7 | | - branches: [ main, develop ] |
8 | | - release: |
9 | | - types: [ created ] |
10 | | - |
11 | | -env: |
12 | | - PYTHON_VERSION: '3.10' |
13 | | - PYTORCH_VERSION: '2.0.0' |
14 | | - |
15 | | -jobs: |
16 | | - code-quality: |
17 | | - name: Code Quality Checks |
18 | | - runs-on: ubuntu-latest |
19 | | - steps: |
20 | | - - uses: actions/checkout@v3 |
21 | | - |
22 | | - - name: Set up Python |
23 | | - uses: actions/setup-python@v4 |
24 | | - with: |
25 | | - python-version: ${{ env.PYTHON_VERSION }} |
26 | | - |
27 | | - - name: Install dependencies |
28 | | - run: | |
29 | | - pip install black isort flake8 mypy pylint bandit |
30 | | - |
31 | | - - name: Black formatting check |
32 | | - run: black --check . |
33 | | - |
34 | | - - name: isort import sorting check |
35 | | - run: isort --check-only . |
36 | | - |
37 | | - - name: Flake8 linting |
38 | | - run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics |
39 | | - |
40 | | - - name: Security check with bandit |
41 | | - run: bandit -r . -f json -o bandit-report.json |
42 | | - |
43 | | - - name: Type checking with mypy |
44 | | - run: mypy . --ignore-missing-imports || true |
45 | | - |
46 | | - unit-tests: |
47 | | - name: Unit Tests |
48 | | - runs-on: ubuntu-latest |
49 | | - strategy: |
50 | | - matrix: |
51 | | - python-version: ['3.8', '3.9', '3.10'] |
52 | | - steps: |
53 | | - - uses: actions/checkout@v3 |
54 | | - |
55 | | - - name: Set up Python ${{ matrix.python-version }} |
56 | | - uses: actions/setup-python@v4 |
57 | | - with: |
58 | | - python-version: ${{ matrix.python-version }} |
59 | | - |
60 | | - - name: Install dependencies |
61 | | - run: | |
62 | | - pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu |
63 | | - pip install -r requirements.txt |
64 | | - pip install pytest pytest-cov pytest-xdist |
65 | | - pip install -e . |
66 | | - |
67 | | - - name: Run unit tests |
68 | | - run: | |
69 | | - pytest test_distributed.py -v --cov=. --cov-report=xml --cov-report=html |
70 | | - |
71 | | - - name: Upload coverage to Codecov |
72 | | - uses: codecov/codecov-action@v3 |
73 | | - with: |
74 | | - file: ./coverage.xml |
75 | | - |
76 | | - integration-tests: |
77 | | - name: Integration Tests (GPU) |
78 | | - runs-on: [self-hosted, gpu] |
79 | | - if: github.event_name == 'push' |
80 | | - steps: |
81 | | - - uses: actions/checkout@v3 |
82 | | - |
83 | | - - name: Set up Python |
84 | | - uses: actions/setup-python@v4 |
85 | | - with: |
86 | | - python-version: ${{ env.PYTHON_VERSION }} |
87 | | - |
88 | | - - name: Install dependencies |
89 | | - run: | |
90 | | - pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118 |
91 | | - pip install -r requirements.txt |
92 | | - pip install -e . |
93 | | - |
94 | | - - name: Run single GPU tests |
95 | | - run: | |
96 | | - python production_train.py --batch-size 16 --epochs 1 |
97 | | - |
98 | | - - name: Run multi-GPU tests |
99 | | - run: | |
100 | | - torchrun --nproc_per_node=2 production_train.py --batch-size 16 --epochs 1 --strategy ddp |
101 | | - |
102 | | - - name: Run benchmarks |
103 | | - run: | |
104 | | - python run_benchmark.py --gpus 1 2 --strategies ddp --batch-sizes 32 |
105 | | -
|
106 | | - docker-build: |
107 | | - name: Build Docker Image |
108 | | - runs-on: ubuntu-latest |
109 | | - needs: [code-quality, unit-tests] |
110 | | - steps: |
111 | | - - uses: actions/checkout@v3 |
112 | | - |
113 | | - - name: Set up Docker Buildx |
114 | | - uses: docker/setup-buildx-action@v2 |
115 | | - |
116 | | - - name: Login to DockerHub |
117 | | - uses: docker/login-action@v2 |
118 | | - with: |
119 | | - username: ${{ secrets.DOCKERHUB_USERNAME }} |
120 | | - password: ${{ secrets.DOCKERHUB_TOKEN }} |
121 | | - |
122 | | - - name: Build and push |
123 | | - uses: docker/build-push-action@v4 |
124 | | - with: |
125 | | - context: . |
126 | | - push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} |
127 | | - tags: | |
128 | | - ${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:latest |
129 | | - ${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:${{ github.sha }} |
130 | | - cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:buildcache |
131 | | - cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:buildcache,mode=max |
132 | | - |
133 | | - performance-benchmarks: |
134 | | - name: Performance Benchmarks |
135 | | - runs-on: [self-hosted, gpu] |
136 | | - if: github.event_name == 'pull_request' |
137 | | - steps: |
138 | | - - uses: actions/checkout@v3 |
139 | | - |
140 | | - - name: Install dependencies |
141 | | - run: | |
142 | | - pip install torch torchvision |
143 | | - pip install -r requirements.txt |
144 | | - pip install -e . |
145 | | - |
146 | | - - name: Run performance benchmarks |
147 | | - run: | |
148 | | - python run_benchmark.py --gpus 1 2 4 --strategies ddp fsdp --output-dir benchmark-results |
149 | | - |
150 | | - - name: Upload benchmark results |
151 | | - uses: actions/upload-artifact@v3 |
152 | | - with: |
153 | | - name: benchmark-results |
154 | | - path: benchmark-results/ |
155 | | - |
156 | | - security-scan: |
157 | | - name: Security Scanning |
158 | | - runs-on: ubuntu-latest |
159 | | - steps: |
160 | | - - uses: actions/checkout@v3 |
161 | | - |
162 | | - - name: Run Trivy vulnerability scanner |
163 | | - uses: aquasecurity/trivy-action@master |
164 | | - with: |
165 | | - scan-type: 'fs' |
166 | | - scan-ref: '.' |
167 | | - format: 'sarif' |
168 | | - output: 'trivy-results.sarif' |
169 | | - |
170 | | - - name: Upload Trivy results to GitHub Security tab |
171 | | - uses: github/codeql-action/upload-sarif@v2 |
172 | | - with: |
173 | | - sarif_file: 'trivy-results.sarif' |
174 | | - |
175 | | - deploy-staging: |
176 | | - name: Deploy to Staging |
177 | | - runs-on: ubuntu-latest |
178 | | - needs: [docker-build, integration-tests] |
179 | | - if: github.ref == 'refs/heads/develop' |
180 | | - steps: |
181 | | - - uses: actions/checkout@v3 |
182 | | - |
183 | | - - name: Configure kubectl |
184 | | - uses: azure/setup-kubectl@v3 |
185 | | - |
186 | | - - name: Deploy to staging |
187 | | - run: | |
188 | | - kubectl apply -f k8s-deployment.yaml --namespace=staging |
189 | | - kubectl rollout status statefulset/distributed-training --namespace=staging |
190 | | -
|
191 | | - deploy-production: |
192 | | - name: Deploy to Production |
193 | | - runs-on: ubuntu-latest |
194 | | - needs: [docker-build, integration-tests, performance-benchmarks] |
195 | | - if: github.event_name == 'release' |
196 | | - steps: |
197 | | - - uses: actions/checkout@v3 |
198 | | - |
199 | | - - name: Configure kubectl |
200 | | - uses: azure/setup-kubectl@v3 |
201 | | - |
202 | | - - name: Deploy to production |
203 | | - run: | |
204 | | - kubectl apply -f k8s-deployment.yaml --namespace=production |
205 | | - kubectl rollout status statefulset/distributed-training --namespace=production |
206 | | - |
207 | | - - name: Run smoke tests |
208 | | - run: | |
209 | | - kubectl exec -n production distributed-training-0 -- python -c "import torch; print(f'PyTorch {torch.__version__}')" |
210 | | -
|
211 | | - notification: |
212 | | - name: Send Notifications |
213 | | - runs-on: ubuntu-latest |
214 | | - needs: [code-quality, unit-tests, integration-tests] |
215 | | - if: always() |
216 | | - steps: |
217 | | - - name: Send Slack notification |
218 | | - uses: 8398a7/action-slack@v3 |
219 | | - with: |
220 | | - status: ${{ job.status }} |
221 | | - text: 'CI/CD Pipeline Status: ${{ job.status }}' |
222 | | - webhook_url: ${{ secrets.SLACK_WEBHOOK }} |
0 commit comments