forked from vllm-project/semantic-router
-
Notifications
You must be signed in to change notification settings - Fork 0
616 lines (509 loc) · 24.4 KB
/
k8s-integration-test.yml
File metadata and controls
616 lines (509 loc) · 24.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
name: Kubernetes Integration Test
# This workflow tests the CORE semantic-router Kubernetes deployment.
#
# Test Scope:
# ✅ Core deployment (namespace, pvc, deployment, service, configmap)
# ✅ Manifest validation (kubeconform)
# ✅ Service connectivity (gRPC, metrics, API ports)
# ✅ Security scanning (Trivy, Checkov)
# ✅ Basic syntax validation for observability and ai-gateway configs
#
# Out of Scope (planned for follow-up PRs):
# 🔄 Observability stack deployment (Prometheus + Grafana)
# 🔄 AI Gateway end-to-end testing (Envoy Gateway + InferencePool)
on:
pull_request:
paths:
- "deploy/kubernetes/**"
- ".github/workflows/k8s-integration-test.yml"
- "Dockerfile.extproc"
- "tools/kind/**"
workflow_dispatch: # Allow manual triggering
schedule:
# Run nightly at 3:00 AM UTC
- cron: "0 3 * * *"
env:
KIND_VERSION: v0.20.0
KUBECTL_VERSION: v1.28.0
KUSTOMIZE_VERSION: v5.2.1
jobs:
validate-manifests:
name: Validate Kubernetes Manifests
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Kustomize
run: |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
sudo mv kustomize /usr/local/bin/
kustomize version
- name: Validate Kustomize build
run: |
echo "Building kustomization..."
kustomize build deploy/kubernetes > /tmp/k8s-manifests.yaml
echo "Kustomize build successful!"
echo "Generated manifests:"
cat /tmp/k8s-manifests.yaml
- name: Setup kubeconform
run: |
wget https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz
tar xf kubeconform-linux-amd64.tar.gz
sudo mv kubeconform /usr/local/bin/
kubeconform -v
- name: Validate manifests with kubeconform
run: |
echo "Validating Kubernetes manifests..."
kustomize build deploy/kubernetes | \
kubeconform -strict -summary \
-kubernetes-version 1.28.0 \
-schema-location default \
-schema-location 'https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json' \
-skip CustomResourceDefinition \
-ignore-missing-schemas
- name: Upload validated manifests
uses: actions/upload-artifact@v4
with:
name: k8s-manifests
path: /tmp/k8s-manifests.yaml
retention-days: 5
kind-integration-test:
name: kind Cluster Integration Test
runs-on: ubuntu-latest
needs: validate-manifests
timeout-minutes: 45 # Increased to account for model downloads
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Create kind cluster
uses: helm/kind-action@v1.8.0
with:
version: ${{ env.KIND_VERSION }}
config: tools/kind/kind-config.yaml
cluster_name: semantic-router-test
wait: 120s
- name: Build semantic-router image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.extproc
tags: ghcr.io/vllm-project/semantic-router/extproc:test
load: true
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Load image into kind cluster
run: |
echo "Loading image into kind cluster..."
kind load docker-image ghcr.io/vllm-project/semantic-router/extproc:test --name semantic-router-test
echo "Image loaded successfully!"
- name: Verify cluster
run: |
kubectl cluster-info
kubectl get nodes
kubectl version
- name: Setup Kustomize
run: |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
sudo mv kustomize /usr/local/bin/
- name: Create temporary kustomization for testing
run: |
# Create a test overlay directory
mkdir -p deploy/kubernetes/test-overlay
cd deploy/kubernetes/test-overlay
# Copy all base resources to overlay directory
cp ../namespace.yaml ./
cp ../service.yaml ./
cp ../config.yaml ./
cp ../tools_db.json ./
# Copy resources for CI testing
cp ../deployment.yaml ./deployment.yaml
cp ../pvc.yaml ./pvc.yaml
# Optimize init container for CI testing
# 1. Update pip install to include hf_transfer for faster downloads
perl -i -pe 's/pip install --no-cache-dir huggingface_hub\[cli\]/pip install --no-cache-dir "huggingface_hub[cli]" hf_transfer/g' deployment.yaml
# 2. Enable HF_HUB_ENABLE_HF_TRANSFER for faster downloads
perl -i -pe 's/(env:)/\1\n - name: HF_HUB_ENABLE_HF_TRANSFER\n value: "1"/g' deployment.yaml
# 3. Simplify the download logic - remove directory checks since CI always starts fresh
# Replace the entire args section with a simpler version
perl -i -0pe 's/args:\s*\n\s*-\s*\|\s*\n\s*set -e.*?ls -la \/app\/models\//args:\n - |\n set -e\n echo "Installing Hugging Face CLI..."\n pip install --no-cache-dir "huggingface_hub[cli]" hf_transfer\n \n echo "Downloading models to persistent volume..."\n cd \/app\/models\n \n echo "Downloading category classifier model..."\n hf download LLM-Semantic-Router\/category_classifier_modernbert-base_model --local-dir category_classifier_modernbert-base_model\n \n echo "Downloading PII classifier model..."\n hf download LLM-Semantic-Router\/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model\n \n echo "Downloading jailbreak classifier model..."\n hf download LLM-Semantic-Router\/jailbreak_classifier_modernbert-base_model --local-dir jailbreak_classifier_modernbert-base_model\n \n echo "Downloading PII token classifier model..."\n hf download LLM-Semantic-Router\/pii_classifier_modernbert-base_presidio_token_model --local-dir pii_classifier_modernbert-base_presidio_token_model\n \n echo "All models downloaded successfully!"\n ls -la \/app\/models\//gs' deployment.yaml
echo "✓ Updated init container with optimized model download for CI"
# Create kustomization with local resources
cat > kustomization.yaml << EOF
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- pvc.yaml
- deployment.yaml
- service.yaml
configMapGenerator:
- name: semantic-router-config
files:
- config.yaml
- tools_db.json
namespace: vllm-semantic-router-system
# Use the same image that was loaded into kind cluster
images:
- name: ghcr.io/vllm-project/semantic-router/extproc
newTag: test
# Reduce resource requirements for CI testing and set imagePullPolicy
patches:
# Patch for main container
- patch: |-
- op: replace
path: /spec/template/spec/containers/0/resources/requests/memory
value: "2Gi"
- op: replace
path: /spec/template/spec/containers/0/resources/requests/cpu
value: "1"
- op: replace
path: /spec/template/spec/containers/0/resources/limits/memory
value: "4Gi"
- op: replace
path: /spec/template/spec/containers/0/resources/limits/cpu
value: "2"
- op: add
path: /spec/template/spec/containers/0/imagePullPolicy
value: "IfNotPresent"
target:
kind: Deployment
name: semantic-router
# Patch for init container - increase resources for faster downloads
- patch: |-
- op: replace
path: /spec/template/spec/initContainers/0/resources/requests/memory
value: "1Gi"
- op: replace
path: /spec/template/spec/initContainers/0/resources/requests/cpu
value: "500m"
- op: replace
path: /spec/template/spec/initContainers/0/resources/limits/memory
value: "2Gi"
- op: replace
path: /spec/template/spec/initContainers/0/resources/limits/cpu
value: "1"
target:
kind: Deployment
name: semantic-router
EOF
echo "=== Generated kustomization.yaml ==="
cat kustomization.yaml
echo "=== Files in overlay directory ==="
ls -la
- name: Pre-flight check for Hugging Face connectivity
run: |
echo "Testing Hugging Face Hub connectivity..."
curl -I https://huggingface.co || {
echo "⚠️ Warning: Cannot reach huggingface.co"
}
# Test one of the model repos
curl -I https://huggingface.co/LLM-Semantic-Router/category_classifier_modernbert-base_model || {
echo "⚠️ Warning: Cannot reach model repository"
}
echo "✓ Connectivity check completed"
- name: Deploy to kind cluster
run: |
echo "Deploying semantic-router to kind cluster..."
kustomize build deploy/kubernetes/test-overlay | kubectl apply -f -
echo "Waiting for namespace to be active..."
kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/vllm-semantic-router-system --timeout=60s
echo "Deployment initiated. Checking resources..."
kubectl get all -n vllm-semantic-router-system
- name: Wait for deployment readiness
run: |
echo "Waiting for deployment to be ready (this may take a few minutes)..."
echo "Note: Using PVC for model storage, init container will download models"
# Wait for PVC to be bound
echo "Waiting for PVC to be bound..."
kubectl wait --for=jsonpath='{.status.phase}'=Bound pvc/semantic-router-models -n vllm-semantic-router-system --timeout=120s || {
echo "PVC binding timeout. Checking PVC status..."
kubectl describe pvc -n vllm-semantic-router-system
exit 1
}
# Wait for pods to be created
echo "Waiting for pods to be created..."
timeout 120 bash -c 'until kubectl get pods -n vllm-semantic-router-system | grep -q semantic-router; do echo "Waiting for pod creation..."; sleep 5; done'
# Show pod status
kubectl get pods -n vllm-semantic-router-system
# Wait for init container to complete (model download)
# Increased timeout to 15 minutes for model downloads
echo "Waiting for init container to complete (downloading models, this may take 10-15 minutes)..."
kubectl wait --for=condition=Initialized pods -l app=semantic-router -n vllm-semantic-router-system --timeout=900s || {
echo "❌ Init container did not complete in time. Showing logs..."
kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=200 || true
echo ""
echo "Checking pod status..."
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router
exit 1
}
# Show init container logs and verify models were downloaded
echo "=== Init Container Logs ==="
kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=100 || true
# Verify models were actually downloaded
echo ""
echo "=== Verifying Model Downloads ==="
POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].metadata.name}')
# Check if models directory has content
echo "Checking models directory content..."
kubectl exec -n vllm-semantic-router-system $POD_NAME -- ls -la /app/models/ || {
echo "⚠️ Warning: Could not list models directory"
}
# Count model directories (should be 4)
MODEL_COUNT=$(kubectl exec -n vllm-semantic-router-system $POD_NAME -- sh -c 'ls -1 /app/models/ | grep -c "model" || echo 0')
echo "Found $MODEL_COUNT model directories"
if [ "$MODEL_COUNT" -lt 4 ]; then
echo "❌ Error: Expected 4 model directories, found $MODEL_COUNT"
echo "Init container may have failed to download all models"
exit 1
fi
echo "✓ All models verified successfully"
# Wait for main container to be ready
echo ""
echo "Waiting for main container to be ready..."
kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=300s || {
echo "❌ Pod did not become ready in time. Showing status and logs..."
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 || true
exit 1
}
echo "✅ Deployment is ready!"
- name: Verify deployment
run: |
echo "=== Verifying Deployment ==="
# Check deployment status
kubectl get deployment -n vllm-semantic-router-system semantic-router -o wide
# Check pod status
kubectl get pods -n vllm-semantic-router-system -o wide
# Check services
kubectl get svc -n vllm-semantic-router-system
# Check configmaps
kubectl get configmap -n vllm-semantic-router-system
# Verify pod is running
POD_STATUS=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].status.phase}')
if [ "$POD_STATUS" != "Running" ]; then
echo "Error: Pod is not running. Status: $POD_STATUS"
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router
exit 1
fi
echo "✓ Pod is running"
# Verify all containers are ready
READY_CONTAINERS=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].status.containerStatuses[0].ready}')
if [ "$READY_CONTAINERS" != "true" ]; then
echo "Error: Container is not ready"
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router
exit 1
fi
echo "✓ All containers are ready"
- name: Test service connectivity
run: |
echo "=== Testing Service Connectivity ==="
# Get pod name
POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].metadata.name}')
echo "Pod name: $POD_NAME"
# Test gRPC port
echo "Testing gRPC port (50051)..."
kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 50051 || {
echo "Warning: gRPC port test failed"
}
# Test metrics port
echo "Testing metrics port (9190)..."
kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 9190 || {
echo "Warning: Metrics port test failed"
}
# Test classify API port
echo "Testing classify API port (8080)..."
kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 8080 || {
echo "Warning: Classify API port test failed"
}
# Port forward for external testing
echo "Setting up port-forward for testing..."
kubectl port-forward -n vllm-semantic-router-system svc/semantic-router 8080:8080 &
PF_PID=$!
sleep 5
# Test HTTP endpoint (if available)
echo "Testing HTTP endpoint..."
curl -v http://localhost:8080/health || echo "Health endpoint not available or not implemented"
# Cleanup port-forward
kill $PF_PID || true
echo "✓ Service connectivity tests completed"
- name: Check logs
if: always()
run: |
echo "=== Deployment Logs ==="
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 --all-containers=true || true
echo "=== Events ==="
kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' || true
- name: Export cluster logs on failure
if: failure()
run: |
echo "=== Exporting cluster information for debugging ==="
mkdir -p /tmp/k8s-logs
# Export pod descriptions
kubectl describe pods -n vllm-semantic-router-system > /tmp/k8s-logs/pod-descriptions.txt || true
# Export deployment description
kubectl describe deployment -n vllm-semantic-router-system > /tmp/k8s-logs/deployment-description.txt || true
# Export all logs
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --all-containers=true --previous > /tmp/k8s-logs/previous-logs.txt || true
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --all-containers=true > /tmp/k8s-logs/current-logs.txt || true
# Export events
kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' > /tmp/k8s-logs/events.txt || true
# Export resource status
kubectl get all -n vllm-semantic-router-system -o yaml > /tmp/k8s-logs/all-resources.yaml || true
- name: Upload cluster logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: k8s-cluster-logs
path: /tmp/k8s-logs/
retention-days: 7
- name: Cleanup
if: always()
run: |
echo "Cleaning up resources..."
kubectl delete namespace vllm-semantic-router-system --timeout=60s || true
test-with-custom-config:
name: Test with Custom Configuration
runs-on: ubuntu-latest
needs: validate-manifests
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Kustomize
run: |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
sudo mv kustomize /usr/local/bin/
- name: Test kustomize with different overlays
run: |
echo "Testing base kustomization..."
kustomize build deploy/kubernetes > /tmp/base-manifests.yaml
echo "Validating generated resources..."
# Check if all expected resources are present
if ! grep -q "kind: Namespace" /tmp/base-manifests.yaml; then
echo "Error: Namespace not found"
exit 1
fi
if ! grep -q "kind: Deployment" /tmp/base-manifests.yaml; then
echo "Error: Deployment not found"
exit 1
fi
if ! grep -q "kind: Service" /tmp/base-manifests.yaml; then
echo "Error: Service not found"
exit 1
fi
if ! grep -q "kind: ConfigMap" /tmp/base-manifests.yaml; then
echo "Error: ConfigMap not found"
exit 1
fi
echo "✓ All expected resources are present"
- name: Verify ConfigMap generation
run: |
echo "Checking ConfigMap generation..."
kustomize build deploy/kubernetes | grep -A 20 "kind: ConfigMap"
# Verify config files are included
if ! kustomize build deploy/kubernetes | grep -q "config.yaml"; then
echo "Warning: config.yaml might not be properly included in ConfigMap"
fi
if ! kustomize build deploy/kubernetes | grep -q "tools_db.json"; then
echo "Warning: tools_db.json might not be properly included in ConfigMap"
fi
- name: Validate observability kustomization
run: |
echo "Validating observability stack kustomization..."
if [ -d "deploy/kubernetes/observability" ]; then
kustomize build deploy/kubernetes/observability > /tmp/observability-manifests.yaml
echo "✓ Observability kustomization is valid"
# Verify expected resources
for resource in "Deployment" "Service" "ConfigMap" "PersistentVolumeClaim"; do
if ! grep -q "kind: $resource" /tmp/observability-manifests.yaml; then
echo "Warning: $resource not found in observability manifests"
fi
done
else
echo "Observability directory not found, skipping..."
fi
- name: Validate AI Gateway configurations
run: |
echo "Validating AI Gateway configurations..."
# Check if ai-gateway directory exists
if [ -d "deploy/kubernetes/ai-gateway" ]; then
# Validate configuration yamls (without CRDs)
for yaml_file in deploy/kubernetes/ai-gateway/configuration/*.yaml; do
if [ -f "$yaml_file" ]; then
echo "Checking $yaml_file..."
# Basic YAML syntax check
kubectl create --dry-run=client -f "$yaml_file" || echo "Warning: Issues with $yaml_file"
fi
done
# Validate inference-pool manifests (skip CRD validation as they may not be installed)
for yaml_file in deploy/kubernetes/ai-gateway/inference-pool/*.yaml; do
if [ -f "$yaml_file" ]; then
echo "Checking $yaml_file for YAML syntax..."
# Just check if it's valid YAML
kubectl create --dry-run=client -f "$yaml_file" 2>&1 | grep -q "no matches for kind" && echo "✓ $yaml_file syntax valid (CRD not installed)" || echo "Validated $yaml_file"
fi
done
echo "✓ AI Gateway configuration validation completed"
else
echo "AI Gateway directory not found, skipping..."
fi
security-scan:
name: Security Scan for K8s Manifests
runs-on: ubuntu-latest
needs: validate-manifests
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Kustomize
run: |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
sudo mv kustomize /usr/local/bin/
- name: Run Trivy security scan
uses: aquasecurity/trivy-action@master
with:
scan-type: "config"
scan-ref: "deploy/kubernetes"
format: "sarif"
output: "trivy-results.sarif"
severity: "CRITICAL,HIGH"
exit-code: "0" # Don't fail on vulnerabilities, just report
- name: Upload Trivy results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: "trivy-results.sarif"
- name: Run Checkov scan
uses: bridgecrewio/checkov-action@master
with:
directory: deploy/kubernetes
framework: kubernetes
output_format: cli
soft_fail: true # Don't fail the build
summary:
name: Test Summary
runs-on: ubuntu-latest
needs:
[
validate-manifests,
kind-integration-test,
test-with-custom-config,
security-scan,
]
if: always()
steps:
- name: Check test results
run: |
echo "=== Kubernetes Integration Test Summary ==="
echo "Manifest Validation: ${{ needs.validate-manifests.result }}"
echo "kind Integration Test: ${{ needs.kind-integration-test.result }}"
echo "Custom Config Test: ${{ needs.test-with-custom-config.result }}"
echo "Security Scan: ${{ needs.security-scan.result }}"
if [[ "${{ needs.validate-manifests.result }}" == "failure" ]] || \
[[ "${{ needs.kind-integration-test.result }}" == "failure" ]] || \
[[ "${{ needs.test-with-custom-config.result }}" == "failure" ]]; then
echo "❌ Some tests failed"
exit 1
else
echo "✅ All tests passed"
fi