-
Notifications
You must be signed in to change notification settings - Fork 0
658 lines (573 loc) · 30.2 KB
/
deploy-private-aks.yml
File metadata and controls
658 lines (573 loc) · 30.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
name: Private AKS PoC - Deploy, Log, Teardown
on:
workflow_dispatch:
inputs:
location:
description: 'Azure region (canadacentral or canadaeast)'
default: 'canadacentral'
type: string
wait_minutes:
description: 'Minutes to wait before teardown (cost control)'
default: '30'
type: string
permissions:
id-token: write # OIDC token for azure/login from GitHub-hosted runners
contents: read
env:
LOCATION: ${{ github.event.inputs.location || 'canadacentral' }}
# Shared infrastructure resource group (VNet, runner VM, MI)
INFRA_RG: rg-aks-poc-infra-${{ github.run_id }}
RUNNER_VM: vm-runner-${{ github.run_id }}
RUNNER_LABEL: aks-poc-runner-${{ github.run_id }}
MI_NAME: mi-aks-poc-deployer
# Network — runner and AKS share one VNet
VNET_NAME: vnet-aks-poc
SUBNET_AKS: subnet-aks
SUBNET_RUNNER: subnet-runner
# AKS deployment resources (separate RG for clean AKS teardown)
AKS_RG: rg-aks-poc-${{ github.run_id }}
CLUSTER_NAME: aks-poc-${{ github.run_id }}
jobs:
# ═══════════════════════════════════════════════════════════════
# Job 1: Provision VNet + Runner VM on GitHub-hosted runner
# ═══════════════════════════════════════════════════════════════
# Creates the shared VNet, managed identity, and runner VM in
# subnet-runner. The AKS cluster will later deploy into
# subnet-aks in the same VNet, giving the runner private
# network access to the Kubernetes API server.
# ═══════════════════════════════════════════════════════════════
setup-runner:
runs-on: ubuntu-latest
timeout-minutes: 30
outputs:
mi_client_id: ${{ steps.identity.outputs.mi_client_id }}
steps:
# ── 1a. Azure Login (OIDC from GitHub-hosted runner) ───────
- name: Azure Login
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
# ── 1b. Create infrastructure resource group ───────────────
- name: Create infrastructure resource group
run: |
az group create \
--name "$INFRA_RG" \
--location "$LOCATION" \
--tags purpose=aks-poc component=infra run=${{ github.run_id }}
# ── 1c. Create shared VNet with runner + AKS subnets ──────
- name: Create VNet and subnets
id: network
run: |
az network vnet create \
--resource-group "$INFRA_RG" \
--name "$VNET_NAME" \
--address-prefixes 10.224.0.0/16 \
--subnet-name "$SUBNET_AKS" \
--subnet-prefixes 10.224.0.0/24
az network vnet subnet create \
--resource-group "$INFRA_RG" \
--vnet-name "$VNET_NAME" \
--name "$SUBNET_RUNNER" \
--address-prefixes 10.224.1.0/24
SUBNET_AKS_ID=$(az network vnet subnet show \
--resource-group "$INFRA_RG" \
--vnet-name "$VNET_NAME" \
--name "$SUBNET_AKS" \
--query id -o tsv)
echo "subnet_aks_id=$SUBNET_AKS_ID" >> "$GITHUB_OUTPUT"
SUBNET_RUNNER_ID=$(az network vnet subnet show \
--resource-group "$INFRA_RG" \
--vnet-name "$VNET_NAME" \
--name "$SUBNET_RUNNER" \
--query id -o tsv)
echo "SUBNET_RUNNER_ID=$SUBNET_RUNNER_ID" >> "$GITHUB_ENV"
# ── 1d. Create user-assigned managed identity ──────────────
- name: Create managed identity
id: identity
run: |
az identity create \
--name "$MI_NAME" \
--resource-group "$INFRA_RG" \
--location "$LOCATION"
MI_CLIENT_ID=$(az identity show \
--name "$MI_NAME" \
--resource-group "$INFRA_RG" \
--query clientId -o tsv)
MI_PRINCIPAL_ID=$(az identity show \
--name "$MI_NAME" \
--resource-group "$INFRA_RG" \
--query principalId -o tsv)
MI_RESOURCE_ID=$(az identity show \
--name "$MI_NAME" \
--resource-group "$INFRA_RG" \
--query id -o tsv)
echo "mi_client_id=$MI_CLIENT_ID" >> "$GITHUB_OUTPUT"
echo "MI_PRINCIPAL_ID=$MI_PRINCIPAL_ID" >> "$GITHUB_ENV"
echo "MI_RESOURCE_ID=$MI_RESOURCE_ID" >> "$GITHUB_ENV"
# ── 1e. Assign RBAC roles to the managed identity ─────────
- name: Assign RBAC roles
run: |
SUBSCRIPTION_ID=$(az account show --query id -o tsv)
az role assignment create \
--assignee-object-id "$MI_PRINCIPAL_ID" \
--assignee-principal-type ServicePrincipal \
--role "Contributor" \
--scope "/subscriptions/$SUBSCRIPTION_ID"
az role assignment create \
--assignee-object-id "$MI_PRINCIPAL_ID" \
--assignee-principal-type ServicePrincipal \
--role "Monitoring Reader" \
--scope "/subscriptions/$SUBSCRIPTION_ID"
az role assignment create \
--assignee-object-id "$MI_PRINCIPAL_ID" \
--assignee-principal-type ServicePrincipal \
--role "User Access Administrator" \
--scope "/subscriptions/$SUBSCRIPTION_ID"
# ── 1f. Create the runner VM in subnet-runner ──────────────
- name: Create runner VM
run: |
cat > /tmp/cloud-init.yaml <<'CLOUD_INIT'
#cloud-config
package_update: true
package_upgrade: true
packages:
- curl
- jq
- unzip
- apt-transport-https
- ca-certificates
- gnupg
- lsb-release
runcmd:
- curl -sL https://aka.ms/InstallAzureCLIDeb | bash
- az aks install-cli
- mkdir -p /home/azureuser/actions-runner
- chown azureuser:azureuser /home/azureuser/actions-runner
- |
RUNNER_VERSION=$(curl -s https://api.github.com/repos/actions/runner/releases/latest | jq -r '.tag_name' | sed 's/^v//')
curl -sL "https://github.com/actions/runner/releases/download/v${RUNNER_VERSION}/actions-runner-linux-x64-${RUNNER_VERSION}.tar.gz" \
-o /tmp/actions-runner.tar.gz
tar xzf /tmp/actions-runner.tar.gz -C /home/azureuser/actions-runner
chown -R azureuser:azureuser /home/azureuser/actions-runner
rm -f /tmp/actions-runner.tar.gz
- /home/azureuser/actions-runner/bin/installdependencies.sh
CLOUD_INIT
az vm create \
--resource-group "$INFRA_RG" \
--name "$RUNNER_VM" \
--image Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest \
--size Standard_B2s \
--admin-username azureuser \
--generate-ssh-keys \
--assign-identity "$MI_RESOURCE_ID" \
--subnet "$SUBNET_RUNNER_ID" \
--public-ip-address "" \
--tags purpose=aks-poc component=runner \
--custom-data /tmp/cloud-init.yaml
# ── 1g. Wait for cloud-init to complete ────────────────────
- name: Wait for cloud-init
run: |
echo "Waiting for cloud-init to finish on the VM..."
for i in $(seq 1 30); do
STATUS=$(az vm run-command invoke \
--resource-group "$INFRA_RG" \
--name "$RUNNER_VM" \
--command-id RunShellScript \
--scripts "cloud-init status 2>/dev/null | grep -q 'done' && echo 'DONE' || echo 'WAITING'" \
--query "value[0].message" -o tsv 2>/dev/null || echo "WAITING")
if echo "$STATUS" | grep -q "DONE"; then
echo "Cloud-init completed."
break
fi
echo "Attempt $i/30: still waiting..."
sleep 30
done
# ── 1h. Register as a GitHub Actions self-hosted runner ────
- name: Register GitHub Actions runner
env:
GH_PAT: ${{ secrets.GH_PAT }}
run: |
REG_TOKEN=$(curl -s -X POST \
-H "Authorization: token $GH_PAT" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${{ github.repository }}/actions/runners/registration-token" \
| jq -r '.token')
if [ -z "$REG_TOKEN" ] || [ "$REG_TOKEN" = "null" ]; then
echo "::error::Failed to obtain runner registration token"
exit 1
fi
az vm run-command invoke \
--resource-group "$INFRA_RG" \
--name "$RUNNER_VM" \
--command-id RunShellScript \
--scripts "
cd /home/azureuser/actions-runner
sudo -u azureuser ./config.sh \
--url https://github.com/${{ github.repository }} \
--token $REG_TOKEN \
--name $RUNNER_VM \
--labels $RUNNER_LABEL \
--unattended \
--replace
./svc.sh install azureuser
./svc.sh start
"
# ── 1i. Wait for runner to come online ─────────────────────
- name: Wait for runner to come online
env:
GH_PAT: ${{ secrets.GH_PAT }}
run: |
echo "Waiting for self-hosted runner '$RUNNER_VM' to come online..."
for i in $(seq 1 20); do
ONLINE=$(curl -s \
-H "Authorization: token $GH_PAT" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${{ github.repository }}/actions/runners" \
| jq -r ".runners[] | select(.name == \"$RUNNER_VM\") | .status")
if [ "$ONLINE" = "online" ]; then
echo "Runner is online!"
exit 0
fi
echo "Attempt $i/20: runner status=${ONLINE:-not found}, waiting 15s..."
sleep 15
done
echo "::error::Runner did not come online within expected time"
exit 1
# ═══════════════════════════════════════════════════════════════
# Job 2: Deploy Private AKS + Validate on self-hosted runner
# ═══════════════════════════════════════════════════════════════
# Runs on the VM in subnet-runner (same VNet as subnet-aks).
# The runner can reach the private AKS API server endpoint
# because both subnets share the VNet and the private DNS zone
# is linked to it. Managed identity tokens are acquired via
# IMDS — Conditional Access location policies never apply.
# ═══════════════════════════════════════════════════════════════
deploy-and-log:
needs: setup-runner
runs-on: [self-hosted, "aks-poc-runner-${{ github.run_id }}"]
timeout-minutes: 60
steps:
# ── 2a. Checkout ───────────────────────────────────────────
- name: Checkout repository
uses: actions/checkout@v4
# ── 2b. Azure Login (Managed Identity on the runner VM) ────
- name: Azure Login (Managed Identity)
uses: azure/login@v2
with:
auth-type: IDENTITY
client-id: ${{ needs.setup-runner.outputs.mi_client_id }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
# ── 2c. Record Runner IP and Network Info ──────────────────
- name: Record runner network info
run: |
mkdir -p logs
RUNNER_IP=$(curl -s --max-time 10 ifconfig.me || echo "NO_PUBLIC_IP")
echo "RUNNER_IP=$RUNNER_IP" >> $GITHUB_ENV
echo "=== Runner Network Info ===" | tee logs/runner-network.log
echo "Public IP : $RUNNER_IP" | tee -a logs/runner-network.log
echo "Hostname : $(hostname)" | tee -a logs/runner-network.log
echo "Private IP: $(hostname -I | awk '{print $1}')" | tee -a logs/runner-network.log
echo "Subnet : $SUBNET_RUNNER (10.224.1.0/24)" | tee -a logs/runner-network.log
# ── 2d. Create AKS Resource Group ─────────────────────────
- name: Create AKS Resource Group
run: |
az group create \
--name "$AKS_RG" \
--location "$LOCATION" \
--tags purpose=aks-poc component=aks created=$(date -u +%Y-%m-%dT%H:%M:%SZ) run=${{ github.run_id }}
# ── 2e. Record Deployment Start Time ──────────────────────
- name: Record start time
run: |
echo "DEPLOY_START_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> $GITHUB_ENV
# ── 2f. Deploy Private AKS into shared VNet ────────────────
- name: Deploy Private AKS Cluster
id: deploy
run: |
SUBNET_AKS_ID=$(az network vnet subnet show \
--resource-group "$INFRA_RG" \
--vnet-name "$VNET_NAME" \
--name "$SUBNET_AKS" \
--query id -o tsv)
echo "Deploying private AKS cluster into subnet-aks..."
echo " Subnet ID: $SUBNET_AKS_ID"
if [ -z "$SUBNET_AKS_ID" ]; then
echo "::error::Failed to resolve subnet ID for $SUBNET_AKS in $VNET_NAME"
exit 1
fi
az aks create \
--resource-group "$AKS_RG" \
--name "$CLUSTER_NAME" \
--node-count 1 \
--node-vm-size Standard_B2s \
--network-plugin azure \
--vnet-subnet-id "$SUBNET_AKS_ID" \
--dns-service-ip 10.2.0.10 \
--service-cidr 10.2.0.0/24 \
--enable-private-cluster \
--enable-managed-identity \
--generate-ssh-keys \
--tier free 2>&1 | tee logs/aks-create.log
if [ "${PIPESTATUS[0]}" -ne 0 ]; then
echo "DEPLOY_FAILED=true" >> $GITHUB_ENV
echo "deploy_status=failed" >> "$GITHUB_OUTPUT"
else
echo "deploy_status=succeeded" >> "$GITHUB_OUTPUT"
fi
# ── 2g. Collect AKS Cluster Info ───────────────────────────
- name: Collect AKS cluster info
if: env.DEPLOY_FAILED != 'true'
run: |
echo "=== AKS Cluster Details ===" | tee logs/aks-cluster-info.log
az aks show \
--resource-group "$AKS_RG" \
--name "$CLUSTER_NAME" \
--query "{name:name, location:location, kubernetesVersion:kubernetesVersion, privateFqdn:privateFqdn, fqdn:fqdn, powerState:powerState.code, provisioningState:provisioningState, nodeResourceGroup:nodeResourceGroup, networkPlugin:networkProfile.networkPlugin, networkPolicy:networkProfile.networkPolicy, serviceCidr:networkProfile.serviceCidr, dnsServiceIP:networkProfile.dnsServiceIP, enablePrivateCluster:apiServerAccessProfile.enablePrivateCluster, identityType:identity.type}" \
-o json | tee -a logs/aks-cluster-info.log
# ── 2h. kubectl Validation ─────────────────────────────────
- name: Validate cluster with kubectl
if: env.DEPLOY_FAILED != 'true'
run: |
echo "=== kubectl Validation ===" | tee logs/kubectl-validation.log
echo "--- Getting credentials (private cluster) ---" | tee -a logs/kubectl-validation.log
az aks get-credentials \
--resource-group "$AKS_RG" \
--name "$CLUSTER_NAME" \
--overwrite-existing
echo "" | tee -a logs/kubectl-validation.log
echo "--- Cluster Info ---" | tee -a logs/kubectl-validation.log
kubectl cluster-info 2>&1 | tee -a logs/kubectl-validation.log
echo "" | tee -a logs/kubectl-validation.log
echo "--- Nodes ---" | tee -a logs/kubectl-validation.log
kubectl get nodes -o wide 2>&1 | tee -a logs/kubectl-validation.log
echo "" | tee -a logs/kubectl-validation.log
echo "--- System Pods ---" | tee -a logs/kubectl-validation.log
kubectl get pods -n kube-system -o wide 2>&1 | tee -a logs/kubectl-validation.log
echo "" | tee -a logs/kubectl-validation.log
echo "--- API Server Endpoint (private) ---" | tee -a logs/kubectl-validation.log
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}' 2>&1 | tee -a logs/kubectl-validation.log
echo "" | tee -a logs/kubectl-validation.log
echo "" | tee -a logs/kubectl-validation.log
echo "--- Component Statuses ---" | tee -a logs/kubectl-validation.log
kubectl get cs 2>&1 | tee -a logs/kubectl-validation.log || true
echo "" | tee -a logs/kubectl-validation.log
echo "--- Namespaces ---" | tee -a logs/kubectl-validation.log
kubectl get namespaces 2>&1 | tee -a logs/kubectl-validation.log
# Verify private FQDN resolves from within the VNet
echo "" | tee -a logs/kubectl-validation.log
echo "--- Private FQDN DNS Resolution ---" | tee -a logs/kubectl-validation.log
PRIVATE_FQDN=$(az aks show --resource-group "$AKS_RG" --name "$CLUSTER_NAME" --query privateFqdn -o tsv)
echo "Private FQDN: $PRIVATE_FQDN" | tee -a logs/kubectl-validation.log
nslookup "$PRIVATE_FQDN" 2>&1 | tee -a logs/kubectl-validation.log || true
# Mark validation result
if kubectl get nodes --no-headers 2>/dev/null | grep -q "Ready"; then
echo "KUBECTL_VALIDATION=passed" >> $GITHUB_ENV
echo "VALIDATION: PASSED — nodes are Ready and reachable via private endpoint" | tee -a logs/kubectl-validation.log
else
echo "KUBECTL_VALIDATION=failed" >> $GITHUB_ENV
echo "VALIDATION: FAILED — could not verify node readiness" | tee -a logs/kubectl-validation.log
fi
# ── 2i. Log IPs (Activity Log) ────────────────────────────
- name: Log IPs (Activity Log)
if: always()
run: |
echo "=== Runner VM Outbound IP ===" | tee logs/ip-activity-log.log
echo "Runner IP: ${RUNNER_IP:-UNKNOWN}" | tee -a logs/ip-activity-log.log
echo "" | tee -a logs/ip-activity-log.log
echo "Waiting 60s for Activity Log propagation..." | tee -a logs/ip-activity-log.log
sleep 60
echo "=== ARM Operation Caller IPs (ContainerService) ===" | tee -a logs/ip-activity-log.log
az monitor activity-log list \
--resource-group "$AKS_RG" \
--start-time "$DEPLOY_START_TIME" \
--query "[?contains(operationName.value, 'Microsoft.ContainerService')].{op:operationName.value, caller:caller, clientIp:httpRequest.clientIpAddress, status:status.value, time:eventTimestamp}" \
-o table 2>&1 | tee -a logs/ip-activity-log.log || echo "Activity log query failed for ContainerService" | tee -a logs/ip-activity-log.log
echo "" | tee -a logs/ip-activity-log.log
echo "=== ARM Operation Caller IPs (Network) ===" | tee -a logs/ip-activity-log.log
az monitor activity-log list \
--resource-group "$AKS_RG" \
--start-time "$DEPLOY_START_TIME" \
--query "[?contains(operationName.value, 'Microsoft.Network')].{op:operationName.value, caller:caller, clientIp:httpRequest.clientIpAddress, status:status.value, time:eventTimestamp}" \
-o table 2>&1 | tee -a logs/ip-activity-log.log || echo "Activity log query failed for Network" | tee -a logs/ip-activity-log.log
echo "" | tee -a logs/ip-activity-log.log
echo "=== IP Comparison ===" | tee -a logs/ip-activity-log.log
echo "Runner IP: ${RUNNER_IP:-UNKNOWN}" | tee -a logs/ip-activity-log.log
echo "Compare the clientIp values above against the runner IP to verify traffic routes." | tee -a logs/ip-activity-log.log
# ── 2j. Log IPs (Entra Sign-In) ───────────────────────────
- name: Log IPs (Entra Sign-In Logs)
if: always()
continue-on-error: true
run: |
echo "=== Entra ID Sign-In IPs (requires P1/P2) ===" | tee logs/ip-signin-log.log
MI_CLIENT_ID="${{ needs.setup-runner.outputs.mi_client_id }}"
az rest --method get \
--url "https://graph.microsoft.com/v1.0/auditLogs/signIns?\$filter=createdDateTime ge $DEPLOY_START_TIME and appId eq '${MI_CLIENT_ID}'" \
--query "value[].{ip:ipAddress, app:appDisplayName, time:createdDateTime, status:status.errorCode}" \
-o table 2>&1 | tee -a logs/ip-signin-log.log || echo "Sign-in log query failed (may require Entra P1/P2)" | tee -a logs/ip-signin-log.log
# ── 2k. Upload Logs as Artifacts ──────────────────────────
- name: Upload logs
if: always()
uses: actions/upload-artifact@v4
with:
name: aks-poc-logs-${{ github.run_id }}
path: logs/
retention-days: 30
# ── 2l. Write Job Summary ──────────────────────────────────
- name: Write job summary
if: always()
run: |
DEPLOY_STATUS="${{ steps.deploy.outputs.deploy_status || 'unknown' }}"
KUBECTL_STATUS="${KUBECTL_VALIDATION:-skipped}"
# Collect cluster info for summary
K8S_VERSION=""
PRIVATE_FQDN=""
NODE_COUNT=""
PROVISIONING_STATE=""
if [ "$DEPLOY_STATUS" = "succeeded" ]; then
K8S_VERSION=$(az aks show --resource-group "$AKS_RG" --name "$CLUSTER_NAME" --query kubernetesVersion -o tsv 2>/dev/null || echo "N/A")
PRIVATE_FQDN=$(az aks show --resource-group "$AKS_RG" --name "$CLUSTER_NAME" --query privateFqdn -o tsv 2>/dev/null || echo "N/A")
NODE_COUNT=$(az aks show --resource-group "$AKS_RG" --name "$CLUSTER_NAME" --query agentPoolProfiles[0].count -o tsv 2>/dev/null || echo "N/A")
PROVISIONING_STATE=$(az aks show --resource-group "$AKS_RG" --name "$CLUSTER_NAME" --query provisioningState -o tsv 2>/dev/null || echo "N/A")
fi
# Build status badges
if [ "$DEPLOY_STATUS" = "succeeded" ]; then
DEPLOY_BADGE="✅ Succeeded"
else
DEPLOY_BADGE="❌ Failed"
fi
if [ "$KUBECTL_STATUS" = "passed" ]; then
KUBECTL_BADGE="✅ Passed"
elif [ "$KUBECTL_STATUS" = "skipped" ]; then
KUBECTL_BADGE="⏭️ Skipped"
else
KUBECTL_BADGE="❌ Failed"
fi
cat >> "$GITHUB_STEP_SUMMARY" <<EOF
## Private AKS PoC — Run Summary
### Deployment Result
| Check | Status |
|-------|--------|
| AKS Cluster Creation | $DEPLOY_BADGE |
| kubectl Validation | $KUBECTL_BADGE |
### Cluster Details
| Property | Value |
|----------|-------|
| Cluster Name | \`$CLUSTER_NAME\` |
| Resource Group | \`$AKS_RG\` |
| Location | \`$LOCATION\` |
| Kubernetes Version | \`${K8S_VERSION:-N/A}\` |
| Private FQDN | \`${PRIVATE_FQDN:-N/A}\` |
| Node Count | \`${NODE_COUNT:-N/A}\` |
| Provisioning State | \`${PROVISIONING_STATE:-N/A}\` |
| Node VM Size | \`Standard_B2s\` |
| Network Plugin | \`azure\` |
| AKS Tier | \`free\` |
### Network Architecture
| Component | Subnet | CIDR |
|-----------|--------|------|
| AKS Nodes | \`subnet-aks\` | \`10.224.0.0/24\` |
| Runner VM | \`subnet-runner\` | \`10.224.1.0/24\` |
| K8s Services | internal | \`10.2.0.0/24\` |
| VNet | \`$VNET_NAME\` | \`10.224.0.0/16\` |
### Identity & Authentication
| Property | Value |
|----------|-------|
| Runner Auth | Managed Identity (IMDS) |
| AKS Auth | Managed Identity |
| MI Client ID | \`${{ needs.setup-runner.outputs.mi_client_id }}\` |
| CA Bypass | ✅ MI tokens via IMDS — not evaluated by CA |
### IP Verification
| Source | IP |
|--------|----|
| Runner VM Outbound | \`${RUNNER_IP:-UNKNOWN}\` |
> Activity Log and Entra Sign-In IP details are in the uploaded artifacts.
### Runner VM
| Property | Value |
|----------|-------|
| VM Name | \`$RUNNER_VM\` |
| Infrastructure RG | \`$INFRA_RG\` |
| VNet Placement | \`$VNET_NAME / $SUBNET_RUNNER\` |
| Private API Access | Runner is in the same VNet as AKS — can reach private endpoint |
### Key Validation Points
1. **Managed Identity bypasses CA**: Token acquisition via IMDS (\`169.254.169.254\`) stays within Azure fabric
2. **Private cluster API access**: Runner VM in \`subnet-runner\` can reach AKS API via private endpoint in \`subnet-aks\`
3. **kubectl works**: Cluster is fully operational and manageable from the VNet
---
*Run ID: \`${{ github.run_id }}\` | Triggered by: \`${{ github.actor }}\` | Deploy start: \`$DEPLOY_START_TIME\`*
EOF
# ── 2m. Wait Before Teardown ──────────────────────────────
- name: Wait before teardown
if: env.DEPLOY_FAILED != 'true'
run: |
WAIT=${{ github.event.inputs.wait_minutes || '30' }}
echo "Waiting ${WAIT} minutes before teardown..."
sleep $((WAIT * 60))
# ── 2n. Teardown AKS Resources ────────────────────────────
- name: Teardown AKS resources
if: always()
run: |
echo "Deleting resource group $AKS_RG..."
az group delete --name "$AKS_RG" --yes --no-wait
echo "Resource group deletion initiated."
# ── 2o. Azure Logout ───────────────────────────────────────
- name: Azure Logout
if: always()
run: az logout
# ═══════════════════════════════════════════════════════════════
# Job 3: Teardown runner infrastructure (GitHub-hosted runner)
# ═══════════════════════════════════════════════════════════════
# Always runs — deregisters the self-hosted runner from the repo
# and deletes the infrastructure resource group (VNet, VM, MI).
# ═══════════════════════════════════════════════════════════════
teardown-runner:
needs: [setup-runner, deploy-and-log]
if: always()
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
# ── 3a. Azure Login ────────────────────────────────────────
- name: Azure Login
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
# ── 3b. Deregister runner from GitHub ──────────────────────
- name: Deregister self-hosted runner
continue-on-error: true
env:
GH_PAT: ${{ secrets.GH_PAT }}
run: |
RUNNER_ID=$(curl -s \
-H "Authorization: token $GH_PAT" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${{ github.repository }}/actions/runners" \
| jq -r ".runners[] | select(.name == \"$RUNNER_VM\") | .id")
if [ -n "$RUNNER_ID" ] && [ "$RUNNER_ID" != "null" ]; then
curl -s -X DELETE \
-H "Authorization: token $GH_PAT" \
-H "Accept: application/vnd.github+json" \
"https://api.github.com/repos/${{ github.repository }}/actions/runners/$RUNNER_ID"
echo "Runner deregistered (ID: $RUNNER_ID)."
else
echo "Runner not found or already removed."
fi
# ── 3c. Safety-net: ensure AKS RG is deleted ──────────────
- name: Ensure AKS resources are deleted
continue-on-error: true
run: |
if az group show --name "$AKS_RG" --output none 2>/dev/null; then
echo "AKS resource group still exists — deleting..."
az group delete --name "$AKS_RG" --yes --no-wait
fi
# ── 3d. Delete infrastructure resource group ───────────────
- name: Delete infrastructure resource group
run: |
az group delete --name "$INFRA_RG" --yes --no-wait
echo "Infrastructure resource group deletion initiated."
# ── 3e. Azure Logout ───────────────────────────────────────
- name: Azure Logout
if: always()
run: az logout