Skip to content

Commit d4ad417

Browse files
committed
Better health checks
1 parent 855faf5 commit d4ad417

2 files changed

Lines changed: 19 additions & 10 deletions

File tree

.github/workflows/deploy.yml

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -169,18 +169,27 @@ jobs:
169169
local svc="$1" timeout="${2:-300}"
170170
local end=$((SECONDS + timeout))
171171
while (( SECONDS < end )); do
172-
local desired running state
173-
desired="$(sudo docker service inspect "$svc" --format '{{.Spec.Mode.Replicated.Replicas}}' 2>/dev/null || echo "")"
174-
running="$(sudo docker service ps "$svc" --filter desired-state=running --format '{{.CurrentState}}' 2>/dev/null | grep -c '^Running' || true)"
172+
local state
175173
state="$(sudo docker service inspect "$svc" --format '{{if .UpdateStatus}}{{.UpdateStatus.State}}{{end}}' 2>/dev/null || echo "")"
176-
echo " $svc: desired=$desired running=$running state=$state"
177-
if [[ -n "$desired" && "$running" == "$desired" ]] && { [[ -z "$state" ]] || [[ "$state" == "completed" ]]; }; then
178-
echo " $svc rollout complete"
179-
return 0
174+
echo " $svc: update_state=$state"
175+
if [[ "$state" == "rollback_started" || "$state" == "rollback_completed" ]]; then
176+
echo " ERROR: $svc rolled back!"
177+
sudo docker service ps "$svc" --no-trunc --format '{{.Name}} {{.CurrentState}} {{.Error}}' | head -10
178+
return 1
180179
fi
181-
sleep 5
180+
if [[ "$state" == "completed" ]] || [[ -z "$state" ]]; then
181+
# Verify all running tasks are healthy (not just started)
182+
local unhealthy
183+
unhealthy="$(sudo docker service ps "$svc" --filter desired-state=running --format '{{.CurrentState}}' 2>/dev/null | grep -cv '^Running' || true)"
184+
if [[ "$unhealthy" == "0" ]]; then
185+
echo " $svc rollout complete"
186+
return 0
187+
fi
188+
fi
189+
sleep 10
182190
done
183191
echo "Rollout timeout for $svc"
192+
sudo docker service ps "$svc" --no-trunc --format '{{.Name}} {{.CurrentState}} {{.Error}}' | head -10
184193
return 1
185194
}
186195

docker-compose.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,11 @@ services:
3838
tmpfs:
3939
- /tmp:size=64m
4040
healthcheck:
41-
test: ["CMD-SHELL", "node -e \"require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode < 400 ? 0 : 1)).on('error', () => process.exit(1));\""]
41+
test: ["CMD-SHELL", "node -e \"const h=require('http'),check=(port,path)=>new Promise((res,rej)=>{h.get('http://127.0.0.1:'+port+path,r=>r.statusCode<400?res():rej()).on('error',rej)});Promise.all([check(3000,'/api/health'),check(4321,'/')]).then(()=>process.exit(0)).catch(()=>process.exit(1));\""]
4242
interval: 30s
4343
timeout: 5s
4444
retries: 3
45-
start_period: 15s
45+
start_period: 30s
4646
logging:
4747
driver: json-file
4848
options:

0 commit comments

Comments
 (0)