From 90e0bf8d1034f080e0963ce4f96a331c3c7cf459 Mon Sep 17 00:00:00 2001 From: Anthony Ettinger Date: Sun, 21 Jun 2026 08:14:18 +0000 Subject: [PATCH] fix(deploy): restart orphaned podcast worker + prevent start-limiter wedge The bittorrented-podcast-worker systemd service is a separate long-running process that `next start` does not recreate. When it crash-looped during the June-7 mise/pnpm path drift it hit systemd's start limiter, went `failed`, and stayed dead for ~2 weeks: podcast episode ingestion stopped on 2026-06-07 because deploy-droplet.yml only restarts the main app + iptv worker, never the podcast worker, and its unit lacked StartLimitIntervalSec=0. - deploy-droplet.yml: reset-failed + restart bittorrented-podcast-worker on every deploy (and reset-failed the iptv worker for parity); report its status. - setup-server.sh: add StartLimitIntervalSec=0 to the podcast worker unit so a transient crash self-heals instead of wedging permanently. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/deploy-droplet.yml | 12 +++++++++++- scripts/setup-server.sh | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy-droplet.yml b/.github/workflows/deploy-droplet.yml index 42f8046..8049078 100644 --- a/.github/workflows/deploy-droplet.yml +++ b/.github/workflows/deploy-droplet.yml @@ -193,9 +193,18 @@ jobs: sudo systemctl restart bittorrented || echo "Warning: Could not restart main service" echo "✓ Main service restart attempted" + sudo systemctl reset-failed bittorrented-iptv-worker || true sudo systemctl restart bittorrented-iptv-worker || echo "Warning: Could not restart IPTV worker" echo "✓ IPTV worker restart attempted" - + + # The podcast worker is a long-running process that is NOT recreated by + # `next start`; if it crash-loops (e.g. the mise/pnpm path drift) it hits + # systemd's start limiter and stays `failed` forever because nothing here + # restarts it. Reset the limiter and restart it on every deploy. + sudo systemctl reset-failed bittorrented-podcast-worker || true + sudo systemctl restart bittorrented-podcast-worker || echo "Warning: Could not restart podcast worker" + echo "✓ Podcast worker restart attempted" + echo "" echo "=== Verifying deployment ===" # Give the unit a moment to either become active or crash so the @@ -212,6 +221,7 @@ jobs: echo "failed" > "$STATUS_FILE"; exit 1 fi systemctl is-active bittorrented-iptv-worker || echo "IPTV worker status unknown" + systemctl is-active bittorrented-podcast-worker || echo "Podcast worker status unknown" echo "Waiting for HTTP health check..." HEALTH_OK=false diff --git a/scripts/setup-server.sh b/scripts/setup-server.sh index b538e9a..ada81c9 100644 --- a/scripts/setup-server.sh +++ b/scripts/setup-server.sh @@ -933,6 +933,9 @@ WorkingDirectory=${DEPLOY_PATH} ExecStart=/bin/bash -c 'set -a; source ${DEPLOY_PATH}/.env; set +a; exec ${PNPM_HOME}/pnpm podcast-worker' Restart=on-failure RestartSec=30 +# Never let crash-looping wedge the worker in systemd's start limiter; it must +# keep retrying so it self-heals after a transient failure (e.g. feed/network). +StartLimitIntervalSec=0 Environment=NODE_ENV=production Environment=PATH=${PNPM_HOME}:/usr/local/bin:/usr/bin:/bin