diff --git a/bin/setup-policy-routes.sh b/bin/setup-policy-routes.sh index 7c51089..b0e3b95 100755 --- a/bin/setup-policy-routes.sh +++ b/bin/setup-policy-routes.sh @@ -50,12 +50,18 @@ refresh) start) register_networkd_reloader counter=0 + max_wait=3000 # 5 minute timeout to avoid infinite loop if sysfs node never appears while [ ! -e "/sys/class/net/${iface}" ]; do if ((counter % 1000 == 0)); then debug "Waiting for sysfs node to exist for ${iface} (iteration $counter)" fi sleep 0.1 - ((counter++)) + ((counter++)) || true + if ((counter >= max_wait)); then + error "Timed out waiting for sysfs node for ${iface} after $((counter / 10)) seconds" + /usr/bin/systemctl disable --now refresh-policy-routes@${iface}.timer 2>/dev/null || true + exit 2 + fi done debug "Starting configuration for $iface" debug /lib/systemd/systemd-networkd-wait-online -i "$iface" diff --git a/lib/lib.sh b/lib/lib.sh index efc2ef3..b03a665 100644 --- a/lib/lib.sh +++ b/lib/lib.sh @@ -627,10 +627,22 @@ maybe_reload_networkd() { register_networkd_reloader() { local -i registered=1 cnt=0 - local -i max=10000 + local -i max=3000 # 300s (3000 × 0.1s); matches sysfs wait timeout in setup-policy-routes.sh local -r lockfile="${lockdir}/${iface}" local old_opts=$- + # If the existing lock owner is no longer alive, remove the stale lockfile + # so subsequent invocations don't spin for up to 1000 seconds waiting on a + # process that will never release it. + if [ -f "${lockfile}" ]; then + local existing_pid + existing_pid=$(cat "${lockfile}" 2>/dev/null) + if [ -n "$existing_pid" ] && ! kill -0 "$existing_pid" 2>/dev/null; then + debug "Removing stale lock from dead process $existing_pid for ${iface}" + rm -f "${lockfile}" + fi + fi + # Disable -o errexit in the following block so we can capture # nonzero exit codes from a redirect without considering them # fatal errors diff --git a/systemd/system/policy-routes@.service b/systemd/system/policy-routes@.service index 675e0e7..5093bbd 100644 --- a/systemd/system/policy-routes@.service +++ b/systemd/system/policy-routes@.service @@ -17,4 +17,5 @@ User=root ExecStart=/usr/bin/setup-policy-routes %i start Restart=on-failure RestartSec=1 +RestartPreventExitStatus=2 KillMode=process