From bdabb507ed737b947baff0a455a77a9b8c3114f0 Mon Sep 17 00:00:00 2001 From: Shani Elharrar Date: Thu, 14 Apr 2022 08:52:17 +0300 Subject: [PATCH 1/3] refactor: Use bash variable replace instead of sed --- docker-entrypoint | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-entrypoint b/docker-entrypoint index eb482bf..f82e625 100755 --- a/docker-entrypoint +++ b/docker-entrypoint @@ -11,7 +11,7 @@ WEBHOOK_URL=${WEBHOOK_URL:-""} # only use unix domain socket if no TCP endpoint is defined case "${DOCKER_SOCK}" in - "tcp://"*) HTTP_ENDPOINT="$(echo ${DOCKER_SOCK} | sed 's#tcp://#https://#')" + "tcp://"*) HTTP_ENDPOINT="${DOCKER_SOCK//tcp:\/\///https:\/\/}" CA="--cacert /certs/ca.pem" CLIENT_KEY="--key /certs/client-key.pem" CLIENT_CERT="--cert /certs/client-cert.pem" From 25b33e4c7414e262883e0ad07efd516429488354 Mon Sep 17 00:00:00 2001 From: Shani Elharrar Date: Thu, 14 Apr 2022 08:53:03 +0300 Subject: [PATCH 2/3] shellcheck fixes --- docker-entrypoint | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docker-entrypoint b/docker-entrypoint index f82e625..a47cc7a 100755 --- a/docker-entrypoint +++ b/docker-entrypoint @@ -26,6 +26,7 @@ AUTOHEAL_START_PERIOD=${AUTOHEAL_START_PERIOD:-0} AUTOHEAL_INTERVAL=${AUTOHEAL_INTERVAL:-5} AUTOHEAL_DEFAULT_STOP_TIMEOUT=${AUTOHEAL_DEFAULT_STOP_TIMEOUT:-10} +# shellcheck disable=2086 docker_curl() { curl --max-time "${CURL_TIMEOUT}" --no-buffer -s \ ${CA} ${CLIENT_KEY} ${CLIENT_CERT} \ @@ -58,18 +59,18 @@ restart_container() { } notify_webhook() { - local text="$@" + local text="$1" if [ -n "$WEBHOOK_URL" ] then # execute webhook requests as background process to prevent healer from blocking - curl -X POST -H "Content-type: application/json" -d "$(generate_webhook_payload $text)" $WEBHOOK_URL + curl -X POST -H "Content-type: application/json" -d "$(generate_webhook_payload "$text")" "$WEBHOOK_URL" fi } # https://towardsdatascience.com/proper-ways-to-pass-environment-variables-in-json-for-curl-post-f797d2698bf3 generate_webhook_payload() { - local text="$@" + local text="$1" cat < Date: Thu, 14 Apr 2022 08:54:10 +0300 Subject: [PATCH 3/3] Feature: Allow starting exited containers --- Dockerfile | 1 + README.md | 1 + docker-entrypoint | 49 +++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1485535..e3917e5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,7 @@ ENV AUTOHEAL_CONTAINER_LABEL=autoheal \ AUTOHEAL_START_PERIOD=0 \ AUTOHEAL_INTERVAL=5 \ AUTOHEAL_DEFAULT_STOP_TIMEOUT=10 \ + AUTOHEAL_START_EXITED_CONTAINERS=false \ DOCKER_SOCK=/var/run/docker.sock \ CURL_TIMEOUT=30 \ WEBHOOK_URL="" diff --git a/README.md b/README.md index f9bcdcb..86519e6 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ AUTOHEAL_CONTAINER_LABEL=autoheal AUTOHEAL_INTERVAL=5 # check every 5 seconds AUTOHEAL_START_PERIOD=0 # wait 0 seconds before first health check AUTOHEAL_DEFAULT_STOP_TIMEOUT=10 # Docker waits max 10 seconds (the Docker default) for a container to stop before killing during restarts (container overridable via label, see below) +AUTOHEAL_START_EXITED_CONTAINERS=false # set "true" to start docker containers that match the label which are in "exited" state DOCKER_SOCK=/var/run/docker.sock # Unix socket for curl requests to Docker API CURL_TIMEOUT=30 # --max-time seconds for curl requests to Docker API WEBHOOK_URL="" # post message to the webhook if a container was restarted (or restart failed) diff --git a/docker-entrypoint b/docker-entrypoint index a47cc7a..f2fcc27 100755 --- a/docker-entrypoint +++ b/docker-entrypoint @@ -25,6 +25,7 @@ AUTOHEAL_CONTAINER_LABEL=${AUTOHEAL_CONTAINER_LABEL:-autoheal} AUTOHEAL_START_PERIOD=${AUTOHEAL_START_PERIOD:-0} AUTOHEAL_INTERVAL=${AUTOHEAL_INTERVAL:-5} AUTOHEAL_DEFAULT_STOP_TIMEOUT=${AUTOHEAL_DEFAULT_STOP_TIMEOUT:-10} +AUTOHEAL_START_EXITED_CONTAINERS=${AUTOHEAL_START_EXITED_CONTAINERS:false} # shellcheck disable=2086 docker_curl() { @@ -46,10 +47,19 @@ get_container_info() { else label_filter=",\"label\":\[\"${AUTOHEAL_CONTAINER_LABEL}=true\"\]" fi - url="${HTTP_ENDPOINT}/containers/json?filters=\{\"health\":\[\"unhealthy\"\]${label_filter}\}" + url="${HTTP_ENDPOINT}/containers/json?filters=\{$1${label_filter}\}" docker_curl "$url" } +get_unhealthy_container_info() { + get_container_info "\"health\":\[\"unhealthy\"\]" +} + +get_exit_container_info() { + get_container_info "\"status\":\[\"exited\"\]" +} + + # shellcheck disable=2039 restart_container() { local container_id="$1" @@ -58,6 +68,14 @@ restart_container() { docker_curl -f -X POST "${HTTP_ENDPOINT}/containers/${container_id}/restart?t=${timeout}" } +# shellcheck disable=2039 +start_container() { + local container_id="$1" + + docker_curl -f -X POST "${HTTP_ENDPOINT}/containers/${container_id}/start" +} + + notify_webhook() { local text="$1" @@ -97,7 +115,7 @@ if [ "$1" = "autoheal" ] && [ -e "$DOCKER_SOCK" ];then while true do STOP_TIMEOUT=".Labels[\"autoheal.stop.timeout\"] // $AUTOHEAL_DEFAULT_STOP_TIMEOUT" - get_container_info | \ + get_unhealthy_container_info | \ jq -r "foreach .[] as \$CONTAINER([];[]; \$CONTAINER | .Id, .Names[0], .State, ${STOP_TIMEOUT})" | \ while read -r CONTAINER_ID && read -r CONTAINER_NAME && read -r CONTAINER_STATE && read -r TIMEOUT do @@ -122,6 +140,33 @@ if [ "$1" = "autoheal" ] && [ -e "$DOCKER_SOCK" ];then fi fi done + + if [ "$AUTOHEAL_START_EXITED_CONTAINERS" = "true" ] + then + get_exit_container_info | \ + jq -r "foreach .[] as \$CONTAINER([];[]; \$CONTAINER | .Id, .Names[0])" | \ + while read -r CONTAINER_ID && read -r CONTAINER_NAME + do + # shellcheck disable=2039 + CONTAINER_SHORT_ID=${CONTAINER_ID:0:12} + DATE=$(date +%d-%m-%Y" "%H:%M:%S) + + if [ "$CONTAINER_NAME" = "null" ] + then + echo "$DATE Container name of (${CONTAINER_SHORT_ID}) is null, which implies container does not exist - don't restart" >&2 + else + echo "$DATE Container $CONTAINER_NAME (${CONTAINER_SHORT_ID}) found to be exited - Starting container now" + if ! start_container "$CONTAINER_ID" + then + echo "$DATE Starting container $CONTAINER_SHORT_ID failed" >&2 + notify_webhook "Container ${CONTAINER_NAME:1} (${CONTAINER_SHORT_ID}) found to be exited. Failed to start the container!" & + else + notify_webhook "Container ${CONTAINER_NAME:1} (${CONTAINER_SHORT_ID}) found to be exited. Successfully started the container!" & + fi + fi + done + fi + sleep "$AUTOHEAL_INTERVAL" done