diff --git a/charts/shield/Chart.yaml b/charts/shield/Chart.yaml
index 49221b8d9..5f45199a8 100644
--- a/charts/shield/Chart.yaml
+++ b/charts/shield/Chart.yaml
@@ -13,5 +13,5 @@ maintainers:
- name: mavimo
email: marcovito.moscaritolo@sysdig.com
type: application
-version: 1.37.1
+version: 1.38.0
appVersion: "1.0.0"
diff --git a/charts/shield/README.md b/charts/shield/README.md
index e157a66d0..6c3a0145e 100644
--- a/charts/shield/README.md
+++ b/charts/shield/README.md
@@ -297,7 +297,10 @@ The following table lists the configurable parameters of the `shield` chart and
| gke_autopilot.allowlist_version | The Allowlist version label applied to host-shield workloads. Must match an AllowlistSynchronizer the cluster knows about. (Replaces the deprecated top-level "gke_autopilot_allowlist", which is still honored and takes precedence when set.) | sysdig-agent-v1.1.4 |
| gke_autopilot.allowlist_waiter.enabled | Enable the waiter Job | false |
| gke_autopilot.allowlist_waiter.timeout | Maximum time the Job will block on the AllowlistSynchronizer reaching Ready | 120s |
+| gke_autopilot.allowlist_waiter.active_deadline_seconds | Maximum seconds the waiter Pod is allowed to run before Kubernetes terminates it. Acts as a Job-level guard against the pod hanging before the inner `kubectl wait` timeout fires (image-pull stalls, scheduler delays, admission webhook hangs). Should be greater than `timeout` to leave headroom for pod startup. | 300 |
| gke_autopilot.allowlist_waiter.service_account_name | Override the name of the waiter ServiceAccount (defaults to -allowlist-waiter) | |
+| gke_autopilot.allowlist_waiter.create_rbac | Create the RBAC resources (ServiceAccount, ClusterRole, ClusterRoleBinding) for the allowlist waiter Job. Set to false to manage them externally. | true |
+| gke_autopilot.allowlist_waiter.rbac_annotations | Additional annotations applied to the waiter SA/ClusterRole/ClusterRoleBinding | {} |
| gke_autopilot.allowlist_waiter.image.registry | The registry where the kubectl image is stored | quay.io |
| gke_autopilot.allowlist_waiter.image.repository | The repository where the kubectl image is stored | sysdig/kubectl |
| gke_autopilot.allowlist_waiter.image.tag | The tag for the kubectl image | 1.34.3-1.6.21 |
diff --git a/charts/shield/templates/host/gke-allowlist-waiter-clusterrole.yaml b/charts/shield/templates/host/gke-allowlist-waiter-clusterrole.yaml
index 20f2390d7..6bc08241c 100644
--- a/charts/shield/templates/host/gke-allowlist-waiter-clusterrole.yaml
+++ b/charts/shield/templates/host/gke-allowlist-waiter-clusterrole.yaml
@@ -1,5 +1,5 @@
{{- if (include "host.allowlist_waiter.enabled" .) -}}
-{{- if .Values.host.rbac.create }}
+{{- if .Values.gke_autopilot.allowlist_waiter.create_rbac }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
@@ -9,8 +9,8 @@ metadata:
annotations:
helm.sh/hook: "pre-install,pre-upgrade"
helm.sh/hook-weight: "-5"
- helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded"
- {{- with .Values.host.rbac.annotations }}
+ helm.sh/hook-delete-policy: "before-hook-creation"
+ {{- with .Values.gke_autopilot.allowlist_waiter.rbac_annotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
rules:
diff --git a/charts/shield/templates/host/gke-allowlist-waiter-clusterrolebinding.yaml b/charts/shield/templates/host/gke-allowlist-waiter-clusterrolebinding.yaml
index b469d4157..9f195e98a 100644
--- a/charts/shield/templates/host/gke-allowlist-waiter-clusterrolebinding.yaml
+++ b/charts/shield/templates/host/gke-allowlist-waiter-clusterrolebinding.yaml
@@ -1,5 +1,5 @@
{{- if (include "host.allowlist_waiter.enabled" .) -}}
-{{- if .Values.host.rbac.create }}
+{{- if .Values.gke_autopilot.allowlist_waiter.create_rbac }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
@@ -9,8 +9,8 @@ metadata:
annotations:
helm.sh/hook: "pre-install,pre-upgrade"
helm.sh/hook-weight: "-5"
- helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded"
- {{- with .Values.host.rbac.annotations }}
+ helm.sh/hook-delete-policy: "before-hook-creation"
+ {{- with .Values.gke_autopilot.allowlist_waiter.rbac_annotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
roleRef:
diff --git a/charts/shield/templates/host/gke-allowlist-waiter-job.yaml b/charts/shield/templates/host/gke-allowlist-waiter-job.yaml
index 968951752..23e0529c3 100644
--- a/charts/shield/templates/host/gke-allowlist-waiter-job.yaml
+++ b/charts/shield/templates/host/gke-allowlist-waiter-job.yaml
@@ -9,9 +9,10 @@ metadata:
annotations:
helm.sh/hook: "pre-install,pre-upgrade"
helm.sh/hook-weight: "5"
- helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded"
+ helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded,hook-failed"
spec:
backoffLimit: 3
+ activeDeadlineSeconds: {{ .Values.gke_autopilot.allowlist_waiter.active_deadline_seconds }}
template:
metadata:
name: {{ include "host.allowlist_waiter.fullname" . }}
@@ -35,12 +36,26 @@ spec:
- /bin/bash
- -c
- |
- set -euo pipefail
- echo "Waiting for AllowlistSynchronizer/sysdig-agent-allowlist-synchronizer to become Ready..."
+ set -uo pipefail
+ NAMESPACE={{ .Release.Namespace }}
+ SYNC_NAME=sysdig-agent-allowlist-synchronizer
+ TIMEOUT={{ .Values.gke_autopilot.allowlist_waiter.timeout }}
+
+ echo "Waiting for AllowlistSynchronizer/$SYNC_NAME to become Ready (timeout $TIMEOUT)..."
kubectl wait --for=condition=Ready \
- allowlistsynchronizer/sysdig-agent-allowlist-synchronizer \
- -n {{ .Release.Namespace }} \
- --timeout={{ .Values.gke_autopilot.allowlist_waiter.timeout }}
+ allowlistsynchronizer/"$SYNC_NAME" \
+ -n "$NAMESPACE" \
+ --timeout="$TIMEOUT"
+ ec=$?
+
+ if [ "$ec" -ne 0 ]; then
+ echo "---" >&2
+ echo "kubectl wait failed (exit $ec). Dumping AllowlistSynchronizer state for diagnostics:" >&2
+ kubectl describe -n "$NAMESPACE" allowlistsynchronizer/"$SYNC_NAME" >&2 || true
+ echo "---" >&2
+ kubectl get -n "$NAMESPACE" allowlistsynchronizer/"$SYNC_NAME" -o yaml >&2 || true
+ exit "$ec"
+ fi
echo "AllowlistSynchronizer is Ready."
resources:
{{- toYaml .Values.gke_autopilot.allowlist_waiter.resources | nindent 12 }}
diff --git a/charts/shield/templates/host/gke-allowlist-waiter-serviceaccount.yaml b/charts/shield/templates/host/gke-allowlist-waiter-serviceaccount.yaml
index 46e401057..039c8c7e2 100644
--- a/charts/shield/templates/host/gke-allowlist-waiter-serviceaccount.yaml
+++ b/charts/shield/templates/host/gke-allowlist-waiter-serviceaccount.yaml
@@ -1,5 +1,5 @@
{{- if (include "host.allowlist_waiter.enabled" .) -}}
-{{- if .Values.host.rbac.create }}
+{{- if .Values.gke_autopilot.allowlist_waiter.create_rbac }}
apiVersion: v1
kind: ServiceAccount
metadata:
@@ -10,8 +10,8 @@ metadata:
annotations:
helm.sh/hook: "pre-install,pre-upgrade"
helm.sh/hook-weight: "-5"
- helm.sh/hook-delete-policy: "before-hook-creation,hook-succeeded"
- {{- with .Values.host.rbac.annotations }}
+ helm.sh/hook-delete-policy: "before-hook-creation"
+ {{- with .Values.gke_autopilot.allowlist_waiter.rbac_annotations }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
diff --git a/charts/shield/tests/host/gke-allowlist-synchronizer_test.yaml b/charts/shield/tests/host/gke-allowlist-synchronizer_test.yaml
index 07d9eb255..7fcaa95eb 100644
--- a/charts/shield/tests/host/gke-allowlist-synchronizer_test.yaml
+++ b/charts/shield/tests/host/gke-allowlist-synchronizer_test.yaml
@@ -65,10 +65,28 @@ tests:
count: 0
template: templates/host/gke-allowlist-waiter-job.yaml
- - it: Does not render waiter SA/CR/CRB when host.rbac.create is false
+ - it: Renders waiter SA/CR/CRB even when host.rbac.create is false (decoupled from host RBAC)
set:
gke_autopilot.allowlist_waiter.enabled: true
host.rbac.create: false
+ asserts:
+ - hasDocuments:
+ count: 1
+ template: templates/host/gke-allowlist-waiter-serviceaccount.yaml
+ - hasDocuments:
+ count: 1
+ template: templates/host/gke-allowlist-waiter-clusterrole.yaml
+ - hasDocuments:
+ count: 1
+ template: templates/host/gke-allowlist-waiter-clusterrolebinding.yaml
+ - hasDocuments:
+ count: 1
+ template: templates/host/gke-allowlist-waiter-job.yaml
+
+ - it: Does not render waiter SA/CR/CRB when gke_autopilot.allowlist_waiter.create_rbac is false
+ set:
+ gke_autopilot.allowlist_waiter.enabled: true
+ gke_autopilot.allowlist_waiter.create_rbac: false
asserts:
- hasDocuments:
count: 0
@@ -83,6 +101,25 @@ tests:
count: 1
template: templates/host/gke-allowlist-waiter-job.yaml
+ - it: Propagates gke_autopilot.allowlist_waiter.rbac_annotations to all waiter RBAC objects
+ set:
+ gke_autopilot.allowlist_waiter.enabled: true
+ gke_autopilot.allowlist_waiter.rbac_annotations:
+ custom.example.com/owner: shield-team
+ asserts:
+ - equal:
+ path: metadata.annotations["custom.example.com/owner"]
+ value: shield-team
+ template: templates/host/gke-allowlist-waiter-serviceaccount.yaml
+ - equal:
+ path: metadata.annotations["custom.example.com/owner"]
+ value: shield-team
+ template: templates/host/gke-allowlist-waiter-clusterrole.yaml
+ - equal:
+ path: metadata.annotations["custom.example.com/owner"]
+ value: shield-team
+ template: templates/host/gke-allowlist-waiter-clusterrolebinding.yaml
+
- it: Renders the waiter ServiceAccount with hook annotations
set:
gke_autopilot.allowlist_waiter.enabled: true
@@ -102,7 +139,7 @@ tests:
value: "-5"
- equal:
path: metadata.annotations["helm.sh/hook-delete-policy"]
- value: before-hook-creation,hook-succeeded
+ value: before-hook-creation
template: templates/host/gke-allowlist-waiter-serviceaccount.yaml
- it: Renders the waiter ClusterRole limited to allowlistsynchronizers
@@ -119,6 +156,9 @@ tests:
- equal:
path: metadata.annotations["helm.sh/hook-weight"]
value: "-5"
+ - equal:
+ path: metadata.annotations["helm.sh/hook-delete-policy"]
+ value: before-hook-creation
- contains:
path: rules
content:
@@ -146,6 +186,9 @@ tests:
- equal:
path: metadata.annotations["helm.sh/hook-weight"]
value: "-5"
+ - equal:
+ path: metadata.annotations["helm.sh/hook-delete-policy"]
+ value: before-hook-creation
- equal:
path: roleRef.kind
value: ClusterRole
@@ -182,7 +225,7 @@ tests:
value: "5"
- equal:
path: metadata.annotations["helm.sh/hook-delete-policy"]
- value: before-hook-creation,hook-succeeded
+ value: before-hook-creation,hook-succeeded,hook-failed
- equal:
path: spec.template.spec.serviceAccountName
value: release-name-shield-host-allowlist-waiter
@@ -194,7 +237,17 @@ tests:
pattern: "^quay\\.io/sysdig/kubectl:[^\\s]+$"
- matchRegex:
path: spec.template.spec.containers[0].command[2]
- pattern: "--timeout=120s"
+ pattern: "TIMEOUT=120s"
+ # The waiter script must dump AllowlistSynchronizer state on wait failure
+ # so the next on-caller has actionable diagnostics instead of a bare exit.
+ - matchRegex:
+ path: spec.template.spec.containers[0].command[2]
+ pattern: "kubectl describe .* allowlistsynchronizer"
+ # Job-level guard against pod hangs before the inner `kubectl wait` timeout fires
+ # (image-pull stalls, scheduler delays, admission webhook hangs).
+ - equal:
+ path: spec.activeDeadlineSeconds
+ value: 300
# The waiter Pod must NOT carry cloud.google.com/matching-allowlist —
# otherwise GKE Autopilot would block it on the very allowlist it waits to load.
- notExists:
@@ -203,6 +256,16 @@ tests:
path: spec.template.metadata.labels["autopilot.gke.io/no-connect"]
template: templates/host/gke-allowlist-waiter-job.yaml
+ - it: Honors gke_autopilot.allowlist_waiter.active_deadline_seconds override
+ set:
+ gke_autopilot.allowlist_waiter.enabled: true
+ gke_autopilot.allowlist_waiter.active_deadline_seconds: 600
+ asserts:
+ - equal:
+ path: spec.activeDeadlineSeconds
+ value: 600
+ template: templates/host/gke-allowlist-waiter-job.yaml
+
- it: Renders imagePullSecrets when gke_autopilot.allowlist_waiter.image.pull_secrets is set
set:
gke_autopilot.allowlist_waiter.enabled: true
diff --git a/charts/shield/values.yaml b/charts/shield/values.yaml
index b51203d0b..3940728a7 100644
--- a/charts/shield/values.yaml
+++ b/charts/shield/values.yaml
@@ -660,8 +660,17 @@ gke_autopilot:
enabled: false
# Maximum time the Job will block on the AllowlistSynchronizer reaching Ready
timeout: 120s
+ # Maximum seconds the waiter Pod is allowed to run before Kubernetes terminates it.
+ # Acts as a Job-level guard against the pod hanging before the inner `kubectl wait`
+ # timeout fires (image-pull stalls, scheduler delays, admission webhook hangs).
+ # Should be greater than `timeout` to leave headroom for pod startup.
+ active_deadline_seconds: 300
# Override the name of the waiter ServiceAccount (defaults to -allowlist-waiter)
service_account_name:
+ # Create the RBAC resources (ServiceAccount, ClusterRole, ClusterRoleBinding) for the allowlist waiter Job. Set to false to manage them externally.
+ create_rbac: true
+ # Additional annotations applied to the waiter SA/ClusterRole/ClusterRoleBinding
+ rbac_annotations: {}
image:
# The registry where the kubectl image is stored
registry: quay.io