From 9bfdfd2c8b42038a6bd390e3962457e4e9977753 Mon Sep 17 00:00:00 2001 From: Gerard Toonstra Date: Tue, 12 May 2026 17:49:59 +0200 Subject: [PATCH] fix: Notify k8s of broken pool executors --- charts/datafold-manager/Chart.yaml | 2 +- charts/datafold-manager/values.yaml | 2 +- charts/datafold/Chart.yaml | 2 +- .../worker-temporal/templates/deployment.yaml | 23 ++++++++++++++++++- .../charts/worker-temporal/values.yaml | 14 +++++++++++ 5 files changed, 39 insertions(+), 4 deletions(-) diff --git a/charts/datafold-manager/Chart.yaml b/charts/datafold-manager/Chart.yaml index ba286a8..ae6f263 100644 --- a/charts/datafold-manager/Chart.yaml +++ b/charts/datafold-manager/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: datafold-manager description: Helm chart for Datafold Operator type: application -version: 0.1.100 +version: 0.1.101 appVersion: "1.0.0" icon: https://www.datafold.com/logo.png diff --git a/charts/datafold-manager/values.yaml b/charts/datafold-manager/values.yaml index 1602914..a379909 100644 --- a/charts/datafold-manager/values.yaml +++ b/charts/datafold-manager/values.yaml @@ -18,7 +18,7 @@ operator: # Operator image configuration image: repository: us-docker.pkg.dev/datadiff-mm/datafold/datafold-operator - tag: "1.1.70" + tag: "1.1.71" pullPolicy: Always # Operator deployment configuration diff --git a/charts/datafold/Chart.yaml b/charts/datafold/Chart.yaml index cb11625..6ce0870 100644 --- a/charts/datafold/Chart.yaml +++ b/charts/datafold/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: datafold description: Helm chart package to deploy Datafold on kubernetes. type: application -version: 0.10.84 +version: 0.10.85 appVersion: "1.0.0" icon: https://www.datafold.com/logo.png diff --git a/charts/datafold/charts/worker-temporal/templates/deployment.yaml b/charts/datafold/charts/worker-temporal/templates/deployment.yaml index f656847..2c7deb0 100644 --- a/charts/datafold/charts/worker-temporal/templates/deployment.yaml +++ b/charts/datafold/charts/worker-temporal/templates/deployment.yaml @@ -103,14 +103,35 @@ spec: - name: TEMPORAL_METRICS_BIND_ADDRESS value: "0.0.0.0:{{ .Values.metrics.port }}" {{- end }} + {{- if .Values.health.enabled }} + - name: TEMPORAL_HEALTH_PORT + value: {{ .Values.health.port | quote }} + {{- end }} {{- with .Values.extraEnv }} {{- toYaml . | nindent 12 }} {{- end }} - {{- if .Values.metrics.enabled }} + {{- if or .Values.metrics.enabled .Values.health.enabled }} ports: + {{- if .Values.metrics.enabled }} - name: metrics containerPort: {{ .Values.metrics.port }} protocol: TCP + {{- end }} + {{- if .Values.health.enabled }} + - name: health + containerPort: {{ .Values.health.port }} + protocol: TCP + {{- end }} + {{- end }} + {{- if .Values.health.enabled }} + livenessProbe: + httpGet: + path: / + port: {{ .Values.health.port }} + initialDelaySeconds: {{ .Values.health.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.health.livenessProbe.periodSeconds }} + failureThreshold: {{ .Values.health.livenessProbe.failureThreshold }} + timeoutSeconds: {{ .Values.health.livenessProbe.timeoutSeconds }} {{- end }} resources: {{- toYaml .Values.resources | nindent 12 }} diff --git a/charts/datafold/charts/worker-temporal/values.yaml b/charts/datafold/charts/worker-temporal/values.yaml index 863fd4b..68f694c 100644 --- a/charts/datafold/charts/worker-temporal/values.yaml +++ b/charts/datafold/charts/worker-temporal/values.yaml @@ -76,6 +76,20 @@ metrics: enabled: true port: 9090 +health: + # Enables the HTTP liveness probe endpoint (TEMPORAL_HEALTH_PORT). + # When the activity executor becomes broken the endpoint returns 503, which + # triggers K8s to send SIGTERM and schedule a replacement pod immediately. + # The old pod continues draining within terminationGracePeriodSeconds so + # in-flight work on healthy threads can still complete. + enabled: true + port: 8091 + livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 15 + failureThreshold: 4 + timeoutSeconds: 5 + extraEnv: [] volumes: []