Skip to content

Commit bd9eead

Browse files
fix: Make init-job run as hook only on install
There are multiple issues stating the init-job is not run as expected This is because there is a deadlock with the CRDB-StatfulSet requiring the init-job to run, which is only ran by Helm when the StatefulSet is considered ready The optional use of the --wait of `helm install` is causing differing observations This PR attempts to fix the problem by using the Job as plain Job instead of hook when the Chart is initially installed
1 parent aa9552e commit bd9eead

1 file changed

Lines changed: 24 additions & 47 deletions

File tree

cockroachdb/templates/job.init.yaml

Lines changed: 24 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@ metadata:
1919
{{- toYaml . | nindent 4 }}
2020
{{- end }}
2121
annotations:
22-
helm.sh/hook: post-install,post-upgrade
23-
helm.sh/hook-delete-policy: before-hook-creation
22+
{{- if not .Release.IsInstall }}
23+
helm.sh/hook: post-upgrade
24+
helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
25+
{{- end }}
2426
{{- with .Values.init.jobAnnotations }}
2527
{{- toYaml . | nindent 4 }}
2628
{{- end }}
@@ -97,57 +99,32 @@ spec:
9799
- name: cluster-init
98100
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
99101
imagePullPolicy: {{ .Values.image.pullPolicy | quote }}
100-
# Run the command in an `while true` loop because this Job is bound
101-
# to come up before the CockroachDB Pods (due to the time needed to
102-
# get PersistentVolumes attached to Nodes), and sleeping 5 seconds
103-
# between attempts is much better than letting the Pod fail when
104-
# the init command does and waiting out Kubernetes' non-configurable
105-
# exponential back-off for Pod restarts.
106102
# Command completes either when cluster initialization succeeds,
107103
# or when cluster has been initialized already.
108104
command:
109105
- /bin/bash
110106
- -c
111-
- >-
107+
- |
112108
{{- if $isClusterInitEnabled }}
113-
initCluster() {
114-
while true; do
115-
local output=$(
116-
set -x;
117-
118-
/cockroach/cockroach init \
119-
{{- if .Values.tls.enabled }}
120-
--certs-dir=/cockroach-certs/ \
121-
{{- else }}
122-
--insecure \
123-
{{- end }}
124-
{{- with index .Values.conf "cluster-name" }}
125-
--cluster-name={{.}} \
126-
{{- end }}
127-
--host={{ template "cockroachdb.fullname" . }}-0.{{ template "cockroachdb.fullname" . -}}
128-
:{{ .Values.service.ports.grpc.internal.port | int64 }} \
129-
{{- if .Values.init.pcr.enabled -}}
130-
{{- if .Values.init.pcr.isPrimary }}
131-
--virtualized \
132-
{{- else }}
133-
--virtualized-empty \
134-
{{- end }}
135-
{{- end }}
136-
2>&1);
137-
138-
local exitCode="$?";
139-
echo $output;
140-
141-
if [[ "$output" =~ .*"Cluster successfully initialized".* || "$output" =~ .*"cluster has already been initialized".* ]]; then
142-
break;
143-
fi
144-
145-
echo "Cluster is not ready to be initialized, retrying in 5 seconds"
146-
sleep 5;
147-
done
148-
}
149-
150-
initCluster;
109+
set -x;
110+
/cockroach/cockroach init \
111+
{{- if .Values.tls.enabled }}
112+
--certs-dir=/cockroach-certs/
113+
{{- else }}
114+
--insecure
115+
{{- end }} \
116+
{{- with index .Values.conf "cluster-name" }}
117+
--cluster-name={{.}} \
118+
{{- end }}
119+
--host={{ template "cockroachdb.fullname" . }}-0.{{ template "cockroachdb.fullname" . -}}
120+
:{{ .Values.service.ports.grpc.internal.port | int64 }}
121+
{{- if .Values.init.pcr.enabled -}} \
122+
{{- if .Values.init.pcr.isPrimary }}
123+
--virtualized
124+
{{- else }}
125+
--virtualized-empty
126+
{{- end }}
127+
{{- end }} 2>&1 | tee /tmp/output.txt || grep "ERROR: cluster has already been initialized" /tmp/output.txt
151128
{{- end }}
152129
153130
{{- if $isDatabaseProvisioningEnabled }}

0 commit comments

Comments
 (0)