Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions pkg/kotsadm/objects/kotsadm_objects.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,34 @@ func updateKotsadmDeploymentScriptsPath(existing *appsv1.Deployment) {
}
}

// waitForRqliteInitContainer returns an init container that polls the rqlite
// readiness endpoint before schemahero-plan runs. This prevents CrashLoopBackOff
// when kotsadm and rqlite restart simultaneously (e.g., during EC upgrades).
// Times out after 5 minutes so rqlite failures surface as a clear init error
// rather than an indefinite hang.
// Ref: https://app.shortcut.com/replicated/story/138103
func waitForRqliteInitContainer(deployOptions types.DeployOptions) corev1.Container {
return corev1.Container{
Image: GetAdminConsoleImage(deployOptions, "kotsadm-migrations"),
ImagePullPolicy: corev1.PullIfNotPresent,
Name: "wait-for-rqlite",
Command: []string{"sh", "-c"},
Args: []string{
`elapsed=0; timeout=300; while [ $elapsed -lt $timeout ]; do if wget -qO- http://kotsadm-rqlite:4001/readyz 2>/dev/null | grep -q "ok"; then echo "rqlite is ready (${elapsed}s)"; exit 0; fi; echo "Waiting for rqlite... (${elapsed}s/${timeout}s)"; sleep 2; elapsed=$((elapsed+2)); done; echo "ERROR: rqlite not ready after ${timeout}s"; exit 1`,
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
"memory": resource.MustParse("50Mi"),
},
Requests: corev1.ResourceList{
"cpu": resource.MustParse("10m"),
"memory": resource.MustParse("10Mi"),
},
},
SecurityContext: k8sutil.SecureContainerContext(deployOptions.StrictSecurityContext),
}
}

func KotsadmDeployment(deployOptions types.DeployOptions) (*appsv1.Deployment, error) {
securityContext := k8sutil.SecurePodContext(1001, 1001, deployOptions.StrictSecurityContext)
if deployOptions.IsOpenShift {
Expand Down Expand Up @@ -493,6 +521,7 @@ func KotsadmDeployment(deployOptions types.DeployOptions) (*appsv1.Deployment, e
RestartPolicy: corev1.RestartPolicyAlways,
ImagePullSecrets: pullSecrets,
InitContainers: []corev1.Container{
waitForRqliteInitContainer(deployOptions),
{
Image: GetAdminConsoleImage(deployOptions, "kotsadm-migrations"),
ImagePullPolicy: corev1.PullIfNotPresent,
Expand Down Expand Up @@ -1086,6 +1115,7 @@ func KotsadmStatefulSet(deployOptions types.DeployOptions, size resource.Quantit
RestartPolicy: corev1.RestartPolicyAlways,
ImagePullSecrets: pullSecrets,
InitContainers: []corev1.Container{
waitForRqliteInitContainer(deployOptions),
{
Image: GetAdminConsoleImage(deployOptions, "kotsadm-migrations"),
ImagePullPolicy: corev1.PullIfNotPresent,
Expand Down
44 changes: 44 additions & 0 deletions pkg/kotsadm/objects/kotsadm_objects_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package kotsadm

import (
"strings"
"testing"

"github.com/replicatedhq/kots/pkg/kotsadm/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -226,3 +229,44 @@ func Test_updateKotsadmDeploymentScriptsPath(t *testing.T) {
})
}
}

func Test_waitForRqliteInitContainer(t *testing.T) {
opts := types.DeployOptions{
Namespace: "default",
StrictSecurityContext: true,
}
c := waitForRqliteInitContainer(opts)

assert.Equal(t, "wait-for-rqlite", c.Name)
assert.Equal(t, corev1.PullIfNotPresent, c.ImagePullPolicy)
assert.Equal(t, []string{"sh", "-c"}, c.Command)
require.Len(t, c.Args, 1)

// Polls /readyz
assert.Contains(t, c.Args[0], "kotsadm-rqlite:4001/readyz")
// Has a timeout (not an infinite loop)
assert.Contains(t, c.Args[0], "timeout=300")
// Exits non-zero on timeout
assert.True(t, strings.HasSuffix(c.Args[0], "exit 1"))

// Resource requests are set
assert.NotNil(t, c.Resources.Requests.Cpu())
assert.NotNil(t, c.Resources.Requests.Memory())
assert.NotNil(t, c.Resources.Limits.Memory())

// Security context is set
assert.NotNil(t, c.SecurityContext)
}

func Test_kotsadmDeploymentHasWaitForRqlite(t *testing.T) {
opts := types.DeployOptions{
Namespace: "default",
}
dep, err := KotsadmDeployment(opts)
require.NoError(t, err)

initContainers := dep.Spec.Template.Spec.InitContainers
require.True(t, len(initContainers) >= 4, "expected at least 4 init containers, got %d", len(initContainers))
assert.Equal(t, "wait-for-rqlite", initContainers[0].Name, "wait-for-rqlite should be the first init container")
assert.Equal(t, "schemahero-plan", initContainers[1].Name, "schemahero-plan should follow wait-for-rqlite")
}
Loading