Skip to content

Commit ebb4d4c

Browse files
committed
comments - adding a fallback
1 parent 6c932c3 commit ebb4d4c

9 files changed

Lines changed: 425 additions & 39 deletions

File tree

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,10 @@ Notes:
8080
- `namespace` selects the namespace inside the chosen cluster; it does not choose the cluster itself, and defaults to `default` when omitted
8181
- `unschedulable_timeout` controls how long a Pod may remain unschedulable before the task is failed early; it defaults to `30s`, and `0s` disables that fail-fast behavior
8282
- `image_pull_policy` defaults to `IfNotPresent`
83-
- the Kubernetes backend mounts task sidecars with native image volumes; sidecar mounts are read-only, and Kubernetes/runtime support for the built-in `ImageVolume` Pod volume source is required
84-
- Kubernetes `1.35+` is the recommended and tested target for this image-volume path; Kubernetes `1.33`-`1.34` may work if `ImageVolume` is enabled and the container runtime supports image volumes
85-
- the worker runs a short-lived startup preflight Job and waits for either preflight success or an early controller, mount, or admission failure, so incompatible cluster/runtime policy failures surface before the worker starts accepting tasks
83+
- by default, the Kubernetes backend materializes sidecars with root init containers into `emptyDir` volumes, matching the existing behavior
84+
- set `use_image_volumes: true` to opt into native image volumes for sidecars; in that mode, sidecar mounts are read-only and Kubernetes/runtime support for the built-in `ImageVolume` Pod volume source is required
85+
- Kubernetes `1.35+` is the recommended and tested target for `use_image_volumes: true`; Kubernetes `1.33`-`1.34` may work if `ImageVolume` is enabled and the container runtime supports image volumes
86+
- the worker runs a short-lived startup preflight Job for the configured sidecar-loading mode and waits for either preflight success or an early controller, mount, or admission failure, so incompatible cluster/runtime policy failures surface before the worker starts accepting tasks
8687
- `preflight_image` defaults to `busybox:1.36`; set it if your cluster only allows pulling startup-preflight images from an internal or allowlisted registry
8788
- `pod_template` accepts standard Kubernetes PodSpec YAML and is the declarative way to configure task pod scheduling, service accounts, image pull secrets, resources, and environment
8889
- when using `pod_template`, define a container named `task` if you want to customize the main task container directly; otherwise the worker appends its own `task` container to the PodSpec
@@ -144,7 +145,7 @@ Recommended namespace-scoped permissions for the worker are:
144145
- get `pods/log`
145146
- list `events`
146147

147-
The worker Deployment's `ServiceAccount` is separate from the task Job `serviceAccountName` you may set inside `backend.kubernetes.pod_template` / `kubernetesBackend.podTemplate`. The worker `Deployment` defaults to non-root, and task Jobs mount sidecars via native image volumes instead of root init containers. Kubernetes `1.35+` is the recommended and tested target for this path; Kubernetes `1.33`-`1.34` may work if `ImageVolume` is enabled and the container runtime supports image volumes. If your cluster restricts image sources for admission or policy reasons, set `kubernetesBackend.preflightImage` in the chart to an allowlisted image for the startup preflight Job, and configure task `imagePullSecrets` inside `podTemplate` when needed.
148+
The worker Deployment's `ServiceAccount` is separate from the task Job `serviceAccountName` you may set inside `backend.kubernetes.pod_template` / `kubernetesBackend.podTemplate`. The worker `Deployment` defaults to non-root. By default, task Jobs still materialize sidecars with root init containers; set `kubernetesBackend.useImageVolumes=true` to opt into native image volumes instead. Kubernetes `1.35+` is the recommended and tested target for that opt-in path, while Kubernetes `1.33`-`1.34` may work if `ImageVolume` is enabled and the container runtime supports image volumes. If your cluster restricts image sources for admission or policy reasons, set `kubernetesBackend.preflightImage` in the chart to an allowlisted image for the startup preflight Job, and configure task `imagePullSecrets` inside `podTemplate` when needed.
148149

149150
### Go Install
150151

charts/oz-agent-worker/templates/configmap.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ data:
1919
default_image: {{ .Values.kubernetesBackend.defaultImage | quote }}
2020
{{- end }}
2121
image_pull_policy: {{ .Values.kubernetesBackend.imagePullPolicy | quote }}
22+
use_image_volumes: {{ .Values.kubernetesBackend.useImageVolumes }}
2223
unschedulable_timeout: {{ .Values.kubernetesBackend.unschedulableTimeout | quote }}
2324
{{- if .Values.kubernetesBackend.preflightImage }}
2425
preflight_image: {{ .Values.kubernetesBackend.preflightImage | quote }}

charts/oz-agent-worker/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ kubernetesBackend:
6868
namespace: ""
6969
defaultImage: ""
7070
imagePullPolicy: IfNotPresent
71+
useImageVolumes: false
7172
preflightImage: ""
7273
setupCommand: ""
7374
teardownCommand: ""

internal/config/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ type KubernetesConfig struct {
5858
Kubeconfig string `yaml:"kubeconfig"`
5959
DefaultImage string `yaml:"default_image" validate:"omitempty,no_whitespace"`
6060
ImagePullPolicy string `yaml:"image_pull_policy" validate:"omitempty,oneof=Always Never IfNotPresent"`
61+
UseImageVolumes bool `yaml:"use_image_volumes"`
6162
PreflightImage string `yaml:"preflight_image" validate:"omitempty,no_whitespace"`
6263
SetupCommand string `yaml:"setup_command"`
6364
TeardownCommand string `yaml:"teardown_command"`

internal/config/config_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ backend:
271271
namespace: "agents"
272272
kubeconfig: "/tmp/kubeconfig"
273273
image_pull_policy: "IfNotPresent"
274+
use_image_volumes: true
274275
preflight_image: "registry.internal/platform/preflight:1.0"
275276
setup_command: "printf 'SETUP=done\n' > \"$OZ_ENVIRONMENT_FILE\""
276277
teardown_command: "rm -rf \"$OZ_WORKSPACE_ROOT/tmp\""
@@ -315,6 +316,9 @@ backend:
315316
if cfg.Backend.Kubernetes.ImagePullPolicy != "IfNotPresent" {
316317
t.Errorf("image_pull_policy = %q, want %q", cfg.Backend.Kubernetes.ImagePullPolicy, "IfNotPresent")
317318
}
319+
if !cfg.Backend.Kubernetes.UseImageVolumes {
320+
t.Fatal("expected use_image_volumes to be true")
321+
}
318322
if cfg.Backend.Kubernetes.PreflightImage != "registry.internal/platform/preflight:1.0" {
319323
t.Errorf("preflight_image = %q, want %q", cfg.Backend.Kubernetes.PreflightImage, "registry.internal/platform/preflight:1.0")
320324
}

internal/worker/kubernetes.go

Lines changed: 164 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ const (
3232
startupPreflightPollInterval = 500 * time.Millisecond
3333
startupPreflightTimeout = 15 * time.Second
3434
kubernetesBackendTypeName = "kubernetes"
35+
sidecarCopyTargetMountPath = "/target"
3536
kubernetesStartupPreflightImage = "busybox:1.36"
3637
startupPreflightImageVolumeName = "preflight-image"
3738
startupPreflightImageMountPath = "/preflight-image"
@@ -52,6 +53,7 @@ type KubernetesBackendConfig struct {
5253
Kubeconfig string
5354
DefaultImage string
5455
ImagePullPolicy string
56+
UseImageVolumes bool
5557
PreflightImage string
5658
SetupCommand string
5759
TeardownCommand string
@@ -117,7 +119,7 @@ func NewKubernetesBackend(ctx context.Context, config KubernetesBackendConfig) (
117119

118120
// ExecuteTask runs the agent in a Kubernetes Job.
119121
func (b *KubernetesBackend) ExecuteTask(ctx context.Context, params *TaskParams) error {
120-
if err := validateTaskSidecars(params.Sidecars); err != nil {
122+
if err := validateTaskSidecars(params.Sidecars, b.config.UseImageVolumes); err != nil {
121123
return err
122124
}
123125

@@ -154,16 +156,55 @@ func (b *KubernetesBackend) ExecuteTask(ctx context.Context, params *TaskParams)
154156

155157
var initContainers []corev1.Container
156158
for i, sidecar := range params.Sidecars {
157-
volumeName := fmt.Sprintf("sidecar-%d-image", i)
158-
volumes = append(volumes, imageVolume(volumeName, sidecar.Image, pullPolicy))
159+
if b.config.UseImageVolumes {
160+
volumeName := fmt.Sprintf("sidecar-%d-image", i)
161+
volumes = append(volumes, imageVolume(volumeName, sidecar.Image, pullPolicy))
162+
163+
volumeMount := corev1.VolumeMount{
164+
Name: volumeName,
165+
MountPath: sidecar.MountPath,
166+
ReadOnly: true,
167+
}
168+
mainVolumeMounts = append(mainVolumeMounts, volumeMount)
169+
setupVolumeMounts = append(setupVolumeMounts, volumeMount)
170+
continue
171+
}
159172

160-
volumeMount := corev1.VolumeMount{
161-
Name: volumeName,
173+
dataVolumeName := fmt.Sprintf("sidecar-%d-data", i)
174+
volumes = append(volumes, corev1.Volume{
175+
Name: dataVolumeName,
176+
VolumeSource: corev1.VolumeSource{
177+
EmptyDir: &corev1.EmptyDirVolumeSource{},
178+
},
179+
})
180+
initContainers = append(initContainers, corev1.Container{
181+
Name: fmt.Sprintf("copy-sidecar-%d", i),
182+
Image: sidecar.Image,
183+
ImagePullPolicy: pullPolicy,
184+
Command: []string{
185+
"/bin/sh",
186+
"-c",
187+
kubernetesSidecarMaterializationScript(),
188+
},
189+
SecurityContext: rootSecurityContext(),
190+
VolumeMounts: []corev1.VolumeMount{
191+
{
192+
Name: dataVolumeName,
193+
MountPath: sidecarCopyTargetMountPath,
194+
},
195+
},
196+
})
197+
198+
mainVolumeMounts = append(mainVolumeMounts, corev1.VolumeMount{
199+
Name: dataVolumeName,
162200
MountPath: sidecar.MountPath,
163-
ReadOnly: true,
164-
}
165-
mainVolumeMounts = append(mainVolumeMounts, volumeMount)
166-
setupVolumeMounts = append(setupVolumeMounts, volumeMount)
201+
ReadOnly: !sidecar.ReadWrite,
202+
})
203+
setupVolumeMounts = append(setupVolumeMounts, corev1.VolumeMount{
204+
Name: dataVolumeName,
205+
MountPath: sidecar.MountPath,
206+
ReadOnly: !sidecar.ReadWrite,
207+
})
167208
}
168209

169210
if b.config.SetupCommand != "" {
@@ -497,7 +538,7 @@ func workspaceVolumeName() string {
497538
return "workspace"
498539
}
499540

500-
func validateTaskSidecars(sidecars []types.SidecarMount) error {
541+
func validateTaskSidecars(sidecars []types.SidecarMount, useImageVolumes bool) error {
501542
seenMountPaths := make(map[string]bool)
502543
for _, sidecar := range sidecars {
503544
if sidecar.Image == "" {
@@ -512,7 +553,7 @@ func validateTaskSidecars(sidecars []types.SidecarMount) error {
512553
if seenMountPaths[sidecar.MountPath] {
513554
return fmt.Errorf("duplicate mount path %s for additional sidecar %s", sidecar.MountPath, sidecar.Image)
514555
}
515-
if sidecar.ReadWrite {
556+
if useImageVolumes && sidecar.ReadWrite {
516557
return fmt.Errorf("additional sidecar %s cannot request a read-write mount: kubernetes image volumes are read-only", sidecar.Image)
517558
}
518559
seenMountPaths[sidecar.MountPath] = true
@@ -550,22 +591,42 @@ func (b *KubernetesBackend) startupPreflightJob() *batchv1.Job {
550591
pullPolicy := normalizePullPolicy(b.config.ImagePullPolicy)
551592
podSpec := b.basePodSpec()
552593
podSpec.RestartPolicy = corev1.RestartPolicyNever
553-
podSpec.InitContainers = nil
554-
podSpec.Volumes = append(podSpec.Volumes, imageVolume(startupPreflightImageVolumeName, b.config.PreflightImage, pullPolicy))
555-
podSpec.Containers = []corev1.Container{
556-
{
557-
Name: "main",
558-
Image: b.config.PreflightImage,
559-
ImagePullPolicy: pullPolicy,
560-
Command: []string{"/bin/sh", "-c", "test -d " + startupPreflightImageMountPath},
561-
VolumeMounts: []corev1.VolumeMount{
562-
{
563-
Name: startupPreflightImageVolumeName,
564-
MountPath: startupPreflightImageMountPath,
565-
ReadOnly: true,
594+
if b.config.UseImageVolumes {
595+
podSpec.InitContainers = nil
596+
podSpec.Volumes = append(podSpec.Volumes, imageVolume(startupPreflightImageVolumeName, b.config.PreflightImage, pullPolicy))
597+
podSpec.Containers = []corev1.Container{
598+
{
599+
Name: "main",
600+
Image: b.config.PreflightImage,
601+
ImagePullPolicy: pullPolicy,
602+
Command: []string{"/bin/sh", "-c", "test -d " + startupPreflightImageMountPath},
603+
VolumeMounts: []corev1.VolumeMount{
604+
{
605+
Name: startupPreflightImageVolumeName,
606+
MountPath: startupPreflightImageMountPath,
607+
ReadOnly: true,
608+
},
566609
},
567610
},
568-
},
611+
}
612+
} else {
613+
podSpec.InitContainers = []corev1.Container{
614+
{
615+
Name: "root-init-preflight",
616+
Image: b.config.PreflightImage,
617+
ImagePullPolicy: pullPolicy,
618+
Command: []string{"/bin/sh", "-c", "true"},
619+
SecurityContext: rootSecurityContext(),
620+
},
621+
}
622+
podSpec.Containers = []corev1.Container{
623+
{
624+
Name: "main",
625+
Image: b.config.PreflightImage,
626+
ImagePullPolicy: pullPolicy,
627+
Command: []string{"/bin/sh", "-c", "true"},
628+
},
629+
}
569630
}
570631
job := &batchv1.Job{
571632
ObjectMeta: metav1.ObjectMeta{
@@ -583,6 +644,57 @@ func (b *KubernetesBackend) startupPreflightJob() *batchv1.Job {
583644
}
584645

585646
func (b *KubernetesBackend) waitForStartupPreflight(logCtx, ctx context.Context, job *batchv1.Job) error {
647+
if b.config.UseImageVolumes {
648+
return b.waitForImageVolumeStartupPreflight(logCtx, ctx, job)
649+
}
650+
return b.waitForLegacyStartupPreflight(logCtx, ctx, job)
651+
}
652+
653+
func (b *KubernetesBackend) waitForLegacyStartupPreflight(logCtx, ctx context.Context, job *batchv1.Job) error {
654+
podSelector := fmt.Sprintf("job-name=%s", job.Name)
655+
eventSelector := fmt.Sprintf("involvedObject.uid=%s", job.UID)
656+
ticker := time.NewTicker(startupPreflightPollInterval)
657+
defer ticker.Stop()
658+
659+
for {
660+
select {
661+
case <-ctx.Done():
662+
if ctx.Err() == context.DeadlineExceeded {
663+
return fmt.Errorf("timed out waiting for startup preflight Job %q to create a Pod or surface a controller failure", job.Name)
664+
}
665+
return ctx.Err()
666+
default:
667+
}
668+
669+
pods, err := b.clientset.CoreV1().Pods(b.config.Namespace).List(ctx, metav1.ListOptions{
670+
LabelSelector: podSelector,
671+
})
672+
if err != nil {
673+
return fmt.Errorf("failed to list startup preflight Pods: %w", err)
674+
}
675+
if len(pods.Items) > 0 {
676+
return nil
677+
}
678+
679+
events, err := b.clientset.CoreV1().Events(b.config.Namespace).List(ctx, metav1.ListOptions{
680+
FieldSelector: eventSelector,
681+
})
682+
if err != nil {
683+
return fmt.Errorf("failed to list startup preflight events: %w", err)
684+
}
685+
if err := startupPreflightFailureFromEvents(job.Name, events.Items); err != nil {
686+
log.Warnf(logCtx, "Startup preflight Job %s failed before creating a Pod: %v", job.Name, err)
687+
return err
688+
}
689+
690+
select {
691+
case <-ctx.Done():
692+
case <-ticker.C:
693+
}
694+
}
695+
}
696+
697+
func (b *KubernetesBackend) waitForImageVolumeStartupPreflight(logCtx, ctx context.Context, job *batchv1.Job) error {
586698
podSelector := fmt.Sprintf("job-name=%s", job.Name)
587699
eventSelector := fmt.Sprintf("involvedObject.uid=%s", job.UID)
588700
ticker := time.NewTicker(startupPreflightPollInterval)
@@ -660,7 +772,10 @@ func startupPreflightFailureFromEvents(jobName string, events []corev1.Event) er
660772
}
661773

662774
func (b *KubernetesBackend) startupPreflightError(err error) error {
663-
return fmt.Errorf("kubernetes startup preflight failed: the kubernetes backend requires creating task Jobs that mount sidecars via image volumes; verify service account/RBAC, Pod Security or admission policy, and Kubernetes/runtime image-volume support for namespace %q: %w", b.config.Namespace, err)
775+
if b.config.UseImageVolumes {
776+
return fmt.Errorf("kubernetes startup preflight failed: the kubernetes backend requires creating task Jobs that mount sidecars via image volumes; verify service account/RBAC, Pod Security or admission policy, and Kubernetes/runtime image-volume support for namespace %q: %w", b.config.Namespace, err)
777+
}
778+
return fmt.Errorf("kubernetes startup preflight failed: the kubernetes backend requires creating task Jobs with a root init container for sidecar materialization; verify service account/RBAC and Pod Security or admission policy for namespace %q: %w", b.config.Namespace, err)
664779
}
665780

666781
func (b *KubernetesBackend) baseLabels(taskID string) map[string]string {
@@ -931,6 +1046,20 @@ func kubernetesTaskWrapperScript() string {
9311046
}, "\n")
9321047
}
9331048

1049+
func kubernetesSidecarMaterializationScript() string {
1050+
return strings.Join([]string{
1051+
"tar \\",
1052+
" --exclude=./target \\",
1053+
" --exclude=./proc \\",
1054+
" --exclude=./sys \\",
1055+
" --exclude=./dev \\",
1056+
" --exclude=./.dockerenv \\",
1057+
" --exclude=./var/run/secrets \\",
1058+
" --exclude=./run/secrets \\",
1059+
" -C / -cf - . | tar --no-same-owner --no-same-permissions -C /target -xf -",
1060+
}, "\n")
1061+
}
1062+
9341063
// mergeKubernetesEnvVars merges base and override env var slices.
9351064
// Override entries take precedence on name conflict.
9361065
func mergeKubernetesEnvVars(base, override []corev1.EnvVar) []corev1.EnvVar {
@@ -950,6 +1079,15 @@ func mergeKubernetesEnvVars(base, override []corev1.EnvVar) []corev1.EnvVar {
9501079
return result
9511080
}
9521081

1082+
func rootSecurityContext() *corev1.SecurityContext {
1083+
rootUser := int64(0)
1084+
rootGroup := int64(0)
1085+
return &corev1.SecurityContext{
1086+
RunAsUser: &rootUser,
1087+
RunAsGroup: &rootGroup,
1088+
}
1089+
}
1090+
9531091
func (b *KubernetesBackend) shouldFailUnschedulablePod(pod *corev1.Pod) bool {
9541092
if b.config.UnschedulableTimeout == nil || *b.config.UnschedulableTimeout <= 0 {
9551093
return false

0 commit comments

Comments
 (0)