From cdc70ed524fa81234905485b7ef20b8f638304f3 Mon Sep 17 00:00:00 2001 From: Luke Bond Date: Mon, 23 Mar 2026 17:33:28 +0000 Subject: [PATCH 1/2] feat: allow canary image to be configured in helm --- CLAUDE.md | 2 + .../templates/deployment.yaml | 4 ++ charts/restate-operator-helm/values.yaml | 1 + .../94-configurable-canary-image.md | 40 +++++++++++++++++++ src/controllers/mod.rs | 6 +++ src/controllers/restatecluster/controller.rs | 3 ++ .../restatecluster/reconcilers/compute.rs | 18 ++++++++- src/main.rs | 9 +++++ 8 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 release-notes/unreleased/94-configurable-canary-image.md diff --git a/CLAUDE.md b/CLAUDE.md index a23a167..cb95829 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -260,6 +260,8 @@ kubectl delete pod -n restate-operator -l app=restate-operator - `image.repository` - Image repository (default: `ghcr.io/restatedev/restate-operator`) - `image.pullPolicy` - Pull policy (default: `IfNotPresent`) - `awsPodIdentityAssociationCluster` - Enables EKS Pod Identity support +- `gcpWorkloadIdentity` - Enables GCP Workload Identity via Config Connector +- `canaryImage` - Container image for canary jobs (default: `busybox:uclibc`); must provide `grep` and `wget` - `operatorNamespace` - Namespace where operator runs - `operatorLabelName/Value` - Labels for network policy selectors diff --git a/charts/restate-operator-helm/templates/deployment.yaml b/charts/restate-operator-helm/templates/deployment.yaml index 24c9d70..70060f6 100644 --- a/charts/restate-operator-helm/templates/deployment.yaml +++ b/charts/restate-operator-helm/templates/deployment.yaml @@ -64,6 +64,10 @@ spec: - name: GCP_WORKLOAD_IDENTITY value: "true" {{- end }} + {{- if .Values.canaryImage }} + - name: CANARY_IMAGE + value: {{ .Values.canaryImage }} + {{- end }} {{- if .Values.clusterDns }} - name: CLUSTER_DNS value: {{ .Values.clusterDns }} diff --git a/charts/restate-operator-helm/values.yaml b/charts/restate-operator-helm/values.yaml index 0b43c65..7123deb 100644 --- a/charts/restate-operator-helm/values.yaml +++ b/charts/restate-operator-helm/values.yaml @@ -16,6 +16,7 @@ podAnnotations: {} awsPodIdentityAssociationCluster: null gcpWorkloadIdentity: null clusterDns: null # defaults to "cluster.local" in the operator binary +canaryImage: null # defaults to "busybox:uclibc"; image must provide grep and wget podSecurityContext: fsGroup: 2000 diff --git a/release-notes/unreleased/94-configurable-canary-image.md b/release-notes/unreleased/94-configurable-canary-image.md new file mode 100644 index 0000000..2f1e1e4 --- /dev/null +++ b/release-notes/unreleased/94-configurable-canary-image.md @@ -0,0 +1,40 @@ +# Release Notes for Issue #94: Configurable canary image + +## New Feature + +### What Changed +The container image used for PIA and Workload Identity canary jobs is now +configurable via the `canaryImage` Helm value, `CANARY_IMAGE` environment +variable, or `--canary-image` CLI flag. Previously `busybox:uclibc` was +hardcoded, which fails in environments that cannot pull from Docker Hub. + +### Why This Matters +Air-gapped or restricted environments require all images to be pulled from +a private registry. The hardcoded image caused canary pods to enter +ImagePullBackOff, blocking RestateCluster reconciliation. + +### Impact on Users +- **Existing deployments**: No impact. The default remains `busybox:uclibc`. +- **Restricted environments**: Can now point to a private registry mirror. + +### Migration Guidance +If your nodes cannot pull from Docker Hub, set the canary image in your +Helm values: + +```yaml +canaryImage: my-registry.example.com/busybox:uclibc +``` + +The simplest approach is to mirror the default image to your private registry: + +```bash +docker pull busybox:uclibc +docker tag busybox:uclibc my-registry.example.com/busybox:uclibc +docker push my-registry.example.com/busybox:uclibc +``` + +If using a different image, it must provide `grep` and `wget` (used by the +AWS PIA and GCP Workload Identity canary jobs respectively). + +### Related Issues +- Issue #94: Cannot configure image URI for PIA canary pods diff --git a/src/controllers/mod.rs b/src/controllers/mod.rs index 26e86c9..5d1f76a 100644 --- a/src/controllers/mod.rs +++ b/src/controllers/mod.rs @@ -53,9 +53,13 @@ pub struct State { /// The cluster DNS suffix (e.g. "cluster.local") pub cluster_dns: String, + + /// The container image to use for canary jobs + pub canary_image: String, } /// State wrapper around the controller outputs for the web server +#[allow(clippy::too_many_arguments)] impl State { pub fn new( aws_pod_identity_association_cluster: Option, @@ -65,6 +69,7 @@ impl State { operator_label_value: Option, tunnel_client_default_image: String, cluster_dns: String, + canary_image: String, ) -> Self { Self { diagnostics: Arc::new(RwLock::new(Diagnostics::default())), @@ -76,6 +81,7 @@ impl State { operator_label_value, tunnel_client_default_image, cluster_dns, + canary_image, } } diff --git a/src/controllers/restatecluster/controller.rs b/src/controllers/restatecluster/controller.rs index 39243e0..646fc99 100644 --- a/src/controllers/restatecluster/controller.rs +++ b/src/controllers/restatecluster/controller.rs @@ -76,6 +76,8 @@ pub(super) struct Context { pub gcp_workload_identity: bool, /// The cluster DNS suffix (e.g. "cluster.local") pub cluster_dns: String, + /// The container image to use for canary jobs + pub canary_image: String, /// Diagnostics read by the web server pub diagnostics: Arc>, /// Prometheus metrics @@ -108,6 +110,7 @@ impl Context { secret_provider_class_installed, gcp_workload_identity: state.gcp_workload_identity, cluster_dns: state.cluster_dns.clone(), + canary_image: state.canary_image.clone(), diagnostics: state.diagnostics.clone(), metrics, }) diff --git a/src/controllers/restatecluster/reconcilers/compute.rs b/src/controllers/restatecluster/reconcilers/compute.rs index 70e8708..78137ff 100644 --- a/src/controllers/restatecluster/reconcilers/compute.rs +++ b/src/controllers/restatecluster/reconcilers/compute.rs @@ -557,6 +557,7 @@ pub async fn reconcile_compute( spec.compute.tolerations.as_ref(), &job_api, &pod_api, + &ctx.canary_image, ) .await?; @@ -672,6 +673,7 @@ pub async fn reconcile_compute( base_metadata, spec.compute.tolerations.as_ref(), &job_api, + &ctx.canary_image, ) .await?; @@ -825,6 +827,8 @@ async fn apply_pod_identity_association( struct CanaryConfig { /// Job name, e.g. "restate-pia-canary" name: &'static str, + /// Container image to use for the canary pod + image: String, /// Command to run in the canary container command: Vec, /// Reason prefix for NotReady conditions, e.g. "PodIdentityAssociation" @@ -859,7 +863,7 @@ fn canary_job_spec( service_account_name: Some("restate".into()), containers: vec![Container { name: "canary".into(), - image: Some("busybox:uclibc".into()), + image: Some(config.image.clone()), command: Some(config.command.clone()), ..Default::default() }], @@ -965,9 +969,11 @@ async fn check_pia( tolerations: Option<&Vec>, job_api: &Api, pod_api: &Api, + canary_image: &str, ) -> Result<(), Error> { let config = CanaryConfig { name: "restate-pia-canary", + image: canary_image.into(), command: vec![ "grep".into(), "-q".into(), @@ -1182,9 +1188,11 @@ async fn check_workload_identity( base_metadata: &ObjectMeta, tolerations: Option<&Vec>, job_api: &Api, + canary_image: &str, ) -> Result<(), Error> { let config = CanaryConfig { name: "restate-wi-canary", + image: canary_image.into(), command: vec![ "wget".into(), "--header".into(), @@ -1680,6 +1688,7 @@ mod tests { fn test_canary_job_spec_structure() { let config = CanaryConfig { name: "test-canary", + image: "my-registry/busybox:latest".into(), command: vec!["echo".into(), "hello".into()], reason_prefix: "Test", failure_message: "test failed", @@ -1697,7 +1706,10 @@ mod tests { let container = &pod_spec.containers[0]; assert_eq!(container.name, "canary"); - assert_eq!(container.image.as_deref(), Some("busybox:uclibc")); + assert_eq!( + container.image.as_deref(), + Some("my-registry/busybox:latest") + ); assert_eq!( container.command.as_ref().unwrap(), &vec!["echo".to_string(), "hello".to_string()] @@ -1708,6 +1720,7 @@ mod tests { fn test_canary_job_spec_label() { let config = CanaryConfig { name: "my-canary", + image: "busybox:uclibc".into(), command: vec!["true".into()], reason_prefix: "Test", failure_message: "", @@ -1734,6 +1747,7 @@ mod tests { }]; let config = CanaryConfig { name: "test-canary", + image: "busybox:uclibc".into(), command: vec!["true".into()], reason_prefix: "Test", failure_message: "", diff --git a/src/main.rs b/src/main.rs index 2f17e86..5e2e2bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,6 +61,14 @@ struct Arguments { default_value = "cluster.local" )] cluster_dns: String, + + #[arg( + long = "canary-image", + env = "CANARY_IMAGE", + value_name = "IMAGE", + default_value = "busybox:uclibc" + )] + canary_image: String, } #[get("/metrics")] @@ -109,6 +117,7 @@ async fn main() -> anyhow::Result<()> { args.operator_label_value, args.tunnel_client_default_image, args.cluster_dns, + args.canary_image, ); let client = Client::try_default() From 7342dfd120b85304085a756169f5470031c3ba54 Mon Sep 17 00:00:00 2001 From: Luke Bond Date: Wed, 25 Mar 2026 11:21:43 +0000 Subject: [PATCH 2/2] chore: update README.md with requirements for a custom canary image --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index 16e7a83..555209c 100644 --- a/README.md +++ b/README.md @@ -642,6 +642,27 @@ cluster name, by setting `awsPodIdentityAssociationCluster` in the helm chart. I be installed or the operator will fail to start. Then, you may provide `awsPodIdentityAssociationRoleArn` in the `RestateCluster` spec. +### Canary Image + +Both EKS Pod Identity and GCP Workload Identity use a canary job to validate that credentials are available before +starting the Restate cluster. By default, this uses the `busybox:uclibc` image from Docker Hub. In environments where +nodes cannot pull from Docker Hub (e.g. air-gapped or restricted registries), you can override this with the +`canaryImage` Helm value: + +```yaml +canaryImage: my-private-registry.example.com/busybox:uclibc +``` + +The simplest approach is to mirror the default image: + +```bash +docker pull busybox:uclibc +docker tag busybox:uclibc my-private-registry.example.com/busybox:uclibc +docker push my-private-registry.example.com/busybox:uclibc +``` + +If using a different base image, it must provide `grep` and `wget`. + ### EKS Security Groups for Pods [EKS Security Groups for Pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html) allows