test(e2e): shared multi-cluster e2e harness

scotwells · claude · scotwells · commit cfc79cb6aaa6 · 2026-06-05T10:24:43.000-05:00
Add the shared e2e environment helper plus the kind and Chainsaw
configuration and kubeconfig/cluster-secret scripts that stand up a
multi-cluster control plane, so federated scheduling can be exercised
end to end.

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/hack/e2e/kind-control-plane.yaml b/hack/e2e/kind-control-plane.yaml
@@ -0,0 +1,17 @@
+# Kind cluster configuration for the compute-control-plane management cluster.
+#
+# extraPortMappings exposes port 32443 on the macOS host so that the Karmada
+# API server NodePort service (nodePort: 32443) is accessible at
+# https://localhost:32443 without any additional port-forwarding.
+#
+# This matches KARMADA_API_NODEPORT in Taskfile.yaml.
+
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+  - role: control-plane
+    extraPortMappings:
+      - containerPort: 32443   # Karmada API server NodePort
+        hostPort: 32443
+        protocol: TCP
+        listenAddress: "127.0.0.1"
diff --git a/hack/e2e/make-internal-kubeconfig.sh b/hack/e2e/make-internal-kubeconfig.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+# make-internal-kubeconfig.sh <input-kubeconfig> <output-kubeconfig> <kind-cluster-name>
+#
+# Produces a kubeconfig variant that uses the Kind node's Docker container IP
+# instead of localhost. This variant is stored in Karmada so the controller
+# manager (running inside Docker) can reach member cluster API servers across
+# the kind bridge network.
+#
+# Background: Kind maps each cluster's API server to a random localhost port
+# on the developer machine. Inside Docker containers, "localhost" refers to the
+# container's own loopback — not the host. We therefore swap the server address
+# to the Kind control-plane container's Docker bridge IP (e.g. 172.18.0.x) and
+# set insecure-skip-tls-verify because the node certificate does not include
+# the Docker bridge IP in its SANs.
+#
+# Usage:
+#   hack/e2e/make-internal-kubeconfig.sh \
+#     tmp/e2e/kubeconfigs/pop-dfw.yaml \
+#     tmp/e2e/kubeconfigs/pop-dfw-internal.yaml \
+#     compute-pop-dfw
+
+set -euo pipefail
+
+INPUT="${1:?usage: $0 <input-kubeconfig> <output-kubeconfig> <kind-cluster-name>}"
+OUTPUT="${2:?usage: $0 <input-kubeconfig> <output-kubeconfig> <kind-cluster-name>}"
+CLUSTER_NAME="${3:?usage: $0 <input-kubeconfig> <output-kubeconfig> <kind-cluster-name>}"
+
+CONTAINER_NAME="${CLUSTER_NAME}-control-plane"
+
+# Resolve the container's Docker bridge IP.
+DOCKER_IP=$(docker inspect \
+  -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' \
+  "${CONTAINER_NAME}" 2>/dev/null || true)
+
+if [ -z "${DOCKER_IP}" ]; then
+  echo "ERROR: Could not resolve Docker IP for container '${CONTAINER_NAME}'." >&2
+  echo "       Is the Kind cluster '${CLUSTER_NAME}' running?" >&2
+  exit 1
+fi
+
+echo "  ${CLUSTER_NAME}: Docker IP ${DOCKER_IP} → ${OUTPUT}"
+
+python3 - "${INPUT}" "${OUTPUT}" "${DOCKER_IP}" <<'PYEOF'
+import sys, yaml
+
+src, dst, docker_ip = sys.argv[1], sys.argv[2], sys.argv[3]
+
+with open(src) as f:
+    cfg = yaml.safe_load(f)
+
+for cluster in cfg.get('clusters', []):
+    # Kind API server always listens on port 6443 inside the container.
+    cluster['cluster']['server'] = f'https://{docker_ip}:6443'
+    # The node cert only covers localhost / 127.0.0.1, not the bridge IP.
+    cluster['cluster']['insecure-skip-tls-verify'] = True
+    cluster['cluster'].pop('certificate-authority-data', None)
+
+with open(dst, 'w') as f:
+    yaml.dump(cfg, f, default_flow_style=False)
+PYEOF
diff --git a/hack/e2e/patch-cluster-secret.sh b/hack/e2e/patch-cluster-secret.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+# patch-cluster-secret.sh <karmada-kubeconfig> <cluster-name> <internal-kubeconfig>
+#
+# After "karmadactl join", Karmada stores the member cluster's kubeconfig in a
+# Secret referenced by the Cluster object's spec.secretRef, and sets
+# spec.apiEndpoint to the localhost address it resolved from the external
+# kubeconfig. The Karmada controller manager runs inside Docker and cannot use
+# localhost to reach POP cell API servers.
+#
+# This script:
+#   1. Replaces the kubeconfig in the Secret with the Docker-IP variant so that
+#      the Karmada controller can make API calls to the member cluster.
+#   2. Patches spec.apiEndpoint on the Cluster object so that health checks also
+#      use the Docker bridge IP instead of localhost.
+#
+# Usage:
+#   hack/e2e/patch-cluster-secret.sh \
+#     tmp/e2e/kubeconfigs/karmada.yaml \
+#     compute-pop-dfw \
+#     tmp/e2e/kubeconfigs/pop-dfw-internal.yaml
+
+set -euo pipefail
+
+KARMADA_KUBECONFIG="${1:?usage: $0 <karmada-kubeconfig> <cluster-name> <internal-kubeconfig>}"
+CLUSTER_NAME="${2:?usage: $0 <karmada-kubeconfig> <cluster-name> <internal-kubeconfig>}"
+INTERNAL_KUBECONFIG="${3:?usage: $0 <karmada-kubeconfig> <cluster-name> <internal-kubeconfig>}"
+
+# ------------------------------------------------------------------
+# Read the Cluster object's secretRef (name + namespace)
+# ------------------------------------------------------------------
+SECRET_NAME=$(kubectl \
+  --kubeconfig="${KARMADA_KUBECONFIG}" \
+  get cluster "${CLUSTER_NAME}" \
+  -o jsonpath='{.spec.secretRef.name}' 2>/dev/null || true)
+
+if [ -z "${SECRET_NAME}" ]; then
+  echo "ERROR: Could not find spec.secretRef.name on cluster '${CLUSTER_NAME}'." >&2
+  echo "       Has karmadactl join completed successfully?" >&2
+  exit 1
+fi
+
+SECRET_NAMESPACE=$(kubectl \
+  --kubeconfig="${KARMADA_KUBECONFIG}" \
+  get cluster "${CLUSTER_NAME}" \
+  -o jsonpath='{.spec.secretRef.namespace}' 2>/dev/null || true)
+
+SECRET_NAMESPACE="${SECRET_NAMESPACE:-karmada-system}"
+
+echo "  Patching secret ${SECRET_NAMESPACE}/${SECRET_NAME} with Docker-IP kubeconfig..."
+
+# ------------------------------------------------------------------
+# Replace the kubeconfig data in the secret
+# ------------------------------------------------------------------
+kubectl \
+  --kubeconfig="${KARMADA_KUBECONFIG}" \
+  create secret generic "${SECRET_NAME}" \
+  --namespace="${SECRET_NAMESPACE}" \
+  --from-file=kubeconfig="${INTERNAL_KUBECONFIG}" \
+  --dry-run=client -o yaml \
+  | kubectl \
+      --kubeconfig="${KARMADA_KUBECONFIG}" \
+      apply -f -
+
+echo "  Secret ${SECRET_NAMESPACE}/${SECRET_NAME} updated — Karmada controller will use Docker bridge IP"
+
+# ------------------------------------------------------------------
+# Extract the Docker-IP server URL from the internal kubeconfig and
+# patch spec.apiEndpoint on the Cluster object so that Karmada's
+# cluster-status controller uses the same reachable address for health
+# checks. Without this patch the controller continues to probe the
+# localhost address stored by karmadactl join and the cluster never
+# transitions to Ready.
+# ------------------------------------------------------------------
+DOCKER_SERVER=$(kubectl \
+  --kubeconfig="${INTERNAL_KUBECONFIG}" \
+  config view --minify -o jsonpath='{.clusters[0].cluster.server}')
+
+if [ -z "${DOCKER_SERVER}" ]; then
+  echo "ERROR: Could not read server URL from ${INTERNAL_KUBECONFIG}" >&2
+  exit 1
+fi
+
+echo "  Patching spec.apiEndpoint on cluster '${CLUSTER_NAME}' → ${DOCKER_SERVER}..."
+kubectl \
+  --kubeconfig="${KARMADA_KUBECONFIG}" \
+  patch cluster "${CLUSTER_NAME}" \
+  --type=merge \
+  -p "{\"spec\":{\"apiEndpoint\":\"${DOCKER_SERVER}\"}}"
+
+echo "  Cluster '${CLUSTER_NAME}' patched — health checks will now use Docker bridge IP"
diff --git a/test/e2e/chainsaw-config.yaml b/test/e2e/chainsaw-config.yaml
@@ -0,0 +1,47 @@
+# Chainsaw global configuration for the compute federation e2e test suite.
+#
+# Prerequisites
+# ─────────────
+# Run `task e2e:up` to create the Kind clusters and populate kubeconfigs under
+# tmp/e2e/kubeconfigs/ before running Chainsaw.
+#
+# Running
+# ───────
+# From the repository root via Taskfile (recommended):
+#
+#   task e2e:test
+#
+# Or directly:
+#
+#   KUBECONFIG=tmp/e2e/kubeconfigs/control-plane.yaml \
+#   chainsaw test --config test/e2e/chainsaw-config.yaml test/e2e/
+#
+# The KUBECONFIG env var sets the "default" cluster (control-plane cell).
+# Additional clusters (downstream, pop-dfw, pop-ord) are declared below and
+# referenced by name in individual test steps via `cluster: downstream` etc.
+#
+# Kubeconfig paths below are relative to the working directory where Chainsaw is
+# invoked (the project root), NOT relative to this config file's location.
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Configuration
+metadata:
+  name: chainsaw
+spec:
+  timeouts:
+    apply: 30s
+    assert: 60s
+    cleanup: 60s
+    delete: 30s
+    error: 30s
+    exec: 30s
+  clusters:
+    # Downstream control plane. WorkloadDeployments, PropagationPolicies,
+    # and Instance write-backs live here.
+    downstream:
+      kubeconfig: tmp/e2e/kubeconfigs/downstream.yaml
+    # POP DFW cell — downstream member cluster labelled topology.datum.net/city-code=dfw.
+    pop-dfw:
+      kubeconfig: tmp/e2e/kubeconfigs/pop-dfw.yaml
+    # POP ORD cell — downstream member cluster labelled topology.datum.net/city-code=ord.
+    pop-ord:
+      kubeconfig: tmp/e2e/kubeconfigs/pop-ord.yaml
diff --git a/test/e2e/env/README.md b/test/e2e/env/README.md
diff --git a/test/e2e/env/env.go b/test/e2e/env/env.go