diff --git a/config/rbac/cluster/role.yaml b/config/rbac/cluster/role.yaml index b26a01297b..fb845ce5c1 100644 --- a/config/rbac/cluster/role.yaml +++ b/config/rbac/cluster/role.yaml @@ -119,6 +119,16 @@ rules: - get - update - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - create + - delete + - list + - patch + - watch - apiGroups: - pgv2.percona.com resources: diff --git a/config/rbac/namespace/role.yaml b/config/rbac/namespace/role.yaml index 650c48240b..f1f9f922f5 100644 --- a/config/rbac/namespace/role.yaml +++ b/config/rbac/namespace/role.yaml @@ -119,6 +119,16 @@ rules: - get - update - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - create + - delete + - list + - patch + - watch - apiGroups: - pgv2.percona.com resources: diff --git a/deploy/bundle.yaml b/deploy/bundle.yaml index eca85afdf2..45fba8fd88 100644 --- a/deploy/bundle.yaml +++ b/deploy/bundle.yaml @@ -69405,6 +69405,16 @@ rules: - get - update - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - create + - delete + - list + - patch + - watch - apiGroups: - pgv2.percona.com resources: diff --git a/deploy/cw-bundle.yaml b/deploy/cw-bundle.yaml index 313b06a725..69975f8f4c 100644 --- a/deploy/cw-bundle.yaml +++ b/deploy/cw-bundle.yaml @@ -69405,6 +69405,16 @@ rules: - get - update - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - create + - delete + - list + - patch + - watch - apiGroups: - pgv2.percona.com resources: diff --git a/deploy/cw-rbac.yaml b/deploy/cw-rbac.yaml index cd97b571ce..00b0753c9c 100644 --- a/deploy/cw-rbac.yaml +++ b/deploy/cw-rbac.yaml @@ -123,6 +123,16 @@ rules: - get - update - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - create + - delete + - list + - patch + - watch - apiGroups: - pgv2.percona.com resources: diff --git a/deploy/rbac.yaml b/deploy/rbac.yaml index d698698d8e..433ff9cd33 100644 --- a/deploy/rbac.yaml +++ b/deploy/rbac.yaml @@ -123,6 +123,16 @@ rules: - get - update - watch +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - create + - delete + - list + - patch + - watch - apiGroups: - pgv2.percona.com resources: diff --git a/e2e-tests/functions b/e2e-tests/functions index 3a2f665020..02ec53acf9 100644 --- a/e2e-tests/functions +++ b/e2e-tests/functions @@ -49,6 +49,8 @@ check_operator_panic() { } deploy_operator() { + deploy_coredns + local cw_prefix="" destroy_operator @@ -411,7 +413,7 @@ run_psql_command() { local command=${1} local uri=${2} local driver=${3:-postgres} - + kubectl -n ${NAMESPACE} exec $(get_client_pod) -- \ psql -v ON_ERROR_STOP=1 -t -q "${driver}://${uri}" -c "${command}" } @@ -1209,94 +1211,127 @@ wait_for_generation() { # Reads all environment variables from a pod (single kubectl exec call) get_envs_from_pod() { - local namespace=$1 - local pod=$2 - local container=$3 + local namespace=$1 + local pod=$2 + local container=$3 - if [ -n "$container" ]; then - kubectl exec -n "$namespace" "$pod" -c "$container" -- printenv 2>/dev/null || true - else - kubectl exec -n "$namespace" "$pod" -- printenv 2>/dev/null || true - fi + if [ -n "$container" ]; then + kubectl exec -n "$namespace" "$pod" -c "$container" -- printenv 2>/dev/null || true + else + kubectl exec -n "$namespace" "$pod" -- printenv 2>/dev/null || true + fi } # Verifies a single variable in given env content check_env_in_pod() { - local check_type=$1 - local pod=$2 - local var_name=$3 - local expected_value=$4 - local env_content=$5 - - local actual_value - actual_value=$(echo "$env_content" | grep -E "^${var_name}=" | cut -d'=' -f2- || true) - - if [[ "$check_type" == "add" ]]; then - if [ "$actual_value" != "$expected_value" ]; then - echo "ERROR: $var_name in $pod — expected '$expected_value', got '${actual_value:-}'" - return 1 - else - echo "OK: $var_name=$actual_value in $pod" - fi - elif [[ "$check_type" == "delete" ]]; then - if [ -n "$actual_value" ]; then - echo "ERROR: $var_name exists in $pod (should not exist)" - return 1 - else - echo "OK: $var_name deleted in $pod" - fi - else - echo "ERROR: unknown check type '$check_type'" - return 1 - fi + local check_type=$1 + local pod=$2 + local var_name=$3 + local expected_value=$4 + local env_content=$5 + + local actual_value + actual_value=$(echo "$env_content" | grep -E "^${var_name}=" | cut -d'=' -f2- || true) + + if [[ $check_type == "add" ]]; then + if [ "$actual_value" != "$expected_value" ]; then + echo "ERROR: $var_name in $pod — expected '$expected_value', got '${actual_value:-}'" + return 1 + else + echo "OK: $var_name=$actual_value in $pod" + fi + elif [[ $check_type == "delete" ]]; then + if [ -n "$actual_value" ]; then + echo "ERROR: $var_name exists in $pod (should not exist)" + return 1 + else + echo "OK: $var_name deleted in $pod" + fi + else + echo "ERROR: unknown check type '$check_type'" + return 1 + fi } # Checks multiple env vars in one or more components check_envs_for_component() { - local check_type=$1 # add | delete - local component=$2 # instance | pgbouncer | repohost - local vars=("${@:3}") # everything after the 2nd argument - - case "$component" in - instance) - POD=$(kubectl get -n "${NAMESPACE}" pod -l postgres-operator.crunchydata.com/instance-set=instance1 -o 'jsonpath={.items[0].metadata.name}') - CONTAINER="" - ;; - pgbouncer) - POD=$(kubectl get -n "${NAMESPACE}" pod -l postgres-operator.crunchydata.com/role=pgbouncer -o 'jsonpath={.items[0].metadata.name}') - CONTAINER="pgbouncer" - ;; - repohost) - POD=$(kubectl get -n "${NAMESPACE}" pod -l postgres-operator.crunchydata.com/data=pgbackrest -o 'jsonpath={.items[0].metadata.name}') - CONTAINER="pgbackrest" - ;; - *) - echo "ERROR: unknown component '$component'" - return 1 - ;; - esac - - echo "Fetching environment variables for $component pod $POD..." - local env_content - env_content=$(get_envs_from_pod "${NAMESPACE}" "$POD" "$CONTAINER") - - local errors=0 - for var_entry in "${vars[@]}"; do - if [[ "$check_type" == "add" ]]; then - local var_name="${var_entry%%=*}" - local var_expected="${var_entry#*=}" - check_env_in_pod add "$POD" "$var_name" "$var_expected" "$env_content" || errors=$((errors+1)) - else - check_env_in_pod delete "$POD" "$var_entry" "" "$env_content" || errors=$((errors+1)) - fi - done - - if (( errors > 0 )); then - echo "$errors environment check(s) failed for component '$component'" - return 1 - else - echo "All environment checks passed for component '$component'" - fi + local check_type=$1 # add | delete + local component=$2 # instance | pgbouncer | repohost + local vars=("${@:3}") # everything after the 2nd argument + + case "$component" in + instance) + POD=$(kubectl get -n "${NAMESPACE}" pod -l postgres-operator.crunchydata.com/instance-set=instance1 -o 'jsonpath={.items[0].metadata.name}') + CONTAINER="" + ;; + pgbouncer) + POD=$(kubectl get -n "${NAMESPACE}" pod -l postgres-operator.crunchydata.com/role=pgbouncer -o 'jsonpath={.items[0].metadata.name}') + CONTAINER="pgbouncer" + ;; + repohost) + POD=$(kubectl get -n "${NAMESPACE}" pod -l postgres-operator.crunchydata.com/data=pgbackrest -o 'jsonpath={.items[0].metadata.name}') + CONTAINER="pgbackrest" + ;; + *) + echo "ERROR: unknown component '$component'" + return 1 + ;; + esac + + echo "Fetching environment variables for $component pod $POD..." + local env_content + env_content=$(get_envs_from_pod "${NAMESPACE}" "$POD" "$CONTAINER") + + local errors=0 + for var_entry in "${vars[@]}"; do + if [[ $check_type == "add" ]]; then + local var_name="${var_entry%%=*}" + local var_expected="${var_entry#*=}" + check_env_in_pod add "$POD" "$var_name" "$var_expected" "$env_content" || errors=$((errors + 1)) + else + check_env_in_pod delete "$POD" "$var_entry" "" "$env_content" || errors=$((errors + 1)) + fi + done + + if ((errors > 0)); then + echo "$errors environment check(s) failed for component '$component'" + return 1 + else + echo "All environment checks passed for component '$component'" + fi +} + +deploy_coredns() { + # Only the "upgrade-consistency" test needs CoreDNS to be installed. + # For all other tests, if CoreDNS was installed by this script earlier, uninstall it. + local coredns_found=false + if kubectl -n kube-system get deploy coredns >/dev/null 2>&1; then + coredns_found=true + fi + + if [[ $test_name != "upgrade-consistency" ]]; then + ! $coredns_found && return + + helm uninstall coredns -n kube-system >/dev/null 2>&1 || true + kubectl scale deployment --replicas=2 kube-dns --namespace=kube-system + kubectl scale deployment --replicas=1 kube-dns-autoscaler --namespace=kube-system + return + fi + + $coredns_found && return + + helm repo add coredns https://coredns.github.io/helm + helm install coredns coredns/coredns \ + -n kube-system \ + --set k8sAppLabelOverride=kube-dns + kubectl scale deployment --replicas=0 kube-dns-autoscaler kube-dns --namespace=kube-system + ======= + if ((errors > 0)); then + echo "$errors environment check(s) failed for component '$component'" + return 1 + else + echo "All environment checks passed for component '$component'" + fi } detect_k8s_platform() { @@ -1348,21 +1383,21 @@ enable_hugepages() { } enable_hugepages_gke() { - local hugepage_count=$1 + local hugepage_count=$1 - echo "Hugepages configuration is now handled during cluster creation" - echo "Skipping runtime configuration for GKE" + echo "Hugepages configuration is now handled during cluster creation" + echo "Skipping runtime configuration for GKE" - echo "Verifying hugepages configuration..." - local node_name=$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}') + echo "Verifying hugepages configuration..." + local node_name=$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}') - if kubectl get node ${node_name} -o jsonpath='{.status.allocatable.hugepages-2Mi}' | grep -q '[0-9]'; then - echo "✓ Hugepages are already configured on nodes" - return 0 - else - echo "✗ Warning: Hugepages not found in node allocatable resources" - return 1 - fi + if kubectl get node ${node_name} -o jsonpath='{.status.allocatable.hugepages-2Mi}' | grep -q '[0-9]'; then + echo "✓ Hugepages are already configured on nodes" + return 0 + else + echo "✗ Warning: Hugepages not found in node allocatable resources" + return 1 + fi } enable_hugepages_eks() { @@ -1530,128 +1565,128 @@ spec: path: /etc/sysctl.d/99-hugepages.conf EOF - [ $? -eq 0 ] || return 1 + [ $? -eq 0 ] || return 1 - echo "MachineConfig created" - echo "Waiting for worker pool to update (~10 minutes)..." + echo "MachineConfig created" + echo "Waiting for worker pool to update (~10 minutes)..." - kubectl wait --for=condition=Updated mcp/worker --timeout=900s 2>/dev/null || { - echo "Update taking longer than expected" - return 1 - } + kubectl wait --for=condition=Updated mcp/worker --timeout=900s 2>/dev/null || { + echo "Update taking longer than expected" + return 1 + } - echo "Worker pool updated" + echo "Worker pool updated" - sleep 10 - verify_hugepages_on_nodes + sleep 10 + verify_hugepages_on_nodes } verify_hugepages_on_nodes() { - echo "Verifying hugepages on nodes" + echo "Verifying hugepages on nodes" - # Get first worker node, fallback to first non-master, fallback to any node - local node_name=$( - kubectl get nodes -l node-role.kubernetes.io/worker -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || \ - kubectl get nodes -l '!node-role.kubernetes.io/master,!node-role.kubernetes.io/control-plane' -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || \ - kubectl get nodes -o jsonpath='{.items[0].metadata.name}' - ) + # Get first worker node, fallback to first non-master, fallback to any node + local node_name=$( + kubectl get nodes -l node-role.kubernetes.io/worker -o jsonpath='{.items[0].metadata.name}' 2>/dev/null \ + || kubectl get nodes -l '!node-role.kubernetes.io/master,!node-role.kubernetes.io/control-plane' -o jsonpath='{.items[0].metadata.name}' 2>/dev/null \ + || kubectl get nodes -o jsonpath='{.items[0].metadata.name}' + ) - if [ -z "${node_name}" ]; then - echo "No nodes found" - return 1 - fi + if [ -z "${node_name}" ]; then + echo "No nodes found" + return 1 + fi - echo "Checking node: ${node_name}" + echo "Checking node: ${node_name}" - local hugepages_capacity=$(kubectl get node ${node_name} \ - -o jsonpath='{.status.capacity.hugepages-2Mi}') + local hugepages_capacity=$(kubectl get node ${node_name} \ + -o jsonpath='{.status.capacity.hugepages-2Mi}') - if [ -n "${hugepages_capacity}" ] && [ "${hugepages_capacity}" != "0" ]; then - echo "Node has hugepages capacity: ${hugepages_capacity}" - return 0 - else - echo "No hugepages capacity found on node ${node_name}" - return 1 - fi + if [ -n "${hugepages_capacity}" ] && [ "${hugepages_capacity}" != "0" ]; then + echo "Node has hugepages capacity: ${hugepages_capacity}" + return 0 + else + echo "No hugepages capacity found on node ${node_name}" + return 1 + fi } verify_hugepages_in_pod() { - local pod_name=$1 - local namespace=$2 - local container=${3:-postgres} + local pod_name=$1 + local namespace=$2 + local container=${3:-postgres} - echo "Verifying hugepages in pod ${pod_name}" + echo "Verifying hugepages in pod ${pod_name}" - # Check /proc/meminfo - local hugepages_total=$(kubectl exec ${pod_name} -n ${namespace} -c ${container} -- \ - grep HugePages_Total /proc/meminfo | awk '{print $2}') + # Check /proc/meminfo + local hugepages_total=$(kubectl exec ${pod_name} -n ${namespace} -c ${container} -- \ + grep HugePages_Total /proc/meminfo | awk '{print $2}') - local hugepages_free=$(kubectl exec ${pod_name} -n ${namespace} -c ${container} -- \ - grep HugePages_Free /proc/meminfo | awk '{print $2}') + local hugepages_free=$(kubectl exec ${pod_name} -n ${namespace} -c ${container} -- \ + grep HugePages_Free /proc/meminfo | awk '{print $2}') - echo "HugePages_Total: ${hugepages_total}" - echo "HugePages_Free: ${hugepages_free}" + echo "HugePages_Total: ${hugepages_total}" + echo "HugePages_Free: ${hugepages_free}" - if [ "${hugepages_total}" -gt 0 ]; then - echo "Hugepages are available in pod" - return 0 - else - echo "No hugepages in pod" - return 1 - fi + if [ "${hugepages_total}" -gt 0 ]; then + echo "Hugepages are available in pod" + return 0 + else + echo "No hugepages in pod" + return 1 + fi } verify_postgresql_hugepages_setting() { - local cluster_name=$1 - local expected_value=${2:-try} + local cluster_name=$1 + local expected_value=${2:-try} - echo "Verifying PostgreSQL huge_pages setting..." + echo "Verifying PostgreSQL huge_pages setting..." - local huge_pages=$(run_psql_local \ - "SHOW huge_pages;" \ - "postgres:$(get_psql_user_pass ${cluster_name}-pguser-postgres)@$(get_psql_user_host ${cluster_name}-pguser-postgres)") + local huge_pages=$(run_psql_local \ + "SHOW huge_pages;" \ + "postgres:$(get_psql_user_pass ${cluster_name}-pguser-postgres)@$(get_psql_user_host ${cluster_name}-pguser-postgres)") - echo "huge_pages: ${huge_pages}" + echo "huge_pages: ${huge_pages}" - if [[ "${huge_pages}" == *"${expected_value}"* ]]; then - echo "PostgreSQL huge_pages is set to '${expected_value}'" - return 0 - else - echo "PostgreSQL huge_pages not set to '${expected_value}' (value: ${huge_pages})" - return 1 - fi + if [[ ${huge_pages} == *"${expected_value}"* ]]; then + echo "PostgreSQL huge_pages is set to '${expected_value}'" + return 0 + else + echo "PostgreSQL huge_pages not set to '${expected_value}' (value: ${huge_pages})" + return 1 + fi } verify_hugepages_usage() { - local pod_name=$1 - local namespace=$2 - local container=${3:-database} + local pod_name=$1 + local namespace=$2 + local container=${3:-database} - echo "Checking hugepages usage..." + echo "Checking hugepages usage..." - kubectl -n ${namespace} exec ${pod_name} -c ${container} -- \ - grep HugePages /proc/meminfo + kubectl -n ${namespace} exec ${pod_name} -c ${container} -- \ + grep HugePages /proc/meminfo - local hugepages_total=$(kubectl -n ${namespace} exec ${pod_name} -c ${container} -- \ - grep HugePages_Total /proc/meminfo | awk '{print $2}') + local hugepages_total=$(kubectl -n ${namespace} exec ${pod_name} -c ${container} -- \ + grep HugePages_Total /proc/meminfo | awk '{print $2}') - local hugepages_free=$(kubectl -n ${namespace} exec ${pod_name} -c ${container} -- \ - grep HugePages_Free /proc/meminfo | awk '{print $2}') + local hugepages_free=$(kubectl -n ${namespace} exec ${pod_name} -c ${container} -- \ + grep HugePages_Free /proc/meminfo | awk '{print $2}') - local hugepages_used=$((hugepages_total - hugepages_free)) + local hugepages_used=$((hugepages_total - hugepages_free)) - echo "" - echo "HugePages usage:" - echo " Total: ${hugepages_total}" - echo " Used: ${hugepages_used}" + echo "" + echo "HugePages usage:" + echo " Total: ${hugepages_total}" + echo " Used: ${hugepages_used}" - if [ "${hugepages_used}" -gt 0 ]; then - echo "PostgreSQL is using hugepages" - return 0 - else - echo "Hugepages available but NOT being used by PostgreSQL" - return 1 - fi + if [ "${hugepages_used}" -gt 0 ]; then + echo "PostgreSQL is using hugepages" + return 0 + else + echo "Hugepages available but NOT being used by PostgreSQL" + return 1 + fi } function vault_tls() { diff --git a/e2e-tests/tests/upgrade-consistency/00-deploy-operator.yaml b/e2e-tests/tests/upgrade-consistency/00-deploy-operator.yaml index bcdb235dc7..9176fb5311 100644 --- a/e2e-tests/tests/upgrade-consistency/00-deploy-operator.yaml +++ b/e2e-tests/tests/upgrade-consistency/00-deploy-operator.yaml @@ -9,4 +9,4 @@ commands: source ../../functions init_temp_dir # do this only in the first TestStep - deploy_operator + PGO_FEATURE_GATES="EndpointSlices=true" deploy_operator diff --git a/e2e-tests/tests/upgrade-consistency/02-assert.yaml b/e2e-tests/tests/upgrade-consistency/02-assert.yaml index 45ce641352..3c97535ee8 100644 --- a/e2e-tests/tests/upgrade-consistency/02-assert.yaml +++ b/e2e-tests/tests/upgrade-consistency/02-assert.yaml @@ -1,6 +1,6 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert -timeout: 300 +timeout: 240 --- kind: StatefulSet apiVersion: apps/v1 @@ -133,3 +133,80 @@ status: ready: 3 size: 3 state: ready +--- +apiVersion: v1 +kind: Endpoints +metadata: + labels: + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/patroni: upgrade-consistency-ha + name: upgrade-consistency-ha +--- +apiVersion: v1 +kind: Endpoints +metadata: + labels: + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/patroni: upgrade-consistency-ha + name: upgrade-consistency-ha-config +--- +apiVersion: v1 +kind: Endpoints +metadata: + labels: + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/patroni: upgrade-consistency-ha + name: upgrade-consistency-ha-failover +--- +apiVersion: v1 +kind: Endpoints +metadata: + labels: + app.kubernetes.io/component: pgbouncer + app.kubernetes.io/instance: upgrade-consistency + app.kubernetes.io/managed-by: percona-postgresql-operator + app.kubernetes.io/name: percona-postgresql + app.kubernetes.io/part-of: percona-postgresql + pgv2.percona.com/version: 2.8.2 + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/role: pgbouncer + name: upgrade-consistency-pgbouncer +--- +apiVersion: v1 +kind: Endpoints +metadata: + labels: + postgres-operator.crunchydata.com/cluster: upgrade-consistency + service.kubernetes.io/headless: "" + name: upgrade-consistency-pods +--- +apiVersion: v1 +kind: Endpoints +metadata: + labels: + app.kubernetes.io/component: pg + app.kubernetes.io/instance: upgrade-consistency + app.kubernetes.io/managed-by: percona-postgresql-operator + app.kubernetes.io/name: percona-postgresql + app.kubernetes.io/part-of: percona-postgresql + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/role: primary + name: upgrade-consistency-primary + ownerReferences: + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + blockOwnerDeletion: true + controller: true + kind: PostgresCluster +--- +apiVersion: v1 +kind: Endpoints +metadata: + labels: + app.kubernetes.io/component: pg + app.kubernetes.io/instance: upgrade-consistency + app.kubernetes.io/managed-by: percona-postgresql-operator + app.kubernetes.io/name: percona-postgresql + app.kubernetes.io/part-of: percona-postgresql + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/role: replica + name: upgrade-consistency-replicas diff --git a/e2e-tests/tests/upgrade-consistency/03-assert.yaml b/e2e-tests/tests/upgrade-consistency/03-assert.yaml index 1382b3ea26..305b66f677 100644 --- a/e2e-tests/tests/upgrade-consistency/03-assert.yaml +++ b/e2e-tests/tests/upgrade-consistency/03-assert.yaml @@ -133,3 +133,196 @@ status: ready: 3 size: 3 state: ready +--- +addressType: IPv4 +apiVersion: discovery.k8s.io/v1 +endpoints: + - conditions: + ready: true + targetRef: + kind: Pod +kind: EndpointSlice +metadata: + labels: + endpointslice.kubernetes.io/managed-by: endpointslicemirroring-controller.k8s.io + kubernetes.io/service-name: upgrade-consistency-ha + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/patroni: upgrade-consistency-ha + ownerReferences: + - apiVersion: v1 + blockOwnerDeletion: true + controller: true + kind: Endpoints +ports: + - name: postgres + port: 5432 + protocol: TCP +--- +addressType: IPv4 +apiVersion: discovery.k8s.io/v1 +endpoints: + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod +kind: EndpointSlice +metadata: + generateName: upgrade-consistency-pgbouncer- + labels: + app.kubernetes.io/component: pgbouncer + app.kubernetes.io/instance: upgrade-consistency + app.kubernetes.io/managed-by: percona-postgresql-operator + app.kubernetes.io/name: percona-postgresql + app.kubernetes.io/part-of: percona-postgresql + endpointslice.kubernetes.io/managed-by: endpointslice-controller.k8s.io + kubernetes.io/service-name: upgrade-consistency-pgbouncer + pgv2.percona.com/version: 2.9.0 + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/role: pgbouncer + ownerReferences: + - apiVersion: v1 + blockOwnerDeletion: true + controller: true + kind: Service +ports: + - name: pgbouncer + port: 5432 + protocol: TCP +--- +addressType: IPv4 +apiVersion: discovery.k8s.io/v1 +endpoints: + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod +kind: EndpointSlice +metadata: + generateName: upgrade-consistency-pods- + labels: + endpointslice.kubernetes.io/managed-by: endpointslice-controller.k8s.io + kubernetes.io/service-name: upgrade-consistency-pods + postgres-operator.crunchydata.com/cluster: upgrade-consistency + service.kubernetes.io/headless: "" + ownerReferences: + - apiVersion: v1 + blockOwnerDeletion: true + controller: true + kind: Service +ports: null +--- +addressType: IPv4 +apiVersion: discovery.k8s.io/v1 +endpoints: + - conditions: {} +kind: EndpointSlice +metadata: + name: upgrade-consistency-primary + labels: + app.kubernetes.io/component: pg + app.kubernetes.io/instance: upgrade-consistency + app.kubernetes.io/managed-by: percona-postgresql-operator + app.kubernetes.io/name: percona-postgresql + app.kubernetes.io/part-of: percona-postgresql + kubernetes.io/service-name: upgrade-consistency-primary + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/role: primary + ownerReferences: + - apiVersion: postgres-operator.crunchydata.com/v1beta1 + blockOwnerDeletion: true + controller: true + kind: PostgresCluster +ports: + - name: postgres + port: 5432 + protocol: TCP +--- +addressType: IPv4 +apiVersion: discovery.k8s.io/v1 +endpoints: + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod + - conditions: + ready: true + serving: true + terminating: false + targetRef: + kind: Pod +kind: EndpointSlice +metadata: + generateName: upgrade-consistency-replicas- + labels: + app.kubernetes.io/component: pg + app.kubernetes.io/instance: upgrade-consistency + app.kubernetes.io/managed-by: percona-postgresql-operator + app.kubernetes.io/name: percona-postgresql + app.kubernetes.io/part-of: percona-postgresql + endpointslice.kubernetes.io/managed-by: endpointslice-controller.k8s.io + kubernetes.io/service-name: upgrade-consistency-replicas + postgres-operator.crunchydata.com/cluster: upgrade-consistency + postgres-operator.crunchydata.com/role: replica + ownerReferences: + - apiVersion: v1 + blockOwnerDeletion: true + controller: true + kind: Service +ports: + - name: postgres + port: 5432 + protocol: TCP diff --git a/e2e-tests/tests/upgrade-consistency/03-errors.yaml b/e2e-tests/tests/upgrade-consistency/03-errors.yaml new file mode 100644 index 0000000000..85d74f94a8 --- /dev/null +++ b/e2e-tests/tests/upgrade-consistency/03-errors.yaml @@ -0,0 +1,11 @@ +apiVersion: discovery.k8s.io/v1 +kind: EndpointSlice +metadata: + generateName: upgrade-consistency-primary- + labels: + endpointslice.kubernetes.io/managed-by: endpointslice-controller.k8s.io +--- +apiVersion: v1 +kind: Endpoints +metadata: + name: upgrade-consistency-primary diff --git a/internal/controller/postgrescluster/cluster.go b/internal/controller/postgrescluster/cluster.go index 4dc8a5f4d0..d82817ebac 100644 --- a/internal/controller/postgrescluster/cluster.go +++ b/internal/controller/postgrescluster/cluster.go @@ -10,10 +10,12 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" + discoveryv1 "k8s.io/api/discovery/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/initialize" "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/internal/patroni" @@ -88,18 +90,18 @@ func (r *Reconciler) reconcileClusterPodService( return clusterPodService, err } -// generateClusterPrimaryService returns a v1.Service and v1.Endpoints that +// generateClusterPrimaryService returns a v1.Service and discoveryv1.EndpointSlice that // resolve to the PostgreSQL primary instance. func (r *Reconciler) generateClusterPrimaryService( cluster *v1beta1.PostgresCluster, leader *corev1.Service, -) (*corev1.Service, *corev1.Endpoints, error) { +) (*corev1.Service, *discoveryv1.EndpointSlice, *corev1.Endpoints, error) { // We want to name and label our primary Service consistently. When Patroni is // using Endpoints for its DCS, however, they and any Service that uses them // must use the same name as the Patroni "scope" which has its own constraints. // // To stay free from those constraints, our primary Service resolves to the // ClusterIP of the Service created in Reconciler.reconcilePatroniLeaderLease - // when Patroni is using Endpoints. + // when Patroni is using EndpointSlices. service := &corev1.Service{ObjectMeta: naming.ClusterPrimaryService(cluster)} service.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Service")) @@ -116,18 +118,26 @@ func (r *Reconciler) generateClusterPrimaryService( err := errors.WithStack(r.setControllerReference(cluster, service)) - // Endpoints for a Service have the same name as the Service. Copy labels, - // annotations, and ownership, too. - endpoints := &corev1.Endpoints{} - service.ObjectMeta.DeepCopyInto(&endpoints.ObjectMeta) - endpoints.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Endpoints")) + deprecatedEndpoints := &corev1.Endpoints{} + service.ObjectMeta.DeepCopyInto(&deprecatedEndpoints.ObjectMeta) + deprecatedEndpoints.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Endpoints")) + + // EndpointSlice for a Service. Copy labels, annotations, and ownership. + endpointSlice := &discoveryv1.EndpointSlice{} + service.ObjectMeta.DeepCopyInto(&endpointSlice.ObjectMeta) + endpointSlice.SetGroupVersionKind(discoveryv1.SchemeGroupVersion.WithKind("EndpointSlice")) + + if endpointSlice.Labels == nil { + endpointSlice.Labels = make(map[string]string) + } + endpointSlice.Labels[discoveryv1.LabelServiceName] = service.Name if leader == nil { // TODO(cbandy): We need to build a different kind of Service here. - return nil, nil, errors.New("Patroni DCS other than Kubernetes Endpoints is not implemented") + return nil, nil, nil, errors.New("Patroni DCS other than Kubernetes EndpointSlices is not implemented") } - // Allocate no IP address (headless) and manage the Endpoints ourselves. + // Allocate no IP address (headless) and manage the EndpointSlice ourselves. // - https://docs.k8s.io/concepts/services-networking/service/#headless-services // - https://docs.k8s.io/concepts/services-networking/service/#services-without-selectors service.Spec.ClusterIP = corev1.ClusterIPNone @@ -141,24 +151,39 @@ func (r *Reconciler) generateClusterPrimaryService( }} // Resolve to the ClusterIP for which Patroni has configured the Endpoints. - endpoints.Subsets = []corev1.EndpointSubset{{ + deprecatedEndpoints.Subsets = []corev1.EndpointSubset{{ Addresses: []corev1.EndpointAddress{{IP: leader.Spec.ClusterIP}}, }} - // Copy the EndpointPorts from the ServicePorts. + // Set the address type for the EndpointSlice + endpointSlice.AddressType = discoveryv1.AddressTypeIPv4 + + endpointSlice.Endpoints = []discoveryv1.Endpoint{{ + Addresses: []string{leader.Spec.ClusterIP}, + }} + for _, sp := range service.Spec.Ports { - endpoints.Subsets[0].Ports = append(endpoints.Subsets[0].Ports, + deprecatedEndpoints.Subsets[0].Ports = append(deprecatedEndpoints.Subsets[0].Ports, corev1.EndpointPort{ Name: sp.Name, Port: sp.Port, Protocol: sp.Protocol, + }, + ) + port := sp.Port + endpointSlice.Ports = append(endpointSlice.Ports, + discoveryv1.EndpointPort{ + Name: &sp.Name, + Port: &port, + Protocol: &sp.Protocol, }) } - return service, endpoints, err + return service, endpointSlice, deprecatedEndpoints, err } // +kubebuilder:rbac:groups="",resources="endpoints",verbs={create,patch} +// +kubebuilder:rbac:groups="discovery.k8s.io",resources="endpointslices",verbs={create,patch} // +kubebuilder:rbac:groups="",resources="services",verbs={create,patch} // The OpenShift RestrictedEndpointsAdmission plugin requires special @@ -166,18 +191,22 @@ func (r *Reconciler) generateClusterPrimaryService( // - https://github.com/openshift/origin/pull/9383 // +kubebuilder:rbac:groups="",resources="endpoints/restricted",verbs={create} -// reconcileClusterPrimaryService writes the Service and Endpoints that resolve +// reconcileClusterPrimaryService writes the Service and EndpointSlice that resolve // to the PostgreSQL primary instance. func (r *Reconciler) reconcileClusterPrimaryService( ctx context.Context, cluster *v1beta1.PostgresCluster, leader *corev1.Service, ) (*corev1.Service, error) { - service, endpoints, err := r.generateClusterPrimaryService(cluster, leader) + service, endpointSlice, deprecatedEndpoints, err := r.generateClusterPrimaryService(cluster, leader) if err == nil { err = errors.WithStack(r.apply(ctx, service)) } if err == nil { - err = errors.WithStack(r.apply(ctx, endpoints)) + if cluster.CompareVersion("2.9.0") >= 0 && feature.Enabled(ctx, feature.EndpointSlices) { + err = errors.WithStack(r.apply(ctx, endpointSlice)) + } else { + err = errors.WithStack(r.apply(ctx, deprecatedEndpoints)) + } } return service, err } diff --git a/internal/controller/postgrescluster/cluster_test.go b/internal/controller/postgrescluster/cluster_test.go index 8165d0354f..b6e8435d70 100644 --- a/internal/controller/postgrescluster/cluster_test.go +++ b/internal/controller/postgrescluster/cluster_test.go @@ -13,6 +13,7 @@ import ( appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" + discoveryv1 "k8s.io/api/discovery/v1" rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -263,9 +264,11 @@ func TestCustomLabels(t *testing.T) { MatchLabels: map[string]string{ naming.LabelCluster: cluster.Name, }, - MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: naming.LabelPGBackRest, - Operator: metav1.LabelSelectorOpExists}, + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: naming.LabelPGBackRest, + Operator: metav1.LabelSelectorOpExists, + }, }, }) assert.NilError(t, err) @@ -514,9 +517,11 @@ func TestCustomAnnotations(t *testing.T) { MatchLabels: map[string]string{ naming.LabelCluster: cluster.Name, }, - MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: naming.LabelPGBackRest, - Operator: metav1.LabelSelectorOpExists}, + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: naming.LabelPGBackRest, + Operator: metav1.LabelSelectorOpExists, + }, }, }) assert.NilError(t, err) @@ -594,10 +599,10 @@ func TestGenerateClusterPrimaryService(t *testing.T) { leader := &corev1.Service{} leader.Spec.ClusterIP = "1.9.8.3" - _, _, err := reconciler.generateClusterPrimaryService(cluster, nil) + _, _, _, err := reconciler.generateClusterPrimaryService(cluster, nil) assert.ErrorContains(t, err, "not implemented") - alwaysExpect := func(t testing.TB, service *corev1.Service, endpoints *corev1.Endpoints) { + alwaysExpect := func(t testing.TB, service *corev1.Service, endpointSlice *discoveryv1.EndpointSlice) { assert.Assert(t, cmp.MarshalMatches(service.TypeMeta, ` apiVersion: v1 kind: Service @@ -632,9 +637,14 @@ ownerReferences: assert.Assert(t, service.Spec.Selector == nil, "got %v", service.Spec.Selector) - assert.Assert(t, cmp.MarshalMatches(endpoints, ` -apiVersion: v1 -kind: Endpoints + assert.Assert(t, cmp.MarshalMatches(endpointSlice, ` +addressType: IPv4 +apiVersion: discovery.k8s.io/v1 +endpoints: +- addresses: + - 1.9.8.3 + conditions: {} +kind: EndpointSlice metadata: labels: app.kubernetes.io/component: pg @@ -642,6 +652,7 @@ metadata: app.kubernetes.io/managed-by: percona-postgresql-operator app.kubernetes.io/name: percona-postgresql app.kubernetes.io/part-of: percona-postgresql + kubernetes.io/service-name: pg5-primary postgres-operator.crunchydata.com/cluster: pg5 postgres-operator.crunchydata.com/role: primary name: pg5-primary @@ -653,19 +664,16 @@ metadata: kind: PostgresCluster name: pg5 uid: "" -subsets: -- addresses: - - ip: 1.9.8.3 - ports: - - name: postgres - port: 2600 - protocol: TCP +ports: +- name: postgres + port: 2600 + protocol: TCP `)) } - service, endpoints, err := reconciler.generateClusterPrimaryService(cluster, leader) + service, endpointSlice, _, err := reconciler.generateClusterPrimaryService(cluster, leader) assert.NilError(t, err) - alwaysExpect(t, service, endpoints) + alwaysExpect(t, service, endpointSlice) t.Run("LeaderLoadBalancer", func(t *testing.T) { leader := leader.DeepCopy() @@ -676,9 +684,9 @@ subsets: {IP: "1.2.3.4", Hostname: "only.the.first"}, } - service, endpoints, err := reconciler.generateClusterPrimaryService(cluster, leader) + service, endpointSlice, _, err := reconciler.generateClusterPrimaryService(cluster, leader) assert.NilError(t, err) - alwaysExpect(t, service, endpoints) + alwaysExpect(t, service, endpointSlice) // generateClusterPrimaryService no longer sets ExternalIPs or ExternalName from // LoadBalancer-type leader service diff --git a/internal/controller/postgrescluster/controller.go b/internal/controller/postgrescluster/controller.go index 9ee8b8d6ea..64d6451d6e 100644 --- a/internal/controller/postgrescluster/controller.go +++ b/internal/controller/postgrescluster/controller.go @@ -14,6 +14,7 @@ import ( appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" + discoveryv1 "k8s.io/api/discovery/v1" policyv1 "k8s.io/api/policy/v1" rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/equality" @@ -571,6 +572,7 @@ func (r *Reconciler) SetupWithManager(mgr manager.Manager) error { For(&v1beta1.PostgresCluster{}). Owns(&corev1.ConfigMap{}, configMapPredicate). // K8SPG-712 Owns(&corev1.Endpoints{}). + Owns(&discoveryv1.EndpointSlice{}). Owns(&corev1.PersistentVolumeClaim{}). Owns(&corev1.Secret{}). Owns(&corev1.Service{}). diff --git a/internal/feature/features.go b/internal/feature/features.go index 36f76aacf6..90639520a2 100644 --- a/internal/feature/features.go +++ b/internal/feature/features.go @@ -95,6 +95,9 @@ const ( // Support VolumeSnapshots VolumeSnapshots = "VolumeSnapshots" + // Support EndpointSlices + EndpointSlices = "EndpointSlices" + // K8SPG-771 // This feature gate enables the use of snapshot based backups. // NOTE: This feature is different from VolumeSnapshots which is implemented by @@ -117,6 +120,7 @@ func NewGate() MutableGate { PGUpgradeCPUConcurrency: {Default: false, PreRelease: featuregate.Alpha}, TablespaceVolumes: {Default: false, PreRelease: featuregate.Alpha}, VolumeSnapshots: {Default: false, PreRelease: featuregate.Alpha}, + EndpointSlices: {Default: false, PreRelease: featuregate.Alpha}, BackupSnapshots: {Default: false, PreRelease: featuregate.Alpha}, }); err != nil { panic(err) diff --git a/percona/controller/pgcluster/controller.go b/percona/controller/pgcluster/controller.go index 15bdcaabd4..d4d69490a7 100644 --- a/percona/controller/pgcluster/controller.go +++ b/percona/controller/pgcluster/controller.go @@ -34,6 +34,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" "github.com/percona/percona-postgresql-operator/v2/internal/controller/runtime" + "github.com/percona/percona-postgresql-operator/v2/internal/feature" "github.com/percona/percona-postgresql-operator/v2/internal/logging" "github.com/percona/percona-postgresql-operator/v2/internal/naming" "github.com/percona/percona-postgresql-operator/v2/internal/postgres" @@ -218,6 +219,8 @@ func (r *PGClusterReconciler) watchSecrets() handler.TypedFuncs[*corev1.Secret, // +kubebuilder:rbac:groups=apps,resources=replicasets,verbs=create;delete;get;list;patch;watch // +kubebuilder:rbac:groups=pgv2.percona.com,resources=perconapgclusters/finalizers,verbs=update // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=create;list;update +// +kubebuilder:rbac:groups=discovery.k8s.io,resources=endpointslices,verbs=list;delete +// +kubebuilder:rbac:groups="",resources="endpoints",verbs=get;delete // +kubebuilder:rbac:groups="",resources="pods",verbs=create;delete // +kubebuilder:rbac:groups="",resources="persistentvolumeclaims",verbs=create;update @@ -327,6 +330,10 @@ func (r *PGClusterReconciler) Reconcile(ctx context.Context, request reconcile.R return reconcile.Result{}, errors.Wrap(err, "reconcile scheduled backups") } + if err := r.reconcileEndpoints(ctx, cr); err != nil { + return reconcile.Result{}, errors.Wrap(err, "reconcile deprecated endpoints") + } + if err := r.reconcilePVCs(ctx, cr); err != nil { return reconcile.Result{}, errors.Wrap(err, "reconcile pvcs") } @@ -389,6 +396,52 @@ func (r *PGClusterReconciler) Reconcile(ctx context.Context, request reconcile.R return ctrl.Result{}, nil } +// reconcileEndpoints ensures that the deprecated `Endpoints` resource created by the operator in previous versions +// is removed once version 2.9.0 is reached. +// +// In earlier versions, `endpointslicemirroring-controller.k8s.io` mirrored this deprecated `Endpoints` resource to +// `EndpointSlices`. Since the operator now creates a new `EndpointSlice`, we should remove the mirrored `EndpointSlice` +// created from the deprecated `Endpoints` resource. +func (r *PGClusterReconciler) reconcileEndpoints(ctx context.Context, cr *v2.PerconaPGCluster) error { + if cr.CompareVersion("2.9.0") < 0 || !feature.Enabled(ctx, feature.EndpointSlices) { + return nil + } + + e := new(corev1.Endpoints) + err := r.Client.Get(ctx, types.NamespacedName{ + Namespace: cr.Namespace, + Name: cr.Name + "-primary", + }, e) + if client.IgnoreNotFound(err) != nil { + return errors.Wrap(err, "failed to get deprecated primary endpoints") + } + if !k8serrors.IsNotFound(err) { + if err := r.Client.Delete(ctx, e); err != nil { + return errors.Wrap(err, "failed to delete deprecated endpoint") + } + } + + endpoints := new(corev1.EndpointsList) + if err := r.Client.List(ctx, endpoints, + client.InNamespace(cr.Namespace), + client.MatchingLabels(map[string]string{naming.LabelCluster: cr.Name}), + ); err != nil { + return errors.Wrap(err, "failed to list endpoints") + } + + for _, e := range endpoints.Items { + if e.GenerateName != cr.Name+"-primary-" { + continue + } + + if err := r.Client.Delete(ctx, &e); err != nil { + return errors.Wrap(err, "failed to delete deprecated endpoint") + } + } + + return nil +} + func (r *PGClusterReconciler) reconcileTLS(ctx context.Context, cr *v2.PerconaPGCluster) error { if err := r.validateTLS(ctx, cr); err != nil { return errors.Wrap(err, "validate TLS")