From 35006d918ac59237d47752a7e245ac43de23ab88 Mon Sep 17 00:00:00 2001 From: Monika Jakhar Date: Sat, 7 Mar 2026 18:36:38 +0530 Subject: [PATCH] feat(e2e): add backward compatibility e2e tests Signed-off-by: Monika Jakhar --- .github/scripts/build-all-images.sh | 42 ++--- .github/scripts/deploy-fluid-to-kind.sh | 25 +-- .github/scripts/gha-backward-compatibility.sh | 166 ++++++++++++++++++ .../workflows/backward-compatibility-e2e.yml | 79 +++++++++ .github/workflows/pr-quota-limit.yml | 2 +- 5 files changed, 280 insertions(+), 34 deletions(-) create mode 100644 .github/scripts/gha-backward-compatibility.sh create mode 100644 .github/workflows/backward-compatibility-e2e.yml diff --git a/.github/scripts/build-all-images.sh b/.github/scripts/build-all-images.sh index 37c83dc54fa..e66723219fc 100755 --- a/.github/scripts/build-all-images.sh +++ b/.github/scripts/build-all-images.sh @@ -1,40 +1,40 @@ #!/bin/bash set -e -function get_image_tag() { +get_image_tag() { version=$(grep "^VERSION := " ./Makefile) - version=${version#VERSION := } + version="${version#VERSION := }" git_sha=$(git rev-parse --short HEAD || echo "HEAD") - export IMAGE_TAG=${version}-${git_sha} + export IMAGE_TAG="${version}-${git_sha}" } -function build_images() { +build_images() { images=( - ${IMG_REPO}/dataset-controller:${IMAGE_TAG} - ${IMG_REPO}/application-controller:${IMAGE_TAG} - ${IMG_REPO}/alluxioruntime-controller:${IMAGE_TAG} - ${IMG_REPO}/jindoruntime-controller:${IMAGE_TAG} - ${IMG_REPO}/goosefsruntime-controller:${IMAGE_TAG} - ${IMG_REPO}/juicefsruntime-controller:${IMAGE_TAG} - ${IMG_REPO}/thinruntime-controller:${IMAGE_TAG} - ${IMG_REPO}/efcruntime-controller:${IMAGE_TAG} - ${IMG_REPO}/vineyardruntime-controller:${IMAGE_TAG} - ${IMG_REPO}/cacheruntime-controller:${IMAGE_TAG} - ${IMG_REPO}/fluid-csi:${IMAGE_TAG} - ${IMG_REPO}/fluid-webhook:${IMAGE_TAG} - ${IMG_REPO}/fluid-crd-upgrader:${IMAGE_TAG} + "${IMG_REPO}/dataset-controller:${IMAGE_TAG}" + "${IMG_REPO}/application-controller:${IMAGE_TAG}" + "${IMG_REPO}/alluxioruntime-controller:${IMAGE_TAG}" + "${IMG_REPO}/jindoruntime-controller:${IMAGE_TAG}" + "${IMG_REPO}/goosefsruntime-controller:${IMAGE_TAG}" + "${IMG_REPO}/juicefsruntime-controller:${IMAGE_TAG}" + "${IMG_REPO}/thinruntime-controller:${IMAGE_TAG}" + "${IMG_REPO}/efcruntime-controller:${IMAGE_TAG}" + "${IMG_REPO}/vineyardruntime-controller:${IMAGE_TAG}" + "${IMG_REPO}/cacheruntime-controller:${IMAGE_TAG}" + "${IMG_REPO}/fluid-csi:${IMAGE_TAG}" + "${IMG_REPO}/fluid-webhook:${IMAGE_TAG}" + "${IMG_REPO}/fluid-crd-upgrader:${IMAGE_TAG}" ) make docker-build-all - for img in ${images[@]}; do - echo "Loading image $img to kind cluster..." - kind load docker-image $img --name ${KIND_CLUSTER} + for img in "${images[@]}"; do + echo "Loading image ${img} to kind cluster..." + kind load docker-image "${img}" --name "${KIND_CLUSTER}" done } -function cleanup_docker_caches() { +cleanup_docker_caches() { echo ">>> System disk usage after building fluid images" df -h echo ">>> Cleaning docker caches..." diff --git a/.github/scripts/deploy-fluid-to-kind.sh b/.github/scripts/deploy-fluid-to-kind.sh index 667c2592a95..baf81be0581 100755 --- a/.github/scripts/deploy-fluid-to-kind.sh +++ b/.github/scripts/deploy-fluid-to-kind.sh @@ -1,24 +1,25 @@ -#!/bin/bash -set -e +#! /bin/bash -function get_image_tag() { +get_image_tag() { + local version="" version=$(grep "^VERSION := " ./Makefile) - version=${version#VERSION := } + version="${version#VERSION := }" + local git_sha="" git_sha=$(git rev-parse --short HEAD || echo "HEAD") - export IMAGE_TAG=${version}-${git_sha} + export IMAGE_TAG="${version}-${git_sha}" } -function deploy_fluid() { - echo "Replacing image tags in values.yaml with $IMAGE_TAG" - sed -i -E "s/version: &defaultVersion v[0-9]\.[0-9]\.[0-9]-[a-z0-9]+$/version: \&defaultVersion $IMAGE_TAG/g" charts/fluid/fluid/values.yaml - kubectl create ns fluid-system - helm install --create-namespace --set runtime.jindo.smartdata.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs --set runtime.jindo.fuse.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs fluid charts/fluid/fluid +deploy_fluid() { + echo "Replacing image tags in values.yaml with ${IMAGE_TAG}" + sed -i -E "s/version: &defaultVersion .+$/version: \&defaultVersion ${IMAGE_TAG}/g" charts/fluid/fluid/values.yaml + kubectl create ns fluid-system || true + helm upgrade --install --namespace fluid-system --create-namespace --set runtime.jindo.smartdata.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs --set runtime.jindo.fuse.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs fluid charts/fluid/fluid } -function main() { +main() { get_image_tag - if [[ -z "$IMAGE_TAG" ]];then + if [[ -z "${IMAGE_TAG}" ]]; then echo "Failed to get image tag, exiting..." exit 1 fi diff --git a/.github/scripts/gha-backward-compatibility.sh b/.github/scripts/gha-backward-compatibility.sh new file mode 100644 index 00000000000..e7170089273 --- /dev/null +++ b/.github/scripts/gha-backward-compatibility.sh @@ -0,0 +1,166 @@ +#!/bin/bash +set -o pipefail + + +syslog() { + echo ">>> ${1}" + return 0 +} + +panic() { + local err_msg="${1}" + syslog "backward compatibility test failed: ${err_msg}" + exit 1 +} + +check_control_plane_status() { + echo "=== Unique image tags used by Fluid control plane ===" + kubectl get pod -n fluid-system -o jsonpath=' + {range .items[*]}{range .spec.containers[*]}{.image}{"\n"}{end}{range .spec.initContainers[*]}{.image}{"\n"}{end}{end}' \ + | sed 's/.*://' \ + | sort -u + + # Timeout counter (30 minutes = 360*5 seconds) + local timeout=360 + local counter=0 + local status_interval=36 + + while true; do + total_pods=$(kubectl get pod -n fluid-system --no-headers | grep -cv "Completed") + running_pods=$(kubectl get pod -n fluid-system --no-headers | grep -c "Running") + not_running_pods=$((total_pods - running_pods)) + + if ((counter % status_interval == 0)); then + syslog "[Status Check $((counter / status_interval))] Pod status: ${running_pods}/${total_pods} running (${not_running_pods} not ready)" + if [[ "${not_running_pods}" -gt 0 ]]; then + echo "=== Not running pods ===" + kubectl get pods -n fluid-system \ + --field-selector=status.phase!=Running \ + -o=custom-columns='NAME:.metadata.name,STATUS:.status.phase,REASON:.status.reason' + fi + fi + + if [[ "${total_pods}" -ne 0 ]] && [[ "${total_pods}" -eq "${running_pods}" ]]; then + break + fi + + if [[ "${counter}" -ge "${timeout}" ]]; then + panic "Timeout waiting for control plane after ${counter} checks!" + fi + + sleep 5 + ((counter++)) + done + syslog "Fluid control plane is ready after ${counter} checks!" +} + +wait_dataset_bound() { + local dataset_name="${1}" + local deadline=180 + local log_interval=4 # log every 20s (4 iterations * 5s) + local counter=0 + + syslog "Waiting for dataset ${dataset_name} to be Bound..." + + while true; do + # We don't use 'set -e' here so we can handle the case where the object or field is missing + last_state=$(kubectl get dataset "${dataset_name}" -n default -ojsonpath='{.status.phase}' 2>/dev/null || echo "Unknown") + + if [[ "${last_state}" == "Bound" ]]; then + break + fi + + if [[ $((counter % log_interval)) -eq 0 ]]; then + syslog "checking dataset.status.phase==Bound (elapsed: $((counter * 5))s, current state: ${last_state})" + fi + + ((counter++)) + if [[ $((counter * 5)) -ge "${deadline}" ]]; then + panic "timeout for ${deadline}s waiting for dataset ${dataset_name} to become bound!" + fi + + sleep 5 + done + syslog "Found dataset ${dataset_name} status.phase==Bound" +} + +wait_job_completed() { + local job_name="${1}" + local deadline=600 # 10 minutes + local counter=0 + while true; do + # Handle missing fields gracefully + succeed=$(kubectl get job "${job_name}" -ojsonpath='{.status.succeeded}' 2>/dev/null || echo "0") + failed=$(kubectl get job "${job_name}" -ojsonpath='{.status.failed}' 2>/dev/null || echo "0") + + # Ensure variables are treated as integers + [[ -z "${succeed}" ]] && succeed=0 + [[ -z "${failed}" ]] && failed=0 + + if [[ "${failed}" -gt 0 ]]; then + panic "job ${job_name} failed when accessing data" + fi + if [[ "${succeed}" -gt 0 ]]; then + break + fi + + ((counter++)) + if [[ $((counter * 5)) -ge "${deadline}" ]]; then + panic "timeout for ${deadline}s waiting for job ${job_name} completion!" + fi + sleep 5 + done + syslog "Found succeeded job ${job_name}" +} + +setup_old_fluid() { + syslog "Setting up older version of Fluid from charts" + helm repo add fluid https://fluid-cloudnative.github.io/charts + helm repo update fluid + + # We ignore errors in case namespace exists + kubectl create ns fluid-system || true + + helm install fluid fluid/fluid --namespace fluid-system --wait + check_control_plane_status +} + +create_dataset() { + syslog "Creating alluxio dataset..." + kubectl apply -f test/gha-e2e/alluxio/dataset.yaml + # give it 15s to let the CRDs and controllers settle + sleep 15 + wait_dataset_bound "zookeeper" +} + +upgrade_fluid() { + syslog "Upgrading Fluid to the locally built current version..." + ./.github/scripts/deploy-fluid-to-kind.sh + check_control_plane_status +} + +verify_backward_compatibility() { + syslog "Verifying backward compatibility..." + # Ensure the dataset created earlier is still bound + wait_dataset_bound "zookeeper" + + # create job to access data over the runtime + kubectl apply -f test/gha-e2e/alluxio/job.yaml + wait_job_completed "fluid-test" + + # Clean up + kubectl delete -f test/gha-e2e/alluxio/ +} + +main() { + syslog "[BACKWARD COMPATIBILITY TEST STARTS AT $(date)]" + + setup_old_fluid + create_dataset + upgrade_fluid + verify_backward_compatibility + + syslog "[BACKWARD COMPATIBILITY TEST SUCCEEDED AT $(date)]" +} + +main diff --git a/.github/workflows/backward-compatibility-e2e.yml b/.github/workflows/backward-compatibility-e2e.yml new file mode 100644 index 00000000000..d599dd1cbf5 --- /dev/null +++ b/.github/workflows/backward-compatibility-e2e.yml @@ -0,0 +1,79 @@ +name: E2E Backward Compatibility Check +on: + pull_request: + branches: [master, release-*] + paths-ignore: + - "docs/**" + - "addons/**" + - "sdk/**" + - "static/**" + +permissions: + contents: read + actions: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + GO_VERSION: 1.24.12 + +jobs: + backward-compat-test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + kubernetes-version: + ["v1.33.2", "v1.30.13", "v1.28.15", "v1.24.17", "v1.22.17"] + env: + GOPATH: ${{ github.workspace }} + GO111MODULE: auto + KIND_CLUSTER: fluid-cluster + defaults: + run: + working-directory: ${{ env.GOPATH }}/src/github.com/fluid-cloudnative/fluid + + steps: + - name: Set up Go + uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Set up Helm + uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1 + + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + path: ${{ env.GOPATH }}/src/github.com/fluid-cloudnative/fluid + + - name: Create k8s Kind Cluster + uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # v1.13.0 + with: + version: v0.29.0 + node_image: kindest/node:${{ matrix.kubernetes-version }} + cluster_name: ${{ env.KIND_CLUSTER }} + kubectl_version: ${{ matrix.kubernetes-version }} + + - name: Build current fluid docker images + env: + IMG_REPO: fluidcloudnative + run: | + echo ">>> System disk usage before build fluid images" + df -h + ./.github/scripts/build-all-images.sh + + - name: Run backward compatibility e2e tests + timeout-minutes: 40 + run: | + bash ./.github/scripts/gha-backward-compatibility.sh + + - name: Dump environment + if: ${{ !cancelled() }} + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: gha-backward-compat-logs-${{ github.job }}-${{ matrix.kubernetes-version }} + path: "src/github.com/fluid-cloudnative/fluid/e2e-tmp/testcase-*.tgz" + retention-days: 14 diff --git a/.github/workflows/pr-quota-limit.yml b/.github/workflows/pr-quota-limit.yml index e950d70acd4..a052a1994ab 100644 --- a/.github/workflows/pr-quota-limit.yml +++ b/.github/workflows/pr-quota-limit.yml @@ -16,7 +16,7 @@ jobs: issues: write steps: - name: Check PR quota - // Use action version v7.0.1 + # Use action version v7.0.1 uses: actions/github-script@60a0d8304218317a38b4124020f343a0d555a1eb with: script: |