Skip to content

Commit 9aec00d

Browse files
feat(e2e): add backward compatibility e2e tests
Signed-off-by: Monika Jakhar <jakharmonika364@gmail.com>
1 parent a69c888 commit 9aec00d

File tree

4 files changed

+276
-32
lines changed

4 files changed

+276
-32
lines changed

.github/scripts/build-all-images.sh

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,39 @@
11
#!/bin/bash
22
set -e
33

4-
function get_image_tag() {
4+
get_image_tag() {
55
version=$(grep "^VERSION := " ./Makefile)
6-
version=${version#VERSION := }
6+
version="${version#VERSION := }"
77

88
git_sha=$(git rev-parse --short HEAD || echo "HEAD")
9-
export IMAGE_TAG=${version}-${git_sha}
9+
export IMAGE_TAG="${version}-${git_sha}"
1010
}
1111

12-
function build_images() {
12+
build_images() {
1313
images=(
14-
${IMG_REPO}/dataset-controller:${IMAGE_TAG}
15-
${IMG_REPO}/application-controller:${IMAGE_TAG}
16-
${IMG_REPO}/alluxioruntime-controller:${IMAGE_TAG}
17-
${IMG_REPO}/jindoruntime-controller:${IMAGE_TAG}
18-
${IMG_REPO}/goosefsruntime-controller:${IMAGE_TAG}
19-
${IMG_REPO}/juicefsruntime-controller:${IMAGE_TAG}
20-
${IMG_REPO}/thinruntime-controller:${IMAGE_TAG}
21-
${IMG_REPO}/efcruntime-controller:${IMAGE_TAG}
22-
${IMG_REPO}/vineyardruntime-controller:${IMAGE_TAG}
23-
${IMG_REPO}/fluid-csi:${IMAGE_TAG}
24-
${IMG_REPO}/fluid-webhook:${IMAGE_TAG}
25-
${IMG_REPO}/fluid-crd-upgrader:${IMAGE_TAG}
14+
"${IMG_REPO}/dataset-controller:${IMAGE_TAG}"
15+
"${IMG_REPO}/application-controller:${IMAGE_TAG}"
16+
"${IMG_REPO}/alluxioruntime-controller:${IMAGE_TAG}"
17+
"${IMG_REPO}/jindoruntime-controller:${IMAGE_TAG}"
18+
"${IMG_REPO}/goosefsruntime-controller:${IMAGE_TAG}"
19+
"${IMG_REPO}/juicefsruntime-controller:${IMAGE_TAG}"
20+
"${IMG_REPO}/thinruntime-controller:${IMAGE_TAG}"
21+
"${IMG_REPO}/efcruntime-controller:${IMAGE_TAG}"
22+
"${IMG_REPO}/vineyardruntime-controller:${IMAGE_TAG}"
23+
"${IMG_REPO}/fluid-csi:${IMAGE_TAG}"
24+
"${IMG_REPO}/fluid-webhook:${IMAGE_TAG}"
25+
"${IMG_REPO}/fluid-crd-upgrader:${IMAGE_TAG}"
2626
)
2727

2828
make docker-build-all
2929

30-
for img in ${images[@]}; do
31-
echo "Loading image $img to kind cluster..."
32-
kind load docker-image $img --name ${KIND_CLUSTER}
30+
for img in "${images[@]}"; do
31+
echo "Loading image ${img} to kind cluster..."
32+
kind load docker-image "${img}" --name "${KIND_CLUSTER}"
3333
done
3434
}
3535

36-
function cleanup_docker_caches() {
36+
cleanup_docker_caches() {
3737
echo ">>> System disk usage after building fluid images"
3838
df -h
3939
echo ">>> Cleaning docker caches..."

.github/scripts/deploy-fluid-to-kind.sh

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
1-
#!/bin/bash
2-
set -e
1+
#! /bin/bash
32

4-
function get_image_tag() {
3+
get_image_tag() {
4+
local version=""
55
version=$(grep "^VERSION := " ./Makefile)
6-
version=${version#VERSION := }
6+
version="${version#VERSION := }"
77

8+
local git_sha=""
89
git_sha=$(git rev-parse --short HEAD || echo "HEAD")
9-
export IMAGE_TAG=${version}-${git_sha}
10+
export IMAGE_TAG="${version}-${git_sha}"
1011
}
1112

12-
function deploy_fluid() {
13-
echo "Replacing image tags in values.yaml with $IMAGE_TAG"
14-
sed -i -E "s/version: &defaultVersion v[0-9]\.[0-9]\.[0-9]-[a-z0-9]+$/version: \&defaultVersion $IMAGE_TAG/g" charts/fluid/fluid/values.yaml
15-
kubectl create ns fluid-system
16-
helm install --create-namespace --set runtime.jindo.smartdata.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs --set runtime.jindo.fuse.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs fluid charts/fluid/fluid
13+
deploy_fluid() {
14+
echo "Replacing image tags in values.yaml with ${IMAGE_TAG}"
15+
sed -i -E "s/version: &defaultVersion .+$/version: \&defaultVersion ${IMAGE_TAG}/g" charts/fluid/fluid/values.yaml
16+
kubectl create ns fluid-system || true
17+
helm upgrade --install --namespace fluid-system --create-namespace --set runtime.jindo.smartdata.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs --set runtime.jindo.fuse.imagePrefix=registry-cn-hongkong.ack.aliyuncs.com/acs fluid charts/fluid/fluid
1718
}
1819

19-
function main() {
20+
main() {
2021
get_image_tag
21-
if [[ -z "$IMAGE_TAG" ]];then
22+
if [[ -z "${IMAGE_TAG}" ]]; then
2223
echo "Failed to get image tag, exiting..."
2324
exit 1
2425
fi
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
#!/bin/bash
2+
3+
syslog() {
4+
echo ">>> ${1}"
5+
}
6+
7+
panic() {
8+
local err_msg="${1}"
9+
syslog "backward compatibility test failed: ${err_msg}"
10+
exit 1
11+
}
12+
13+
check_control_plane_status() {
14+
echo "=== Unique image tags used by Fluid control plane ==="
15+
kubectl get pod -n fluid-system -o jsonpath='
16+
{range .items[*]}{range .spec.containers[*]}{.image}{"\n"}{end}{range .spec.initContainers[*]}{.image}{"\n"}{end}{end}' \
17+
| sed 's/.*://' \
18+
| sort -u
19+
20+
# Timeout counter (30 minutes = 360*5 seconds)
21+
local timeout=360
22+
local counter=0
23+
local status_interval=36
24+
25+
while true; do
26+
total_pods=$(kubectl get pod -n fluid-system --no-headers | grep -cv "Completed")
27+
running_pods=$(kubectl get pod -n fluid-system --no-headers | grep -c "Running")
28+
not_running_pods=$((total_pods - running_pods))
29+
30+
if ((counter % status_interval == 0)); then
31+
syslog "[Status Check $((counter / status_interval))] Pod status: ${running_pods}/${total_pods} running (${not_running_pods} not ready)"
32+
if [[ "${not_running_pods}" -gt 0 ]]; then
33+
echo "=== Not running pods ==="
34+
kubectl get pods -n fluid-system \
35+
--field-selector=status.phase!=Running \
36+
-o=custom-columns='NAME:.metadata.name,STATUS:.status.phase,REASON:.status.reason'
37+
fi
38+
fi
39+
40+
if [[ "${total_pods}" -ne 0 ]] && [[ "${total_pods}" -eq "${running_pods}" ]]; then
41+
break
42+
fi
43+
44+
if [[ "${counter}" -ge "${timeout}" ]]; then
45+
panic "Timeout waiting for control plane after ${counter} checks!"
46+
fi
47+
48+
sleep 5
49+
((counter++))
50+
done
51+
syslog "Fluid control plane is ready after ${counter} checks!"
52+
}
53+
54+
wait_dataset_bound() {
55+
local dataset_name="${1}"
56+
local deadline=180
57+
local log_interval=0
58+
local log_times=0
59+
60+
syslog "Waiting for dataset ${dataset_name} to be Bound..."
61+
62+
while true; do
63+
# We don't use 'set -e' here so we can handle the case where the object or field is missing
64+
last_state=$(kubectl get dataset "${dataset_name}" -n default -ojsonpath='{.status.phase}' 2>/dev/null || echo "Unknown")
65+
66+
if [[ "${last_state}" == "Bound" ]]; then
67+
break
68+
fi
69+
70+
if [[ "${log_interval}" -eq 3 ]]; then
71+
((log_times++))
72+
syslog "checking dataset.status.phase==Bound (elapsed: $((log_times * 3 * 5))s, current state: ${last_state})"
73+
if [[ $((log_times * 3 * 5)) -ge "${deadline}" ]]; then
74+
panic "timeout for ${deadline}s waiting for dataset ${dataset_name} to become bound!"
75+
fi
76+
log_interval=0
77+
fi
78+
79+
((log_interval++))
80+
sleep 5
81+
done
82+
syslog "Found dataset ${dataset_name} status.phase==Bound"
83+
}
84+
85+
wait_job_completed() {
86+
local job_name="${1}"
87+
local deadline=600 # 10 minutes
88+
local counter=0
89+
while true; do
90+
# Handle missing fields gracefully
91+
succeed=$(kubectl get job "${job_name}" -ojsonpath='{.status.succeeded}' 2>/dev/null || echo "0")
92+
failed=$(kubectl get job "${job_name}" -ojsonpath='{.status.failed}' 2>/dev/null || echo "0")
93+
94+
# Ensure variables are treated as integers
95+
[[ -z "${succeed}" ]] && succeed=0
96+
[[ -z "${failed}" ]] && failed=0
97+
98+
if [[ "${failed}" -gt 0 ]]; then
99+
panic "job ${job_name} failed when accessing data"
100+
fi
101+
if [[ "${succeed}" -gt 0 ]]; then
102+
break
103+
fi
104+
105+
((counter++))
106+
if [[ $((counter * 5)) -ge "${deadline}" ]]; then
107+
panic "timeout for ${deadline}s waiting for job ${job_name} completion!"
108+
fi
109+
sleep 5
110+
done
111+
syslog "Found succeeded job ${job_name}"
112+
}
113+
114+
setup_old_fluid() {
115+
syslog "Setting up older version of Fluid from charts"
116+
helm repo add fluid https://fluid-cloudnative.github.io/charts
117+
helm repo update fluid
118+
119+
# We ignore errors in case namespace exists
120+
kubectl create ns fluid-system || true
121+
122+
helm install fluid fluid/fluid --namespace fluid-system --wait
123+
check_control_plane_status
124+
}
125+
126+
create_dataset() {
127+
syslog "Creating alluxio dataset..."
128+
kubectl apply -f test/gha-e2e/alluxio/dataset.yaml
129+
# give it 15s to let the CRDs and controllers settle
130+
sleep 15
131+
wait_dataset_bound "zookeeper"
132+
}
133+
134+
upgrade_fluid() {
135+
syslog "Upgrading Fluid to the locally built current version..."
136+
./.github/scripts/deploy-fluid-to-kind.sh
137+
check_control_plane_status
138+
}
139+
140+
verify_backward_compatibility() {
141+
syslog "Verifying backward compatibility..."
142+
# Ensure the dataset created earlier is still bound
143+
wait_dataset_bound "zookeeper"
144+
145+
# create job to access data over the runtime
146+
kubectl apply -f test/gha-e2e/alluxio/job.yaml
147+
wait_job_completed "fluid-test"
148+
149+
# Clean up
150+
kubectl delete -f test/gha-e2e/alluxio/
151+
}
152+
153+
main() {
154+
syslog "[BACKWARD COMPATIBILITY TEST STARTS AT $(date)]"
155+
156+
setup_old_fluid
157+
create_dataset
158+
upgrade_fluid
159+
verify_backward_compatibility
160+
161+
syslog "[BACKWARD COMPATIBILITY TEST SUCCEEDED AT $(date)]"
162+
}
163+
164+
main
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
name: E2E Backward Compatibility Check
2+
on:
3+
pull_request:
4+
branches: [master, release-*]
5+
paths-ignore:
6+
- "docs/**"
7+
- "addons/**"
8+
- "sdk/**"
9+
- "static/**"
10+
11+
permissions:
12+
contents: read
13+
actions: read
14+
15+
concurrency:
16+
group: ${{ github.workflow }}-${{ github.ref }}
17+
cancel-in-progress: true
18+
19+
env:
20+
GO_VERSION: 1.24.12
21+
22+
jobs:
23+
backward-compat-test:
24+
runs-on: ubuntu-latest
25+
strategy:
26+
fail-fast: false
27+
matrix:
28+
kubernetes-version:
29+
["v1.33.2", "v1.30.13", "v1.28.15", "v1.24.17", "v1.22.17"]
30+
env:
31+
GOPATH: ${{ github.workspace }}
32+
GO111MODULE: auto
33+
KIND_CLUSTER: fluid-cluster
34+
defaults:
35+
run:
36+
working-directory: ${{ env.GOPATH }}/src/github.com/fluid-cloudnative/fluid
37+
38+
steps:
39+
- name: Set up Go
40+
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
41+
with:
42+
go-version: ${{ env.GO_VERSION }}
43+
44+
- name: Set up Helm
45+
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
46+
47+
- name: Checkout code
48+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
49+
with:
50+
path: ${{ env.GOPATH }}/src/github.com/fluid-cloudnative/fluid
51+
52+
- name: Create k8s Kind Cluster
53+
uses: helm/kind-action@92086f6be054225fa813e0a4b13787fc9088faab # v1.13.0
54+
with:
55+
version: v0.29.0
56+
node_image: kindest/node:${{ matrix.kubernetes-version }}
57+
cluster_name: ${{ env.KIND_CLUSTER }}
58+
kubectl_version: ${{ matrix.kubernetes-version }}
59+
60+
- name: Build current fluid docker images
61+
env:
62+
IMG_REPO: fluidcloudnative
63+
run: |
64+
echo ">>> System disk usage before build fluid images"
65+
df -h
66+
./.github/scripts/build-all-images.sh
67+
68+
- name: Run backward compatibility e2e tests
69+
timeout-minutes: 40
70+
run: |
71+
bash ./.github/scripts/gha-backward-compatibility.sh
72+
73+
- name: Dump environment
74+
if: ${{ !cancelled() }}
75+
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
76+
with:
77+
name: gha-backward-compat-logs-${{ github.job }}-${{ matrix.kubernetes-version }}
78+
path: "src/github.com/fluid-cloudnative/fluid/e2e-tmp/testcase-*.tgz"
79+
retention-days: 14

0 commit comments

Comments
 (0)