Skip to content

Commit 1aca347

Browse files
authored
chore: add retry and throttle resilience to canary script (#3303)
1 parent d6d9f52 commit 1aca347

1 file changed

Lines changed: 117 additions & 52 deletions

File tree

scripts/run_canary_in_devicefarm.sh

Lines changed: 117 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
#!/bin/bash
2+
3+
# Retry on Device Farm API throttling (~60s total backoff)
4+
export AWS_MAX_ATTEMPTS=8
5+
26
project_arn=$DEVICEFARM_PROJECT_ARN
37
max_devices=$NUMBER_OF_DEVICES_TO_TEST
48
module_name=$1
@@ -16,38 +20,69 @@ if [[ -z "${max_devices}" ]]; then
1620
max_devices=1
1721
fi
1822

19-
# Function to setup the app uploads in device farm
20-
function createUpload {
23+
# Function to setup the app uploads in device farm.
24+
# Retries with random jitter on throttling failures.
25+
function createUploadWithRetry {
2126
test_type=$1
22-
upload_response=`aws devicefarm create-upload --type $test_type \
23-
--content-type="application/octet-stream" \
24-
--project-arn="$project_arn" \
25-
--name="$file_name" \
26-
--query="upload.[url, arn]" \
27-
--region="us-west-2" \
28-
--output=text`
29-
echo $upload_response
27+
max_upload_attempts=3
28+
for upload_attempt in $(seq 1 $max_upload_attempts); do
29+
upload_response=`aws devicefarm create-upload --type $test_type \
30+
--content-type="application/octet-stream" \
31+
--project-arn="$project_arn" \
32+
--name="$file_name" \
33+
--query="upload.[url, arn]" \
34+
--region="us-west-2" \
35+
--output=text 2>&1`
36+
# Check if we got a valid response (URL + ARN)
37+
read -a parts <<< "$upload_response"
38+
if [[ -n "${parts[1]}" && "${parts[1]}" == arn:* ]]; then
39+
echo "$upload_response"
40+
return 0
41+
fi
42+
if [ $upload_attempt -lt $max_upload_attempts ]; then
43+
jitter=$((30 + RANDOM % 60))
44+
echo "[RUN_IN_DEVICEFARM] CreateUpload throttled (attempt $upload_attempt/$max_upload_attempts). Retrying in ${jitter}s..." >&2
45+
sleep $jitter
46+
fi
47+
done
48+
echo "[RUN_IN_DEVICEFARM] CreateUpload failed after $max_upload_attempts attempts" >&2
49+
echo ""
50+
return 1
3051
}
3152

3253
echo 'Uploading test package'
33-
# Create an upload for the instrumentation test package
34-
read -a result <<< $(createUpload "INSTRUMENTATION_TEST_PACKAGE")
54+
read -a result <<< $(createUploadWithRetry "INSTRUMENTATION_TEST_PACKAGE")
3555
test_package_url=${result[0]}
3656
test_package_upload_arn=${result[1]}
37-
# Upload the apk
57+
if [[ -z "$test_package_upload_arn" ]]; then
58+
echo "Failed to create test package upload (see logs above). Exiting."
59+
exit 1
60+
fi
3861
curl -H "Content-Type:application/octet-stream" -T $full_path $test_package_url
3962

40-
# Create an upload for the app package (They're the same, but they have to be setup in device farm)
4163
echo 'Uploading app package'
42-
read -a result <<< $(createUpload "ANDROID_APP")
64+
read -a result <<< $(createUploadWithRetry "ANDROID_APP")
4365
app_package_url=${result[0]}
4466
app_package_upload_arn=${result[1]}
45-
# Upload the apk
67+
if [[ -z "$app_package_upload_arn" ]]; then
68+
echo "Failed to create app package upload (see logs above). Exiting."
69+
exit 1
70+
fi
4671
curl -H "Content-Type:application/octet-stream" -T $full_path $app_package_url
4772

48-
# Wait to make sure the upload completes. This should actually make a get-upload call and check the status.
49-
echo "Waiting for uploads to complete"
50-
sleep 10
73+
# Wait for uploads to complete
74+
for arn in "$test_package_upload_arn" "$app_package_upload_arn"; do
75+
while true; do
76+
upload_status=$(aws devicefarm get-upload --arn "$arn" --region="us-west-2" --query="upload.status" --output text)
77+
if [ "$upload_status" = "SUCCEEDED" ]; then
78+
break
79+
elif [ "$upload_status" = "FAILED" ]; then
80+
echo "Upload failed for $arn"
81+
exit 1
82+
fi
83+
sleep 5
84+
done
85+
done
5186

5287
# Get oldest device we can test against.
5388
minDevice=$(aws devicefarm list-devices \
@@ -109,44 +144,74 @@ function stopDuplicates {
109144
}
110145
stopDuplicates
111146

112-
# Schedule the test run in device farm
113-
echo "Scheduling test run"
114-
run_arn=$(aws devicefarm schedule-run --project-arn=$project_arn \
115-
--app-arn="$app_package_upload_arn" \
116-
--device-selection-configuration='{
117-
"filters": [
118-
{"attribute": "ARN", "operator":"IN", "values":["'$minDevice'", "'$middleDevice'", "'$latestDevice'"]}
119-
],
120-
"maxDevices": '$max_devices'
121-
}' \
122-
--name="$file_name-$CODEBUILD_SOURCE_VERSION" \
123-
--test="type=INSTRUMENTATION,testPackageArn=$test_package_upload_arn,filter=$canary_test_name" \
124-
--execution-configuration="jobTimeoutMinutes=30,videoCapture=false" \
125-
--query="run.arn" \
126-
--output=text \
127-
--region="us-west-2")
128-
129-
status='NONE'
130-
result='NONE'
131-
# Wait for the test to complete
132-
while true; do
133-
run_status_response=`aws devicefarm get-run --arn="$run_arn" --region="us-west-2" --query="run.[status, result]" --output text`
134-
read -a result_arr <<< $run_status_response
135-
status=${result_arr[0]}
136-
result=${result_arr[1]}
137-
if [ "$status" = "COMPLETED" ]
138-
then
147+
# Most modules complete within 30 minutes
148+
job_timeout=30
149+
150+
# Retry the Device Farm test run on failure.
151+
max_run_attempts=2
152+
final_result='NONE'
153+
154+
for run_attempt in $(seq 1 $max_run_attempts); do
155+
echo "============================================================"
156+
echo "[RUN_IN_DEVICEFARM] Canary attempt $run_attempt/$max_run_attempts for $module_name"
157+
echo "============================================================"
158+
159+
echo "[RUN_IN_DEVICEFARM] Scheduling canary test run..."
160+
run_arn=$(aws devicefarm schedule-run --project-arn=$project_arn \
161+
--app-arn="$app_package_upload_arn" \
162+
--device-selection-configuration='{
163+
"filters": [
164+
{"attribute": "ARN", "operator":"IN", "values":["'$minDevice'", "'$middleDevice'", "'$latestDevice'"]}
165+
],
166+
"maxDevices": '$max_devices'
167+
}' \
168+
--name="$file_name-$CODEBUILD_SOURCE_VERSION" \
169+
--test="type=INSTRUMENTATION,testPackageArn=$test_package_upload_arn,filter=$canary_test_name" \
170+
--execution-configuration="jobTimeoutMinutes=$job_timeout,videoCapture=false" \
171+
--query="run.arn" \
172+
--output=text \
173+
--region="us-west-2")
174+
175+
echo "[RUN_IN_DEVICEFARM] Run ARN: $run_arn"
176+
echo "[RUN_IN_DEVICEFARM] Waiting for canary test run to complete..."
177+
178+
status='NONE'
179+
result='NONE'
180+
while true; do
181+
run_status_response=`aws devicefarm get-run --arn="$run_arn" --region="us-west-2" --query="run.[status, result]" --output text`
182+
read -a result_arr <<< $run_status_response
183+
status=${result_arr[0]}
184+
result=${result_arr[1]}
185+
if [ "$status" = "COMPLETED" ]; then
186+
break
187+
fi
188+
sleep 30
189+
done
190+
191+
final_result=$result
192+
echo "[RUN_IN_DEVICEFARM] Canary attempt $run_attempt/$max_run_attempts: Status=$status Result=$result"
193+
194+
if [ "$result" = "PASSED" ]; then
195+
if [ $run_attempt -gt 1 ]; then
196+
echo "[RUN_IN_DEVICEFARM] Canary passed on retry (attempt $run_attempt)"
197+
fi
139198
break
140199
fi
141-
sleep 30
200+
201+
if [ $run_attempt -lt $max_run_attempts ]; then
202+
echo "[RUN_IN_DEVICEFARM] Canary did not pass (result=$result). Will retry..."
203+
else
204+
echo "[RUN_IN_DEVICEFARM] Canary did not pass after $max_run_attempts attempts."
205+
fi
142206
done
143-
echo "Status = $status Result = $result"
207+
208+
echo "============================================================"
209+
echo "[RUN_IN_DEVICEFARM] Final canary result for $module_name: $final_result"
210+
echo "============================================================"
144211

145212
./scripts/generate_df_testrun_report --run_arn="$run_arn" --module_name="$module_name" --pr="$CODEBUILD_SOURCE_VERSION" --output_path="build/allTests/$module_name/"
146-
# If the result is PASSED, then exit with a return code 0
147-
if [ "$result" = "PASSED" ]
148-
then
213+
214+
if [ "$final_result" = "PASSED" ]; then
149215
exit 0
150216
fi
151-
# Otherwise, exit with a non-zero.
152217
exit 1

0 commit comments

Comments
 (0)