Skip to content

Commit 3003141

Browse files
authored
Generalize the pipefail false-negatives fix from #12919 (#12921)
Move output_contains from s3client_test.sh to tests_common.sh and add output_matches, output_matches_i, output_matches_E variants for regex and case-insensitive matching. All use here-strings (<<<) instead of echo|grep pipes, avoiding false negatives from SIGPIPE when pipefail is enabled. Convert 26 of 28 echo|grep -q instances across test scripts. Two chained grep patterns (grep -i | grep -qvi) are left unchanged. Also make backup_tests_common.sh source tests_common.sh so the helpers are available in backup test utilities.
1 parent 5447868 commit 3003141

4 files changed

Lines changed: 68 additions & 31 deletions

File tree

fdbbackup/tests/backup_tests_common.sh

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
# Shared between s3_backup_test.sh, s3_backup_bulkdump_bulkload.sh, dir_backup_test.sh, etc.
2323
# These functions work with both S3/blobstore and file-based backup testing
2424

25+
# Source shared test utilities (output_contains, output_matches, etc.)
26+
_BACKUP_COMMON_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
27+
if [[ -f "${_BACKUP_COMMON_DIR}/../../fdbclient/tests/tests_common.sh" ]]; then
28+
source "${_BACKUP_COMMON_DIR}/../../fdbclient/tests/tests_common.sh"
29+
fi
30+
2531
# Helper function to add base arguments (cluster file and logging)
2632
# Uses bash nameref (requires bash 4.3+) to modify the array in place
2733
# $1 name of the array variable to modify (passed by name, not value)
@@ -182,12 +188,12 @@ function run_backup {
182188
set -e
183189

184190
# Check if backup is restorable (differential state) or completed
185-
if echo "${status_output}" | grep -q "is restorable"; then
191+
if output_contains "${status_output}" "is restorable"; then
186192
log "Backup is now restorable after ${elapsed}s"
187193
break
188194
fi
189195

190-
if echo "${status_output}" | grep -q "completed"; then
196+
if output_contains "${status_output}" "completed"; then
191197
log "Backup completed after ${elapsed}s"
192198
break
193199
fi
@@ -196,17 +202,17 @@ function run_backup {
196202
if [[ $((elapsed % 30)) -eq 0 ]]; then
197203
log "Still waiting for backup to become restorable (${elapsed}s elapsed)..."
198204
# Show current state for debugging
199-
if echo "${status_output}" | grep -q "is restorable"; then
205+
if output_contains "${status_output}" "is restorable"; then
200206
log " Status: backup is restorable (should have exited loop)"
201-
elif echo "${status_output}" | grep -q "in progress to"; then
207+
elif output_contains "${status_output}" "in progress to"; then
202208
log " Status: backup running, waiting for snapshot to complete"
203-
elif echo "${status_output}" | grep -q "just started"; then
209+
elif output_contains "${status_output}" "just started"; then
204210
log " Status: backup submitted, tasks starting up"
205211
fi
206212
# Check snapshot mode for debugging
207-
if echo "${status_output}" | grep -q "Snapshot Mode: bulkdump"; then
213+
if output_contains "${status_output}" "Snapshot Mode: bulkdump"; then
208214
log " Snapshot Mode: bulkdump (using BulkDump for snapshots)"
209-
elif echo "${status_output}" | grep -q "Snapshot Mode: both"; then
215+
elif output_contains "${status_output}" "Snapshot Mode: both"; then
210216
log " Snapshot Mode: both (generating both formats)"
211217
fi
212218
fi
@@ -238,7 +244,7 @@ function run_backup {
238244
set -e
239245

240246
# Check if BulkDump snapshot exists (look for bulkDumpJobId in describe output)
241-
if echo "${snapshot_list}" | grep -q "bulkDumpJobId\|,bulk"; then
247+
if output_matches "${snapshot_list}" "bulkDumpJobId\|,bulk"; then
242248
log "BulkDump snapshot found after ${bulkdump_elapsed}s"
243249
break
244250
fi
@@ -255,7 +261,7 @@ function run_backup {
255261
fi
256262

257263
# Check if backup already completed (no need to discontinue)
258-
if echo "${status_output}" | grep -q "completed"; then
264+
if output_contains "${status_output}" "completed"; then
259265
log "Backup already completed - no need to discontinue"
260266
return 0
261267
fi
@@ -268,7 +274,7 @@ function run_backup {
268274
set -e
269275

270276
if [[ $stop_exit_code -ne 0 ]]; then
271-
if echo "${stop_output}" | grep -q "already discontinued\|not running\|unneeded"; then
277+
if output_matches "${stop_output}" "already discontinued\|not running\|unneeded"; then
272278
log "Backup already completed and finalized - this is success!"
273279
else
274280
err "Failed to stop backup: ${stop_output}"
@@ -334,13 +340,13 @@ function run_restore {
334340
# Check if restore completed
335341
# Status output contains "State: completed" or "Phase: Complete" when done
336342
# Also check "No restore" for when restore tag doesn't exist (completed and cleaned up)
337-
if echo "${status_output}" | grep -qi "State:.*completed\|Phase:.*Complete\|No restore"; then
343+
if output_matches_i "${status_output}" "State:.*completed\|Phase:.*Complete\|No restore"; then
338344
log "Restore completed after ${elapsed}s"
339345
return 0
340346
fi
341347

342348
# Check if restore failed (be specific - "LastError: None" contains "Error" so avoid false positives)
343-
if echo "${status_output}" | grep -qi "State:.*aborted"; then
349+
if output_matches_i "${status_output}" "State:.*aborted"; then
344350
err "Restore aborted after ${elapsed}s"
345351
log "Status output:"
346352
echo "${status_output}"
@@ -359,7 +365,7 @@ function run_restore {
359365
if [[ $((elapsed % 30)) -eq 0 ]]; then
360366
log "Still waiting for restore to complete (${elapsed}s elapsed)..."
361367
# Show phase info for debugging
362-
if echo "${status_output}" | grep -qi "Phase:"; then
368+
if output_matches_i "${status_output}" "Phase:"; then
363369
phase_info=$(echo "${status_output}" | grep -i "Phase:" | head -1)
364370
log " ${phase_info}"
365371
fi
@@ -496,11 +502,11 @@ function run_restore_wait {
496502
status_output=$("${local_build_dir}"/bin/fdbrestore status -t "${local_tag}" --dest-cluster-file "${local_scratch_dir}/loopback_cluster/fdb.cluster" --log --logdir="${local_scratch_dir}" 2>&1)
497503
set -e
498504

499-
if echo "${status_output}" | grep -qi "State:.*completed\|Phase:.*Complete\|No restore"; then
505+
if output_matches_i "${status_output}" "State:.*completed\|Phase:.*Complete\|No restore"; then
500506
return 0
501507
fi
502508

503-
if echo "${status_output}" | grep -qi "State:.*aborted"; then
509+
if output_matches_i "${status_output}" "State:.*aborted"; then
504510
return 1
505511
fi
506512

fdbbackup/tests/s3_backup_bulkdump_bulkload.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ function run_validate_restore_audit {
164164
break
165165
fi
166166

167-
if echo "${audit_output}" | grep -qE "1221|1230|1010"; then
167+
if output_matches_E "${audit_output}" "1221|1230|1010"; then
168168
log "Transient error detected, retrying in ${retry_delay}s..."
169169
sleep $retry_delay
170170
continue
@@ -193,12 +193,12 @@ function run_validate_restore_audit {
193193
local status_output
194194
status_output=$("${fdbcli}" -C "${cluster_file}" --exec "get_audit_status validate_restore id ${audit_id}" 2>&1)
195195

196-
if echo "${status_output}" | grep -q "Phase.*2"; then
196+
if output_matches "${status_output}" "Phase.*2"; then
197197
log "Audit completed successfully after ${elapsed}s"
198198
return 0
199199
fi
200200

201-
if echo "${status_output}" | grep -q "Phase.*[34]"; then
201+
if output_matches "${status_output}" "Phase.*[34]"; then
202202
err "Audit failed with status: ${status_output}"
203203
return 1
204204
fi

fdbclient/tests/s3client_test.sh

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,6 @@ function filter_http_debug {
3535
echo "${output}" | grep -v "Contents of" | grep -v "^$" | grep -v "^[[:space:]]*$" | grep -v "HTTP" | grep -v "^\[.*\]" | grep -v "^Request Header:" | grep -v "^Response Header:" | grep -v "^Response Code:" | grep -v "^Response ContentLen:" | grep -v "^-- RESPONSE CONTENT--" | grep -v "^--------" | grep -v "^<?xml" | grep -v "^<.*>" | grep -v "^'$" | grep -v "^++"
3636
}
3737

38-
# Check whether captured output contains a literal string. Avoid echo|grep -q because
39-
# pipefail can turn grep's early success exit into a false negative via SIGPIPE upstream.
40-
function output_contains {
41-
local output="$1"
42-
local needle="$2"
43-
grep -Fq -- "${needle}" <<<"${output}"
44-
}
4538

4639
# Run s3client with proper TLS CA file handling
4740
# This function handles the case where TLS_CA_FILE might be empty
@@ -258,7 +251,7 @@ function test_nonexistent_bucket {
258251
# 1. The command succeeded (status 0)
259252
# 2. The output contains the URL header
260253
# 3. There are no objects listed (no lines after the header)
261-
if echo "${output}" | grep -q "Contents of" &&
254+
if output_contains "${output}" "Contents of" &&
262255
[[ $(filter_http_debug "${output}" | wc -l) -eq 0 ]]; then
263256
# Success - we have the header and no other non-empty lines (excluding HTTP debug lines)
264257
return 0
@@ -301,7 +294,7 @@ function test_nonexistent_resource {
301294
local filtered_output
302295
filtered_output=$(filter_http_debug "${output}")
303296

304-
if ! (echo "${output}" | grep -q "Contents of" &&
297+
if ! (output_contains "${output}" "Contents of" &&
305298
[[ -z "$(echo "${filtered_output}" | tr -d '[:space:]')" ]]); then
306299
err "Failed to detect non-existent resource in S3"
307300
return 1
@@ -315,8 +308,8 @@ function test_nonexistent_resource {
315308
# 1. The command succeeded (status 0)
316309
# 2. The output contains the URL header
317310
# 3. There are no actual object listings (no lines starting with FILE or DIR)
318-
if echo "${output}" | grep -q "Contents of" &&
319-
! echo "${output}" | grep -q "^FILE\|^DIR"; then
311+
if output_contains "${output}" "Contents of" &&
312+
! output_matches "${output}" "^FILE\|^DIR"; then
320313
# Success - we have the header and no object listings
321314
return 0
322315
else
@@ -375,8 +368,8 @@ function test_empty_bucket {
375368
local filtered_output
376369
filtered_output=$(filter_http_debug "${output}")
377370

378-
if echo "${output}" | grep -q "No objects found" ||
379-
(echo "${output}" | grep -q "Contents of" &&
371+
if output_contains "${output}" "No objects found" ||
372+
(output_contains "${output}" "Contents of" &&
380373
[[ -z "$(echo "${filtered_output}" | tr -d '[:space:]')" ]]); then
381374
log "Successfully handled empty bucket listing"
382375
return 0

fdbclient/tests/tests_common.sh

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,44 @@ function err {
9696
echo "$(date -Iseconds) ERROR: ${*}" >&2
9797
}
9898

99+
# Check whether captured output contains a literal string.
100+
# Avoids echo|grep -q because pipefail can turn grep's early-exit into
101+
# a false negative via SIGPIPE on the upstream echo.
102+
# $1 output to search
103+
# $2 literal string to find
104+
function output_contains {
105+
local output="$1"
106+
local needle="$2"
107+
grep -Fq -- "${needle}" <<<"${output}"
108+
}
109+
110+
# Check whether captured output matches a basic regex.
111+
# $1 output to search
112+
# $2 regex pattern
113+
function output_matches {
114+
local output="$1"
115+
local pattern="$2"
116+
grep -q -- "${pattern}" <<<"${output}"
117+
}
118+
119+
# Case-insensitive regex match.
120+
# $1 output to search
121+
# $2 regex pattern
122+
function output_matches_i {
123+
local output="$1"
124+
local pattern="$2"
125+
grep -qi -- "${pattern}" <<<"${output}"
126+
}
127+
128+
# Extended regex match.
129+
# $1 output to search
130+
# $2 extended regex pattern
131+
function output_matches_E {
132+
local output="$1"
133+
local pattern="$2"
134+
grep -qE -- "${pattern}" <<<"${output}"
135+
}
136+
99137
# Check if test data should be preserved (PRESERVE_TEST_DATA=1)
100138
# If yes, prints preservation message and returns 0 (should skip cleanup)
101139
# If no, returns 1 (should continue with normal cleanup)

0 commit comments

Comments
 (0)