Skip to content

Commit 822c719

Browse files
authored
acc: add retry.py helper and avoid eventual-consistency-prone gets (#5694)
Test-only improvements toward making the direct-engine dashboard tests robust against eventual consistency. - Add `acceptance/bin/retry.py`: retry a command until it succeeds, with `--until SUBSTR` / `--until-not SUBSTR` to also gate on stdout content. - `publish-failure-stale-content`: wait for the bumped etag to become visible via `retry.py --until-not` instead of a single `lakeview get`. - `detect-change`: read the etag from the update response instead of a separate `lakeview get`. This pull request and its description were written by Isaac.
1 parent 2481a10 commit 822c719

3 files changed

Lines changed: 60 additions & 4 deletions

File tree

  • acceptance

acceptance/bin/retry.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env python3
2+
"""Retry a command until it succeeds and its output matches expectations.
3+
4+
Usage: retry.py [--until SUBSTR] [--until-not SUBSTR] CMD [ARGS...]
5+
6+
Retries CMD up to 5 times (configurable via RETRY_MAX_ATTEMPTS env var),
7+
sleeping RETRY_INTERVAL_MS milliseconds (default 500) between attempts.
8+
An attempt is considered successful when the command exits with code 0 and:
9+
--until SUBSTR SUBSTR appears in stdout
10+
--until-not SUBSTR SUBSTR does not appear in stdout
11+
"""
12+
13+
import argparse
14+
import os
15+
import subprocess
16+
import sys
17+
import time
18+
19+
20+
def main():
21+
parser = argparse.ArgumentParser(prog="retry.py")
22+
parser.add_argument("--until")
23+
parser.add_argument("--until-not")
24+
parser.add_argument("cmd", nargs=argparse.REMAINDER)
25+
args = parser.parse_args()
26+
if not args.cmd:
27+
parser.error("no command given")
28+
until = args.until
29+
until_not = args.until_not
30+
argv = args.cmd
31+
32+
interval = float(os.environ.get("RETRY_INTERVAL_MS", "500")) / 1000.0
33+
max_attempts = int(os.environ.get("RETRY_MAX_ATTEMPTS", "5"))
34+
35+
result = subprocess.run(argv, capture_output=True)
36+
for _ in range(1, max_attempts):
37+
success = (
38+
result.returncode == 0
39+
and (until is None or until.encode() in result.stdout)
40+
and (until_not is None or until_not.encode() not in result.stdout)
41+
)
42+
if success:
43+
break
44+
time.sleep(interval)
45+
result = subprocess.run(argv, capture_output=True)
46+
47+
sys.stdout.buffer.write(result.stdout)
48+
sys.stderr.buffer.write(result.stderr)
49+
sys.exit(result.returncode)
50+
51+
52+
if __name__ == "__main__":
53+
main()

acceptance/bundle/resources/dashboards/detect-change/script

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ $CLI lakeview get "${DASHBOARD_ID}" | jq '{display_name,page_display_name: (.ser
3131
title "Make an out of band modification to the dashboard and confirm that it is detected:\n"
3232
RESOURCE_ID=$($CLI workspace get-status "${DASHBOARD_PATH}" | jq -r '.resource_id')
3333
DASHBOARD_JSON="{\"serialized_dashboard\": \"{}\", \"warehouse_id\": \"$TEST_DEFAULT_WAREHOUSE_ID\"}"
34-
$CLI lakeview update "${RESOURCE_ID}" --json "${DASHBOARD_JSON}" | jq '{lifecycle_state}'
35-
echo "$($CLI lakeview get "$DASHBOARD_ID" | jq -r '.etag'):ETAG_2" >> ACC_REPLS
34+
UPDATE_RESP=$($CLI lakeview update "${RESOURCE_ID}" --json "${DASHBOARD_JSON}")
35+
echo "$UPDATE_RESP" | jq '{lifecycle_state}'
36+
echo "$(echo "$UPDATE_RESP" | jq -r '.etag'):ETAG_2" >> ACC_REPLS
3637

3738
title "Try to redeploy the bundle and confirm that the out of band modification is detected:"
3839
trace $CLI bundle plan

acceptance/bundle/resources/dashboards/publish-failure-stale-content/script

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ unset MSYS_NO_PATHCONV
1212
trace $CLI bundle deploy
1313
replace_ids.py
1414
DASHBOARD_ID=$($CLI bundle summary --output json | jq -r '.resources.dashboards.dashboard1.id')
15-
add_repl.py "$($CLI lakeview get $DASHBOARD_ID | jq -r '.etag')" ETAG_1
15+
ETAG_1=$($CLI lakeview get $DASHBOARD_ID | jq -r '.etag')
16+
add_repl.py "$ETAG_1" ETAG_1
1617
trace $CLI lakeview get $DASHBOARD_ID | jq '{display_name, etag}'
1718
trace $CLI lakeview get-published $DASHBOARD_ID | jq '{display_name}'
1819
trace $CLI bundle plan -o json | gron.py | grep -E "etag|published"
@@ -28,7 +29,8 @@ update_file.py databricks.yml "my dashboard" "my dashboard renamed"
2829
# SaveState is only called on success, so state retains the pre-PATCH etag.
2930
errcode trace $CLI bundle deploy
3031
trace print_requests.py //lakeview/dashboards
31-
add_repl.py "$($CLI lakeview get $DASHBOARD_ID | jq -r '.etag')" ETAG_2
32+
# The PATCH bumped the remote etag to ETAG_2; retry until it is visible (eventual consistency).
33+
add_repl.py "$(retry.py --until-not "$ETAG_1" $CLI lakeview get $DASHBOARD_ID | jq -r '.etag')" ETAG_2
3234
trace $CLI lakeview get $DASHBOARD_ID | jq '{display_name, etag}'
3335
trace $CLI lakeview get-published $DASHBOARD_ID | jq '{display_name}'
3436
trace $CLI bundle plan -o json | gron.py | grep -E "etag|published"

0 commit comments

Comments
 (0)