Skip to content

Commit d8dff9e

Browse files
authored
Time-based-snapshot test should use polling for asserts (#7766)
1 parent cc8a285 commit d8dff9e

2 files changed

Lines changed: 96 additions & 45 deletions

File tree

tests/e2e_operations.py

Lines changed: 77 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3063,65 +3063,106 @@ def net_with_min_tx(label, min_tx_interval):
30633063
net.start_and_open(inner_args)
30643064
yield net
30653065

3066-
def get_snapshot_count(net):
3066+
def get_committed_snapshot_files(net):
30673067
primary, _ = net.find_primary()
3068-
snapshots_dir = net.get_committed_snapshots(primary, force_txs=False)
3068+
snapshots_dirs = [
3069+
os.path.join(primary.remote.remote.root, primary.remote.snapshots_dir_name)
3070+
]
3071+
if primary.remote.read_only_snapshots_dir_name is not None:
3072+
snapshots_dirs.append(
3073+
os.path.join(
3074+
primary.remote.remote.root,
3075+
primary.remote.read_only_snapshots_dir_name,
3076+
)
3077+
)
30693078

3070-
return len(os.listdir(snapshots_dir))
3079+
snapshots = set()
3080+
for snapshots_dir in snapshots_dirs:
3081+
if not os.path.isdir(snapshots_dir):
3082+
continue
30713083

3072-
def wait_for_at_least_one(net):
3073-
timeout_s = 10
3074-
end_time = time.time() + timeout_s
3075-
while time.time() < end_time:
3076-
count = get_snapshot_count(net)
3077-
if count > 0:
3078-
return count
3084+
for snapshot_name in os.listdir(snapshots_dir):
3085+
if ccf.ledger.is_snapshot_file_committed(snapshot_name):
3086+
snapshots.add(snapshot_name)
30793087

3080-
time.sleep(0.1)
3081-
raise TimeoutError(f"Expected at least one snapshot after {timeout_s}s")
3088+
return snapshots
3089+
3090+
# Pattern for these tests:
3091+
# 1. wait for any startup triggered txs to commit and net to settle
3092+
# 2. wait for a snapshot with the latest committed tx, or wait 5s
3093+
# 3. record baseline
3094+
# 4. record new snapshots over 10s and compare that to the baseline
30823095

30833096
# min_tx set low
30843097
with net_with_min_tx("_low", 0) as net:
3085-
LOG.info("Started")
3086-
baseline = wait_for_at_least_one(net)
3087-
LOG.info("Got snapshot count")
3098+
time.sleep(1)
3099+
net.get_committed_snapshots(
3100+
net.find_primary()[0],
3101+
force_txs=False,
3102+
wait_for_target_seqno=True,
3103+
timeout=5,
3104+
)
3105+
baseline = get_committed_snapshot_files(net)
30883106
time.sleep(10)
3089-
after = get_snapshot_count(net)
3107+
final = get_committed_snapshot_files(net)
30903108
assert (
3091-
after > baseline
3092-
), f"min_tx_count set to 0 should cause many snapshots, got {after - baseline}"
3109+
len(final - baseline) >= 8
3110+
), f"With min_tx_interval set to 0 we expect snapshots to be generated at around 1 per second, but got {final} snapshots 10s after a baseline of {baseline}, with {final - baseline} new snapshots seen over the test."
30933111

30943112
# min_tx set just right
30953113
with net_with_min_tx("_exact", 2) as net:
3096-
baseline = wait_for_at_least_one(net)
3114+
time.sleep(1)
3115+
try:
3116+
net.get_committed_snapshots(
3117+
net.find_primary()[0],
3118+
force_txs=False,
3119+
wait_for_target_seqno=True,
3120+
timeout=5,
3121+
)
3122+
except TimeoutError:
3123+
pass
3124+
baseline = get_committed_snapshot_files(net)
30973125
time.sleep(10)
3098-
after = get_snapshot_count(net)
3126+
final = get_committed_snapshot_files(net)
30993127
assert (
3100-
after == baseline
3101-
), f"With a exact min_tx we expect to not see any extra snapshots, got {after - baseline}"
3128+
final == baseline
3129+
), f"With min_tx_interval set to 2 we expect no snapshots to be generated without transactions, but got {final} snapshots 10s after a baseline of {baseline}, with {final - baseline} new snapshots seen over the test."
31023130

31033131
# set much higher to show that
31043132
with net_with_min_tx("_high", 10) as net:
3105-
baseline = wait_for_at_least_one(net)
3133+
time.sleep(1)
3134+
try:
3135+
net.get_committed_snapshots(
3136+
net.find_primary()[0],
3137+
force_txs=False,
3138+
wait_for_target_seqno=True,
3139+
timeout=5,
3140+
)
3141+
except TimeoutError:
3142+
pass
3143+
baseline = get_committed_snapshot_files(net)
31063144
time.sleep(10)
3107-
after = get_snapshot_count(net)
3145+
final = get_committed_snapshot_files(net)
31083146
assert (
3109-
after == baseline
3110-
), f"Expect no snapshots when min_tx is high, got {after - baseline}"
3147+
final == baseline
3148+
), f"With min_tx_interval set to 10 we expect no snapshots to be generated without transactions, but got {final} snapshots 10s after a baseline of {baseline}, with {final - baseline} new snapshots seen over the test."
31113149

3112-
net.txs.issue(net, number_txs=1)
3113-
time.sleep(5)
3114-
after = get_snapshot_count(net)
3150+
tx_id = net.txs.issue(net, number_txs=1)
3151+
baseline = get_committed_snapshot_files(net)
3152+
time.sleep(10)
3153+
final = get_committed_snapshot_files(net)
31153154
assert (
3116-
after == baseline
3117-
), f"Expect no snapshots when min_tx is high and only 1 tx issued, got {after - baseline}"
3155+
final == baseline
3156+
), f"With min_tx_interval set to 10 and we expect no snapshots to be generated with only one extra tx, but got {final} snapshots 10s after a baseline of {baseline}, and in total saw {final - baseline} new snapshots over the test."
31183157

31193158
net.txs.issue(net, number_txs=20)
3120-
time.sleep(5)
3121-
after = get_snapshot_count(net)
3122-
assert (
3123-
after > baseline
3124-
), f"Expect at least one snapshot after issuing many txs, got {after - baseline}"
3159+
primary, _ = net.find_primary()
3160+
net.get_committed_snapshots(
3161+
primary,
3162+
target_seqno=tx_id.seqno,
3163+
force_txs=False,
3164+
wait_for_target_seqno=True,
3165+
)
31253166

31263167

31273168
def run_snapshot_persistence_across_primary_failure(const_args):

tests/infra/network.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,7 +1865,14 @@ def wait_for_primary_unanimity(
18651865
)
18661866
return primary
18671867

1868-
def get_committed_snapshots(self, node, target_seqno=None, force_txs=True):
1868+
def get_committed_snapshots(
1869+
self,
1870+
node,
1871+
target_seqno=None,
1872+
force_txs=True,
1873+
wait_for_target_seqno=False,
1874+
timeout=20,
1875+
):
18691876
# Wait for the snapshot including target_seqno to be committed before
18701877
# copying snapshot directory. Do not issue transactions if force_txs is False
18711878
# and expect snapshot to have already been created.
@@ -1874,8 +1881,10 @@ def get_committed_snapshots(self, node, target_seqno=None, force_txs=True):
18741881
r = c.get("/node/commit").body.json()
18751882
target_seqno = TxID.from_str(r["transaction_id"]).seqno
18761883

1877-
def wait_for_snapshots_to_be_committed(src_dir, list_src_dir_func, timeout=20):
1878-
if not force_txs:
1884+
def wait_for_snapshots_to_be_committed(
1885+
src_dir, list_src_dir_func, timeout=timeout
1886+
):
1887+
if not wait_for_target_seqno and not force_txs:
18791888
return True
18801889

18811890
LOG.info(
@@ -1899,12 +1908,13 @@ def wait_for_snapshots_to_be_committed(src_dir, list_src_dir_func, timeout=20):
18991908
)
19001909
return False
19011910

1902-
# Update state digest as a neutral write operation, to advance commit
1903-
member = self.consortium.get_any_active_member()
1904-
for _ in range(self.args.snapshot_tx_interval // 2):
1905-
r = member.update_ack_state_digest(node)
1906-
with node.client() as c:
1907-
c.wait_for_commit(r)
1911+
if force_txs:
1912+
# Update state digest as a neutral write operation, to advance commit
1913+
member = self.consortium.get_any_active_member()
1914+
for _ in range(self.args.snapshot_tx_interval // 2):
1915+
r = member.update_ack_state_digest(node)
1916+
with node.client() as c:
1917+
c.wait_for_commit(r)
19081918
time.sleep(0.1)
19091919

19101920
return node.get_committed_snapshots(wait_for_snapshots_to_be_committed)

0 commit comments

Comments
 (0)