Skip to content

Commit 6ab690c

Browse files
authored
Don't overlap networks in e2e tests (#7698)
1 parent 92f79c8 commit 6ab690c

3 files changed

Lines changed: 59 additions & 74 deletions

File tree

tests/e2e_operations.py

Lines changed: 55 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -173,69 +173,46 @@ def test_forced_ledger_chunk(network, args):
173173
@reqs.description("Forced snapshot")
174174
@app.scoped_txs()
175175
def test_forced_snapshot(network, args):
176-
inner_args = copy.deepcopy(args)
177-
inner_args.common_read_only_ledger_dir = (
178-
None # Side-effect setting which would break the starting node
179-
)
180-
inner_args.label = f"{inner_args.label}_forced_snapshot"
181-
inner_args.snapshot_tx_interval = (
182-
10000 # Large interval to avoid interference from regular snapshots
183-
)
184-
185-
# Use a separate network to ensure unforced snapshots do not happen
186-
with infra.network.network(
187-
inner_args.nodes,
188-
inner_args.binary_dir,
189-
inner_args.debug_nodes,
190-
pdb=inner_args.pdb,
191-
txs=app.LoggingTxs("user0"),
192-
) as inner_network:
193-
inner_network.start_and_open(inner_args)
194-
195-
primary, _ = inner_network.find_primary()
196-
197-
# Submit some dummy transactions
198-
inner_network.txs.issue(inner_network, number_txs=3)
176+
primary, _ = network.find_primary()
199177

200-
with primary.client() as c:
201-
r = c.get("/node/commit").body.json()
202-
hwm_pre_proposal = TxID.from_str(r["transaction_id"]).seqno
178+
# Submit some dummy transactions
179+
network.txs.issue(network, number_txs=3)
203180

204-
# Ensure there is at least one signature greater than the hwm
205-
inner_network.txs.issue(inner_network, number_txs=1, wait_for_sync=True)
181+
with primary.client() as c:
182+
r = c.get("/node/commit").body.json()
183+
hwm_pre_proposal = TxID.from_str(r["transaction_id"]).seqno
206184

207-
# Submit a proposal to force a snapshot
208-
proposal_body, careful_vote = inner_network.consortium.make_proposal(
209-
"trigger_snapshot"
210-
)
211-
proposal = inner_network.consortium.get_any_active_member().propose(
212-
primary, proposal_body
213-
)
214-
proposal = inner_network.consortium.vote_using_majority(
215-
primary,
216-
proposal,
217-
careful_vote,
218-
)
185+
# Ensure there is at least one signature greater than the hwm
186+
network.txs.issue(network, number_txs=1, wait_for_sync=True)
219187

220-
# Issue some more transactions
221-
inner_network.txs.issue(inner_network, number_txs=5)
188+
# Submit a proposal to force a snapshot
189+
proposal_body, careful_vote = network.consortium.make_proposal("trigger_snapshot")
190+
proposal = network.consortium.get_any_active_member().propose(
191+
primary, proposal_body
192+
)
193+
proposal = network.consortium.vote_using_majority(
194+
primary,
195+
proposal,
196+
careful_vote,
197+
)
222198

223-
snapshots_dir = inner_network.get_committed_snapshots(
224-
primary, target_seqno=hwm_pre_proposal + 1
225-
)
199+
# Issue some more transactions
200+
network.txs.issue(network, number_txs=5)
226201

227-
for s in os.listdir(snapshots_dir):
228-
with ccf.ledger.Snapshot(os.path.join(snapshots_dir, s)) as snapshot:
229-
snapshot_seqno = snapshot.get_public_domain().get_seqno()
230-
if snapshot_seqno > hwm_pre_proposal:
231-
LOG.info(
232-
f"Found a snapshot at {snapshot_seqno} which is after the pre-proposal-high-water-mark {hwm_pre_proposal}"
233-
)
234-
return network
202+
snapshots_dir = network.get_committed_snapshots(
203+
primary, target_seqno=hwm_pre_proposal + 1
204+
)
235205

236-
raise RuntimeError("Could not find matching snapshot file")
206+
for s in os.listdir(snapshots_dir):
207+
with ccf.ledger.Snapshot(os.path.join(snapshots_dir, s)) as snapshot:
208+
snapshot_seqno = snapshot.get_public_domain().get_seqno()
209+
if snapshot_seqno > hwm_pre_proposal:
210+
LOG.info(
211+
f"Found a snapshot at {snapshot_seqno} which is after the pre-proposal-high-water-mark {hwm_pre_proposal}"
212+
)
213+
return
237214

238-
return network
215+
raise RuntimeError("Could not find matching snapshot file")
239216

240217

241218
# https://github.com/microsoft/CCF/issues/1858
@@ -515,28 +492,32 @@ def test_snapshot_repr_digest(network, args):
515492
), f"Expected partial body of {range_end + 1} bytes, got {len(r.body.data())}"
516493

517494

518-
def test_snapshot_selection(network, args):
519-
inner_args = copy.deepcopy(args)
520-
inner_args.common_read_only_ledger_dir = (
495+
def run_manual_snapshot_tests(const_args):
496+
# Use a separate network with explicit args to ensure unforced snapshots do not happen
497+
498+
args = copy.deepcopy(const_args)
499+
args.common_read_only_ledger_dir = (
521500
None # Side-effect setting which would break the starting node
522501
)
523-
inner_args.label = f"{inner_args.label}_snapshot_selection"
524-
inner_args.snapshot_tx_interval = (
502+
args.label = f"{args.label}_manual_snapshots"
503+
args.snapshot_tx_interval = (
525504
10000 # Large interval to avoid interference from regular snapshots
526505
)
527506

528-
# Use a separate network to ensure unforced snapshots do not happen
529507
with infra.network.network(
530-
inner_args.nodes,
531-
inner_args.binary_dir,
532-
inner_args.debug_nodes,
533-
pdb=inner_args.pdb,
534-
) as inner_network:
535-
inner_network.start_and_open(inner_args)
536-
_test_snapshot_selection(inner_network, inner_args)
508+
args.nodes,
509+
args.binary_dir,
510+
args.debug_nodes,
511+
pdb=args.pdb,
512+
txs=app.LoggingTxs("user0"),
513+
) as network:
514+
network.start_and_open(args)
537515

516+
test_snapshot_selection(network, args)
517+
test_forced_snapshot(network, args)
538518

539-
def _test_snapshot_selection(network, args):
519+
520+
def test_snapshot_selection(network, args):
540521
# Add nodes so we have at least 3
541522
while len(network.get_joined_nodes()) < 3:
542523
new_node = network.create_node()
@@ -656,6 +637,10 @@ def find_snapshot(node):
656637
for node in suspended:
657638
node.resume()
658639

640+
# Heal after all the suspensions, before running further tests
641+
network.wait_for_new_primary_in((primary, *backups))
642+
network.wait_for_node_commit_sync()
643+
659644

660645
def test_empty_snapshot(network, args):
661646

@@ -1533,11 +1518,9 @@ def run_file_operations(args):
15331518
test_save_committed_ledger_files(network, args)
15341519
test_parse_snapshot_file(network, args)
15351520
test_forced_ledger_chunk(network, args)
1536-
test_forced_snapshot(network, args)
15371521
test_large_snapshot(network, args)
15381522
test_snapshot_access(network, args)
15391523
test_snapshot_repr_digest(network, args)
1540-
test_snapshot_selection(network, args)
15411524
test_empty_snapshot(network, args)
15421525
test_nulled_snapshot(network, args)
15431526
test_corrupt_snapshot_handling(network, args)
@@ -2914,6 +2897,7 @@ def run_snp_tests(args):
29142897
def run(args):
29152898
run_max_uncommitted_tx_count(args)
29162899
run_file_operations(args)
2900+
run_manual_snapshot_tests(args)
29172901
run_tls_san_checks(args)
29182902
run_config_timeout_check(args)
29192903
run_configuration_file_checks(args)

tests/infra/network.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1782,13 +1782,14 @@ def wait_for_new_primary(
17821782

17831783
def wait_for_new_primary_in(
17841784
self,
1785-
expected_node_ids,
1785+
expected_nodes,
17861786
nodes=None,
17871787
timeout_multiplier=DEFAULT_TIMEOUT_MULTIPLIER,
17881788
):
17891789
# We arbitrarily pick twice the election duration to protect ourselves against the somewhat
17901790
# but not that rare cases when the first round of election fails (short timeout are particularly susceptible to this)
17911791
timeout = self.observed_election_duration * timeout_multiplier
1792+
expected_node_ids = [n.node_id for n in expected_nodes]
17921793
LOG.info(
17931794
f"Waiting up to {timeout}s for a new primary in {expected_node_ids} to be elected..."
17941795
)

tests/partitions_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def test_isolate_primary_from_one_backup(network, args):
178178
rules.drop()
179179

180180
# primary should now observe partitioned backup as primary
181-
network.wait_for_new_primary_in({b_0.node_id}, nodes=[p])
181+
network.wait_for_new_primary_in({b_0}, nodes=[p])
182182

183183
LOG.info(f"Check that new primary {b_0.local_node_id} reports stable acks")
184184
last_ack = 0
@@ -884,7 +884,7 @@ def force_become_primary(network, args, target_node):
884884
network.wait_for_node_commit_sync(nodes=backups)
885885
rules.drop()
886886
# Wait for the old primary to observe the new one
887-
network.wait_for_new_primary_in({target_node.node_id}, nodes=[primary])
887+
network.wait_for_new_primary_in({target_node}, nodes=[primary])
888888
primary = target_node
889889

890890
# Ensure a signature has been produced in the new term

0 commit comments

Comments
 (0)