From 9a343211dd15d963d5ec04acf6b2662815eae581 Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Tue, 21 Apr 2026 14:58:19 +0100 Subject: [PATCH 01/15] Use fetch without copying snapshots by default. --- tests/code_update.py | 25 ++++++---- tests/e2e_common_endpoints.py | 4 +- tests/e2e_logging.py | 2 +- tests/e2e_operations.py | 12 +++-- tests/governance.py | 4 +- tests/infra/network.py | 16 ++++--- .../custom_authorization.py | 1 + tests/limits.py | 2 +- tests/lts_compatibility.py | 46 ++++++++++++++++--- tests/partitions_test.py | 2 +- tests/reconfiguration.py | 34 +++++++++----- tests/recovery.py | 1 + tests/redirects.py | 4 +- 13 files changed, 107 insertions(+), 46 deletions(-) diff --git a/tests/code_update.py b/tests/code_update.py index 0b1e5cc1ed95..bab9178de202 100644 --- a/tests/code_update.py +++ b/tests/code_update.py @@ -371,7 +371,7 @@ def test_tcb_version_tables(network, args): thrown_exception = None try: new_node = network.create_node() - network.join_node(new_node, args.package, args, timeout=3) + network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False) network.trust_node(new_node, args) except TimeoutError as e: thrown_exception = e @@ -392,7 +392,7 @@ def test_tcb_version_tables(network, args): LOG.info("Checking new nodes are allowed to join using expanded api") new_node = network.create_node() - network.join_node(new_node, args.package, args, timeout=3) + network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False) network.trust_node(new_node, args) LOG.info("Change the current cpuid's TCB version using the new API") @@ -413,7 +413,7 @@ def test_tcb_version_tables(network, args): LOG.info("Checking new nodes are allowed to join using hexstring api") new_node = network.create_node() - network.join_node(new_node, args.package, args, timeout=3) + network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False) network.trust_node(new_node, args) @@ -433,6 +433,7 @@ def test_add_node_without_security_policy(network, args): args, timeout=3, snp_uvm_security_context_dir=snp_dir if security_context_dir else None, + from_snapshot=False, ) network.trust_node(new_node, args) return network @@ -459,7 +460,7 @@ def test_add_node_with_stubbed_security_policy(network, args): # If we don't throw an exception, joining was successful new_node = network.create_node() - network.join_node(new_node, args.package, args, timeout=3) + network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False) network.trust_node(new_node, args) # Revert to original state @@ -494,6 +495,7 @@ def test_start_node_with_mismatched_host_data(network, args): args, timeout=3, snp_uvm_security_context_dir=snp_dir if security_context_dir else None, + from_snapshot=False, ) except (TimeoutError, RuntimeError): LOG.info("As expected, node with invalid security policy failed to startup") @@ -518,7 +520,7 @@ def test_add_node_with_untrusted_measurement(network, args): new_node = network.create_node() try: - network.join_node(new_node, args.package, args, timeout=3) + network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False) except infra.network.MeasurementNotFound: LOG.info("As expected, node with untrusted measurement failed to join") else: @@ -586,7 +588,7 @@ def assert_node_join_fails(network, args): """Create a node and assert that joining the network raises HostDataNotFound.""" new_node = network.create_node() try: - network.join_node(new_node, args.package, args, timeout=3) + network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False) except infra.network.HostDataNotFound as e: LOG.info(f"As expected, node join failed: {e.error_line}") assert ( @@ -724,7 +726,7 @@ def test_add_node_via_code_policy(network, args): ), ) new_node = network.create_node() - network.join_node(new_node, joiner_args.package, joiner_args, timeout=3) + network.join_node(new_node, joiner_args.package, joiner_args, timeout=3, from_snapshot=False) network.trust_node(new_node, joiner_args) # Cleanup: restore host data and remove code update policy. @@ -754,7 +756,7 @@ def test_add_node_with_untrusted_host_data(network, args): new_node = network.create_node() try: - network.join_node(new_node, args.package, args, timeout=3) + network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False) except infra.network.HostDataNotFound: LOG.info("As expected, node with untrusted host data failed to join") else: @@ -788,6 +790,7 @@ def test_add_node_with_no_uvm_endorsements(network, args): args, timeout=3, snp_uvm_security_context_dir=snp_dir if security_context_dir else None, + from_snapshot=False, ) except infra.network.MeasurementNotFound: LOG.info("As expected, node with no UVM endorsements failed to join") @@ -810,6 +813,7 @@ def test_add_node_with_no_uvm_endorsements(network, args): args, timeout=3, snp_uvm_security_context_dir=snp_dir if security_context_dir else None, + from_snapshot=False, ) new_node.stop() @@ -840,6 +844,7 @@ def test_add_node_with_different_package(network, args): replacement_package, args, timeout=3, + from_snapshot=False, ) except (infra.network.MeasurementNotFound, infra.network.HostDataNotFound) as err: @@ -994,7 +999,7 @@ def format_expected_host_data(entries): LOG.info("Start fresh nodes running new code") for _ in range(0, len(old_nodes)): new_node = network.create_node() - network.join_node(new_node, replacement_package, args) + network.join_node(new_node, replacement_package, args, from_snapshot=False) network.trust_node(new_node, args) LOG.info("Retire original nodes running old code") @@ -1075,7 +1080,7 @@ def test_add_node_with_no_uvm_endorsements_in_kv(network, args): try: new_node = network.create_node() - network.join_node(new_node, args.package, args, timeout=3) + network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False) except infra.network.UVMEndorsementsNotAuthorised: LOG.info("As expected, node with no UVM endorsements failed to join") else: diff --git a/tests/e2e_common_endpoints.py b/tests/e2e_common_endpoints.py index cde6ed3b5b3f..752241d15b45 100644 --- a/tests/e2e_common_endpoints.py +++ b/tests/e2e_common_endpoints.py @@ -33,7 +33,7 @@ def test_primary(network, args): host_spec.with_args(args) new_backup = network.create_node(host_spec) - network.join_node(new_backup, args.package, args) + network.join_node(new_backup, args.package, args, from_snapshot=False) network.trust_node(new_backup, args) primary_interfaces = primary.host.rpc_interfaces @@ -149,7 +149,7 @@ def test_network_node_info(network, args): host_spec.with_args(args) new_node = network.create_node(host_spec) - network.join_node(new_node, args.package, args) + network.join_node(new_node, args.package, args, from_snapshot=False) with new_node.client(interface_name=operator_rpc_interface) as c: r = c.get("/node/network/nodes/self", allow_redirects=False) diff --git a/tests/e2e_logging.py b/tests/e2e_logging.py index ae6da203f426..741b1458775b 100644 --- a/tests/e2e_logging.py +++ b/tests/e2e_logging.py @@ -1463,7 +1463,7 @@ def test_long_lived_forwarding(network, args): new_node_args = copy.deepcopy(args) new_node_args.node_to_node_message_limit = message_limit - network.join_node(new_node, args.package, new_node_args) + network.join_node(new_node, args.package, new_node_args, from_snapshot=False) network.trust_node(new_node, new_node_args) # Send many messages to new node over long-lived connections, diff --git a/tests/e2e_operations.py b/tests/e2e_operations.py index fe75ac4f6914..ede397172b72 100644 --- a/tests/e2e_operations.py +++ b/tests/e2e_operations.py @@ -686,6 +686,7 @@ def test_empty_snapshot(network, args): snapshots_dir=snapshots_dir, # Don't try to fetch a snapshot, look at the local files fetch_recent_snapshot=False, + from_snapshot=True, ) new_node.stop() @@ -725,6 +726,7 @@ def test_nulled_snapshot(network, args): snapshots_dir=snapshots_dir, # Don't try to fetch a snapshot, look at the local files fetch_recent_snapshot=False, + from_snapshot=True, ) except Exception as e: failed = True @@ -1427,6 +1429,7 @@ def test_ledger_chunk_redirect_gap(network, args): args, # Fetch recent snapshot to speed up joining fetch_recent_snapshot=True, + from_snapshot=False, ) network.trust_node(new_node, args) @@ -1588,7 +1591,7 @@ def run_tls_san_checks(const_args): ) new_node = network.create_node(host_spec) args.subject_alt_names = [f"dNSName:{dummy_san}"] - network.join_node(new_node, args.package, args) + network.join_node(new_node, args.package, args, from_snapshot=False) sans = infra.crypto.get_san_from_pem_cert(new_node.get_tls_certificate_pem()) assert len(sans) == 1, "Expected exactly one SAN" assert sans[0].value == dummy_san @@ -1603,7 +1606,7 @@ def run_tls_san_checks(const_args): dummy_public_rpc_hosts.add(ipaddress.ip_address(dummy_public_rpc_host)) new_node = network.create_node(host_spec) - network.join_node(new_node, args.package, args) + network.join_node(new_node, args.package, args, from_snapshot=False) # Cannot trust the node here as client cannot authenticate dummy public IP in cert with open( os.path.join(network.common_dir, f"{new_node.local_node_id}.pem"), @@ -1966,7 +1969,7 @@ def try_historical_fetch(node, timeout=1): new_node, nargs.package, nargs, - from_snapshot=True, + from_snapshot=False, copy_ledger=False, common_read_only_ledger_dir=temp_dir, # New node will try to read from temp directory ) @@ -2781,7 +2784,7 @@ def run_error_message_on_failure_to_read_aci_sec_context(args): args_copy.snp_endorsements_file = "/a/fake/path" failed = False try: - network.join_node(new_node, args.package, args_copy, timeout=20) + network.join_node(new_node, args.package, args_copy, timeout=20, from_snapshot=False) except infra.network.CollateralFetchTimeout: LOG.info( "Node with invalid quote endorsement servers could not join as expected" @@ -2952,6 +2955,7 @@ def test_backup_snapshot_fetch_max_size(network, args): args, target_node=primary, timeout=5, + from_snapshot=False, backup_snapshot_fetch_enabled=True, backup_snapshot_fetch_max_attempts=1, backup_snapshot_fetch_max_size="1KB", diff --git a/tests/governance.py b/tests/governance.py index cb332964b0c9..32aee7f152c1 100644 --- a/tests/governance.py +++ b/tests/governance.py @@ -133,7 +133,7 @@ def test_no_quote(network, args): } ) ) - network.join_node(untrusted_node, args.package, args) + network.join_node(untrusted_node, args.package, args, from_snapshot=False) with untrusted_node.client( ca=os.path.join( untrusted_node.common_dir, f"{untrusted_node.local_node_id}.pem" @@ -175,7 +175,7 @@ def get_nodes(): ) # NB: This new node joins but is never trusted - network.join_node(untrusted_node, args.package, args) + network.join_node(untrusted_node, args.package, args, from_snapshot=False) nodes = get_nodes() assert untrusted_node.node_id in nodes, nodes diff --git a/tests/infra/network.py b/tests/infra/network.py index b489a60ea5e8..c8aad0e129a7 100644 --- a/tests/infra/network.py +++ b/tests/infra/network.py @@ -368,13 +368,15 @@ def _setup_node( # Note: Copy snapshot before ledger as retrieving the latest snapshot may require # to produce more ledger entries + # Note: from_snapshot is not true in the start_and_open case nor start_in_recovery if from_snapshot: # Only retrieve snapshot from primary if the snapshot directory is not specified if snapshots_dir is None: - primary, _ = self.find_primary( - timeout=args.ledger_recovery_timeout if recovery else 10 - ) - read_only_snapshots_dir = self.get_committed_snapshots(primary) + assert False, "snapshot_dir must be provided when from_snapshot is True" + #primary, _ = self.find_primary( + # timeout=args.ledger_recovery_timeout if recovery else 10 + #) + #read_only_snapshots_dir = self.get_committed_snapshots(primary) if os.listdir(snapshots_dir) or os.listdir(read_only_snapshots_dir): LOG.info( f"Joining from snapshot directories: {snapshots_dir},{read_only_snapshots_dir}" @@ -1603,7 +1605,7 @@ def resize(self, target_count, args): LOG.info(f"Resizing network from {initial_node_count} to {target_count} nodes") while node_count < target_count: new_node = self.create_node() - self.join_node(new_node, args.package, args) + self.join_node(new_node, args.package, args, from_snapshot=False) self.trust_node(new_node, args) node_count += 1 while node_count > target_count: @@ -1870,12 +1872,14 @@ def wait_for_primary_unanimity( def get_committed_snapshots( self, - node, + node = None, target_seqno=None, force_txs=True, wait_for_target_seqno=False, timeout=20, ): + if node is None: + node, _ = self.find_primary() # Wait for the snapshot including target_seqno to be committed before # copying snapshot directory. Do not issue transactions if force_txs is False # and expect snapshot to have already been created. diff --git a/tests/js-custom-authorization/custom_authorization.py b/tests/js-custom-authorization/custom_authorization.py index e76cd883bde9..010e4441923d 100644 --- a/tests/js-custom-authorization/custom_authorization.py +++ b/tests/js-custom-authorization/custom_authorization.py @@ -1092,6 +1092,7 @@ def test_metrics_logging(network, args): new_node, args.package, args, + from_snapshot=False, ) network.trust_node(new_node, args) diff --git a/tests/limits.py b/tests/limits.py index 48e19e6f783e..2c4202b9d0fa 100644 --- a/tests/limits.py +++ b/tests/limits.py @@ -25,7 +25,7 @@ def test_forward_larger_than_default_requests(network, args): } ) ) - network.join_node(new_node, args.package, args) + network.join_node(new_node, args.package, args, from_snapshot=False) network.trust_node(new_node, args) primary, _ = network.find_primary() diff --git a/tests/lts_compatibility.py b/tests/lts_compatibility.py index 541e7fd1bc92..af0384cc9383 100644 --- a/tests/lts_compatibility.py +++ b/tests/lts_compatibility.py @@ -139,7 +139,26 @@ def test_new_service( library_dir=library_dir, version=version, ) - network.join_node(new_node, args.package, args, **kwargs) + if infra.node.CCFVersion(version) < infra.node.CCFVersion("ccf-6.0.0"): + primary, _ = network.find_primary() + snapshots_dir = network.get_committed_snapshots(primary) + network.join_node( + new_node, + args.package, + args, + from_snapshot=False, + snapshots_dir=snapshots_dir, + **kwargs, + ) + else: + network.join_node( + new_node, + args.package, + args, + from_snapshot=False, + fetch_recent_snapshot=True, + **kwargs, + ) network.trust_node( new_node, args, @@ -358,16 +377,31 @@ def run_code_upgrade_from( # Note: alternate between joining from snapshot and replaying entire ledger new_nodes = [] - from_snapshot = True + fetch_recent_snapshot = True for _ in range(0, len(old_nodes)): new_node = network.create_node( binary_dir=to_binary_dir, library_dir=to_library_dir, version=to_version, ) - network.join_node( - new_node, args.package, args, from_snapshot=from_snapshot - ) + if fetch_recent_snapshot: + network.join_node( + new_node, + args.package, + args, + from_snapshot=False, + fetch_recent_snapshot=fetch_recent_snapshot, + ) + else: + primary, _ = network.find_primary() + snapshots_dir = network.get_committed_snapshots(primary) + network.join_node( + new_node, + args.package, + args, + snapshots_dir=snapshots_dir, + from_snapshot=True, + ) network.trust_node( new_node, args, @@ -382,7 +416,7 @@ def run_code_upgrade_from( expected_validity_period_days=DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS, ignore_proposal_valid_from=True, ) - from_snapshot = not from_snapshot + fetch_recent_snapshot = not fetch_recent_snapshot new_nodes.append(new_node) # Verify that all nodes run the expected CCF version diff --git a/tests/partitions_test.py b/tests/partitions_test.py index 48c4e3d1247a..8e0db48c17d4 100644 --- a/tests/partitions_test.py +++ b/tests/partitions_test.py @@ -266,7 +266,7 @@ def test_new_joiner_helps_liveness(network, args): with contextlib.ExitStack() as stack: # Add a new node, but partition them before trusting them new_node = network.create_node() - network.join_node(new_node, args.package, args, from_snapshot=False) + network.join_node(new_node, args.package, args, from_snapshot=False, fetch_recent_snapshot=True) new_joiner_partition = [new_node] new_joiner_rules = stack.enter_context( network.partitioner.partition([primary, *backups], new_joiner_partition) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index f5c2a3920aee..5ff7335bfcbc 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -90,6 +90,7 @@ def test_add_node_invalid_service_cert(network, args): service_cert_file=service_cert_file, timeout=3, stop_on_error=True, + from_snapshot=False, ) except infra.network.ServiceCertificateInvalid: LOG.info( @@ -104,7 +105,7 @@ def test_add_node_invalid_service_cert(network, args): @reqs.description("Adding a valid node") -def test_add_node(network, args, from_snapshot=True): +def test_add_node(network, args, copy_snapshot=False, fetch_recent_snapshot=True): # Add an operator interface for early access/validation operator_rpc_interface = "operator_rpc_interface" @@ -117,12 +118,17 @@ def test_add_node(network, args, from_snapshot=True): new_node = network.create_node(host_spec) + snapshots_dir = None + if copy_snapshot: + primary, _ = network.find_primary() + snapshots_dir = network.get_committed_snapshots(primary) network.join_node( new_node, args.package, args, - from_snapshot=from_snapshot, - fetch_recent_snapshot=from_snapshot, + snapshots_dir=snapshots_dir, + from_snapshot=snapshots_dir is not None, + fetch_recent_snapshot=fetch_recent_snapshot, ) # Verify self-signed node certificate validity period @@ -155,7 +161,7 @@ def test_ignore_first_sigterm(network, args): # assigned IPs for the interfaces, something which the test infra doesn't # support widely yet. new_node = network.create_node() - network.join_node(new_node, args.package, args, ignore_first_sigterm=True) + network.join_node(new_node, args.package, args, ignore_first_sigterm=True, from_snapshot=False) network.trust_node(new_node, args) with new_node.client() as c: @@ -185,7 +191,7 @@ def test_ignore_first_sigterm(network, args): @reqs.description("Adding a node with an invalid certificate validity period") def test_add_node_invalid_validity_period(network, args): new_node = network.create_node() - network.join_node(new_node, args.package, args) + network.join_node(new_node, args.package, args, from_snapshot=False) try: network.trust_node( new_node, @@ -237,6 +243,7 @@ def test_add_node_from_backup(network, args): args.package, args, target_node=network.find_any_backup(), + from_snapshot=False, ) network.trust_node(new_node, args) return network @@ -280,6 +287,7 @@ def test_add_node_endorsements_endpoints(network, args): args.package, args_copy, timeout=per_request_retry_timeout * 4 * len(servers) + 5, + from_snapshot=False, ) except infra.network.CollateralFetchTimeout as e: LOG.info( @@ -315,12 +323,14 @@ def test_add_node_from_snapshot(network, args, copy_ledger=True, from_backup=Fal network.txs.issue(network, number_txs=1, repeat=True) new_node = network.create_node() + snapshots_dir = network.get_committed_snapshots() network.join_node( new_node, args.package, args, copy_ledger=copy_ledger, target_node=network.find_any_backup() if from_backup else None, + snapshots_dir=snapshots_dir, from_snapshot=True, ) network.trust_node(new_node, args) @@ -421,7 +431,7 @@ def test_add_as_many_pending_nodes(network, args): new_nodes = [] for _ in range(number_new_nodes): new_node = network.create_node() - network.join_node(new_node, args.package, args) + network.join_node(new_node, args.package, args, from_snapshot=False) new_nodes.append(new_node) for new_node in new_nodes: @@ -493,7 +503,7 @@ def get_nodes(status): pending_before = get_nodes("Pending") retired_before = get_nodes("Retired") new_node = network.create_node() - network.join_node(new_node, args.package, args, target_node=primary) + network.join_node(new_node, args.package, args, target_node=primary, from_snapshot=False) trusted_after = get_nodes("Trusted") pending_after = get_nodes("Pending") retired_after = get_nodes("Retired") @@ -622,7 +632,7 @@ def test_node_replacement(network, args): f"local://{node_to_replace.get_public_rpc_host()}:{node_to_replace.get_public_rpc_port()}", node_port=node_to_replace.n2n_interface.port, ) - network.join_node(replacement_node, args.package, args) + network.join_node(replacement_node, args.package, args, from_snapshot=False) network.trust_node(replacement_node, args) assert replacement_node.node_id != node_to_replace.node_id @@ -658,7 +668,7 @@ def test_join_straddling_primary_replacement(network, args): test_add_node(network, args) primary, _ = network.find_primary() new_node = network.create_node() - network.join_node(new_node, args.package, args) + network.join_node(new_node, args.package, args, from_snapshot=False) proposal_body = { "actions": [ { @@ -750,7 +760,7 @@ def test_add_node_with_read_only_ledger(network, args): new_node = network.create_node() network.join_node( - new_node, args.package, args, from_snapshot=False, copy_ledger=True + new_node, args.package, args, from_snapshot=False, copy_ledger=True, fetch_recent_snapshot=False ) network.trust_node(new_node, args) return network @@ -846,10 +856,12 @@ def run_join_old_snapshot(const_args): for _ in range(0, 2): new_node = network.create_node() + snapshots_dir = network.get_committed_snapshots() network.join_node( new_node, args.package, args, + snapshots_dir=snapshots_dir, from_snapshot=True, ) network.trust_node(new_node, args) @@ -868,8 +880,8 @@ def run_join_old_snapshot(const_args): args.package, args, from_snapshot=True, - fetch_recent_snapshot=False, snapshots_dir=tmp_dir, + fetch_recent_snapshot=False, timeout=3, ) except infra.network.StartupSeqnoIsOld as e: diff --git a/tests/recovery.py b/tests/recovery.py index 694576243738..f32ec5780743 100644 --- a/tests/recovery.py +++ b/tests/recovery.py @@ -820,6 +820,7 @@ def test_persistence_old_snapshot(network, args): args.package, args, copy_ledger=False, + from_snapshot=True, snapshots_dir=snapshots_dir, ledger_dir=current_ledger_dir, ) diff --git a/tests/redirects.py b/tests/redirects.py index 49e5af6b4e59..51197540fea3 100644 --- a/tests/redirects.py +++ b/tests/redirects.py @@ -152,7 +152,7 @@ def test_redirects_with_static_name_config(network, args): original, _ = network.find_primary() new_node = network.create_node(host_spec) - network.join_node(new_node, args.package, args) + network.join_node(new_node, args.package, args, from_snapshot=False) network.trust_node(new_node, args) req = {"id": 42, "msg": msg} @@ -172,7 +172,7 @@ def test_redirects_with_static_name_config(network, args): LOG.info("Add 2 more nodes with static address redirect config") for _ in range(2): other_node = network.create_node(host_spec) - network.join_node(other_node, args.package, args) + network.join_node(other_node, args.package, args, from_snapshot=False) network.trust_node(other_node, args) LOG.info( From b6eea6adc80cd8ac39cb52707ffc6185ed377de1 Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Tue, 21 Apr 2026 15:41:48 +0100 Subject: [PATCH 02/15] fix reconfig suite --- tests/reconfiguration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index 5ff7335bfcbc..aa84fe613e51 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -798,7 +798,7 @@ def run_all(args): test_add_as_many_pending_nodes(network, args) test_add_node_invalid_service_cert(network, args) - test_add_node(network, args, from_snapshot=False) + test_add_node(network, args, copy_snapshot=True) test_add_node_with_read_only_ledger(network, args) test_join_straddling_primary_replacement(network, args) test_node_replacement(network, args) @@ -806,7 +806,7 @@ def run_all(args): test_add_node_endorsements_endpoints(network, args) test_add_node_on_other_curve(network, args) test_retire_backup(network, args) - test_add_node(network, args) + test_add_node(network, args, copy_snapshot=True) test_retire_primary(network, args) test_add_node_from_snapshot(network, args) From 54dca73b187ee8002ff4874f3ea00afb50c6aa4c Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Tue, 21 Apr 2026 16:05:43 +0100 Subject: [PATCH 03/15] snags --- tests/infra/network.py | 14 +++++++++----- tests/lts_compatibility.py | 11 +++++------ tests/reconfiguration.py | 20 ++++++++++++++------ 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/tests/infra/network.py b/tests/infra/network.py index c8aad0e129a7..c136654c4e7c 100644 --- a/tests/infra/network.py +++ b/tests/infra/network.py @@ -373,11 +373,15 @@ def _setup_node( # Only retrieve snapshot from primary if the snapshot directory is not specified if snapshots_dir is None: assert False, "snapshot_dir must be provided when from_snapshot is True" - #primary, _ = self.find_primary( + # primary, _ = self.find_primary( # timeout=args.ledger_recovery_timeout if recovery else 10 - #) - #read_only_snapshots_dir = self.get_committed_snapshots(primary) - if os.listdir(snapshots_dir) or os.listdir(read_only_snapshots_dir): + # ) + # read_only_snapshots_dir = self.get_committed_snapshots(primary) + snapshots_exist = snapshots_dir and os.listdir(snapshots_dir) + read_only_snapshots_exist = read_only_snapshots_dir and os.listdir( + read_only_snapshots_dir + ) + if snapshots_exist or read_only_snapshots_exst: LOG.info( f"Joining from snapshot directories: {snapshots_dir},{read_only_snapshots_dir}" ) @@ -1872,7 +1876,7 @@ def wait_for_primary_unanimity( def get_committed_snapshots( self, - node = None, + node=None, target_seqno=None, force_txs=True, wait_for_target_seqno=False, diff --git a/tests/lts_compatibility.py b/tests/lts_compatibility.py index af0384cc9383..2700dc3de5c7 100644 --- a/tests/lts_compatibility.py +++ b/tests/lts_compatibility.py @@ -146,7 +146,7 @@ def test_new_service( new_node, args.package, args, - from_snapshot=False, + from_snapshot=True, snapshots_dir=snapshots_dir, **kwargs, ) @@ -390,17 +390,16 @@ def run_code_upgrade_from( args.package, args, from_snapshot=False, - fetch_recent_snapshot=fetch_recent_snapshot, + fetch_recent_snapshot=True, ) else: - primary, _ = network.find_primary() - snapshots_dir = network.get_committed_snapshots(primary) network.join_node( new_node, args.package, args, - snapshots_dir=snapshots_dir, - from_snapshot=True, + copy_ledger=True, + from_snapshot=False, + fetch_recent_snapshot=False, ) network.trust_node( new_node, diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index aa84fe613e51..d3ec0edd02cd 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -120,8 +120,7 @@ def test_add_node(network, args, copy_snapshot=False, fetch_recent_snapshot=True snapshots_dir = None if copy_snapshot: - primary, _ = network.find_primary() - snapshots_dir = network.get_committed_snapshots(primary) + snapshots_dir = network.get_committed_snapshots() network.join_node( new_node, args.package, @@ -140,7 +139,7 @@ def test_add_node(network, args, copy_snapshot=False, fetch_recent_snapshot=True validity_period_days=args.maximum_node_certificate_validity_days // 2, ) - if not from_snapshot: + if (not copy_snapshot) or (not fetch_recent_snapshot): with new_node.client() as c: s = c.get("/node/state") body = s.body.json() @@ -161,7 +160,9 @@ def test_ignore_first_sigterm(network, args): # assigned IPs for the interfaces, something which the test infra doesn't # support widely yet. new_node = network.create_node() - network.join_node(new_node, args.package, args, ignore_first_sigterm=True, from_snapshot=False) + network.join_node( + new_node, args.package, args, ignore_first_sigterm=True, from_snapshot=False + ) network.trust_node(new_node, args) with new_node.client() as c: @@ -503,7 +504,9 @@ def get_nodes(status): pending_before = get_nodes("Pending") retired_before = get_nodes("Retired") new_node = network.create_node() - network.join_node(new_node, args.package, args, target_node=primary, from_snapshot=False) + network.join_node( + new_node, args.package, args, target_node=primary, from_snapshot=False + ) trusted_after = get_nodes("Trusted") pending_after = get_nodes("Pending") retired_after = get_nodes("Retired") @@ -760,7 +763,12 @@ def test_add_node_with_read_only_ledger(network, args): new_node = network.create_node() network.join_node( - new_node, args.package, args, from_snapshot=False, copy_ledger=True, fetch_recent_snapshot=False + new_node, + args.package, + args, + from_snapshot=False, + copy_ledger=True, + fetch_recent_snapshot=False, ) network.trust_node(new_node, args) return network From 8d9878ce61c124c4c850bf617855c01b62b23fcf Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Tue, 21 Apr 2026 16:19:05 +0100 Subject: [PATCH 04/15] fixup merge --- tests/e2e_operations.py | 8 +++++--- tests/infra/basicperf.py | 1 + tests/recovery.py | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/e2e_operations.py b/tests/e2e_operations.py index 6483a66e6798..9ee0dafc060d 100644 --- a/tests/e2e_operations.py +++ b/tests/e2e_operations.py @@ -785,6 +785,7 @@ def test_corrupt_snapshot_handling(network, args): args.package, args, snapshots_dir=writable_dir, + from_snapshot=True, fetch_recent_snapshot=False, ) @@ -877,6 +878,7 @@ def test_corrupt_snapshot_handling(network, args): args.package, args, snapshots_dir=restricted_dir, + from_snapshot=True, fetch_recent_snapshot=False, ) @@ -1944,7 +1946,7 @@ def replace_nodes(network, old_nodes, new_package): new_nodes = [] for _ in range(len(old_nodes)): new_node = network.create_node() - network.join_node(new_node, new_package, nargs) + network.join_node(new_node, new_package, nargs, from_snapshot=False) network.trust_node(new_node, nargs) new_nodes.append(new_node) @@ -2002,7 +2004,7 @@ def replace_nodes(network, old_nodes, new_package): # Verify a Dual joiner can still join (allow_dual_signing_joinee=true) LOG.info("Verifying Dual joiner can still join COSE-only (allow dual) network") dual_joiner = network.create_node() - network.join_node(dual_joiner, nargs.package, nargs) + network.join_node(dual_joiner, nargs.package, nargs, from_snapshot=False) network.trust_node(dual_joiner, nargs) LOG.success("Dual joiner successfully joined COSE-only network") @@ -2042,7 +2044,7 @@ def replace_nodes(network, old_nodes, new_package): LOG.info("Verifying Dual joiner is rejected by COSE-only-no-dual network") rejected_joiner = network.create_node() try: - network.join_node(rejected_joiner, nargs.package, nargs, timeout=10) + network.join_node(rejected_joiner, nargs.package, nargs, timeout=10, from_snapshot=False) network.trust_node(rejected_joiner, nargs) assert False, "Dual joiner should have been rejected" except Exception as e: diff --git a/tests/infra/basicperf.py b/tests/infra/basicperf.py index 4fe7ec88d1bb..944687fdf381 100644 --- a/tests/infra/basicperf.py +++ b/tests/infra/basicperf.py @@ -240,6 +240,7 @@ def replace_primary(network, host, old_primary, snapshots_dir, statistics): timeout=10, copy_ledger=False, snapshots_dir=snapshots_dir, + from_snapshots=True, follow_redirect=False, ) LOG.info(f"Shut down primary: {old_primary.local_node_id}") diff --git a/tests/recovery.py b/tests/recovery.py index 74129f127b67..3a930c72fd1e 100644 --- a/tests/recovery.py +++ b/tests/recovery.py @@ -1462,7 +1462,7 @@ def run_recovery_after_cose_upgrade(args): new_nodes = [] for _ in range(len(old_nodes)): new_node = network.create_node() - network.join_node(new_node, cose_only_package, args) + network.join_node(new_node, cose_only_package, args, from_snapshot=False) network.trust_node(new_node, args) new_nodes.append(new_node) @@ -1540,7 +1540,7 @@ def run_recovery_after_cose_upgrade(args): strict_nodes = [] for _ in range(len(phase2_old_nodes)): n = recovered_network.create_node() - recovered_network.join_node(n, cose_strict_package, recovered_args) + recovered_network.join_node(n, cose_strict_package, recovered_args, from_snapshot=False) recovered_network.trust_node(n, recovered_args) strict_nodes.append(n) From 0af8b268b52da6471066ff0a2487ab50b2a3f402 Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Tue, 21 Apr 2026 16:33:29 +0100 Subject: [PATCH 05/15] fmt --- tests/code_update.py | 4 +++- tests/e2e_operations.py | 8 ++++++-- tests/partitions_test.py | 8 +++++++- tests/recovery.py | 4 +++- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/code_update.py b/tests/code_update.py index bab9178de202..e85aaa36a967 100644 --- a/tests/code_update.py +++ b/tests/code_update.py @@ -726,7 +726,9 @@ def test_add_node_via_code_policy(network, args): ), ) new_node = network.create_node() - network.join_node(new_node, joiner_args.package, joiner_args, timeout=3, from_snapshot=False) + network.join_node( + new_node, joiner_args.package, joiner_args, timeout=3, from_snapshot=False + ) network.trust_node(new_node, joiner_args) # Cleanup: restore host data and remove code update policy. diff --git a/tests/e2e_operations.py b/tests/e2e_operations.py index 9ee0dafc060d..5f7fe30cab43 100644 --- a/tests/e2e_operations.py +++ b/tests/e2e_operations.py @@ -2044,7 +2044,9 @@ def replace_nodes(network, old_nodes, new_package): LOG.info("Verifying Dual joiner is rejected by COSE-only-no-dual network") rejected_joiner = network.create_node() try: - network.join_node(rejected_joiner, nargs.package, nargs, timeout=10, from_snapshot=False) + network.join_node( + rejected_joiner, nargs.package, nargs, timeout=10, from_snapshot=False + ) network.trust_node(rejected_joiner, nargs) assert False, "Dual joiner should have been rejected" except Exception as e: @@ -3009,7 +3011,9 @@ def run_error_message_on_failure_to_read_aci_sec_context(args): args_copy.snp_endorsements_file = "/a/fake/path" failed = False try: - network.join_node(new_node, args.package, args_copy, timeout=20, from_snapshot=False) + network.join_node( + new_node, args.package, args_copy, timeout=20, from_snapshot=False + ) except infra.network.CollateralFetchTimeout: LOG.info( "Node with invalid quote endorsement servers could not join as expected" diff --git a/tests/partitions_test.py b/tests/partitions_test.py index 8e0db48c17d4..677018cfdf30 100644 --- a/tests/partitions_test.py +++ b/tests/partitions_test.py @@ -266,7 +266,13 @@ def test_new_joiner_helps_liveness(network, args): with contextlib.ExitStack() as stack: # Add a new node, but partition them before trusting them new_node = network.create_node() - network.join_node(new_node, args.package, args, from_snapshot=False, fetch_recent_snapshot=True) + network.join_node( + new_node, + args.package, + args, + from_snapshot=False, + fetch_recent_snapshot=True, + ) new_joiner_partition = [new_node] new_joiner_rules = stack.enter_context( network.partitioner.partition([primary, *backups], new_joiner_partition) diff --git a/tests/recovery.py b/tests/recovery.py index 3a930c72fd1e..6cd3ea293ff0 100644 --- a/tests/recovery.py +++ b/tests/recovery.py @@ -1540,7 +1540,9 @@ def run_recovery_after_cose_upgrade(args): strict_nodes = [] for _ in range(len(phase2_old_nodes)): n = recovered_network.create_node() - recovered_network.join_node(n, cose_strict_package, recovered_args, from_snapshot=False) + recovered_network.join_node( + n, cose_strict_package, recovered_args, from_snapshot=False + ) recovered_network.trust_node(n, recovered_args) strict_nodes.append(n) From 438599f3c891a586f2c18647fcd2f049952d6b28 Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Tue, 21 Apr 2026 16:46:47 +0100 Subject: [PATCH 06/15] fix test_add_node cond --- tests/reconfiguration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index d3ec0edd02cd..f8c626987c58 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -139,7 +139,7 @@ def test_add_node(network, args, copy_snapshot=False, fetch_recent_snapshot=True validity_period_days=args.maximum_node_certificate_validity_days // 2, ) - if (not copy_snapshot) or (not fetch_recent_snapshot): + if not (copy_snapshot or fetch_recent_snapshot): with new_node.client() as c: s = c.get("/node/state") body = s.body.json() From a3694ca550ea7715812d2b59a00bb1d3b843fb87 Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Tue, 21 Apr 2026 17:01:53 +0100 Subject: [PATCH 07/15] ledger_redirect_gap requires up-to-date snapshot => force generation. --- tests/e2e_operations.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/e2e_operations.py b/tests/e2e_operations.py index 5f7fe30cab43..8296c519ae63 100644 --- a/tests/e2e_operations.py +++ b/tests/e2e_operations.py @@ -1427,6 +1427,9 @@ def test_ledger_chunk_redirect_gap(network, args): r = c.get("/node/commit").body.json() commit_seqno = TxID.from_str(r["transaction_id"]).seqno + # force primary to generate a new snapshot after commit idx + network.get_committed_snapshots(primary) + new_node = network.create_node() network.join_node( new_node, From 8881571edf3d564105dfcf77006caa2b021773ee Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Wed, 22 Apr 2026 11:43:34 +0100 Subject: [PATCH 08/15] Copy snapshots to fix chunk_redirect_gap --- tests/e2e_operations.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/e2e_operations.py b/tests/e2e_operations.py index 8296c519ae63..68121e938c43 100644 --- a/tests/e2e_operations.py +++ b/tests/e2e_operations.py @@ -1431,13 +1431,16 @@ def test_ledger_chunk_redirect_gap(network, args): network.get_committed_snapshots(primary) new_node = network.create_node() + snapshots = network.get_committed_snapshots() network.join_node( new_node, args.package, args, + # Tmp fix until the primary expresses an opinion + snapshots_dir = snapshots, + from_snapshot=True, # Fetch recent snapshot to speed up joining fetch_recent_snapshot=True, - from_snapshot=False, ) network.trust_node(new_node, args) From 5d229eef7abf0a88bdbec37aa676ba8c2b185f39 Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Wed, 22 Apr 2026 14:48:07 +0100 Subject: [PATCH 09/15] Fix late_mounted_ledger_check issue. Co-authored-by: Copilot --- tests/e2e_operations.py | 9 +++++++-- tests/infra/basicperf.py | 2 +- tests/infra/network.py | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/e2e_operations.py b/tests/e2e_operations.py index 68121e938c43..a71318d6d872 100644 --- a/tests/e2e_operations.py +++ b/tests/e2e_operations.py @@ -1437,7 +1437,7 @@ def test_ledger_chunk_redirect_gap(network, args): args.package, args, # Tmp fix until the primary expresses an opinion - snapshots_dir = snapshots, + snapshots_dir=snapshots, from_snapshot=True, # Fetch recent snapshot to speed up joining fetch_recent_snapshot=True, @@ -2198,11 +2198,16 @@ def try_historical_fetch(node, timeout=1): # Create a temporary directory to manually construct a ledger in with tempfile.TemporaryDirectory() as temp_dir: new_node = network.create_node() + + # TMP: must copy the snapshots folder to ensure it starts with a recent snapshot + # Only fetch after #7835 is merged + snapshots_dir = network.get_committed_snapshots() network.join_node( new_node, nargs.package, nargs, - from_snapshot=False, + from_snapshot=True, + snapshots_dir=snapshots_dir, copy_ledger=False, common_read_only_ledger_dir=temp_dir, # New node will try to read from temp directory ) diff --git a/tests/infra/basicperf.py b/tests/infra/basicperf.py index 944687fdf381..06d5db499931 100644 --- a/tests/infra/basicperf.py +++ b/tests/infra/basicperf.py @@ -240,7 +240,7 @@ def replace_primary(network, host, old_primary, snapshots_dir, statistics): timeout=10, copy_ledger=False, snapshots_dir=snapshots_dir, - from_snapshots=True, + from_snapshot=True, follow_redirect=False, ) LOG.info(f"Shut down primary: {old_primary.local_node_id}") diff --git a/tests/infra/network.py b/tests/infra/network.py index c136654c4e7c..437523f74885 100644 --- a/tests/infra/network.py +++ b/tests/infra/network.py @@ -381,7 +381,7 @@ def _setup_node( read_only_snapshots_exist = read_only_snapshots_dir and os.listdir( read_only_snapshots_dir ) - if snapshots_exist or read_only_snapshots_exst: + if snapshots_exist or read_only_snapshots_exist: LOG.info( f"Joining from snapshot directories: {snapshots_dir},{read_only_snapshots_dir}" ) From 0abe788f8cc9fca6951c11d062b1460cc9711bfc Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Wed, 22 Apr 2026 16:31:14 +0100 Subject: [PATCH 10/15] remove the get_committed_snapshots call --- tests/infra/network.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/infra/network.py b/tests/infra/network.py index 437523f74885..87a27ee98ca8 100644 --- a/tests/infra/network.py +++ b/tests/infra/network.py @@ -373,10 +373,6 @@ def _setup_node( # Only retrieve snapshot from primary if the snapshot directory is not specified if snapshots_dir is None: assert False, "snapshot_dir must be provided when from_snapshot is True" - # primary, _ = self.find_primary( - # timeout=args.ledger_recovery_timeout if recovery else 10 - # ) - # read_only_snapshots_dir = self.get_committed_snapshots(primary) snapshots_exist = snapshots_dir and os.listdir(snapshots_dir) read_only_snapshots_exist = read_only_snapshots_dir and os.listdir( read_only_snapshots_dir From 14b648417b26d2edc44415249194225b9e1289a5 Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Wed, 22 Apr 2026 16:33:19 +0100 Subject: [PATCH 11/15] Change default for from_snapshot Co-authored-by: Copilot --- tests/infra/network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/infra/network.py b/tests/infra/network.py index 87a27ee98ca8..3f093a7ab213 100644 --- a/tests/infra/network.py +++ b/tests/infra/network.py @@ -350,7 +350,7 @@ def _setup_node( ledger_dir=None, copy_ledger=True, read_only_ledger_dirs=None, - from_snapshot=True, + from_snapshot=False, snapshots_dir=None, **kwargs, ): @@ -426,7 +426,7 @@ def _add_node( ledger_dir=None, copy_ledger=True, read_only_ledger_dirs=None, - from_snapshot=True, + from_snapshot=False, snapshots_dir=None, **kwargs, ): From 34bd21b7e8139e1c89266f1bc8cea09c3d2ec761 Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Wed, 22 Apr 2026 16:40:34 +0100 Subject: [PATCH 12/15] fixup read_only_snapshots_dir --- tests/infra/network.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/infra/network.py b/tests/infra/network.py index 3f093a7ab213..6ccd6918171a 100644 --- a/tests/infra/network.py +++ b/tests/infra/network.py @@ -373,17 +373,13 @@ def _setup_node( # Only retrieve snapshot from primary if the snapshot directory is not specified if snapshots_dir is None: assert False, "snapshot_dir must be provided when from_snapshot is True" - snapshots_exist = snapshots_dir and os.listdir(snapshots_dir) - read_only_snapshots_exist = read_only_snapshots_dir and os.listdir( - read_only_snapshots_dir - ) - if snapshots_exist or read_only_snapshots_exist: + if snapshots_dir and os.listdir(snapshots_dir): LOG.info( - f"Joining from snapshot directories: {snapshots_dir},{read_only_snapshots_dir}" + f"Joining from snapshot directories: {snapshots_dir}" ) else: LOG.warning( - f"Attempting to join from snapshot but {snapshots_dir},{read_only_snapshots_dir} are empty: defaulting to complete replay of transaction history" + f"Attempting to join from snapshot but {snapshots_dir} is empty: defaulting to complete replay of transaction history" ) else: LOG.info( From 350527149bb45af03c4c4002ddca3879c690d80b Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Wed, 22 Apr 2026 17:05:14 +0100 Subject: [PATCH 13/15] fmt --- tests/infra/network.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/infra/network.py b/tests/infra/network.py index 6ccd6918171a..98365eac6de4 100644 --- a/tests/infra/network.py +++ b/tests/infra/network.py @@ -374,9 +374,7 @@ def _setup_node( if snapshots_dir is None: assert False, "snapshot_dir must be provided when from_snapshot is True" if snapshots_dir and os.listdir(snapshots_dir): - LOG.info( - f"Joining from snapshot directories: {snapshots_dir}" - ) + LOG.info(f"Joining from snapshot directories: {snapshots_dir}") else: LOG.warning( f"Attempting to join from snapshot but {snapshots_dir} is empty: defaulting to complete replay of transaction history" From 55da7964c12806a45ff06806b2e7dd305468ceda Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Wed, 22 Apr 2026 17:39:31 +0100 Subject: [PATCH 14/15] snags --- tests/e2e_operations.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/e2e_operations.py b/tests/e2e_operations.py index a71318d6d872..f5b2859512cc 100644 --- a/tests/e2e_operations.py +++ b/tests/e2e_operations.py @@ -1427,10 +1427,8 @@ def test_ledger_chunk_redirect_gap(network, args): r = c.get("/node/commit").body.json() commit_seqno = TxID.from_str(r["transaction_id"]).seqno - # force primary to generate a new snapshot after commit idx - network.get_committed_snapshots(primary) - new_node = network.create_node() + # force primary to generate a new snapshot after commit idx snapshots = network.get_committed_snapshots() network.join_node( new_node, @@ -2199,8 +2197,6 @@ def try_historical_fetch(node, timeout=1): with tempfile.TemporaryDirectory() as temp_dir: new_node = network.create_node() - # TMP: must copy the snapshots folder to ensure it starts with a recent snapshot - # Only fetch after #7835 is merged snapshots_dir = network.get_committed_snapshots() network.join_node( new_node, From 8cb58842abe79a4ea0249399e015812cb58bb99c Mon Sep 17 00:00:00 2001 From: cjen1-msft Date: Tue, 28 Apr 2026 13:31:13 +0100 Subject: [PATCH 15/15] Update tests/infra/basicperf.py --- tests/infra/basicperf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/infra/basicperf.py b/tests/infra/basicperf.py index 09f72fbf160e..f5e63c256382 100644 --- a/tests/infra/basicperf.py +++ b/tests/infra/basicperf.py @@ -240,7 +240,7 @@ def replace_primary(network, host, old_primary, snapshots_dir, statistics): timeout=10, copy_ledger=False, snapshots_dir=snapshots_dir, - from_snapshot=True, + from_snapshot=snapshots_dir is not None, follow_redirect=False, ) LOG.info(f"Shut down primary: {old_primary.local_node_id}")