Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions tests/code_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def test_tcb_version_tables(network, args):
thrown_exception = None
try:
new_node = network.create_node()
network.join_node(new_node, args.package, args, timeout=3)
network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False)
network.trust_node(new_node, args)
except TimeoutError as e:
thrown_exception = e
Expand All @@ -392,7 +392,7 @@ def test_tcb_version_tables(network, args):

LOG.info("Checking new nodes are allowed to join using expanded api")
new_node = network.create_node()
network.join_node(new_node, args.package, args, timeout=3)
network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False)
network.trust_node(new_node, args)

LOG.info("Change the current cpuid's TCB version using the new API")
Expand All @@ -413,7 +413,7 @@ def test_tcb_version_tables(network, args):

LOG.info("Checking new nodes are allowed to join using hexstring api")
new_node = network.create_node()
network.join_node(new_node, args.package, args, timeout=3)
network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False)
network.trust_node(new_node, args)


Expand All @@ -433,6 +433,7 @@ def test_add_node_without_security_policy(network, args):
args,
timeout=3,
snp_uvm_security_context_dir=snp_dir if security_context_dir else None,
from_snapshot=False,
)
network.trust_node(new_node, args)
return network
Expand All @@ -459,7 +460,7 @@ def test_add_node_with_stubbed_security_policy(network, args):

# If we don't throw an exception, joining was successful
new_node = network.create_node()
network.join_node(new_node, args.package, args, timeout=3)
network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False)
network.trust_node(new_node, args)

# Revert to original state
Expand Down Expand Up @@ -494,6 +495,7 @@ def test_start_node_with_mismatched_host_data(network, args):
args,
timeout=3,
snp_uvm_security_context_dir=snp_dir if security_context_dir else None,
from_snapshot=False,
)
except (TimeoutError, RuntimeError):
LOG.info("As expected, node with invalid security policy failed to startup")
Expand All @@ -518,7 +520,7 @@ def test_add_node_with_untrusted_measurement(network, args):

new_node = network.create_node()
try:
network.join_node(new_node, args.package, args, timeout=3)
network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False)
except infra.network.MeasurementNotFound:
LOG.info("As expected, node with untrusted measurement failed to join")
else:
Expand Down Expand Up @@ -586,7 +588,7 @@ def assert_node_join_fails(network, args):
"""Create a node and assert that joining the network raises HostDataNotFound."""
new_node = network.create_node()
try:
network.join_node(new_node, args.package, args, timeout=3)
network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False)
except infra.network.HostDataNotFound as e:
LOG.info(f"As expected, node join failed: {e.error_line}")
assert (
Expand Down Expand Up @@ -724,7 +726,9 @@ def test_add_node_via_code_policy(network, args):
),
)
new_node = network.create_node()
network.join_node(new_node, joiner_args.package, joiner_args, timeout=3)
network.join_node(
new_node, joiner_args.package, joiner_args, timeout=3, from_snapshot=False
)
network.trust_node(new_node, joiner_args)

# Cleanup: restore host data and remove code update policy.
Expand Down Expand Up @@ -754,7 +758,7 @@ def test_add_node_with_untrusted_host_data(network, args):

new_node = network.create_node()
try:
network.join_node(new_node, args.package, args, timeout=3)
network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False)
except infra.network.HostDataNotFound:
LOG.info("As expected, node with untrusted host data failed to join")
else:
Expand Down Expand Up @@ -788,6 +792,7 @@ def test_add_node_with_no_uvm_endorsements(network, args):
args,
timeout=3,
snp_uvm_security_context_dir=snp_dir if security_context_dir else None,
from_snapshot=False,
)
except infra.network.MeasurementNotFound:
LOG.info("As expected, node with no UVM endorsements failed to join")
Expand All @@ -810,6 +815,7 @@ def test_add_node_with_no_uvm_endorsements(network, args):
args,
timeout=3,
snp_uvm_security_context_dir=snp_dir if security_context_dir else None,
from_snapshot=False,
)
new_node.stop()

Expand Down Expand Up @@ -840,6 +846,7 @@ def test_add_node_with_different_package(network, args):
replacement_package,
args,
timeout=3,
from_snapshot=False,
)

except (infra.network.MeasurementNotFound, infra.network.HostDataNotFound) as err:
Expand Down Expand Up @@ -994,7 +1001,7 @@ def format_expected_host_data(entries):
LOG.info("Start fresh nodes running new code")
for _ in range(0, len(old_nodes)):
new_node = network.create_node()
network.join_node(new_node, replacement_package, args)
network.join_node(new_node, replacement_package, args, from_snapshot=False)
network.trust_node(new_node, args)

LOG.info("Retire original nodes running old code")
Expand Down Expand Up @@ -1075,7 +1082,7 @@ def test_add_node_with_no_uvm_endorsements_in_kv(network, args):

try:
new_node = network.create_node()
network.join_node(new_node, args.package, args, timeout=3)
network.join_node(new_node, args.package, args, timeout=3, from_snapshot=False)
except infra.network.UVMEndorsementsNotAuthorised:
LOG.info("As expected, node with no UVM endorsements failed to join")
else:
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e_common_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_primary(network, args):
host_spec.with_args(args)

new_backup = network.create_node(host_spec)
network.join_node(new_backup, args.package, args)
network.join_node(new_backup, args.package, args, from_snapshot=False)
network.trust_node(new_backup, args)

primary_interfaces = primary.host.rpc_interfaces
Expand Down Expand Up @@ -149,7 +149,7 @@ def test_network_node_info(network, args):
host_spec.with_args(args)

new_node = network.create_node(host_spec)
network.join_node(new_node, args.package, args)
network.join_node(new_node, args.package, args, from_snapshot=False)

with new_node.client(interface_name=operator_rpc_interface) as c:
r = c.get("/node/network/nodes/self", allow_redirects=False)
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -1543,7 +1543,7 @@ def test_long_lived_forwarding(network, args):

new_node_args = copy.deepcopy(args)
new_node_args.node_to_node_message_limit = message_limit
network.join_node(new_node, args.package, new_node_args)
network.join_node(new_node, args.package, new_node_args, from_snapshot=False)
network.trust_node(new_node, new_node_args)

# Send many messages to new node over long-lived connections,
Expand Down
29 changes: 23 additions & 6 deletions tests/e2e_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,7 @@ def test_empty_snapshot(network, args):
snapshots_dir=snapshots_dir,
# Don't try to fetch a snapshot, look at the local files
fetch_recent_snapshot=False,
from_snapshot=True,
)
new_node.stop()

Expand Down Expand Up @@ -728,6 +729,7 @@ def test_nulled_snapshot(network, args):
snapshots_dir=snapshots_dir,
# Don't try to fetch a snapshot, look at the local files
fetch_recent_snapshot=False,
from_snapshot=True,
)
except Exception as e:
failed = True
Expand Down Expand Up @@ -783,6 +785,7 @@ def test_corrupt_snapshot_handling(network, args):
args.package,
args,
snapshots_dir=writable_dir,
from_snapshot=True,
fetch_recent_snapshot=False,
)

Expand Down Expand Up @@ -875,6 +878,7 @@ def test_corrupt_snapshot_handling(network, args):
args.package,
args,
snapshots_dir=restricted_dir,
from_snapshot=True,
fetch_recent_snapshot=False,
)

Expand Down Expand Up @@ -1424,10 +1428,15 @@ def test_ledger_chunk_redirect_gap(network, args):
commit_seqno = TxID.from_str(r["transaction_id"]).seqno

new_node = network.create_node()
# force primary to generate a new snapshot after commit idx
snapshots = network.get_committed_snapshots()
network.join_node(
new_node,
args.package,
args,
# Tmp fix until the primary expresses an opinion
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am confused about why the primary does not express an opinion here. Do we not have backup download enabled?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the 'we need the primary to have an opinion over what snapshot it likes that is more precise than startup_seqno.

The reason being that if startup_seqno is 0, as it is here, then we replicate the whole ledger via Raft.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is #7844, right? I suggest we mention that inline in the comment here ("Tmp" is just a way to avoid our "TODO" check, the way we actually prevent this shim living forever is "link to the issue/PR that fixes it), and probably aim to merge that before this, so this shim never hits main.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eddyashton Yep I'm going to delay merging this for a little bit until we merge #7844 and then this can be clean.

snapshots_dir=snapshots,
from_snapshot=True,
# Fetch recent snapshot to speed up joining
fetch_recent_snapshot=True,
)
Expand Down Expand Up @@ -1591,7 +1600,7 @@ def run_tls_san_checks(const_args):
)
new_node = network.create_node(host_spec)
args.subject_alt_names = [f"dNSName:{dummy_san}"]
network.join_node(new_node, args.package, args)
network.join_node(new_node, args.package, args, from_snapshot=False)
sans = infra.crypto.get_san_from_pem_cert(new_node.get_tls_certificate_pem())
assert len(sans) == 1, "Expected exactly one SAN"
assert sans[0].value == dummy_san
Expand All @@ -1606,7 +1615,7 @@ def run_tls_san_checks(const_args):
dummy_public_rpc_hosts.add(ipaddress.ip_address(dummy_public_rpc_host))

new_node = network.create_node(host_spec)
network.join_node(new_node, args.package, args)
network.join_node(new_node, args.package, args, from_snapshot=False)
# Cannot trust the node here as client cannot authenticate dummy public IP in cert
with open(
os.path.join(network.common_dir, f"{new_node.local_node_id}.pem"),
Expand Down Expand Up @@ -1941,7 +1950,7 @@ def replace_nodes(network, old_nodes, new_package):
new_nodes = []
for _ in range(len(old_nodes)):
new_node = network.create_node()
network.join_node(new_node, new_package, nargs)
network.join_node(new_node, new_package, nargs, from_snapshot=False)
network.trust_node(new_node, nargs)
new_nodes.append(new_node)

Expand Down Expand Up @@ -1999,7 +2008,7 @@ def replace_nodes(network, old_nodes, new_package):
# Verify a Dual joiner can still join (allow_dual_signing_joinee=true)
LOG.info("Verifying Dual joiner can still join COSE-only (allow dual) network")
dual_joiner = network.create_node()
network.join_node(dual_joiner, nargs.package, nargs)
network.join_node(dual_joiner, nargs.package, nargs, from_snapshot=False)
network.trust_node(dual_joiner, nargs)
LOG.success("Dual joiner successfully joined COSE-only network")

Expand Down Expand Up @@ -2039,7 +2048,9 @@ def replace_nodes(network, old_nodes, new_package):
LOG.info("Verifying Dual joiner is rejected by COSE-only-no-dual network")
rejected_joiner = network.create_node()
try:
network.join_node(rejected_joiner, nargs.package, nargs, timeout=10)
network.join_node(
rejected_joiner, nargs.package, nargs, timeout=10, from_snapshot=False
)
network.trust_node(rejected_joiner, nargs)
assert False, "Dual joiner should have been rejected"
except Exception as e:
Expand Down Expand Up @@ -2185,11 +2196,14 @@ def try_historical_fetch(node, timeout=1):
# Create a temporary directory to manually construct a ledger in
with tempfile.TemporaryDirectory() as temp_dir:
new_node = network.create_node()

snapshots_dir = network.get_committed_snapshots()
network.join_node(
new_node,
nargs.package,
nargs,
from_snapshot=True,
snapshots_dir=snapshots_dir,
copy_ledger=False,
common_read_only_ledger_dir=temp_dir, # New node will try to read from temp directory
)
Expand Down Expand Up @@ -3004,7 +3018,9 @@ def run_error_message_on_failure_to_read_aci_sec_context(args):
args_copy.snp_endorsements_file = "/a/fake/path"
failed = False
try:
network.join_node(new_node, args.package, args_copy, timeout=20)
network.join_node(
new_node, args.package, args_copy, timeout=20, from_snapshot=False
)
except infra.network.CollateralFetchTimeout:
LOG.info(
"Node with invalid quote endorsement servers could not join as expected"
Expand Down Expand Up @@ -3175,6 +3191,7 @@ def test_backup_snapshot_fetch_max_size(network, args):
args,
target_node=primary,
timeout=5,
from_snapshot=False,
backup_snapshot_fetch_enabled=True,
backup_snapshot_fetch_max_attempts=1,
backup_snapshot_fetch_max_size="1KB",
Expand Down
4 changes: 2 additions & 2 deletions tests/governance.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def test_no_quote(network, args):
}
)
)
network.join_node(untrusted_node, args.package, args)
network.join_node(untrusted_node, args.package, args, from_snapshot=False)
with untrusted_node.client(
ca=os.path.join(
untrusted_node.common_dir, f"{untrusted_node.local_node_id}.pem"
Expand Down Expand Up @@ -175,7 +175,7 @@ def get_nodes():
)

# NB: This new node joins but is never trusted
network.join_node(untrusted_node, args.package, args)
network.join_node(untrusted_node, args.package, args, from_snapshot=False)

nodes = get_nodes()
assert untrusted_node.node_id in nodes, nodes
Expand Down
1 change: 1 addition & 0 deletions tests/infra/basicperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def replace_primary(network, host, old_primary, snapshots_dir, statistics):
timeout=10,
copy_ledger=False,
snapshots_dir=snapshots_dir,
from_snapshot=snapshots_dir is not None,
follow_redirect=False,
)
LOG.info(f"Shut down primary: {old_primary.local_node_id}")
Expand Down
24 changes: 11 additions & 13 deletions tests/infra/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ def _setup_node(
ledger_dir=None,
copy_ledger=True,
read_only_ledger_dirs=None,
from_snapshot=True,
from_snapshot=False,
snapshots_dir=None,
**kwargs,
):
Expand All @@ -368,20 +368,16 @@ def _setup_node(

# Note: Copy snapshot before ledger as retrieving the latest snapshot may require
# to produce more ledger entries
# Note: from_snapshot is not true in the start_and_open case nor start_in_recovery
if from_snapshot:
# Only retrieve snapshot from primary if the snapshot directory is not specified
if snapshots_dir is None:
primary, _ = self.find_primary(
timeout=args.ledger_recovery_timeout if recovery else 10
)
read_only_snapshots_dir = self.get_committed_snapshots(primary)
if os.listdir(snapshots_dir) or os.listdir(read_only_snapshots_dir):
LOG.info(
f"Joining from snapshot directories: {snapshots_dir},{read_only_snapshots_dir}"
)
assert False, "snapshot_dir must be provided when from_snapshot is True"
if snapshots_dir and os.listdir(snapshots_dir):
LOG.info(f"Joining from snapshot directories: {snapshots_dir}")
else:
LOG.warning(
f"Attempting to join from snapshot but {snapshots_dir},{read_only_snapshots_dir} are empty: defaulting to complete replay of transaction history"
f"Attempting to join from snapshot but {snapshots_dir} is empty: defaulting to complete replay of transaction history"
)
else:
LOG.info(
Expand Down Expand Up @@ -424,7 +420,7 @@ def _add_node(
ledger_dir=None,
copy_ledger=True,
read_only_ledger_dirs=None,
from_snapshot=True,
from_snapshot=False,
snapshots_dir=None,
**kwargs,
):
Expand Down Expand Up @@ -1603,7 +1599,7 @@ def resize(self, target_count, args):
LOG.info(f"Resizing network from {initial_node_count} to {target_count} nodes")
while node_count < target_count:
new_node = self.create_node()
self.join_node(new_node, args.package, args)
self.join_node(new_node, args.package, args, from_snapshot=False)
Comment thread
achamayou marked this conversation as resolved.
self.trust_node(new_node, args)
node_count += 1
while node_count > target_count:
Expand Down Expand Up @@ -1870,12 +1866,14 @@ def wait_for_primary_unanimity(

def get_committed_snapshots(
self,
node,
node=None,
target_seqno=None,
force_txs=True,
wait_for_target_seqno=False,
timeout=20,
):
if node is None:
node, _ = self.find_primary()
# Wait for the snapshot including target_seqno to be committed before
# copying snapshot directory. Do not issue transactions if force_txs is False
# and expect snapshot to have already been created.
Expand Down
1 change: 1 addition & 0 deletions tests/js-custom-authorization/custom_authorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -1092,6 +1092,7 @@ def test_metrics_logging(network, args):
new_node,
args.package,
args,
from_snapshot=False,
)
network.trust_node(new_node, args)

Expand Down
2 changes: 1 addition & 1 deletion tests/limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_forward_larger_than_default_requests(network, args):
}
)
)
network.join_node(new_node, args.package, args)
network.join_node(new_node, args.package, args, from_snapshot=False)
network.trust_node(new_node, args)

primary, _ = network.find_primary()
Expand Down
Loading
Loading