Skip to content

Commit f8cf8ba

Browse files
Only do local sealing self-healing-open (#7568)
Co-authored-by: Amaury Chamayou <amchamay@microsoft.com>
1 parent aeb2796 commit f8cf8ba

1 file changed

Lines changed: 23 additions & 124 deletions

File tree

tests/e2e_operations.py

Lines changed: 23 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,7 +1748,9 @@ def run(self, src_dir, dst_dir):
17481748
def run_self_healing_open(const_args):
17491749
args = copy.deepcopy(const_args)
17501750
args.nodes = infra.e2e_args.min_nodes(args, f=1)
1751-
args.label += "_self_healing"
1751+
args.label += "_self_healing_open"
1752+
args.enable_local_sealing = True
1753+
17521754
with infra.network.network(
17531755
args.nodes,
17541756
args.binary_dir,
@@ -1757,6 +1759,7 @@ def run_self_healing_open(const_args):
17571759
LOG.info("Start a network and stop it")
17581760
network.start_and_open(args)
17591761
network.save_service_identity(args)
1762+
node_secrets = [node.save_sealed_ledger_secret() for node in network.nodes]
17601763
network.stop_all_nodes()
17611764

17621765
recovery_args = copy.deepcopy(args)
@@ -1768,7 +1771,7 @@ def run_self_healing_open(const_args):
17681771
ledger_dirs[i] = l_dir
17691772
committed_ledger_dirs[i] = c
17701773

1771-
LOG.info("Start a recovery network and stop it")
1774+
LOG.info("Start recovery network")
17721775
with infra.network.network(
17731776
recovery_args.nodes,
17741777
recovery_args.binary_dir,
@@ -1779,6 +1782,7 @@ def run_self_healing_open(const_args):
17791782
recovery_args,
17801783
ledger_dirs=ledger_dirs,
17811784
committed_ledger_dirs=committed_ledger_dirs,
1785+
sealed_ledger_secrets=node_secrets,
17821786
)
17831787
recovered_network.wait_for_self_healing_open_finish()
17841788

@@ -1788,27 +1792,9 @@ def run_self_healing_open(const_args):
17881792

17891793
recovered_network.refresh_service_identity_file(recovery_args)
17901794

1791-
recovered_network.consortium.recover_with_shares(
1792-
recovered_network.find_random_node()
1793-
)
1794-
1795-
LOG.info("Submitted recovery shares")
1796-
1797-
# Wait for all live replicas to report being part of the opened network
1798-
successfully_opened = 0
1799-
for node in recovered_network.get_joined_nodes():
1800-
try:
1801-
recovered_network.wait_for_status(
1802-
node,
1803-
"Open",
1804-
timeout=10,
1805-
)
1806-
recovered_network._wait_for_app_open(node)
1807-
successfully_opened += 1
1808-
except TimeoutError:
1809-
pass
1795+
joined_nodes = recovered_network.get_joined_nodes()
18101796

1811-
assert successfully_opened == 1
1797+
assert len(joined_nodes) == 1, joined_nodes
18121798

18131799
LOG.info("Completed self-healing open successfully")
18141800

@@ -1825,6 +1811,8 @@ def run_self_healing_open_timeout_path(const_args):
18251811
args = copy.deepcopy(const_args)
18261812
args.nodes = infra.e2e_args.min_nodes(args, f=1)
18271813
args.label += "_self_healing_open_timeout"
1814+
args.enable_local_sealing = True
1815+
18281816
with infra.network.network(
18291817
args.nodes,
18301818
args.binary_dir,
@@ -1833,6 +1821,7 @@ def run_self_healing_open_timeout_path(const_args):
18331821
LOG.info("Start a network and stop it")
18341822
network.start_and_open(args)
18351823
network.save_service_identity(args)
1824+
node_secrets = [node.save_sealed_ledger_secret() for node in network.nodes]
18361825
network.stop_all_nodes()
18371826

18381827
recovery_args = copy.deepcopy(args)
@@ -1855,6 +1844,7 @@ def run_self_healing_open_timeout_path(const_args):
18551844
recovery_args,
18561845
ledger_dirs=ledger_dirs,
18571846
committed_ledger_dirs=committed_ledger_dirs,
1847+
sealed_ledger_secrets=node_secrets,
18581848
starting_nodes=0, # Force timeout path by starting only one node
18591849
)
18601850
recovered_network.wait_for_self_healing_open_finish()
@@ -1865,27 +1855,8 @@ def run_self_healing_open_timeout_path(const_args):
18651855

18661856
recovered_network.refresh_service_identity_file(recovery_args)
18671857

1868-
recovered_network.consortium.recover_with_shares(
1869-
recovered_network.find_random_node()
1870-
)
1871-
1872-
LOG.info("Submitted recovery shares")
1873-
1874-
# Wait for all live replicas to report being part of the opened network
1875-
successfully_opened = 0
1876-
for node in recovered_network.get_joined_nodes():
1877-
try:
1878-
recovered_network.wait_for_status(
1879-
node,
1880-
"Open",
1881-
timeout=10,
1882-
)
1883-
recovered_network._wait_for_app_open(node)
1884-
successfully_opened += 1
1885-
except TimeoutError:
1886-
pass
1887-
1888-
assert successfully_opened == 1
1858+
joined_nodes = recovered_network.get_joined_nodes()
1859+
assert len(joined_nodes) == 1, joined_nodes
18891860

18901861
LOG.info("Completed self-healing open successfully")
18911862

@@ -1902,6 +1873,8 @@ def run_self_healing_open_multiple_timeout(const_args):
19021873
args = copy.deepcopy(const_args)
19031874
args.nodes = infra.e2e_args.min_nodes(args, f=1)
19041875
args.label += "_self_healing_open_multiple_timeout"
1876+
args.enable_local_sealing = True
1877+
19051878
with infra.network.network(
19061879
args.nodes,
19071880
args.binary_dir,
@@ -1910,6 +1883,7 @@ def run_self_healing_open_multiple_timeout(const_args):
19101883
LOG.info("Start a network and stop it")
19111884
network.start_and_open(args)
19121885
network.save_service_identity(args)
1886+
node_secrets = [node.save_sealed_ledger_secret() for node in network.nodes]
19131887
network.stop_all_nodes()
19141888

19151889
recovery_args = copy.deepcopy(args)
@@ -1932,14 +1906,15 @@ def run_self_healing_open_multiple_timeout(const_args):
19321906
recovery_args,
19331907
ledger_dirs=ledger_dirs,
19341908
committed_ledger_dirs=committed_ledger_dirs,
1909+
sealed_ledger_secrets=node_secrets,
19351910
suspend_after_start=True, # suspend each node after starting to ensure they don't progress
19361911
)
1937-
# for each node start it and wait until it finishes the self-healing-open on the timeout path
1912+
# for each node: start it and wait until it finishes the self-healing-open on the timeout path
19381913
for node in recovered_network.nodes:
19391914
node.resume()
19401915
recovered_network.wait_for_statuses(
19411916
node,
1942-
["WaitingForRecoveryShares", "Open"],
1917+
["Open"],
19431918
timeout=10,
19441919
verify_ca=False,
19451920
)
@@ -1955,81 +1930,6 @@ def run_self_healing_open_multiple_timeout(const_args):
19551930
assert len(recovered_network.get_joined_nodes()) == len(args.nodes)
19561931

19571932

1958-
def run_self_healing_open_local_unsealing(const_args):
1959-
args = copy.deepcopy(const_args)
1960-
args.nodes = infra.e2e_args.min_nodes(args, f=1)
1961-
args.enable_local_sealing = True
1962-
args.label += "_self_healing_local_unsealing"
1963-
1964-
with infra.network.network(
1965-
args.nodes,
1966-
args.binary_dir,
1967-
args.debug_nodes,
1968-
) as network:
1969-
LOG.info("Start a network and stop it")
1970-
network.start_and_open(args)
1971-
network.save_service_identity(args)
1972-
node_secrets = [node.save_sealed_ledger_secret() for node in network.nodes]
1973-
network.stop_all_nodes()
1974-
1975-
recovery_args = copy.deepcopy(args)
1976-
1977-
ledger_dirs = {}
1978-
committed_ledger_dirs = {}
1979-
for i, node in enumerate(network.nodes):
1980-
l_dir, c = node.get_ledger()
1981-
ledger_dirs[i] = l_dir
1982-
committed_ledger_dirs[i] = c
1983-
1984-
LOG.info("Start a recovery network")
1985-
with infra.network.network(
1986-
recovery_args.nodes,
1987-
recovery_args.binary_dir,
1988-
recovery_args.debug_nodes,
1989-
existing_network=network,
1990-
) as recovered_network:
1991-
recovered_network.start_in_self_healing_open(
1992-
recovery_args,
1993-
ledger_dirs=ledger_dirs,
1994-
committed_ledger_dirs=committed_ledger_dirs,
1995-
sealed_ledger_secrets=node_secrets,
1996-
)
1997-
1998-
recovered_network.wait_for_self_healing_open_finish()
1999-
2000-
# Refresh the declared state of nodes which have shut themselves down to join.
2001-
for node in recovered_network.nodes:
2002-
node.refresh_network_state(verify_ca=False)
2003-
2004-
recovered_network.refresh_service_identity_file(recovery_args)
2005-
2006-
# Wait for all live replicas to report being part of the opened network
2007-
successfully_opened = 0
2008-
for node in recovered_network.get_joined_nodes():
2009-
try:
2010-
recovered_network.wait_for_status(
2011-
node,
2012-
"Open",
2013-
timeout=10,
2014-
)
2015-
recovered_network._wait_for_app_open(node)
2016-
successfully_opened += 1
2017-
except TimeoutError:
2018-
pass
2019-
2020-
assert successfully_opened == 1
2021-
2022-
LOG.info("Completed self-healing open successfully")
2023-
2024-
latest_public_tables, _ = recovered_network.get_latest_ledger_public_state()
2025-
recovery_type = latest_public_tables[
2026-
"public:ccf.gov.self_healing_open.open_kind"
2027-
][b"\x00\x00\x00\x00\x00\x00\x00\x00"].decode("utf-8")
2028-
assert (
2029-
recovery_type == '"Quorum"'
2030-
), f"Network self-healing open type was {recovery_type} instead of Quorum"
2031-
2032-
20331933
def run_read_ledger_on_testdata(args):
20341934
for testdata_dir in os.scandir(args.historical_testdata):
20351935
assert testdata_dir.is_dir()
@@ -2341,7 +2241,9 @@ def run_snp_tests(args):
23412241
run_recovery_unsealing_corrupt(args)
23422242
run_recovery_unsealing_validate_audit(args)
23432243
test_error_message_on_failure_to_read_aci_sec_context(args)
2344-
run_self_healing_open_local_unsealing(args)
2244+
run_self_healing_open(args)
2245+
run_self_healing_open_timeout_path(args)
2246+
run_self_healing_open_multiple_timeout(args)
23452247

23462248

23472249
def run(args):
@@ -2357,8 +2259,5 @@ def run(args):
23572259
run_cose_signatures_config_check(args)
23582260
run_late_mounted_ledger_check(args)
23592261
run_empty_ledger_dir_check(args)
2360-
run_self_healing_open(args)
2361-
run_self_healing_open_timeout_path(args)
2362-
run_self_healing_open_multiple_timeout(args)
23632262
run_read_ledger_on_testdata(args)
23642263
run_propose_request_vote(args)

0 commit comments

Comments
 (0)