Skip to content

Commit 9168ce7

Browse files
cjen1-msftCopilotachamayou
authored
Add close_on_error context to wrap critical sections (#7544)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Amaury Chamayou <amchamay@microsoft.com> Co-authored-by: Amaury Chamayou <amaury@xargs.fr>
1 parent 3c5ea1f commit 9168ce7

3 files changed

Lines changed: 152 additions & 136 deletions

File tree

tests/e2e_operations.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,7 @@ def run_file_operations(args):
724724
test_empty_snapshot(network, args)
725725
test_nulled_snapshot(network, args)
726726

727+
# Ensure that the network is still live
727728
primary, _ = network.find_primary()
728729
# Scoped transactions are not handled by historical range queries
729730
network.stop_all_nodes(skip_verification=True)

tests/infra/network.py

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,9 @@ def stop_all_nodes(
11001100
"Fatal error found during node shutdown", node_errors
11011101
)
11021102

1103+
if self.partitioner is not None:
1104+
self.partitioner.cleanup()
1105+
11031106
def setup_join_node(
11041107
self,
11051108
node,
@@ -1956,6 +1959,28 @@ def identity(self, name=None):
19561959
)
19571960

19581961

1962+
# Closes the network on error, logging stack traces and optionally dropping into pdb
1963+
@contextmanager
1964+
def close_on_error(net, pdb=False):
1965+
try:
1966+
yield
1967+
except Exception:
1968+
# Don't try to verify txs on Exception path
1969+
net.txs = None
1970+
1971+
net.log_stack_traces(timeout=10)
1972+
1973+
if pdb:
1974+
import pdb
1975+
1976+
pdb.post_mortem()
1977+
1978+
LOG.info("Stopping network")
1979+
net.stop_all_nodes(skip_verification=True, accept_ledger_diff=True)
1980+
1981+
raise
1982+
1983+
19591984
@contextmanager
19601985
def network(
19611986
hosts,
@@ -1999,22 +2024,9 @@ def network(
19992024
node_data_json_file=node_data_json_file,
20002025
**kwargs,
20012026
)
2002-
try:
2027+
with close_on_error(net, pdb=pdb):
20032028
yield net
2004-
except Exception:
2005-
# Don't try to verify txs on Exception path
2006-
net.txs = None
2007-
2008-
net.log_stack_traces(timeout=10)
2009-
2010-
if pdb:
2011-
import pdb
2012-
2013-
pdb.set_trace()
2014-
else:
2015-
raise
2016-
finally:
2017-
LOG.info("Stopping network")
2018-
net.stop_all_nodes(skip_verification=True, accept_ledger_diff=True)
2019-
if init_partitioner:
2020-
net.partitioner.cleanup()
2029+
LOG.info("Stopping network")
2030+
net.stop_all_nodes(skip_verification=True, accept_ledger_diff=True)
2031+
if init_partitioner:
2032+
net.partitioner.cleanup()

tests/recovery.py

Lines changed: 121 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -417,136 +417,139 @@ def test_recover_service_with_wrong_identity(network, args):
417417
existing_network=network,
418418
)
419419

420-
recovered_network.start_in_recovery(
421-
args,
422-
ledger_dir=current_ledger_dir,
423-
committed_ledger_dirs=committed_ledger_dirs,
424-
snapshots_dir=snapshots_dir,
425-
)
426-
427-
# Must fail with a dedicated error message if requesting a receipt for a TX
428-
# from past epochs, since ledger secrets are not yet available,
429-
# therefore no receipt can be generated.
430-
primary, _ = recovered_network.find_primary()
431-
with primary.client() as cli:
432-
curr_tx_id = ccf.tx_id.TxID.from_str(
433-
cli.get("/node/commit").body.json()["transaction_id"]
420+
with infra.network.close_on_error(recovered_network):
421+
recovered_network.start_in_recovery(
422+
args,
423+
ledger_dir=current_ledger_dir,
424+
committed_ledger_dirs=committed_ledger_dirs,
425+
snapshots_dir=snapshots_dir,
434426
)
435427

436-
response = cli.get(f"/node/receipt?transaction_id={str(before_recovery_tx_id)}")
437-
assert response.status_code == http.HTTPStatus.NOT_FOUND, response
438-
assert (
439-
"not signed by the current service"
440-
in response.body.json()["error"]["message"]
441-
), response
442-
443-
current_service_created_tx_id = ccf.tx_id.TxID.from_str(
444-
cli.get("/node/network").body.json()["current_service_create_txid"]
445-
)
428+
# Must fail with a dedicated error message if requesting a receipt for a TX
429+
# from past epochs, since ledger secrets are not yet available,
430+
# therefore no receipt can be generated.
431+
primary, _ = recovered_network.find_primary()
432+
with primary.client() as cli:
433+
curr_tx_id = ccf.tx_id.TxID.from_str(
434+
cli.get("/node/commit").body.json()["transaction_id"]
435+
)
446436

447-
# TX from the current epoch though can be verified, as soon as the caller
448-
# trusts the current service identity.
449-
receipt = primary.get_receipt(curr_tx_id.view, curr_tx_id.seqno).json()
450-
verify_receipt(receipt, recovered_network.cert, is_signature_tx=True)
437+
response = cli.get(
438+
f"/node/receipt?transaction_id={str(before_recovery_tx_id)}"
439+
)
440+
assert response.status_code == http.HTTPStatus.NOT_FOUND, response
441+
assert (
442+
"not signed by the current service"
443+
in response.body.json()["error"]["message"]
444+
), response
451445

452-
recovered_network.recover(args)
446+
current_service_created_tx_id = ccf.tx_id.TxID.from_str(
447+
cli.get("/node/network").body.json()["current_service_create_txid"]
448+
)
453449

454-
# Needs refreshing, recovery has completed.
455-
with primary.client() as cli:
456-
curr_tx_id = ccf.tx_id.TxID.from_str(
457-
cli.get("/node/commit").body.json()["transaction_id"]
458-
)
450+
# TX from the current epoch though can be verified, as soon as the caller
451+
# trusts the current service identity.
452+
receipt = primary.get_receipt(curr_tx_id.view, curr_tx_id.seqno).json()
453+
verify_receipt(receipt, recovered_network.cert, is_signature_tx=True)
459454

460-
# Check receipts for transactions after multiple recoveries. This test
461-
# relies on previous recoveries and is therefore prone to failures if
462-
# surrounding test calls change.
463-
txids = [
464-
# Last TX before previous recovery
465-
shifted_tx(previous_service_created_tx_id, -2, -1),
466-
# First after previous recovery
467-
previous_service_created_tx_id,
468-
# Random TX before previous and last recovery
469-
shifted_tx(current_service_created_tx_id, -2, -5),
470-
# Last TX before last recovery
471-
shifted_tx(current_service_created_tx_id, -2, -1),
472-
# First TX after last recovery
473-
current_service_created_tx_id,
474-
# Random TX after last recovery
475-
shifted_tx(curr_tx_id, 0, -3),
476-
]
477-
478-
with primary.client("user0") as client:
479-
480-
def pull_with_handle():
481-
# Receipts for previous service instances require back-endorsement.
482-
# In this case it should trigger reading pulling up state
483-
# for previous_service_created_tx_id, which will have an overlapping
484-
# seqno with the target tx, but this has to work just fine due to
485-
# App/Sys handle split.
486-
return client.get(
487-
f"/log/private/historical/handle?seqno={previous_service_created_tx_id.seqno + 1}&handle={previous_service_created_tx_id.seqno}"
488-
).status_code
455+
recovered_network.recover(args)
489456

490-
for _ in range(10):
491-
if pull_with_handle() == http.HTTPStatus.OK:
492-
break
493-
time.sleep(0.5)
494-
else:
495-
assert False, "Could not get a receipt with a custom handle"
496-
497-
for tx in txids:
498-
receipt = primary.get_receipt(tx.view, tx.seqno).json()
499-
500-
try:
501-
verify_receipt(receipt, recovered_network.cert)
502-
except AssertionError:
503-
# May fail due to missing leaf components if it's a signature TX,
504-
# try again with a flag to force skip leaf components verification.
505-
verify_receipt(receipt, recovered_network.cert, is_signature_tx=True)
506-
507-
with primary.client() as cli:
508-
service_cert = cli.get("/node/network").body.json()["service_certificate"]
509-
cert = load_pem_x509_certificate(
510-
service_cert.encode("ascii"), default_backend()
511-
)
457+
# Needs refreshing, recovery has completed.
458+
with primary.client() as cli:
459+
curr_tx_id = ccf.tx_id.TxID.from_str(
460+
cli.get("/node/commit").body.json()["transaction_id"]
461+
)
512462

513-
for tx in txids[0:1]:
514-
response = query_endorsements_chain(primary, tx)
515-
assert response.status_code == http.HTTPStatus.OK, response
516-
endorsements = [
517-
base64.b64decode(x) for x in response.body.json()["endorsements"]
463+
# Check receipts for transactions after multiple recoveries. This test
464+
# relies on previous recoveries and is therefore prone to failures if
465+
# surrounding test calls change.
466+
txids = [
467+
# Last TX before previous recovery
468+
shifted_tx(previous_service_created_tx_id, -2, -1),
469+
# First after previous recovery
470+
previous_service_created_tx_id,
471+
# Random TX before previous and last recovery
472+
shifted_tx(current_service_created_tx_id, -2, -5),
473+
# Last TX before last recovery
474+
shifted_tx(current_service_created_tx_id, -2, -1),
475+
# First TX after last recovery
476+
current_service_created_tx_id,
477+
# Random TX after last recovery
478+
shifted_tx(curr_tx_id, 0, -3),
518479
]
519-
assert len(endorsements) == 2 # 2 recoveries behind
520-
verify_endorsements_chain(
521-
primary,
522-
endorsements,
523-
cert.public_key().public_bytes(
524-
serialization.Encoding.PEM,
525-
serialization.PublicFormat.SubjectPublicKeyInfo,
526-
),
527-
)
528480

529-
for tx in txids[1:4]:
530-
response = query_endorsements_chain(primary, tx)
531-
assert response.status_code == http.HTTPStatus.OK, response
532-
endorsements = [
533-
base64.b64decode(x) for x in response.body.json()["endorsements"]
534-
]
535-
assert len(endorsements) == 1 # 1 recovery behind
536-
verify_endorsements_chain(
537-
primary,
538-
endorsements,
539-
cert.public_key().public_bytes(
540-
serialization.Encoding.PEM,
541-
serialization.PublicFormat.SubjectPublicKeyInfo,
542-
),
543-
)
481+
with primary.client("user0") as client:
482+
483+
def pull_with_handle():
484+
# Receipts for previous service instances require back-endorsement.
485+
# In this case it should trigger reading pulling up state
486+
# for previous_service_created_tx_id, which will have an overlapping
487+
# seqno with the target tx, but this has to work just fine due to
488+
# App/Sys handle split.
489+
return client.get(
490+
f"/log/private/historical/handle?seqno={previous_service_created_tx_id.seqno + 1}&handle={previous_service_created_tx_id.seqno}"
491+
).status_code
492+
493+
for _ in range(10):
494+
if pull_with_handle() == http.HTTPStatus.OK:
495+
break
496+
time.sleep(0.5)
497+
else:
498+
assert False, "Could not get a receipt with a custom handle"
499+
500+
for tx in txids:
501+
receipt = primary.get_receipt(tx.view, tx.seqno).json()
502+
503+
try:
504+
verify_receipt(receipt, recovered_network.cert)
505+
except AssertionError:
506+
# May fail due to missing leaf components if it's a signature TX,
507+
# try again with a flag to force skip leaf components verification.
508+
verify_receipt(receipt, recovered_network.cert, is_signature_tx=True)
509+
510+
with primary.client() as cli:
511+
service_cert = cli.get("/node/network").body.json()["service_certificate"]
512+
cert = load_pem_x509_certificate(
513+
service_cert.encode("ascii"), default_backend()
514+
)
515+
516+
for tx in txids[0:1]:
517+
response = query_endorsements_chain(primary, tx)
518+
assert response.status_code == http.HTTPStatus.OK, response
519+
endorsements = [
520+
base64.b64decode(x) for x in response.body.json()["endorsements"]
521+
]
522+
assert len(endorsements) == 2 # 2 recoveries behind
523+
verify_endorsements_chain(
524+
primary,
525+
endorsements,
526+
cert.public_key().public_bytes(
527+
serialization.Encoding.PEM,
528+
serialization.PublicFormat.SubjectPublicKeyInfo,
529+
),
530+
)
544531

545-
for tx in txids[4:]:
546-
response = query_endorsements_chain(primary, tx)
547-
assert response.status_code == http.HTTPStatus.NOT_FOUND, response
532+
for tx in txids[1:4]:
533+
response = query_endorsements_chain(primary, tx)
534+
assert response.status_code == http.HTTPStatus.OK, response
535+
endorsements = [
536+
base64.b64decode(x) for x in response.body.json()["endorsements"]
537+
]
538+
assert len(endorsements) == 1 # 1 recovery behind
539+
verify_endorsements_chain(
540+
primary,
541+
endorsements,
542+
cert.public_key().public_bytes(
543+
serialization.Encoding.PEM,
544+
serialization.PublicFormat.SubjectPublicKeyInfo,
545+
),
546+
)
548547

549-
return recovered_network
548+
for tx in txids[4:]:
549+
response = query_endorsements_chain(primary, tx)
550+
assert response.status_code == http.HTTPStatus.NOT_FOUND, response
551+
552+
return recovered_network
550553

551554

552555
@reqs.description("Recover a service from local files")

0 commit comments

Comments
 (0)