Skip to content

Commit 21b16d0

Browse files
cjen1-msftCopilotachamayou
authored
Refactor join handler (#7853)
Co-authored-by: Copilot <copilot@github.com> Co-authored-by: Amaury Chamayou <amchamay@microsoft.com>
1 parent 1fc5b74 commit 21b16d0

4 files changed

Lines changed: 309 additions & 117 deletions

File tree

src/node/rpc/node_frontend.h

Lines changed: 72 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -264,11 +264,6 @@ namespace ccf
264264
return duplicate_node_id;
265265
}
266266

267-
bool is_taking_part_in_acking(NodeStatus node_status)
268-
{
269-
return node_status == NodeStatus::TRUSTED;
270-
}
271-
272267
auto add_node(
273268
ccf::kv::Tx& tx,
274269
const std::vector<uint8_t>& node_der,
@@ -457,120 +452,38 @@ namespace ccf
457452
auto accept = [this](auto& args, const nlohmann::json& params) {
458453
const auto in = params.get<JoinNetworkNodeToNode::In>();
459454

455+
// Not part of network => Internal error
460456
if (
461457
!this->node_operation.is_part_of_network() &&
462458
!this->node_operation.is_part_of_public_network() &&
463459
!this->node_operation.is_reading_private_ledger())
464460
{
461+
const std::string payload =
462+
"Target node should be part of network to accept new nodes.";
463+
LOG_INFO_FMT("Join request rejected: {}", payload);
465464
return make_error(
466465
HTTP_STATUS_INTERNAL_SERVER_ERROR,
467466
ccf::errors::InternalError,
468-
"Target node should be part of network to accept new nodes.");
469-
}
470-
471-
// Make sure that the joiner's snapshot is more recent than this node's
472-
// snapshot. Otherwise, the joiner may not be given all the ledger
473-
// secrets required to replay historical transactions.
474-
auto this_startup_seqno =
475-
this->node_operation.get_startup_snapshot_seqno();
476-
if (
477-
in.startup_seqno.has_value() &&
478-
this_startup_seqno > in.startup_seqno.value())
479-
{
480-
return make_error(
481-
HTTP_STATUS_BAD_REQUEST,
482-
ccf::errors::StartupSeqnoIsOld,
483-
fmt::format(
484-
"Node requested to join from seqno {} which is older than this "
485-
"node startup seqno {}. A snapshot at least as recent as {} must "
486-
"be used instead.",
487-
in.startup_seqno.value(),
488-
this_startup_seqno,
489-
this_startup_seqno));
467+
payload);
490468
}
491469

492-
auto nodes = args.tx.rw(this->network.nodes);
470+
// No service => Internal error
493471
auto service = args.tx.rw(this->network.service);
494-
495472
auto active_service = service->get();
496473
if (!active_service.has_value())
497474
{
475+
const std::string payload =
476+
"No service is available to accept new node.";
477+
LOG_INFO_FMT("Join request rejected: {}", payload);
498478
return make_error(
499479
HTTP_STATUS_INTERNAL_SERVER_ERROR,
500480
ccf::errors::InternalError,
501-
"No service is available to accept new node.");
481+
payload);
502482
}
503483

504-
if (
505-
active_service->status == ServiceStatus::OPENING ||
506-
active_service->status == ServiceStatus::RECOVERING)
507-
{
508-
// If the service is opening, new nodes are trusted straight away
509-
NodeStatus joining_node_status = NodeStatus::TRUSTED;
510-
511-
// If the node is already trusted, return network secrets
512-
auto existing_node_info = check_node_exists(
513-
args.tx,
514-
args.rpc_ctx->get_session_context()->caller_cert,
515-
joining_node_status);
516-
if (existing_node_info.has_value())
517-
{
518-
JoinNetworkNodeToNode::Out rep;
519-
rep.node_status = joining_node_status;
520-
rep.network_info = JoinNetworkNodeToNode::Out::NetworkInfo(
521-
node_operation.is_part_of_public_network(),
522-
node_operation.get_last_recovered_signed_idx(),
523-
this->network.ledger_secrets->get(
524-
args.tx, existing_node_info->ledger_secret_seqno),
525-
*this->network.identity,
526-
active_service->status,
527-
existing_node_info->endorsed_certificate,
528-
node_operation.get_cose_signatures_config());
529-
530-
return make_success(rep);
531-
}
532-
533-
if (consensus != nullptr && !this->node_operation.can_replicate())
534-
{
535-
auto primary_id = consensus->primary();
536-
if (primary_id.has_value())
537-
{
538-
const auto address = node::get_redirect_address_for_node(
539-
args, args.tx, primary_id.value());
540-
if (!address.has_value())
541-
{
542-
return already_populated_response();
543-
}
544-
545-
args.rpc_ctx->set_response_header(
546-
http::headers::LOCATION,
547-
fmt::format("https://{}/node/join", address.value()));
548-
549-
return make_error(
550-
HTTP_STATUS_PERMANENT_REDIRECT,
551-
ccf::errors::NodeCannotHandleRequest,
552-
"Node is not primary; cannot handle write");
553-
}
554-
555-
return make_error(
556-
HTTP_STATUS_INTERNAL_SERVER_ERROR,
557-
ccf::errors::InternalError,
558-
"Primary unknown");
559-
}
560-
561-
return add_node(
562-
args.tx,
563-
args.rpc_ctx->get_session_context()->caller_cert,
564-
in,
565-
joining_node_status,
566-
active_service->status);
567-
}
568-
569-
// If the service is open, new nodes are first added as pending and
570-
// then only trusted via member governance. It is expected that a new
571-
// node polls the network to retrieve the network secrets until it is
572-
// trusted
484+
auto nodes = args.tx.ro(network.nodes);
573485

486+
// If already joined => return equivalent response
574487
auto existing_node_info = check_node_exists(
575488
args.tx, args.rpc_ctx->get_session_context()->caller_cert);
576489
if (existing_node_info.has_value())
@@ -583,7 +496,7 @@ namespace ccf
583496
auto node_status = node_info->status;
584497
rep.node_status = node_status;
585498
rep.node_id = existing_node_info->node_id;
586-
if (is_taking_part_in_acking(node_status))
499+
if (node_status == NodeStatus::TRUSTED)
587500
{
588501
rep.network_info = JoinNetworkNodeToNode::Out::NetworkInfo(
589502
node_operation.is_part_of_public_network(),
@@ -595,22 +508,29 @@ namespace ccf
595508
existing_node_info->endorsed_certificate,
596509
node_operation.get_cose_signatures_config());
597510

511+
LOG_DEBUG_FMT(
512+
"Join request accepted: {} already marked as TRUSTED",
513+
existing_node_info->node_id);
598514
return make_success(rep);
599515
}
600516

601517
if (node_status == NodeStatus::PENDING)
602518
{
603519
// Only return node status and ID
520+
LOG_DEBUG_FMT(
521+
"Join request accepted: {} already marked as PENDING",
522+
existing_node_info->node_id);
604523
return make_success(rep);
605524
}
606525

526+
const std::string payload = fmt::format(
527+
"Joining node is not in expected state ({}).", node_status);
528+
LOG_INFO_FMT("Join request rejected: {}", payload);
607529
return make_error(
608-
HTTP_STATUS_BAD_REQUEST,
609-
ccf::errors::InvalidNodeState,
610-
fmt::format(
611-
"Joining node is not in expected state ({}).", node_status));
530+
HTTP_STATUS_BAD_REQUEST, ccf::errors::InvalidNodeState, payload);
612531
}
613532

533+
// Not the primary => Redirect if possible to primary
614534
if (consensus != nullptr && !this->node_operation.can_replicate())
615535
{
616536
auto primary_id = consensus->primary();
@@ -620,31 +540,75 @@ namespace ccf
620540
args, args.tx, primary_id.value());
621541
if (!address.has_value())
622542
{
543+
LOG_INFO_FMT(
544+
"Join request rejected: no redirect address for "
545+
"primary {}",
546+
primary_id.value());
623547
return already_populated_response();
624548
}
625549

626550
args.rpc_ctx->set_response_header(
627551
http::headers::LOCATION,
628552
fmt::format("https://{}/node/join", address.value()));
629553

554+
const std::string payload =
555+
"Node is not primary; cannot handle write";
556+
LOG_INFO_FMT(
557+
"Join request redirected to primary {} at {}: {}",
558+
primary_id.value(),
559+
address.value(),
560+
payload);
630561
return make_error(
631562
HTTP_STATUS_PERMANENT_REDIRECT,
632563
ccf::errors::NodeCannotHandleRequest,
633-
"Node is not primary; cannot handle write");
564+
"payload");
634565
}
635566

567+
const std::string payload = "Primary unknown";
568+
LOG_INFO_FMT("Join request rejected: {}", payload);
636569
return make_error(
637570
HTTP_STATUS_INTERNAL_SERVER_ERROR,
638571
ccf::errors::InternalError,
639-
"Primary unknown");
572+
payload);
573+
}
574+
575+
// Joiner's snapshot too old => StartupSeqnoIsOld
576+
// (cause it to fetch a more recent snapshot)
577+
auto this_startup_seqno =
578+
this->node_operation.get_startup_snapshot_seqno();
579+
if (
580+
in.startup_seqno.has_value() &&
581+
this_startup_seqno > in.startup_seqno.value())
582+
{
583+
// Make sure that the joiner's snapshot is more recent than this
584+
// node's snapshot. Otherwise, the joiner may not be given all the
585+
// ledger secrets required to replay historical transactions.
586+
const std::string payload = fmt::format(
587+
"Node requested to join from seqno {} which is older than this "
588+
"node startup seqno {}. A snapshot at least as recent as {} must "
589+
"be used instead.",
590+
in.startup_seqno.value(),
591+
this_startup_seqno,
592+
this_startup_seqno);
593+
LOG_INFO_FMT("Join request rejected: {}", payload);
594+
return make_error(
595+
HTTP_STATUS_BAD_REQUEST, ccf::errors::StartupSeqnoIsOld, payload);
596+
}
597+
598+
auto joining_node_status = NodeStatus::PENDING;
599+
// If the service is opening, new nodes are trusted straight away
600+
if (
601+
active_service->status == ServiceStatus::OPENING ||
602+
active_service->status == ServiceStatus::RECOVERING)
603+
{
604+
joining_node_status = NodeStatus::TRUSTED;
640605
}
641606

642-
// If the node does not exist, add it to the KV in state pending
643607
return add_node(
644608
args.tx,
645609
args.rpc_ctx->get_session_context()->caller_cert,
646610
in,
647-
NodeStatus::PENDING,
611+
joining_node_status,
648612
active_service->status);
649613
};
650614
make_endpoint("/join", HTTP_POST, json_adapter(accept), no_auth_required)

0 commit comments

Comments
 (0)