Skip to content

Commit c089ce6

Browse files
authored
Merge pull request #288 from Derecho-Project/signature_sst_bug_fix
Fixed bugs in signature verification
2 parents a2d483f + 04a785a commit c089ce6

13 files changed

Lines changed: 184 additions & 69 deletions

File tree

include/derecho/conf/conf.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,16 @@ class Conf {
8383
std::map<const std::string, std::string> config = {
8484
// [DERECHO]
8585
{DERECHO_CONTACT_IP, "127.0.0.1"},
86-
{DERECHO_CONTACT_PORT, "23580"},
86+
{DERECHO_CONTACT_PORT, "14480"},
8787
{DERECHO_RESTART_LEADERS, "127.0.0.1"},
88-
{DERECHO_RESTART_LEADER_PORTS, "23580"},
88+
{DERECHO_RESTART_LEADER_PORTS, "14480"},
8989
{DERECHO_LOCAL_ID, "0"},
9090
{DERECHO_LOCAL_IP, "127.0.0.1"},
91-
{DERECHO_GMS_PORT, "23580"},
92-
{DERECHO_STATE_TRANSFER_PORT, "28366"},
93-
{DERECHO_SST_PORT, "37683"},
94-
{DERECHO_RDMC_PORT, "31675"},
95-
{DERECHO_EXTERNAL_PORT, "32645"},
91+
{DERECHO_GMS_PORT, "14480"},
92+
{DERECHO_STATE_TRANSFER_PORT, "14560"},
93+
{DERECHO_SST_PORT, "14660"},
94+
{DERECHO_RDMC_PORT, "14720"},
95+
{DERECHO_EXTERNAL_PORT, "14880"},
9696
{SUBGROUP_DEFAULT_RDMC_SEND_ALGORITHM, "binomial_send"},
9797
{DERECHO_P2P_LOOP_BUSY_WAIT_BEFORE_SLEEP_MS, "250"},
9898
{DERECHO_SST_POLL_CQ_TIMEOUT_MS, "2000"},
@@ -126,7 +126,7 @@ class Conf {
126126
{LOGGER_DEFAULT_LOG_NAME, "derecho_debug"},
127127
{LOGGER_DEFAULT_LOG_LEVEL, "debug"},
128128
{LOGGER_LOG_TO_TERMINAL, "true"},
129-
{LOGGER_LOG_FILE_DEPTH, "3"}};
129+
{LOGGER_LOG_FILE_DEPTH, "10"}};
130130

131131
public:
132132
// the option for parsing command line with getopt(not GetPot!!!)

include/derecho/core/detail/derecho_sst.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ class DerechoSST : public sst::SST<DerechoSST> {
289289
void push_row_except_slots();
290290

291291
/**
292-
* Creates a string representation of the local row (not the whole table).
292+
* Creates a string representation of the table for debugging purposes.
293293
* This should be converted to an ostream operator<< to follow standards.
294294
*/
295295
std::string to_string() const;
@@ -389,7 +389,7 @@ void set(volatile char* string_array, const std::string& value);
389389

390390
void increment(volatile int& member);
391391

392-
bool equals(const volatile char& string_array, const std::string& value);
392+
bool equals(const volatile char* string_array, const std::string& value);
393393

394394
} // namespace gmssst
395395

include/derecho/sst/detail/sst_impl.hpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ void SST<DerivedSST>::put(const std::vector<uint32_t> receiver_ranks, size_t off
155155

156156
template <typename DerivedSST>
157157
void SST<DerivedSST>::put_with_completion(const std::vector<uint32_t> receiver_ranks, size_t offset, size_t size) {
158+
dbg_trace(sst_logger, "put_with_completion called with arguments receiver_ranks={}, offset={}, size={}", receiver_ranks, offset, size);
158159
assert(offset + size <= rowLen);
159160
unsigned int num_writes_posted = 0;
160161
std::vector<bool> posted_write_to(num_members, false);
@@ -177,6 +178,7 @@ void SST<DerivedSST>::put_with_completion(const std::vector<uint32_t> receiver_r
177178
// perform a remote RDMA write on the owner of the row
178179
ce_ctxt[index].set_remote_id(res_vec[index]->remote_id);
179180
ce_ctxt[index].set_ce_idx(ce_idx);
181+
dbg_trace(sst_logger, "Created a CE context for write to row {}: {:p} -> {{ {}, {}, {} }}", index, static_cast<void*>(&ce_ctxt[index]), ce_ctxt[index].ce_idx(), ce_ctxt[index].remote_id(), ce_ctxt[index].is_managed());
180182
res_vec[index]->post_remote_write_with_completion(&ce_ctxt[index], offset, size);
181183
posted_write_to[index] = true;
182184
num_writes_posted++;
@@ -216,6 +218,7 @@ void SST<DerivedSST>::put_with_completion(const std::vector<uint32_t> receiver_r
216218
if (result && result.value() == 1) {
217219
polled_successfully_from[index] = true;
218220
} else {
221+
dbg_debug(sst_logger, "put_with_completion marked row {} failed due to not receiving a completion", index);
219222
failed_node_indexes.push_back(index);
220223
}
221224
}
@@ -264,7 +267,11 @@ void SST<DerivedSST>::sync_with_members() const {
264267
for(auto const& id_index : members_by_id) {
265268
std::tie(node_id, sst_index) = id_index;
266269
if(sst_index != my_index && !row_is_frozen[sst_index]) {
267-
sync(node_id);
270+
dbg_debug(sst_logger, "TCP sync with node {}, for row {}", node_id, sst_index);
271+
bool success = sync(node_id);
272+
if(!success) {
273+
dbg_warn(sst_logger, "TCP sync with node {} was unsuccessful", node_id);
274+
}
268275
}
269276
}
270277
}
@@ -279,7 +286,11 @@ void SST<DerivedSST>::sync_with_members(std::vector<uint32_t> row_indices) const
279286
continue;
280287
}
281288
if(!row_is_frozen[row_index]) {
282-
sync(members[row_index]);
289+
dbg_debug(sst_logger, "TCP sync with node {}, for row {}", members[row_index], row_index);
290+
bool success = sync(members[row_index]);
291+
if(!success) {
292+
dbg_warn(sst_logger, "TCP sync with node {} was unsuccessful", members[row_index]);
293+
}
283294
}
284295
}
285296
}

include/derecho/sst/sst.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,9 @@ class SST {
181181

182182
DerivedSST* derived_this;
183183

184+
/** Pointer to the logger for the SST module, which is created in sst::lf_initialize() */
185+
std::shared_ptr<spdlog::logger> sst_logger;
186+
184187
std::vector<std::thread> background_threads;
185188
std::atomic<bool> thread_shutdown;
186189

@@ -236,6 +239,7 @@ class SST {
236239
public:
237240
SST(DerivedSST* derived_class_pointer, const SSTParams& params)
238241
: derived_this(derived_class_pointer),
242+
sst_logger(spdlog::get(LoggerFactory::SST_LOGGER_NAME)),
239243
thread_shutdown(false),
240244
poll_cq_timeout_ms(derecho::getConfUInt32(derecho::Conf::DERECHO_SST_POLL_CQ_TIMEOUT_MS)),
241245
members(params.members),

src/applications/tests/unit_tests/signed_log_test.cpp

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -312,31 +312,35 @@ std::unique_ptr<UnsignedObject> UnsignedObject::from_bytes(mutils::Deserializati
312312
return std::make_unique<UnsignedObject>(*field_ptr, *counter_ptr, test_state_ptr);
313313
}
314314

315+
const int TEST_COORDINATION_PORT = 16000;
316+
315317
/**
316-
* Command-line arguments: <one_field_size> <two_field_size> <unsigned_size> <num_updates>
318+
* Command-line arguments: <one_field_size> <two_field_size> <unsigned_size> <num_updates> <update_size>
317319
* one_field_size: Maximum size of the subgroup that replicates the one-field signed object
318320
* two_field_size: Maximum size of the subgroup that replicates the two-field signed object
319321
* mixed_field_size: Maximum size of the subgroup that replicates the mixed-signed-and-unsigned-field object
320322
* unsigned_size: Maximum size of the subgroup that replicates the persistent-but-not-signed object
321-
* num_updates: Number of randomly-generated 32-byte updates to send to each subgroup
323+
* num_updates: Number of randomly-generated updates to send to each subgroup
324+
* update_size: Size of the updates, in bytes
322325
*/
323326
int main(int argc, char** argv) {
324327
pthread_setname_np(pthread_self(), "test_main");
325328
const std::string characters("abcdefghijklmnopqrstuvwxyz");
326329
std::mt19937 random_generator(getpid());
327330
std::uniform_int_distribution<std::size_t> char_distribution(0, characters.size() - 1);
328-
const int num_args = 5;
331+
const int num_args = 6;
329332
if(argc < (num_args + 1) || (argc > (num_args + 1) && strcmp("--", argv[argc - (num_args + 1)]) != 0)) {
330333
std::cout << "Invalid command line arguments." << std::endl;
331-
std::cout << "Usage: " << argv[0] << " [derecho-config-options -- ] one_field_size two_field_size mixed_field_size unsigned_size num_updates" << std::endl;
334+
std::cout << "Usage: " << argv[0] << " [derecho-config-options -- ] one_field_size two_field_size mixed_field_size unsigned_size num_updates update_size" << std::endl;
332335
return -1;
333336
}
334337

335338
const unsigned int subgroup_1_size = std::stoi(argv[argc - num_args]);
336339
const unsigned int subgroup_2_size = std::stoi(argv[argc - num_args + 1]);
337340
const unsigned int subgroup_mixed_size = std::stoi(argv[argc - num_args + 2]);
338341
const unsigned int subgroup_unsigned_size = std::stoi(argv[argc - num_args + 3]);
339-
const unsigned int num_updates = std::stoi(argv[argc - 1]);
342+
const unsigned int num_updates = std::stoi(argv[argc - num_args + 4]);
343+
const unsigned int update_size = std::stoi(argv[argc - 1]);
340344
derecho::Conf::initialize(argc, argv);
341345

342346
derecho::SubgroupInfo subgroup_info(
@@ -401,7 +405,7 @@ int main(int argc, char** argv) {
401405
test_state.my_subgroup_is_unsigned = false;
402406
//Send random updates
403407
for(unsigned counter = 0; counter < num_updates; ++counter) {
404-
std::string new_string('a', 32);
408+
std::string new_string('a', update_size);
405409
std::generate(new_string.begin(), new_string.end(),
406410
[&]() { return characters[char_distribution(random_generator)]; });
407411
object_handle.ordered_send<RPC_NAME(update_state)>(new_string);
@@ -414,8 +418,8 @@ int main(int argc, char** argv) {
414418
test_state.my_subgroup_is_unsigned = false;
415419
//Send random updates
416420
for(unsigned counter = 0; counter < num_updates; ++counter) {
417-
std::string new_foo('a', 32);
418-
std::string new_bar('a', 32);
421+
std::string new_foo('a', update_size);
422+
std::string new_bar('a', update_size);
419423
std::generate(new_foo.begin(), new_foo.end(),
420424
[&]() { return characters[char_distribution(random_generator)]; });
421425
std::generate(new_bar.begin(), new_bar.end(),
@@ -430,7 +434,7 @@ int main(int argc, char** argv) {
430434
test_state.my_subgroup_is_unsigned = false;
431435
//Send random updates, alternating between the signed, unsigned, and nonpersistent fields
432436
for(unsigned counter = 0; counter < num_updates; ++counter) {
433-
std::string new_string_value('a', 32);
437+
std::string new_string_value('a', update_size);
434438
std::generate(new_string_value.begin(), new_string_value.end(),
435439
[&]() { return characters[char_distribution(random_generator)]; });
436440
if(counter % 3 == 0) {
@@ -449,7 +453,7 @@ int main(int argc, char** argv) {
449453
test_state.my_subgroup_is_unsigned = true;
450454
//Send random updates
451455
for(unsigned counter = 0; counter < num_updates; ++counter) {
452-
std::string new_string('a', 32);
456+
std::string new_string('a', update_size);
453457
std::generate(new_string.begin(), new_string.end(),
454458
[&]() { return characters[char_distribution(random_generator)]; });
455459
object_handle.ordered_send<RPC_NAME(update_state)>(new_string);
@@ -464,6 +468,37 @@ int main(int argc, char** argv) {
464468
test_state.subgroup_finished_condition.wait(lock, [&]() { return test_state.subgroup_finished; });
465469
}
466470
std::cout << "Done" << std::endl;
467-
group.barrier_sync();
471+
// If this node is the leader, open a socket and wait for all the other nodes to contact it
472+
// Otherwise, open a socket to the leader and exchange IDs to signal that this node is finished
473+
if(group.get_my_rank() == 0) {
474+
tcp::connection_listener listener_socket(TEST_COORDINATION_PORT);
475+
std::set<derecho::node_id_t> nodes_contacted;
476+
nodes_contacted.emplace(group.get_my_id());
477+
std::vector<derecho::node_id_t> members_vector = group.get_members();
478+
std::set<derecho::node_id_t> all_member_ids(members_vector.begin(), members_vector.end());
479+
std::vector<tcp::socket> member_connections;
480+
std::cout << "Waiting for other nodes to signal they are finished (members = " << members_vector << ")" << std::endl;
481+
while(nodes_contacted != all_member_ids) {
482+
member_connections.emplace_back(listener_socket.accept());
483+
derecho::node_id_t finished_member_id;
484+
member_connections.back().read(finished_member_id);
485+
nodes_contacted.emplace(finished_member_id);
486+
std::cout << "Got a connection from node " << finished_member_id << std::endl;
487+
}
488+
std::cout << "All nodes are done with the test, acknowledging so they can exit" << std::endl;
489+
for(auto& connection : member_connections) {
490+
const int done_signal = 1;
491+
connection.write(done_signal);
492+
}
493+
//member_connections sockets will close automatically at the end of this scope
494+
} else {
495+
derecho::ip_addr_t leader_address = group.get_member_addresses().front().ip_address;
496+
std::cout << "Connecting to leader at " << leader_address << " to signal node " << group.get_my_id() << " is done" << std::endl;
497+
tcp::socket leader_connection(leader_address, TEST_COORDINATION_PORT);
498+
leader_connection.write(group.get_my_id());
499+
std::cout << "Waiting for leader to signal the test is done" << std::endl;
500+
int done;
501+
leader_connection.read(done);
502+
}
468503
group.leave(true);
469504
}

src/conf/derecho-sample.cfg

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,21 @@
22
# contact ip - the active leader's ip address
33
contact_ip = 127.0.0.1
44
# contact port - the active leader's gms port
5-
contact_port = 23580
5+
contact_port = 14480
66
# list of leaders to contact during a restart in priority order
77
restart_leaders = 127.0.0.1,127.0.0.1
88
# list of GMS ports of the restart leaders, in the same order
9-
restart_leader_ports = 23580,23581
9+
restart_leader_ports = 14480,14481
1010
# derecho gms port
11-
gms_port = 23580
11+
gms_port = 14480
1212
# derecho state-transfer port
13-
state_transfer_port = 28366
13+
state_transfer_port = 14560
1414
# sst tcp port
15-
sst_port = 37683
15+
sst_port = 14660
1616
# rdmc tcp port
17-
rdmc_port = 31675
17+
rdmc_port = 14720
1818
# externel tcp port listening to external clients
19-
external_port = 32645
19+
external_port = 14880
2020
# Maximum possible node ID value
2121
# Node IDs are 32-bit integers, but all Derecho systems will have
2222
# many fewer nodes than this. Derecho will pre-allocate space for a
@@ -145,8 +145,8 @@ persistence_log_level = info
145145
# Whether logs should be printed to the terminal as well as saved to files (default is true)
146146
log_to_terminal = true
147147
# The number of older log files to save. Log files are rotated automatically
148-
# when the current one reaches 1MB in size. Default is 3.
149-
log_file_depth = 3
148+
# when the current one reaches 1MB in size. Default is 10.
149+
log_file_depth = 10
150150

151151
# optional layout configurations
152152
[LAYOUT]

src/conf/derecho_node-sample.cfg

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@ local_ip = 127.0.0.1
66
# These ports are optional: nodes will use the values from the group derecho.cfg by default,
77
# but if the port options are specified here they will override the defaults.
88
# derecho gms port
9-
gms_port = 23580
9+
gms_port = 14481
1010
# derecho state-transfer port
11-
state_transfer_port = 28366
11+
state_transfer_port = 14561
1212
# sst tcp port
13-
sst_port = 37683
13+
sst_port = 14661
1414
# rdmc tcp port
15-
rdmc_port = 31675
15+
rdmc_port = 14721
1616
# externel tcp port listening to external clients
17-
external_port = 32645
17+
external_port = 14881
1818

1919

2020
# RDMA section contains configurations of the following

0 commit comments

Comments
 (0)