Skip to content

Commit f98189a

Browse files
committed
clusterer: Fix node pinging regression in 7d74f3d
While commit 7d74f3d fixed links going down during pinging, it introduced a bug where node links could flip/flop between Up/Down state intermittently on some setups. This patch addresses the new issue.
1 parent 10b0e17 commit f98189a

4 files changed

Lines changed: 44 additions & 29 deletions

File tree

modules/clusterer/clusterer.c

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -353,10 +353,7 @@ static int msg_send_retry(bin_packet_t *packet, node_info_t *dest,
353353
gettimeofday(&now, NULL);
354354

355355
/* sent a TCP BIN packet directly to @dest -> delay next ping */
356-
lock_get(chosen_dest->lock);
357-
if (chosen_dest->link_state == LS_UP)
358-
chosen_dest->last_ping = now;
359-
lock_release(chosen_dest->lock);
356+
chosen_dest->last_sent = now;
360357

361358
return 0;
362359
}
@@ -1110,9 +1107,7 @@ void bin_rcv_cl_extra_packets(bin_packet_t *packet, int packet_type,
11101107
}
11111108

11121109
lock_get(node->lock);
1113-
1114-
/* bump "last pong" ts, since we fully read a valid TCP BIN packet */
1115-
node->last_pong = now;
1110+
node->last_recv = now;
11161111

11171112
if (!(node->flags & NODE_STATE_ENABLED)) {
11181113
lock_release(node->lock);
@@ -1250,9 +1245,8 @@ void bin_rcv_cl_packets(bin_packet_t *packet, int packet_type,
12501245
}
12511246

12521247
lock_get(node->lock);
1253-
1254-
/* bump "last pong" ts, since we fully read a valid TCP BIN packet */
1255-
node->last_pong = now;
1248+
if (packet_type != CLUSTERER_PONG)
1249+
node->last_recv = now;
12561250

12571251
if (!(node->flags & NODE_STATE_ENABLED)) {
12581252
lock_release(node->lock);
@@ -1398,9 +1392,7 @@ static void bin_rcv_mod_packets(bin_packet_t *packet, int packet_type,
13981392
}
13991393

14001394
lock_get(node->lock);
1401-
1402-
/* bump "last pong" ts, since we fully read a valid TCP BIN packet */
1403-
node->last_pong = now;
1395+
node->last_recv = now;
14041396

14051397
if (!(node->flags & NODE_STATE_ENABLED)) {
14061398
lock_release(node->lock);

modules/clusterer/clusterer_mod.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ void *cl_srg=NULL;
7171

7272
str node_st_sr_ident = str_init("node_states");
7373

74+
long ping_timeout_us;
75+
long ping_interval_us;
76+
7477
/* module interface functions */
7578
static int mod_init(void);
7679
static int child_init(int rank);
@@ -414,6 +417,9 @@ static int mod_init(void)
414417
LM_WARN("Invalid ping_timeout parameter, using default value\n");
415418
ping_timeout = DEFAULT_PING_TIMEOUT;
416419
}
420+
ping_timeout_us = ping_timeout * 1000;
421+
ping_interval_us = ping_interval * 1000000;
422+
417423
if (seed_fb_interval < 0) {
418424
LM_WARN("Invalid seed_fallback_interval parameter, using default value\n");
419425
seed_fb_interval = DEFAULT_SEED_FB_INTERVAL;

modules/clusterer/node_info.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ struct node_info {
8787
/* fields protected by node lock */
8888
clusterer_link_state link_state; /* state of the "link" with this node */
8989
int last_ping_state; /* state(success/error) of the last ping sent to this node */
90-
struct timeval last_ping; /* last ping sent to this node */
91-
struct timeval last_pong; /* last pong received from this node */
90+
struct timeval last_ping, last_sent; /* last ping/packet sent to this node */
91+
struct timeval last_pong, last_recv; /* last pong/packet received from this node */
9292
struct neighbour *neighbour_list; /* list of directly reachable neighbours */
9393
int ls_seq_no; /* sequence number of the last link state update */
9494
int top_seq_no; /* sequence number of the last topology update message */

modules/clusterer/topology.c

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@
2828

2929
extern int ping_interval;
3030
extern int node_timeout;
31-
extern int ping_timeout;
31+
extern long ping_timeout_us, ping_interval_us;
3232
extern int clusterer_enable_rerouting;
3333

3434
#define PING_REPLY_INTERVAL(_node) \
35-
((_node)->last_ping.tv_sec*1000000 + (_node)->last_ping.tv_usec \
36-
- (_node)->last_pong.tv_sec*1000000 - (_node)->last_pong.tv_usec)
35+
((_node)->last_pong.tv_sec*1000000 + (_node)->last_pong.tv_usec \
36+
- (_node)->last_ping.tv_sec*1000000 - (_node)->last_ping.tv_usec)
3737

3838
static int send_ping(node_info_t *node, int req_node_list)
3939
{
@@ -185,7 +185,7 @@ static void do_action_trans_5(node_info_t *node, int *link_state_to_set,
185185
void heartbeats_timer(void)
186186
{
187187
struct timeval now;
188-
time_t last_ping_int, ping_reply_int;
188+
time_t last_ping_int, last_sent_int, ping_reply_int;
189189
cluster_info_t *clusters_it;
190190
node_info_t *node;
191191
int ev_actions_required[MAX_NO_CLUSTERS] = {0};
@@ -213,6 +213,7 @@ void heartbeats_timer(void)
213213
gettimeofday(&now, NULL);
214214
ping_reply_int = PING_REPLY_INTERVAL(node);
215215
last_ping_int = TIME_DIFF(node->last_ping, now);
216+
last_sent_int = TIME_DIFF(node->last_sent, now);
216217

217218
prev_ls = -1;
218219
new_ls = -1;
@@ -225,16 +226,21 @@ void heartbeats_timer(void)
225226
/* restart pinging sequence */
226227
do_action_trans_0(node, &new_ls);
227228
} else if (node->link_state == LS_RETRY_SEND_FAIL &&
228-
last_ping_int >= (utime_t)ping_timeout*1000) {
229+
last_ping_int >= (time_t)ping_timeout_us) {
229230
CL_DBG("case 1: RETRY_SEND_FAIL and timeout\n");
230231
prev_ls = node->link_state;
231232
lock_release(node->lock);
232233

233234
/* failed to send previous ping, retry */
234235
do_action_trans_1(node, &new_ls);
235236
} else if ((node->link_state == LS_UP || node->link_state == LS_RESTARTED) &&
236-
(ping_reply_int >= (time_t)ping_timeout*1000) &&
237-
last_ping_int >= (utime_t)ping_timeout*1000) {
237+
/* have yet to receive a ping reply, or it was unacceptably slow */
238+
(ping_reply_int <= 0 || ping_reply_int >= (time_t)ping_timeout_us) &&
239+
/* ... and we're pinging or haven't sent a recent BIN packet */
240+
(node->link_state == LS_RESTARTED
241+
|| last_sent_int >= (time_t)ping_timeout_us) &&
242+
/* ... and a new ping packet is due */
243+
last_ping_int >= (time_t)ping_timeout_us) {
238244
CL_DBG("case 2: LS_UP and timeout\n");
239245
prev_ls = -2;
240246
lock_release(node->lock);
@@ -243,24 +249,25 @@ void heartbeats_timer(void)
243249
do_action_trans_2(node, &new_ls);
244250
ev_actions_required[no_clusters] = 1;
245251
} else if (node->link_state == LS_RETRYING &&
246-
(ping_reply_int >= (time_t)ping_timeout*1000) &&
247-
last_ping_int >= (utime_t)ping_timeout*1000) {
252+
(ping_reply_int <= 0 || ping_reply_int >= (time_t)ping_timeout_us) &&
253+
last_ping_int >= (time_t)ping_timeout_us) {
248254
CL_DBG("case 3: LS_RETRYING and timeout\n");
249255
prev_ls = node->link_state;
250256
lock_release(node->lock);
251257

252258
/* previous ping retry not replied, continue to retry */
253259
do_action_trans_3(node, &new_ls);
254260
} else if (node->link_state == LS_DOWN &&
255-
last_ping_int >= (utime_t)node_timeout*1000000) {
261+
last_ping_int >= (time_t)node_timeout*1000000) {
256262
CL_DBG("case 4: LS_DOWN and timeout\n");
257263
prev_ls = node->link_state;
258264
lock_release(node->lock);
259265

260266
/* ping a failed node after node_timeout since last ping */
261267
do_action_trans_4(node, &new_ls);
262268
} else if (node->link_state == LS_UP &&
263-
last_ping_int >= (utime_t)ping_interval*1000000) {
269+
last_sent_int >= (time_t)ping_interval_us &&
270+
last_ping_int >= (time_t)ping_interval_us) {
264271
CL_DBG("case 5: LS_UP and timeout\n");
265272
prev_ls = node->link_state;
266273
lock_release(node->lock);
@@ -1357,29 +1364,39 @@ void handle_ping(bin_packet_t *received, node_info_t *src_node,
13571364
void handle_pong(bin_packet_t *received, node_info_t *src_node,
13581365
struct timeval rcv_time, int *ev_actions_required)
13591366
{
1367+
time_t last_recv_int;
13601368
int node_list[MAX_NO_NODES], i, nr_nodes;
13611369

13621370
bin_pop_int(received, &nr_nodes);
13631371
for (i=0; i<nr_nodes; i++)
13641372
bin_pop_int(received, &node_list[i]);
13651373

1374+
last_recv_int = TIME_DIFF(src_node->last_recv, rcv_time);
1375+
13661376
lock_get(src_node->lock);
13671377

13681378
src_node->last_pong = rcv_time;
1379+
src_node->last_recv = rcv_time;
13691380

13701381
/* check possible races between setting the appropriate state
13711382
* after sending ping and receiving the reply */
13721383
if ((src_node->link_state == LS_RESTART_PINGING ||
13731384
src_node->link_state == LS_RETRY_SEND_FAIL ||
13741385
src_node->link_state == LS_DOWN) &&
13751386
src_node->last_ping_state == 0 &&
1376-
TIME_DIFF(src_node->last_ping, rcv_time) < (utime_t)ping_timeout*1000)
1387+
TIME_DIFF(src_node->last_ping, rcv_time) < (time_t)ping_timeout_us)
13771388
src_node->link_state = LS_TEMP;
13781389

13791390
/* if the node was retried and a reply was expected, it should be UP again */
1380-
if (src_node->link_state == LS_RESTARTED ||
1391+
if ((src_node->link_state == LS_RESTARTED ||
13811392
src_node->link_state == LS_RETRYING ||
1382-
src_node->link_state == LS_TEMP) {
1393+
src_node->link_state == LS_TEMP) &&
1394+
/* if either this PONG wasn't too late or we received
1395+
* *any* other type of BIN packet in the mean time */
1396+
((PING_REPLY_INTERVAL(src_node) > 0 &&
1397+
PING_REPLY_INTERVAL(src_node) < (time_t)ping_timeout_us)
1398+
|| last_recv_int <= 0 || last_recv_int < (time_t)ping_timeout_us)) {
1399+
13831400
lock_release(src_node->lock);
13841401

13851402
set_link_w_neigh_up(src_node, nr_nodes, node_list);

0 commit comments

Comments
 (0)