Skip to content

Commit 0761377

Browse files
committed
clusterer: Fix node pinging regression in 7d74f3d
While commit 7d74f3d fixed links going down during pinging, it introduced a bug where node links could flip/flop between Up/Down state intermittently on some setups. This patch addresses the new issue. (cherry picked from commit f98189a)
1 parent c346895 commit 0761377

4 files changed

Lines changed: 45 additions & 30 deletions

File tree

modules/clusterer/clusterer.c

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -350,10 +350,7 @@ static int msg_send_retry(bin_packet_t *packet, node_info_t *dest,
350350
gettimeofday(&now, NULL);
351351

352352
/* sent a TCP BIN packet directly to @dest -> delay next ping */
353-
lock_get(chosen_dest->lock);
354-
if (chosen_dest->link_state == LS_UP)
355-
chosen_dest->last_ping = now;
356-
lock_release(chosen_dest->lock);
353+
chosen_dest->last_sent = now;
357354

358355
return 0;
359356
}
@@ -1081,9 +1078,7 @@ void bin_rcv_cl_extra_packets(bin_packet_t *packet, int packet_type,
10811078
}
10821079

10831080
lock_get(node->lock);
1084-
1085-
/* bump "last pong" ts, since we fully read a valid TCP BIN packet */
1086-
node->last_pong = now;
1081+
node->last_recv = now;
10871082

10881083
if (!(node->flags & NODE_STATE_ENABLED)) {
10891084
lock_release(node->lock);
@@ -1217,9 +1212,8 @@ void bin_rcv_cl_packets(bin_packet_t *packet, int packet_type,
12171212
}
12181213

12191214
lock_get(node->lock);
1220-
1221-
/* bump "last pong" ts, since we fully read a valid TCP BIN packet */
1222-
node->last_pong = now;
1215+
if (packet_type != CLUSTERER_PONG)
1216+
node->last_recv = now;
12231217

12241218
if (!(node->flags & NODE_STATE_ENABLED)) {
12251219
lock_release(node->lock);
@@ -1365,9 +1359,7 @@ static void bin_rcv_mod_packets(bin_packet_t *packet, int packet_type,
13651359
}
13661360

13671361
lock_get(node->lock);
1368-
1369-
/* bump "last pong" ts, since we fully read a valid TCP BIN packet */
1370-
node->last_pong = now;
1362+
node->last_recv = now;
13711363

13721364
if (!(node->flags & NODE_STATE_ENABLED)) {
13731365
lock_release(node->lock);
@@ -1885,4 +1877,4 @@ unsigned long clusterer_get_num_nodes(int state)
18851877
lock_stop_read(cl_list_lock);
18861878

18871879
return nodecount;
1888-
}
1880+
}

modules/clusterer/clusterer_mod.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ void *cl_srg=NULL;
7070

7171
str node_st_sr_ident = str_init("node_states");
7272

73+
long ping_timeout_us;
74+
long ping_interval_us;
75+
7376
/* module interface functions */
7477
static int mod_init(void);
7578
static int child_init(int rank);
@@ -411,6 +414,9 @@ static int mod_init(void)
411414
LM_WARN("Invalid ping_timeout parameter, using default value\n");
412415
ping_timeout = DEFAULT_PING_TIMEOUT;
413416
}
417+
ping_timeout_us = ping_timeout * 1000;
418+
ping_interval_us = ping_interval * 1000000;
419+
414420
if (seed_fb_interval < 0) {
415421
LM_WARN("Invalid seed_fallback_interval parameter, using default value\n");
416422
seed_fb_interval = DEFAULT_SEED_FB_INTERVAL;

modules/clusterer/node_info.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ struct node_info {
8787
/* fields protected by node lock */
8888
clusterer_link_state link_state; /* state of the "link" with this node */
8989
int last_ping_state; /* state(success/error) of the last ping sent to this node */
90-
struct timeval last_ping; /* last ping sent to this node */
91-
struct timeval last_pong; /* last pong received from this node */
90+
struct timeval last_ping, last_sent; /* last ping/packet sent to this node */
91+
struct timeval last_pong, last_recv; /* last pong/packet received from this node */
9292
struct neighbour *neighbour_list; /* list of directly reachable neighbours */
9393
int ls_seq_no; /* sequence number of the last link state update */
9494
int top_seq_no; /* sequence number of the last topology update message */

modules/clusterer/topology.c

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@
2828

2929
extern int ping_interval;
3030
extern int node_timeout;
31-
extern int ping_timeout;
31+
extern long ping_timeout_us, ping_interval_us;
3232

3333
#define PING_REPLY_INTERVAL(_node) \
34-
((_node)->last_ping.tv_sec*1000000 + (_node)->last_ping.tv_usec \
35-
- (_node)->last_pong.tv_sec*1000000 - (_node)->last_pong.tv_usec)
34+
((_node)->last_pong.tv_sec*1000000 + (_node)->last_pong.tv_usec \
35+
- (_node)->last_ping.tv_sec*1000000 - (_node)->last_ping.tv_usec)
3636

3737
static int send_ping(node_info_t *node, int req_node_list)
3838
{
@@ -184,7 +184,7 @@ static void do_action_trans_5(node_info_t *node, int *link_state_to_set,
184184
void heartbeats_timer(void)
185185
{
186186
struct timeval now;
187-
time_t last_ping_int, ping_reply_int;
187+
time_t last_ping_int, last_sent_int, ping_reply_int;
188188
cluster_info_t *clusters_it;
189189
node_info_t *node;
190190
int ev_actions_required[MAX_NO_CLUSTERS] = {0};
@@ -212,6 +212,7 @@ void heartbeats_timer(void)
212212
gettimeofday(&now, NULL);
213213
ping_reply_int = PING_REPLY_INTERVAL(node);
214214
last_ping_int = TIME_DIFF(node->last_ping, now);
215+
last_sent_int = TIME_DIFF(node->last_sent, now);
215216

216217
prev_ls = -1;
217218
new_ls = -1;
@@ -224,16 +225,21 @@ void heartbeats_timer(void)
224225
/* restart pinging sequence */
225226
do_action_trans_0(node, &new_ls);
226227
} else if (node->link_state == LS_RETRY_SEND_FAIL &&
227-
last_ping_int >= (utime_t)ping_timeout*1000) {
228+
last_ping_int >= (time_t)ping_timeout_us) {
228229
CL_DBG("case 1: RETRY_SEND_FAIL and timeout\n");
229230
prev_ls = node->link_state;
230231
lock_release(node->lock);
231232

232233
/* failed to send previous ping, retry */
233234
do_action_trans_1(node, &new_ls);
234235
} else if ((node->link_state == LS_UP || node->link_state == LS_RESTARTED) &&
235-
(ping_reply_int >= (time_t)ping_timeout*1000) &&
236-
last_ping_int >= (utime_t)ping_timeout*1000) {
236+
/* have yet to receive a ping reply, or it was unacceptably slow */
237+
(ping_reply_int <= 0 || ping_reply_int >= (time_t)ping_timeout_us) &&
238+
/* ... and we're pinging or haven't sent a recent BIN packet */
239+
(node->link_state == LS_RESTARTED
240+
|| last_sent_int >= (time_t)ping_timeout_us) &&
241+
/* ... and a new ping packet is due */
242+
last_ping_int >= (time_t)ping_timeout_us) {
237243
CL_DBG("case 2: LS_UP and timeout\n");
238244
prev_ls = -2;
239245
lock_release(node->lock);
@@ -242,24 +248,25 @@ void heartbeats_timer(void)
242248
do_action_trans_2(node, &new_ls);
243249
ev_actions_required[no_clusters] = 1;
244250
} else if (node->link_state == LS_RETRYING &&
245-
(ping_reply_int >= (time_t)ping_timeout*1000) &&
246-
last_ping_int >= (utime_t)ping_timeout*1000) {
251+
(ping_reply_int <= 0 || ping_reply_int >= (time_t)ping_timeout_us) &&
252+
last_ping_int >= (time_t)ping_timeout_us) {
247253
CL_DBG("case 3: LS_RETRYING and timeout\n");
248254
prev_ls = node->link_state;
249255
lock_release(node->lock);
250256

251257
/* previous ping retry not replied, continue to retry */
252258
do_action_trans_3(node, &new_ls);
253259
} else if (node->link_state == LS_DOWN &&
254-
last_ping_int >= (utime_t)node_timeout*1000000) {
260+
last_ping_int >= (time_t)node_timeout*1000000) {
255261
CL_DBG("case 4: LS_DOWN and timeout\n");
256262
prev_ls = node->link_state;
257263
lock_release(node->lock);
258264

259265
/* ping a failed node after node_timeout since last ping */
260266
do_action_trans_4(node, &new_ls);
261267
} else if (node->link_state == LS_UP &&
262-
last_ping_int >= (utime_t)ping_interval*1000000) {
268+
last_sent_int >= (time_t)ping_interval_us &&
269+
last_ping_int >= (time_t)ping_interval_us) {
263270
CL_DBG("case 5: LS_UP and timeout\n");
264271
prev_ls = node->link_state;
265272
lock_release(node->lock);
@@ -1346,29 +1353,39 @@ void handle_ping(bin_packet_t *received, node_info_t *src_node,
13461353
void handle_pong(bin_packet_t *received, node_info_t *src_node,
13471354
struct timeval rcv_time, int *ev_actions_required)
13481355
{
1356+
time_t last_recv_int;
13491357
int node_list[MAX_NO_NODES], i, nr_nodes;
13501358

13511359
bin_pop_int(received, &nr_nodes);
13521360
for (i=0; i<nr_nodes; i++)
13531361
bin_pop_int(received, &node_list[i]);
13541362

1363+
last_recv_int = TIME_DIFF(src_node->last_recv, rcv_time);
1364+
13551365
lock_get(src_node->lock);
13561366

13571367
src_node->last_pong = rcv_time;
1368+
src_node->last_recv = rcv_time;
13581369

13591370
/* check possible races between setting the appropriate state
13601371
* after sending ping and receiving the reply */
13611372
if ((src_node->link_state == LS_RESTART_PINGING ||
13621373
src_node->link_state == LS_RETRY_SEND_FAIL ||
13631374
src_node->link_state == LS_DOWN) &&
13641375
src_node->last_ping_state == 0 &&
1365-
TIME_DIFF(src_node->last_ping, rcv_time) < (utime_t)ping_timeout*1000)
1376+
TIME_DIFF(src_node->last_ping, rcv_time) < (time_t)ping_timeout_us)
13661377
src_node->link_state = LS_TEMP;
13671378

13681379
/* if the node was retried and a reply was expected, it should be UP again */
1369-
if (src_node->link_state == LS_RESTARTED ||
1380+
if ((src_node->link_state == LS_RESTARTED ||
13701381
src_node->link_state == LS_RETRYING ||
1371-
src_node->link_state == LS_TEMP) {
1382+
src_node->link_state == LS_TEMP) &&
1383+
/* if either this PONG wasn't too late or we received
1384+
* *any* other type of BIN packet in the mean time */
1385+
((PING_REPLY_INTERVAL(src_node) > 0 &&
1386+
PING_REPLY_INTERVAL(src_node) < (time_t)ping_timeout_us)
1387+
|| last_recv_int <= 0 || last_recv_int < (time_t)ping_timeout_us)) {
1388+
13721389
lock_release(src_node->lock);
13731390

13741391
set_link_w_neigh_up(src_node, nr_nodes, node_list);

0 commit comments

Comments
 (0)