Skip to content

Commit 97f27d3

Browse files
committed
Feature: fencer: handle any enforced fencing delay
Enforced fencing delay takes precedence over any pcmk_delay_base/max configured for the corresponding fencing resources. Enforced fencing delay is applied only for the first device in the first fencing topology level. Consistently use g_timeout_add_seconds() for pcmk_delay_base/max as well.
1 parent 441d47c commit 97f27d3

3 files changed

Lines changed: 64 additions & 30 deletions

File tree

daemons/fenced/fenced_commands.c

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ typedef struct async_command_s {
7878
int default_timeout; /* seconds */
7979
int timeout; /* seconds */
8080

81-
int start_delay; /* milliseconds */
81+
int start_delay; /* seconds */
8282
int delay_id;
8383

8484
char *op;
@@ -121,36 +121,36 @@ static int
121121
get_action_delay_max(stonith_device_t * device, const char * action)
122122
{
123123
const char *value = NULL;
124-
int delay_max_ms = 0;
124+
int delay_max = 0;
125125

126126
if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
127127
return 0;
128128
}
129129

130130
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX);
131131
if (value) {
132-
delay_max_ms = crm_get_msec(value);
132+
delay_max = crm_parse_interval_spec(value) / 1000;
133133
}
134134

135-
return delay_max_ms;
135+
return delay_max;
136136
}
137137

138138
static int
139139
get_action_delay_base(stonith_device_t * device, const char * action)
140140
{
141141
const char *value = NULL;
142-
int delay_base_ms = 0;
142+
int delay_base = 0;
143143

144144
if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
145145
return 0;
146146
}
147147

148148
value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE);
149149
if (value) {
150-
delay_base_ms = crm_get_msec(value);
150+
delay_base = crm_parse_interval_spec(value) / 1000;
151151
}
152152

153-
return delay_base_ms;
153+
return delay_base;
154154
}
155155

156156
/*!
@@ -241,6 +241,9 @@ create_async_command(xmlNode * msg)
241241
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
242242
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
243243
cmd->timeout = cmd->default_timeout;
244+
// Default value -1 means no enforced fencing delay
245+
cmd->start_delay = -1;
246+
crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
244247

245248
cmd->origin = crm_element_value_copy(msg, F_ORIG);
246249
cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
@@ -347,7 +350,7 @@ stonith_device_execute(stonith_device_t * device)
347350

348351
if (pending_op && pending_op->delay_id) {
349352
crm_trace
350-
("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms",
353+
("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %ds",
351354
pending_op->action, pending_op->victim ? " targeting " : "",
352355
pending_op->victim ? pending_op->victim : "",
353356
device->id, pending_op->start_delay);
@@ -462,6 +465,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
462465
{
463466
int delay_max = 0;
464467
int delay_base = 0;
468+
bool delay_enforced = (cmd->start_delay >= 0);
465469

466470
CRM_CHECK(cmd != NULL, return);
467471
CRM_CHECK(device != NULL, return);
@@ -494,30 +498,37 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
494498
device->pending_ops = g_list_append(device->pending_ops, cmd);
495499
mainloop_set_trigger(device->work);
496500

497-
delay_max = get_action_delay_max(device, cmd->action);
498-
delay_base = get_action_delay_base(device, cmd->action);
499-
if (delay_max == 0) {
500-
delay_max = delay_base;
501-
}
502-
if (delay_max < delay_base) {
503-
crm_warn("Base-delay (%dms) is larger than max-delay (%dms) "
504-
"for %s on %s - limiting to max-delay",
505-
delay_base, delay_max, cmd->action, device->id);
506-
delay_base = delay_max;
501+
// No enforced fencing delay
502+
if (delay_enforced == FALSE) {
503+
delay_max = get_action_delay_max(device, cmd->action);
504+
delay_base = get_action_delay_base(device, cmd->action);
505+
if (delay_max == 0) {
506+
delay_max = delay_base;
507+
}
508+
if (delay_max < delay_base) {
509+
crm_warn("Base-delay (%ds) is larger than max-delay (%ds) "
510+
"for %s on %s - limiting to max-delay",
511+
delay_base, delay_max, cmd->action, device->id);
512+
delay_base = delay_max;
513+
}
514+
if (delay_max > 0) {
515+
// coverity[dont_call] We're not using rand() for security
516+
cmd->start_delay =
517+
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
518+
+ delay_base;
519+
}
507520
}
508-
if (delay_max > 0) {
509-
// coverity[dont_call] We're not using rand() for security
510-
cmd->start_delay =
511-
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
512-
+ delay_base;
513-
crm_notice("Delaying '%s' action%s%s on %s for %dms (timeout=%ds, base=%dms, "
514-
"max=%dms)",
515-
cmd->action,
516-
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
517-
device->id, cmd->start_delay, cmd->timeout,
518-
delay_base, delay_max);
521+
522+
if (cmd->start_delay > 0) {
523+
crm_notice("Delaying '%s' action%s%s on %s for %s%ds (timeout=%ds, base=%ds, "
524+
"max=%ds)",
525+
cmd->action,
526+
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
527+
device->id, delay_enforced ? "enforced " : "",
528+
cmd->start_delay, cmd->timeout,
529+
delay_base, delay_max);
519530
cmd->delay_id =
520-
g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
531+
g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
521532
}
522533
}
523534

daemons/fenced/fenced_remote.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,11 @@ stonith_topology_next(remote_fencing_op_t * op)
842842
op->client_name, op->originator, op->id);
843843
set_op_device_list(op, tp->levels[op->level]);
844844

845+
// The enforced delay has been applied for the first fencing level
846+
if (op->level > 1 && op->delay > 0) {
847+
op->delay = 0;
848+
}
849+
845850
if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) {
846851
/* A reboot has been requested for a topology level with multiple
847852
* devices. Instead of rebooting the devices sequentially, we will
@@ -1000,6 +1005,10 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
10001005

10011006
crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
10021007

1008+
// Default value -1 means no enforced fencing delay
1009+
op->delay = -1;
1010+
crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
1011+
10031012
if (peer && dev) {
10041013
op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
10051014
} else {
@@ -1448,6 +1457,12 @@ advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg,
14481457
/* Necessary devices remain, so execute the next one */
14491458
crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)",
14501459
op->target, op->originator, op->client_name, rc);
1460+
1461+
// The enforced delay has been applied for the first device
1462+
if (op->delay > 0) {
1463+
op->delay = 0;
1464+
}
1465+
14511466
call_remote_stonith(op, NULL);
14521467
} else {
14531468
/* We're done with all devices and phases, so finalize operation */
@@ -1503,6 +1518,10 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
15031518
crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
15041519
crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
15051520

1521+
if (op->delay >= 0) {
1522+
crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
1523+
}
1524+
15061525
if (device) {
15071526
timeout_one = TIMEOUT_MULTIPLY_FACTOR *
15081527
get_device_timeout(op, peer, device);

daemons/fenced/pacemaker-fenced.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ typedef struct remote_fencing_op_s {
113113
* values associated with the devices this fencing operation may call */
114114
gint total_timeout;
115115

116+
/*! Enforced fencing delay.
117+
* Default value -1 means no enforced fencing delay. */
118+
int delay;
119+
116120
/*! Delegate is the node being asked to perform a fencing action
117121
* on behalf of the node that owns the remote operation. Some operations
118122
* will involve multiple delegates. This value represents the final delegate

0 commit comments

Comments
 (0)