Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions tests/basic/fuse/fuse-daemon-stall-enotconn.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/bash

. $(dirname $0)/../../include.rc
. $(dirname $0)/../../volume.rc

holdfile=""

function cleanup_test {
rm -f "$holdfile"
force_umount "$M0" >/dev/null 2>&1 || true
cleanup
}

function mount_log {
local logdir=""

logdir=$($CLI --print-logdir)
ls "$logdir"/mnt-glusterfs-*.log 2>/dev/null | head -1
}

function hold_hook_logged {
local logfile=""

logfile=$(mount_log)
grep -c "debug-disconnect-notify-holdfile is blocking RPC_CLNT_DISCONNECT notify" "$logfile"
}

function connect_count {
local logfile=""

logfile=$(mount_log)
grep -c "Connected, attached to remote volume" "$logfile"
}

cleanup

holdfile="$B0/$V0-child-down.hold"
trap cleanup_test EXIT

TEST glusterd
TEST pidof glusterd

TEST $CLI volume create $V0 $H0:$B0/${V0}1
TEST $CLI volume set $V0 performance.quick-read off
TEST $CLI volume set $V0 performance.write-behind off
TEST $CLI volume set $V0 performance.io-cache off
TEST $CLI volume set $V0 performance.stat-prefetch off
TEST $CLI volume set $V0 performance.client-io-threads off
TEST $CLI volume set $V0 ping-timeout 2
TEST $CLI volume start $V0
EXPECT 'Started' volinfo_field $V0 'Status'

TEST touch "$holdfile"
TEST $GFS --xlator-option="$V0-client-0.debug-disconnect-notify-holdfile=$holdfile" \
--volfile-id=/$V0 --volfile-server=$H0 $M0
TEST touch $M0/preflight

TEST kill_brick $V0 $H0 $B0/${V0}1
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1

# Wait until the client has entered the intentionally stalled disconnect notify.
EXPECT_WITHIN 8 "1" hold_hook_logged

TEST $CLI volume start $V0 force
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1

# This is the bug reproducer: reconnect should not be gated on synchronous
# disconnect notify. Current code schedules reconnect only after notify
# returns, so the second successful connect never appears while the holdfile
# exists.
EXPECT_WITHIN 8 "2" connect_count

trap - EXIT
cleanup_test
36 changes: 36 additions & 0 deletions xlators/protocol/client/src/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,32 @@ client_notify_dispatch(xlator_t *this, int32_t event, void *data, ...)
return ret;
}

static void
client_debug_hold_disconnect_notify(xlator_t *this)
{
clnt_conf_t *conf = NULL;

conf = this->private;
if (!conf || !conf->debug_disconnect_notify_holdfile)
return;

if (access(conf->debug_disconnect_notify_holdfile, F_OK) != 0)
return;

gf_log(this->name, GF_LOG_WARNING,
"debug-disconnect-notify-holdfile is blocking RPC_CLNT_DISCONNECT "
"notify on %s",
conf->debug_disconnect_notify_holdfile);

while (access(conf->debug_disconnect_notify_holdfile, F_OK) == 0) {
sleep(1);
}

gf_log(this->name, GF_LOG_WARNING,
"debug-disconnect-notify-holdfile released RPC_CLNT_DISCONNECT "
"notify");
}

int
client_submit_request(xlator_t *this, void *req, call_frame_t *frame,
rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
Expand Down Expand Up @@ -2237,6 +2263,8 @@ client_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
client_mark_fd_bad(this);

if (!conf->skip_notify) {
client_debug_hold_disconnect_notify(this);

if (conf->can_log_disconnect) {
if (!conf->disconnect_err_logged) {
gf_smsg(this->name, GF_LOG_INFO, 0,
Expand Down Expand Up @@ -2426,6 +2454,8 @@ build_client_config(xlator_t *this, clnt_conf_t *conf)
GF_OPTION_INIT("ping-timeout", conf->opt.ping_timeout, time, out);

GF_OPTION_INIT("remote-subvolume", conf->opt.remote_subvolume, path, out);
GF_OPTION_INIT("debug-disconnect-notify-holdfile",
conf->debug_disconnect_notify_holdfile, path, out);
if (!conf->opt.remote_subvolume)
gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
PC_MSG_REMOTE_SUBVOL_NOT_GIVEN, NULL);
Expand Down Expand Up @@ -2932,6 +2962,12 @@ struct volume_options options[] = {
{.key = {"remote-subvolume"},
.type = GF_OPTION_TYPE_ANY,
.default_value = "{{ brick.path }}"},
{
.key = {"debug-disconnect-notify-holdfile"},
.type = GF_OPTION_TYPE_PATH,
.description = "Test-only option that blocks RPC_CLNT_DISCONNECT "
"notify while the given path exists.",
},
{.key = {"frame-timeout", "rpc-timeout"},
.type = GF_OPTION_TYPE_TIME,
.min = 0,
Expand Down
1 change: 1 addition & 0 deletions xlators/protocol/client/src/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ typedef struct clnt_conf {
complaince as bricks cleanup any granted
locks when a client disconnects.
*/
char *debug_disconnect_notify_holdfile;
gf_boolean_t connection_to_brick; /*True from attempt to connect to brick
till disconnection to brick*/
pthread_cond_t fini_complete_cond; /* Used to wait till we finsh the fini
Expand Down