Skip to content

Commit 3796262

Browse files
committed
session server UPDATE CH thread lifecycle management
Store thread IDs in ch_thread_arg to allow proper pthread_join when stopping threads. Add nc_session_server_ch_client_dispatch_stop_if_dispatched() to centralize thread stopping logic, fixing several locking issues: - Only unlock ch_lock if it was successfully acquired in nc_session_free() - Use config_update_lock to prevent data race in nc_server_destroy() - Properly handle config lock unlocking during thread join to avoid deadlock
1 parent 93e9cf9 commit 3796262

4 files changed

Lines changed: 209 additions & 142 deletions

File tree

src/server_config.c

Lines changed: 76 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -3941,7 +3941,7 @@ config_netconf_client(const struct lyd_node *node, enum nc_operation parent_op,
39413941
struct lyd_node *n;
39423942
enum nc_operation op;
39433943
const char *name;
3944-
LY_ARRAY_COUNT_TYPE i = 0, j = 0;
3944+
LY_ARRAY_COUNT_TYPE i = 0;
39453945
struct nc_ch_client *ch_client = NULL;
39463946

39473947
NC_NODE_GET_OP(node, parent_op, &op);
@@ -3988,30 +3988,6 @@ config_netconf_client(const struct lyd_node *node, enum nc_operation parent_op,
39883988

39893989
/* all children processed, we can now delete the client */
39903990
if (op == NC_OP_DELETE) {
3991-
/* CH THREADS DATA RD LOCK */
3992-
if (nc_rwlock_lock(&server_opts.ch_threads_lock, NC_RWLOCK_READ, NC_CH_THREADS_LOCK_TIMEOUT, __func__) != 1) {
3993-
return 1;
3994-
}
3995-
3996-
/* find the thread data for this CH client, not found <==> CH thread not running */
3997-
LY_ARRAY_FOR(server_opts.ch_threads, j) {
3998-
if (!strcmp(server_opts.ch_threads[j]->client_name, name)) {
3999-
break;
4000-
}
4001-
}
4002-
4003-
if (j < LY_ARRAY_COUNT(server_opts.ch_threads)) {
4004-
/* the CH thread is running, notify it to stop */
4005-
if (nc_mutex_lock(&server_opts.ch_threads[j]->cond_lock, NC_CH_COND_LOCK_TIMEOUT, __func__) != 1) {
4006-
server_opts.ch_threads[j]->thread_running = 0;
4007-
pthread_cond_signal(&server_opts.ch_threads[j]->cond);
4008-
nc_mutex_unlock(&server_opts.ch_threads[j]->cond_lock, __func__);
4009-
}
4010-
}
4011-
4012-
/* CH THREADS DATA UNLOCK */
4013-
nc_rwlock_unlock(&server_opts.ch_threads_lock, __func__);
4014-
40153991
/* we can use 'i' from above */
40163992
if (i < LY_ARRAY_COUNT(config->ch_clients) - 1) {
40173993
config->ch_clients[i] = config->ch_clients[LY_ARRAY_COUNT(config->ch_clients) - 1];
@@ -5330,6 +5306,37 @@ nc_server_config_reconcile_sockets_listen(struct nc_server_config *old_cfg,
53305306

53315307
#ifdef NC_ENABLED_SSH_TLS
53325308

5309+
/**
5310+
* @brief Check if there are any new Call Home clients created in the new configuration.
5311+
*
5312+
* @param[in] old_cfg Old, currently active server configuration.
5313+
* @param[in] new_cfg New server configuration currently being applied.
5314+
* @return 1 if there are new CH clients, 0 otherwise.
5315+
*/
5316+
static int
5317+
nc_server_config_new_ch_clients_created(struct nc_server_config *old_cfg, struct nc_server_config *new_cfg)
5318+
{
5319+
struct nc_ch_client *old_ch_client, *new_ch_client;
5320+
int found;
5321+
5322+
/* check if there are any new clients */
5323+
LY_ARRAY_FOR(new_cfg->ch_clients, struct nc_ch_client, new_ch_client) {
5324+
found = 0;
5325+
LY_ARRAY_FOR(old_cfg->ch_clients, struct nc_ch_client, old_ch_client) {
5326+
if (!strcmp(new_ch_client->name, old_ch_client->name)) {
5327+
found = 1;
5328+
break;
5329+
}
5330+
}
5331+
if (!found) {
5332+
return 1;
5333+
}
5334+
}
5335+
5336+
/* no differences found */
5337+
return 0;
5338+
}
5339+
53335340
/**
53345341
* @brief Atomically dispatch new Call Home clients and reuse existing ones.
53355342
*
@@ -5347,55 +5354,63 @@ nc_server_config_reconcile_chclients_dispatch(struct nc_server_config *old_cfg,
53475354
int found;
53485355
LY_ARRAY_COUNT_TYPE i = 0;
53495356
char **started_clients = NULL, **client_name = NULL;
5357+
int dispatch_new_clients = 1;
53505358

53515359
if (!server_opts.ch_dispatch_data.acquire_ctx_cb || !server_opts.ch_dispatch_data.release_ctx_cb ||
53525360
!server_opts.ch_dispatch_data.new_session_cb) {
5353-
/* Call Home dispatch callbacks not set, nothing to do */
5354-
return 0;
5361+
/* Call Home dispatch callbacks not set, we can't dispatch new clients, but we can still stop deleted ones */
5362+
if (nc_server_config_new_ch_clients_created(old_cfg, new_cfg)) {
5363+
WRN(NULL, "New Call Home clients were created but Call Home dispatch callbacks are not set - "
5364+
"new clients will not be dispatched automatically.");
5365+
}
5366+
dispatch_new_clients = 0;
53555367
}
53565368

53575369
/*
53585370
* == PHASE 1: START NEW CLIENTS ==
53595371
* Start clients present in new_cfg that are not already running.
53605372
* Track successfully started clients for potential rollback.
53615373
*/
5362-
LY_ARRAY_FOR(new_cfg->ch_clients, struct nc_ch_client, new_ch_client) {
5363-
found = 0;
5374+
if (dispatch_new_clients) {
5375+
/* only dispatch if all required CBs are set */
5376+
LY_ARRAY_FOR(new_cfg->ch_clients, struct nc_ch_client, new_ch_client) {
5377+
found = 0;
53645378

5365-
/* CH THREADS LOCK (reading server_opts.ch_threads) */
5366-
if (nc_rwlock_lock(&server_opts.ch_threads_lock, NC_RWLOCK_READ, NC_CH_THREADS_LOCK_TIMEOUT, __func__) != 1) {
5367-
rc = 1;
5368-
goto rollback;
5369-
}
5379+
/* CH THREADS LOCK (reading server_opts.ch_threads) */
5380+
if (nc_rwlock_lock(&server_opts.ch_threads_lock, NC_RWLOCK_READ, NC_CH_THREADS_LOCK_TIMEOUT, __func__) != 1) {
5381+
rc = 1;
5382+
goto rollback;
5383+
}
53705384

5371-
LY_ARRAY_FOR(server_opts.ch_threads, struct nc_server_ch_thread_arg *, ch_thread_arg) {
5372-
if (!strcmp(new_ch_client->name, (*ch_thread_arg)->client_name)) {
5373-
/* already running, do not start again */
5374-
found = 1;
5375-
break;
5385+
LY_ARRAY_FOR(server_opts.ch_threads, struct nc_server_ch_thread_arg *, ch_thread_arg) {
5386+
if (!strcmp(new_ch_client->name, (*ch_thread_arg)->client_name)) {
5387+
/* already running, do not start again */
5388+
found = 1;
5389+
break;
5390+
}
53765391
}
5377-
}
53785392

5379-
/* CH THREADS UNLOCK */
5380-
nc_rwlock_unlock(&server_opts.ch_threads_lock, __func__);
5393+
/* CH THREADS UNLOCK */
5394+
nc_rwlock_unlock(&server_opts.ch_threads_lock, __func__);
53815395

5382-
if (!found) {
5383-
/* this is a new Call Home client, dispatch it */
5384-
rc = _nc_connect_ch_client_dispatch(new_ch_client, server_opts.ch_dispatch_data.acquire_ctx_cb,
5385-
server_opts.ch_dispatch_data.release_ctx_cb, server_opts.ch_dispatch_data.ctx_cb_data,
5386-
server_opts.ch_dispatch_data.new_session_cb, server_opts.ch_dispatch_data.new_session_cb_data);
5387-
if (rc) {
5388-
/* FAILURE! trigger rollback */
5389-
goto rollback;
5390-
}
5396+
if (!found) {
5397+
/* this is a new Call Home client, dispatch it */
5398+
rc = _nc_connect_ch_client_dispatch(new_ch_client, server_opts.ch_dispatch_data.acquire_ctx_cb,
5399+
server_opts.ch_dispatch_data.release_ctx_cb, server_opts.ch_dispatch_data.ctx_cb_data,
5400+
server_opts.ch_dispatch_data.new_session_cb, server_opts.ch_dispatch_data.new_session_cb_data);
5401+
if (rc) {
5402+
/* FAILURE! trigger rollback */
5403+
goto rollback;
5404+
}
53915405

5392-
/* successfully started, track it for potential rollback */
5393-
LY_ARRAY_NEW_GOTO(NULL, started_clients, client_name, rc, rollback);
5394-
*client_name = strdup(new_ch_client->name);
5395-
NC_CHECK_ERRMEM_GOTO(!*client_name, rc = 1, rollback);
5406+
/* successfully started, track it for potential rollback */
5407+
LY_ARRAY_NEW_GOTO(NULL, started_clients, client_name, rc, rollback);
5408+
*client_name = strdup(new_ch_client->name);
5409+
NC_CHECK_ERRMEM_GOTO(!*client_name, rc = 1, rollback);
53965410

5397-
/* ownership transferred to array */
5398-
client_name = NULL;
5411+
/* ownership transferred to array */
5412+
client_name = NULL;
5413+
}
53995414
}
54005415
}
54015416

@@ -5415,27 +5430,10 @@ nc_server_config_reconcile_chclients_dispatch(struct nc_server_config *old_cfg,
54155430

54165431
if (!found) {
54175432
/* this Call Home client was deleted, notify it to stop */
5418-
ch_thread_arg = NULL;
5419-
5420-
/* CH THREADS LOCK (reading server_opts.ch_threads) */
5421-
if (nc_rwlock_lock(&server_opts.ch_threads_lock, NC_RWLOCK_READ, NC_CH_THREADS_LOCK_TIMEOUT, __func__) != 1) {
5422-
/* Continue even if lock fails - best effort cleanup */
5423-
continue;
5424-
}
5425-
LY_ARRAY_FOR(server_opts.ch_threads, struct nc_server_ch_thread_arg *, ch_thread_arg) {
5426-
if (!strcmp(old_ch_client->name, (*ch_thread_arg)->client_name)) {
5427-
/* notify the thread to stop */
5428-
if (nc_mutex_lock(&(*ch_thread_arg)->cond_lock, NC_CH_COND_LOCK_TIMEOUT, __func__) != 1) {
5429-
(*ch_thread_arg)->thread_running = 0;
5430-
pthread_cond_signal(&(*ch_thread_arg)->cond);
5431-
nc_mutex_unlock(&(*ch_thread_arg)->cond_lock, __func__);
5432-
}
5433-
break;
5434-
}
5433+
if ((rc = nc_session_server_ch_client_dispatch_stop_if_dispatched(old_ch_client->name, NC_RWLOCK_WRITE))) {
5434+
ERR(NULL, "Failed to dispatch stop for Call Home client \"%s\".", old_ch_client->name);
5435+
goto rollback;
54355436
}
5436-
/* CH THREADS UNLOCK */
5437-
nc_rwlock_unlock(&server_opts.ch_threads_lock, __func__);
5438-
/* Note: if ch_thread_arg is NULL here, the thread wasn't running. That's fine. */
54395437
}
54405438
}
54415439

@@ -5450,25 +5448,7 @@ nc_server_config_reconcile_chclients_dispatch(struct nc_server_config *old_cfg,
54505448
* to return to the pre-call state.
54515449
*/
54525450
LY_ARRAY_FOR(started_clients, i) {
5453-
ch_thread_arg = NULL;
5454-
5455-
/* CH THREADS LOCK (reading server_opts.ch_threads) */
5456-
if (nc_rwlock_lock(&server_opts.ch_threads_lock, NC_RWLOCK_READ, NC_CH_THREADS_LOCK_TIMEOUT, __func__) != 1) {
5457-
/* Continue even if lock fails - best effort rollback */
5458-
continue;
5459-
}
5460-
LY_ARRAY_FOR(server_opts.ch_threads, struct nc_server_ch_thread_arg *, ch_thread_arg) {
5461-
if (!strcmp(started_clients[i], (*ch_thread_arg)->client_name)) {
5462-
/* notify the newly started thread to stop */
5463-
nc_mutex_lock(&(*ch_thread_arg)->cond_lock, NC_CH_COND_LOCK_TIMEOUT, __func__);
5464-
(*ch_thread_arg)->thread_running = 0;
5465-
pthread_cond_signal(&(*ch_thread_arg)->cond);
5466-
nc_mutex_unlock(&(*ch_thread_arg)->cond_lock, __func__);
5467-
break;
5468-
}
5469-
}
5470-
/* CH THREADS UNLOCK */
5471-
nc_rwlock_unlock(&server_opts.ch_threads_lock, __func__);
5451+
nc_session_server_ch_client_dispatch_stop_if_dispatched(started_clients[i], NC_RWLOCK_WRITE);
54725452
}
54735453
/* rc is already set to non-zero from the failure point */
54745454

src/session.c

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -910,7 +910,7 @@ nc_session_free_transport(struct nc_session *session, int *multisession)
910910
* NC_CLIENT: we are waiting for the server to acknowledge our SSH channel EOF
911911
* by sending us its own SSH channel EOF. */
912912
if (ssh_channel_poll_timeout(session->ti.libssh.channel, NC_SESSION_FREE_SSH_POLL_EOF_TIMEOUT, 0) != SSH_EOF) {
913-
WRN(session, "Timeout for receiving SSH channel EOF from the peer elapsed.");
913+
WRN(session, "Timeout for receiving SSH channel EOF from the %s elapsed.", session->side == NC_CLIENT ? "server" : "client");
914914
}
915915
}
916916
ssh_channel_free(session->ti.libssh.channel);
@@ -1001,7 +1001,7 @@ nc_session_free_transport(struct nc_session *session, int *multisession)
10011001
API void
10021002
nc_session_free(struct nc_session *session, void (*data_free)(void *))
10031003
{
1004-
int r, i, rpc_locked = 0;
1004+
int r, i, rpc_locked = 0, ch_locked = 0;
10051005
int multisession = 0; /* flag for more NETCONF sessions on a single SSH session */
10061006
struct timespec ts;
10071007
NC_STATUS status;
@@ -1012,15 +1012,17 @@ nc_session_free(struct nc_session *session, void (*data_free)(void *))
10121012

10131013
if ((session->side == NC_SERVER) && (session->flags & NC_SESSION_CALLHOME)) {
10141014
/* CH LOCK, continue on error */
1015-
r = nc_mutex_lock(&session->opts.server.ch_lock, NC_SESSION_CH_LOCK_TIMEOUT, __func__);
1015+
if (nc_mutex_lock(&session->opts.server.ch_lock, NC_SESSION_CH_LOCK_TIMEOUT, __func__) == 1) {
1016+
ch_locked = 1;
1017+
}
10161018
}
10171019

10181020
/* store status, so we can check if this session is already closing */
10191021
status = session->status;
10201022

10211023
if ((session->side == NC_SERVER) && (session->flags & NC_SESSION_CALLHOME)) {
10221024
/* CH UNLOCK */
1023-
if (r == 1) {
1025+
if (ch_locked) {
10241026
/* only if we locked it */
10251027
nc_mutex_unlock(&session->opts.server.ch_lock, __func__);
10261028
}
@@ -1060,8 +1062,12 @@ nc_session_free(struct nc_session *session, void (*data_free)(void *))
10601062
nc_client_msgs_free(session);
10611063
}
10621064

1063-
if (session->status == NC_STATUS_RUNNING) {
1064-
/* notify the peer that we're closing the session */
1065+
/* notify the peer that we're closing the session, either if:
1066+
* - session running - normal disconnect from client
1067+
* - session invalid - client disconnected from a Call Home session */
1068+
if ((session->status == NC_STATUS_RUNNING) ||
1069+
((session->side == NC_SERVER) && (session->flags & NC_SESSION_CALLHOME) &&
1070+
(session->status == NC_STATUS_INVALID) && (session->term_reason == NC_SESSION_TERM_CLOSED))) {
10651071
if (session->side == NC_CLIENT) {
10661072
/* graceful close: <close-session> + transport shutdown indication */
10671073
nc_session_free_client_close_graceful(session);
@@ -1073,7 +1079,10 @@ nc_session_free(struct nc_session *session, void (*data_free)(void *))
10731079

10741080
if ((session->side == NC_SERVER) && (session->flags & NC_SESSION_CALLHOME)) {
10751081
/* CH LOCK */
1076-
nc_mutex_lock(&session->opts.server.ch_lock, NC_SESSION_CH_LOCK_TIMEOUT, __func__);
1082+
ch_locked = 0;
1083+
if (nc_mutex_lock(&session->opts.server.ch_lock, NC_SESSION_CH_LOCK_TIMEOUT, __func__) == 1) {
1084+
ch_locked = 1;
1085+
}
10771086
}
10781087

10791088
/* mark session for closing */
@@ -1096,7 +1105,10 @@ nc_session_free(struct nc_session *session, void (*data_free)(void *))
10961105

10971106
if ((session->side == NC_SERVER) && (session->flags & NC_SESSION_CALLHOME)) {
10981107
/* CH UNLOCK */
1099-
nc_mutex_unlock(&session->opts.server.ch_lock, __func__);
1108+
if (ch_locked) {
1109+
/* only if we locked it */
1110+
nc_mutex_unlock(&session->opts.server.ch_lock, __func__);
1111+
}
11001112
}
11011113

11021114
/* transport implementation cleanup */

src/session_p.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ struct nc_server_ch_thread_arg {
692692
uint8_t cur_attempt, void *user_data); /**< failed to create a new session cb */
693693
void *new_session_fail_cb_data; /**< new session fail cb data */
694694

695+
pthread_t tid; /**< Thread ID of the Call Home client thread. */
695696
int thread_running; /**< A boolean value that is truthy while the underlying Call Home thread is running */
696697
pthread_mutex_t cond_lock; /**< Condition's lock used for signalling the thread to terminate */
697698
pthread_cond_t cond; /**< Condition used for signalling the thread to terminate */
@@ -1387,6 +1388,17 @@ NC_MSG_TYPE nc_connect_callhome(const char *host, uint16_t port, NC_TRANSPORT_IM
13871388

13881389
#ifdef NC_ENABLED_SSH_TLS
13891390

1391+
/**
1392+
* @brief Stop a dispatched Call Home client thread, if such thread was dispatched for the given client name.
1393+
*
1394+
* @param[in] client_name Name of the Call Home client to stop the thread for.
1395+
* @param[in] config_lock_mode Lock mode of the configuration lock, if it is held by the caller.
1396+
* If NC_RWLOCK_WRITE is passed, config lock will be briefly unlocked and then locked again.
1397+
* The caller MUST ensure that it holds the CONFIG APPLY mutex, so that nobody steals the wrlock from him.
1398+
* @return 0 if the thread was successfully stopped or not found, 1 on error.
1399+
*/
1400+
int nc_session_server_ch_client_dispatch_stop_if_dispatched(const char *client_name, enum nc_rwlock_mode config_lock_mode);
1401+
13901402
/**
13911403
* @brief Dispatch a thread connecting to a listening NETCONF client and creating Call Home sessions.
13921404
*

0 commit comments

Comments
 (0)