Skip to content

Commit b4e96e2

Browse files
committed
cachedb_redis: use per-worker SSL_CTX instead of dereferencing d->ctx
With use_tls=1 on cachedb_redis, every SIP worker that touches a Redis/Valkey connection SIGSEGVs on the first SSL_new. Two independent problems stacked on the same line: 1. The old code indexed d->ctx as a per-process array: SSL_new(((void**)d->ctx)[process_no]) This predates the tls_mgm refactor (upstream 11aa15d, 'refactor TCP/TLS code to have everything in a single process'), after which ctx became a single SSL_CTX *. Every worker with process_no > 0 read garbage past the SSL_CTX struct. 2. Even after fixing the dereference, d->ctx is not safely usable from sibling workers. tls_mgm builds its SSL_CTX in child_init(PROC_TCP_MAIN), which runs *after* SIP workers and the MI process have already forked. So those workers see d->ctx == NULL when their cachedb_redis:child_init -> cachedb_do_init -> redis_init_ssl path fires. Even waiting for TCP_MAIN doesn't help: the SSL_CTX was built post-fork in TCP_MAIN's address space, and OpenSSL embeds pointers into per-process storage inside the CTX (method tables, providers, OSSL_LIB_CTX). Fix: each worker builds its own SSL_CTX from the static configuration on the tls_domain (cert, pkey, ca, verify_cert). CTXs are cached per-worker, keyed by tls_domain pointer, so multiple connections sharing a domain share one CTX inside that worker. Floors min proto at TLS 1.2 to match Valkey/Redis 7+ defaults and the tls_method=TLSv1_2 that most tls_mgm configs already set. Also tightens the adjacent error paths: SSL_free the SSL on redisInitiateSSL failure, and promote the printf in that path to LM_ERR for consistency. Note: a companion parser fix for 'tls-port' in parse_cluster_shards is still needed for TLS cluster topology updates to use the right port on MOVED/ASK redirects, but depends on PR OpenSIPS#3852 (parse_cluster_shards introduction); it can land as a small follow-up once that PR is merged.
1 parent 62d2368 commit b4e96e2

1 file changed

Lines changed: 120 additions & 2 deletions

File tree

modules/cachedb_redis/cachedb_redis_dbase.c

Lines changed: 120 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,118 @@ static void tls_print_errstack(void)
106106
}
107107
}
108108

109+
/* Per-process cache of SSL_CTX objects, keyed by tls_domain pointer.
110+
*
111+
* tls_mgm builds its SSL_CTX in init_tls_domains() from
112+
* child_init(PROC_TCP_MAIN). SIP workers and the MI process fork
113+
* before TCP_MAIN runs that init, so when they enter
114+
* cachedb_redis's child_init -> redis_init_ssl path, d->ctx is
115+
* still NULL -- and even once populated, the SSL_CTX object lives
116+
* in TCP_MAIN's heap (OpenSSL embeds pointers into per-process
117+
* state) and is not safe to use from sibling workers.
118+
*
119+
* Each worker builds its own SSL_CTX from the static configuration
120+
* carried on the tls_domain (cert, pkey, ca, verify_cert). The CTX
121+
* is cached per-worker so multiple connections sharing a tls_domain
122+
* share one CTX inside that worker.
123+
*/
124+
struct cachedb_redis_tls_ctx {
125+
struct tls_domain *dom;
126+
SSL_CTX *ctx;
127+
struct cachedb_redis_tls_ctx *next;
128+
};
129+
static struct cachedb_redis_tls_ctx *cachedb_redis_tls_ctx_list;
130+
131+
static SSL_CTX *cachedb_redis_get_ssl_ctx(struct tls_domain *d)
132+
{
133+
struct cachedb_redis_tls_ctx *e;
134+
SSL_CTX *ctx;
135+
136+
for (e = cachedb_redis_tls_ctx_list; e; e = e->next)
137+
if (e->dom == d)
138+
return e->ctx;
139+
140+
ctx = SSL_CTX_new(TLS_client_method());
141+
if (!ctx) {
142+
LM_ERR("SSL_CTX_new(TLS_client_method) failed\n");
143+
tls_print_errstack();
144+
return NULL;
145+
}
146+
147+
/* Floor at TLS 1.2 -- matches Valkey/Redis 7+ defaults and
148+
* aligns with tls_method=TLSv1_2 on tls_mgm. */
149+
if (!SSL_CTX_set_min_proto_version(ctx, TLS1_2_VERSION)) {
150+
LM_ERR("SSL_CTX_set_min_proto_version(TLS1_2) failed\n");
151+
tls_print_errstack();
152+
SSL_CTX_free(ctx);
153+
return NULL;
154+
}
155+
156+
/* Mirror tls_mgm's verify_cert toggle. require_client_cert is a
157+
* server-side knob; on the client we only care whether to verify
158+
* the server cert. */
159+
SSL_CTX_set_verify(ctx,
160+
d->verify_cert ? SSL_VERIFY_PEER : SSL_VERIFY_NONE, NULL);
161+
162+
if (d->cert.s && d->cert.len) {
163+
if (SSL_CTX_use_certificate_chain_file(ctx, d->cert.s) != 1) {
164+
LM_ERR("SSL_CTX_use_certificate_chain_file(%s) failed\n",
165+
d->cert.s);
166+
tls_print_errstack();
167+
SSL_CTX_free(ctx);
168+
return NULL;
169+
}
170+
}
171+
if (d->pkey.s && d->pkey.len) {
172+
if (SSL_CTX_use_PrivateKey_file(ctx, d->pkey.s,
173+
SSL_FILETYPE_PEM) != 1) {
174+
LM_ERR("SSL_CTX_use_PrivateKey_file(%s) failed\n", d->pkey.s);
175+
tls_print_errstack();
176+
SSL_CTX_free(ctx);
177+
return NULL;
178+
}
179+
}
180+
if (d->ca.s && d->ca.len) {
181+
if (SSL_CTX_load_verify_locations(ctx, d->ca.s, NULL) != 1) {
182+
LM_ERR("SSL_CTX_load_verify_locations(%s) failed\n", d->ca.s);
183+
tls_print_errstack();
184+
SSL_CTX_free(ctx);
185+
return NULL;
186+
}
187+
}
188+
if (d->ca_directory) {
189+
if (SSL_CTX_load_verify_locations(ctx, NULL, d->ca_directory) != 1) {
190+
LM_WARN("SSL_CTX_load_verify_locations(dir=%s) failed,"
191+
" continuing without CA dir\n", d->ca_directory);
192+
tls_print_errstack();
193+
}
194+
}
195+
196+
e = pkg_malloc(sizeof(*e));
197+
if (!e) {
198+
LM_ERR("pkg_malloc failed for cachedb_redis_tls_ctx cache entry\n");
199+
SSL_CTX_free(ctx);
200+
return NULL;
201+
}
202+
e->dom = d;
203+
e->ctx = ctx;
204+
e->next = cachedb_redis_tls_ctx_list;
205+
cachedb_redis_tls_ctx_list = e;
206+
207+
LM_DBG("created per-worker SSL_CTX %p for tls_domain '%.*s'"
208+
" (verify=%d cert=%s pkey=%s)\n",
209+
ctx, d->name.len, ZSW(d->name.s), d->verify_cert,
210+
d->cert.s ? d->cert.s : "(none)",
211+
d->pkey.s ? d->pkey.s : "(none)");
212+
return ctx;
213+
}
214+
109215
static int redis_init_ssl(char *url_extra_opts, redisContext *ctx,
110216
struct tls_domain **tls_dom)
111217
{
112218
str tls_dom_name;
113219
SSL *ssl;
220+
SSL_CTX *ssl_ctx;
114221
struct tls_domain *d;
115222

116223
if (tls_dom == NULL || *tls_dom == NULL) {
@@ -139,7 +246,17 @@ static int redis_init_ssl(char *url_extra_opts, redisContext *ctx,
139246
d = *tls_dom;
140247
}
141248

142-
ssl = SSL_new(((void**)d->ctx)[process_no]);
249+
/* Use a per-worker SSL_CTX rather than d->ctx -- d->ctx may be
250+
* NULL when this worker forked before TCP_MAIN ran tls_mgm's
251+
* child_init, and even when set it lives in TCP_MAIN's address
252+
* space. */
253+
ssl_ctx = cachedb_redis_get_ssl_ctx(d);
254+
if (!ssl_ctx) {
255+
tls_api.release_domain(*tls_dom);
256+
return -1;
257+
}
258+
259+
ssl = SSL_new(ssl_ctx);
143260
if (!ssl) {
144261
LM_ERR("failed to create SSL structure (%d:%s)\n", errno, strerror(errno));
145262
tls_print_errstack();
@@ -148,7 +265,8 @@ static int redis_init_ssl(char *url_extra_opts, redisContext *ctx,
148265
}
149266

150267
if (redisInitiateSSL(ctx, ssl) != REDIS_OK) {
151-
printf("Failed to init Redis SSL: %s\n", ctx->errstr);
268+
LM_ERR("Failed to init Redis SSL: %s\n", ctx->errstr);
269+
SSL_free(ssl);
152270
tls_api.release_domain(*tls_dom);
153271
return -1;
154272
}

0 commit comments

Comments
 (0)