Skip to content

Commit d0c2898

Browse files
committed
statd: reliable avahi client reconnect
Add reliable avahi client reconnect via explicit free+recreate, reduced log verbosity (single 10s warning instead of 3×2s loop). Also, binary TXT record filtering (UTF-8 + XML validity), and finally add a SIGHUP handler for on-demand reconnect. Signed-off-by: Joachim Wiberg <troglobit@gmail.com>
1 parent ee2057d commit d0c2898

4 files changed

Lines changed: 142 additions & 20 deletions

File tree

package/statd/statd.conf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
#set DEBUG=1
2-
32
service name:statd [12345] <pid/confd> statd -f -p /run/statd.pid -n -- Status daemon

src/statd/avahi.c

Lines changed: 126 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -542,16 +542,49 @@ static void resolver_cb(AvahiServiceResolver *r,
542542

543543
svc->port = port;
544544

545-
/* Copy TXT records verbatim */
545+
/* Copy TXT records, skipping any that are not valid UTF-8 or contain
546+
* bytes that are illegal in XML/YANG strings. Apple devices sometimes
547+
* embed raw binary tokens (device keys, protocol blobs) in TXT records;
548+
* passing them to sr_set_item_str() would return EINVAL. */
546549
for (s = txtlist; s; s = avahi_string_list_get_next(s)) {
547550
uint8_t *data = avahi_string_list_get_text(s);
548551
size_t len = avahi_string_list_get_size(s);
552+
size_t i;
553+
554+
/* Validate: must be well-formed UTF-8 with no XML-illegal bytes */
555+
for (i = 0; i < len; ) {
556+
uint8_t b = data[i];
557+
int extra;
558+
559+
if (b < 0x80) {
560+
/* ASCII: reject control chars invalid in XML */
561+
if ((b < 0x09) || (b > 0x0D && b < 0x20) || b == 0x7F)
562+
goto skip;
563+
i++;
564+
continue;
565+
}
566+
567+
/* Multi-byte UTF-8 lead byte */
568+
if ((b & 0xE0) == 0xC0) extra = 1;
569+
else if ((b & 0xF0) == 0xE0) extra = 2;
570+
else if ((b & 0xF8) == 0xF0) extra = 3;
571+
else goto skip; /* invalid lead byte */
572+
573+
i++;
574+
for (; extra-- > 0; i++) {
575+
if (i >= len || (data[i] & 0xC0) != 0x80)
576+
goto skip; /* truncated sequence */
577+
}
578+
}
549579

550580
t = calloc(1, sizeof(*t));
551581
if (!t)
552582
break;
553583
snprintf(t->val, sizeof(t->val), "%.*s", (int)len, (char *)data);
554584
LIST_INSERT_HEAD(&svc->txts, t, link);
585+
continue;
586+
skip:
587+
DEBUG("mdns: skipping binary TXT record for '%s' (len=%zu)", name, len);
555588
}
556589

557590
ds_push_resolver(ctx, svc, new_addr);
@@ -714,31 +747,59 @@ static bool mdns_is_enabled(struct mdns_ctx *ctx)
714747
return enabled;
715748
}
716749

750+
static void client_cb(AvahiClient *c, AvahiClientState state, void *userdata);
751+
752+
/*
753+
* Reconnect timer: fires MDNS_RECONN_DELAY seconds after AVAHI_CLIENT_FAILURE.
754+
* Frees the broken client and creates a fresh one. libavahi's own AVAHI_CLIENT_NO_FAIL
755+
* reconnection can miss D-Bus NameOwnerChanged events; explicit free+recreate is
756+
* more reliable (same pattern used by mdns-alias).
757+
*/
758+
#define MDNS_RECONN_DELAY 3.0
759+
760+
static void reconn_cb(struct ev_loop *loop, ev_timer *w, int revents)
761+
{
762+
struct mdns_ctx *ctx = (struct mdns_ctx *)
763+
((char *)w - offsetof(struct mdns_ctx, reconn_timer));
764+
int avahi_err;
765+
766+
(void)loop;
767+
(void)revents;
768+
769+
if (ctx->client) {
770+
avahi_client_free(ctx->client);
771+
ctx->client = NULL;
772+
}
773+
774+
ctx->client = avahi_client_new(&ctx->poll_api, AVAHI_CLIENT_NO_FAIL,
775+
client_cb, ctx, &avahi_err);
776+
if (!ctx->client)
777+
ERROR("mdns: failed to recreate avahi client: %s", avahi_strerror(avahi_err));
778+
}
779+
717780
/*
718-
* Retry timer callback: fires 2 s after AVAHI_CLIENT_FAILURE (and repeats up
719-
* to 3 times). Only logs ERROR once all retries are exhausted AND mDNS is
720-
* enabled in the running config — this avoids noisy errors when the operator
721-
* has simply disabled the mDNS service.
781+
* Log-delay timer: fires MDNS_WARN_DELAY seconds after AVAHI_CLIENT_FAILURE.
782+
* Logs a single warning if mDNS is still enabled in the running config —
783+
* suppresses noise when the operator has simply disabled the mDNS service or
784+
* avahi is just restarting briefly. Reconnection itself is handled by the
785+
* libavahi client (AVAHI_CLIENT_NO_FAIL) — we never give up.
722786
*
723-
* Example log (mDNS enabled, daemon stays down):
724-
* avahi: mDNS daemon not responding (attempt 3/3) — check that the mdns
725-
* service is running
787+
* The delay must exceed libavahi's internal reconnect-poll interval (~5 s so
788+
* that a normal daemon restart cancels this timer before it fires.
726789
*/
790+
#define MDNS_WARN_DELAY 10.0
791+
727792
static void mdns_retry_cb(struct ev_loop *loop, ev_timer *w, int revents)
728793
{
729794
struct mdns_ctx *ctx = (struct mdns_ctx *)
730795
((char *)w - offsetof(struct mdns_ctx, retry_timer));
731796

797+
(void)loop;
798+
(void)revents;
732799
ctx->fail_count++;
733-
if (ctx->fail_count < 3) {
734-
ev_timer_set(w, 2.0, 0.0);
735-
ev_timer_start(loop, w);
736-
return;
737-
}
738800

739801
if (mdns_is_enabled(ctx))
740-
ERROR("mdns: mDNS daemon not responding (attempt %d/3) — "
741-
"check that the mdns service is running", ctx->fail_count);
802+
WARN("mdns: mDNS daemon not responding, will reconnect automatically");
742803
}
743804

744805
static void client_cb(AvahiClient *c, AvahiClientState state, void *userdata)
@@ -750,6 +811,7 @@ static void client_cb(AvahiClient *c, AvahiClientState state, void *userdata)
750811
switch (state) {
751812
case AVAHI_CLIENT_S_RUNNING:
752813
if (ctx->fail_count > 0) {
814+
ev_timer_stop(ctx->loop, &ctx->reconn_timer);
753815
ev_timer_stop(ctx->loop, &ctx->retry_timer);
754816
NOTE("mdns: mDNS daemon reconnected");
755817
ctx->fail_count = 0;
@@ -779,8 +841,12 @@ static void client_cb(AvahiClient *c, AvahiClientState state, void *userdata)
779841
* will log only if the daemon stays down for 3 attempts (~6 s)
780842
* and mDNS is enabled in the running config.
781843
*/
844+
if (!ev_is_active(&ctx->reconn_timer)) {
845+
ev_timer_init(&ctx->reconn_timer, reconn_cb, MDNS_RECONN_DELAY, 0.0);
846+
ev_timer_start(ctx->loop, &ctx->reconn_timer);
847+
}
782848
if (!ev_is_active(&ctx->retry_timer)) {
783-
ev_timer_init(&ctx->retry_timer, mdns_retry_cb, 2.0, 0.0);
849+
ev_timer_init(&ctx->retry_timer, mdns_retry_cb, MDNS_WARN_DELAY, 0.0);
784850
ev_timer_start(ctx->loop, &ctx->retry_timer);
785851
}
786852

@@ -857,10 +923,54 @@ int mdns_ctx_init(struct mdns_ctx *ctx, struct ev_loop *loop, sr_conn_ctx_t *sr_
857923
return 0;
858924
}
859925

926+
void mdns_ctx_reconnect(struct mdns_ctx *ctx)
927+
{
928+
struct avahi_type_entry *te;
929+
int avahi_err;
930+
931+
if (!mdns_is_enabled(ctx)) {
932+
NOTE("mdns: mDNS is disabled, ignoring reconnect request");
933+
return;
934+
}
935+
936+
NOTE("mdns: reconnecting on request");
937+
938+
ev_timer_stop(ctx->loop, &ctx->reconn_timer);
939+
ev_timer_stop(ctx->loop, &ctx->retry_timer);
940+
ctx->fail_count = 0;
941+
942+
/* Clean up browsers before freeing the client */
943+
while (!LIST_EMPTY(&ctx->type_entries)) {
944+
te = LIST_FIRST(&ctx->type_entries);
945+
avahi_service_browser_free(te->browser);
946+
LIST_REMOVE(te, link);
947+
free(te);
948+
}
949+
if (ctx->type_browser) {
950+
avahi_service_type_browser_free(ctx->type_browser);
951+
ctx->type_browser = NULL;
952+
}
953+
954+
free_all(ctx);
955+
ds_clear_all(ctx);
956+
957+
if (ctx->client) {
958+
avahi_client_free(ctx->client);
959+
ctx->client = NULL;
960+
}
961+
962+
ctx->client = avahi_client_new(&ctx->poll_api, AVAHI_CLIENT_NO_FAIL,
963+
client_cb, ctx, &avahi_err);
964+
if (!ctx->client)
965+
ERROR("mdns: failed to recreate avahi client: %s", avahi_strerror(avahi_err));
966+
}
967+
860968
void mdns_ctx_exit(struct mdns_ctx *ctx)
861969
{
862970
struct avahi_type_entry *te;
863971

972+
if (ev_is_active(&ctx->reconn_timer))
973+
ev_timer_stop(ctx->loop, &ctx->reconn_timer);
864974
if (ev_is_active(&ctx->retry_timer))
865975
ev_timer_stop(ctx->loop, &ctx->retry_timer);
866976

src/statd/avahi.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,16 @@ struct mdns_ctx {
5858
AvahiClient *client;
5959
AvahiServiceTypeBrowser *type_browser;
6060
AvahiPoll poll_api; /* libev-backed vtable */
61-
unsigned int fail_count; /* Consecutive avahi-daemon connection failures */
62-
ev_timer retry_timer; /* Deferred error-log timer */
61+
unsigned int fail_count; /* Non-zero while avahi-daemon is absent */
62+
ev_timer reconn_timer; /* Free+recreate client after brief delay */
63+
ev_timer retry_timer; /* Deferred warn-log timer */
6364
LIST_HEAD(, avahi_neighbor) neighbors;
6465
LIST_HEAD(, avahi_service) services; /* Flat list; keyed by 5-tuple */
6566
LIST_HEAD(, avahi_type_entry) type_entries;
6667
};
6768

6869
int mdns_ctx_init(struct mdns_ctx *ctx, struct ev_loop *loop, sr_conn_ctx_t *sr_conn);
70+
void mdns_ctx_reconnect(struct mdns_ctx *ctx);
6971
void mdns_ctx_exit(struct mdns_ctx *ctx);
7072

7173
#endif

src/statd/statd.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,13 @@ static void sigusr1_cb(struct ev_loop *, struct ev_signal *, int)
352352
debug ^= 1;
353353
}
354354

355+
static void sighup_cb(struct ev_loop *, struct ev_signal *w, int)
356+
{
357+
struct statd *statd = w->data;
358+
359+
mdns_ctx_reconnect(&statd->mdns);
360+
}
361+
355362

356363
static void sr_event_cb(struct ev_loop *, struct ev_io *w, int)
357364
{
@@ -455,7 +462,7 @@ static int subscribe_to_all(struct statd *statd)
455462

456463
int main(int argc, char *argv[])
457464
{
458-
struct ev_signal sigint_watcher, sigusr1_watcher;
465+
struct ev_signal sigint_watcher, sigusr1_watcher, sighup_watcher;
459466
int log_opts = LOG_PID | LOG_NDELAY;
460467
struct statd statd = {};
461468
const char *env;
@@ -516,6 +523,10 @@ int main(int argc, char *argv[])
516523
sigusr1_watcher.data = &statd;
517524
ev_signal_start(statd.ev_loop, &sigusr1_watcher);
518525

526+
ev_signal_init(&sighup_watcher, sighup_cb, SIGHUP);
527+
sighup_watcher.data = &statd;
528+
ev_signal_start(statd.ev_loop, &sighup_watcher);
529+
519530
err = journal_start(&statd.journal, statd.sr_query_ses);
520531
if (err) {
521532
sr_session_stop(statd.sr_query_ses);

0 commit comments

Comments
 (0)