Skip to content

Commit 4c454cf

Browse files
authored
Merge pull request #2509 from navidemad/fix/network-pollfds-clobbers-cdp-sockets
Fix CDP server stall/SIGTERM hang in optimized builds (Network drops CDP sockets from poll set)
2 parents 5905319 + e1e49c8 commit 4c454cf

1 file changed

Lines changed: 55 additions & 2 deletions

File tree

src/network/Network.zig

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -722,8 +722,15 @@ pub fn run(self: *Network) void {
722722
break :blk min_timeout;
723723
}
724724

725+
// curl_multi_timeout reports -1 when curl has no timeout
726+
// preference (idle) and 0 when it wants to be serviced
727+
// immediately. Treat both as "no curl-imposed deadline" and
728+
// fall back to min_timeout — otherwise @min(min_timeout, -1)
729+
// would be -1, i.e. poll() blocks forever, starving onTick
730+
// (telemetry's periodic flush) and removing the safety net
731+
// that bounds any missed wakeup to min_timeout.
725732
const curl_timeout = self.getCurlTimeout();
726-
if (curl_timeout == 0) {
733+
if (curl_timeout <= 0) {
727734
break :blk min_timeout;
728735
}
729736

@@ -860,7 +867,16 @@ fn acceptConnections(self: *Network) void {
860867
}
861868

862869
fn preparePollFds(self: *Network, multi: *libcurl.CurlM) void {
863-
const curl_fds = self.pollfds[PSEUDO_POLLFDS..];
870+
// Only the curl slice — NOT through to the end of pollfds. The CDP
871+
// socket fds live in [cdp_start..] and are owned by
872+
// prepareCdpPollFds, which only rebuilds them when cdp_dirty is set
873+
// (a steady-state optimization). Slicing to the end here would
874+
// @memset those fds to -1 every iteration once a multi exists (which
875+
// happens as soon as telemetry sends its first request), silently
876+
// dropping every live CDP socket from the poll set — Network then
877+
// never reads another CDP message (#2508) nor observes peer
878+
// EOF/shutdown (#2507).
879+
const curl_fds = self.pollfds[PSEUDO_POLLFDS..self.cdp_start];
864880
@memset(curl_fds, .{ .fd = -1, .events = 0, .revents = 0 });
865881

866882
var fd_count: c_uint = 0;
@@ -1053,3 +1069,40 @@ fn loadCerts(allocator: Allocator) !libcurl.CurlBlob {
10531069
.flags = 0,
10541070
};
10551071
}
1072+
1073+
const testing = @import("../testing.zig");
1074+
1075+
test "Network: preparePollFds leaves the CDP fd region untouched" {
1076+
// Regression for #2507 / #2508. Once a multi exists (telemetry creates
1077+
// one in optimized builds), preparePollFds runs every loop iteration.
1078+
// It rebuilds only the curl slice [PSEUDO_POLLFDS..cdp_start]; the CDP
1079+
// region [cdp_start..] is owned by prepareCdpPollFds, which keeps its
1080+
// entries across iterations and only rebuilds when cdp_dirty is set.
1081+
// A slice that ran to the end of pollfds @memset those CDP sockets to
1082+
// -1, silently dropping every live CDP connection from the poll set —
1083+
// so Network stopped reading CDP messages (#2508) and never observed
1084+
// peer EOF/shutdown (#2507). curl global is initialized by the test
1085+
// harness (App.init -> Network.init).
1086+
const multi = libcurl.curl_multi_init() orelse return error.FailedToInitMulti;
1087+
defer libcurl.curl_multi_cleanup(multi) catch {};
1088+
1089+
const curl_slots = 4;
1090+
const cdp_slots = 3;
1091+
var pollfds: [PSEUDO_POLLFDS + curl_slots + cdp_slots]posix.pollfd = undefined;
1092+
@memset(&pollfds, .{ .fd = -1, .events = 0, .revents = 0 });
1093+
1094+
// preparePollFds only reads self.pollfds and self.cdp_start.
1095+
var nw: Network = undefined;
1096+
nw.pollfds = &pollfds;
1097+
nw.cdp_start = PSEUDO_POLLFDS + curl_slots;
1098+
1099+
// Two live CDP sockets parked in the CDP region, mimicking the steady
1100+
// state between cdp_dirty rebuilds.
1101+
pollfds[nw.cdp_start] = .{ .fd = 4242, .events = posix.POLL.IN, .revents = 0 };
1102+
pollfds[nw.cdp_start + 1] = .{ .fd = 4243, .events = posix.POLL.IN, .revents = 0 };
1103+
1104+
nw.preparePollFds(multi);
1105+
1106+
try testing.expectEqual(@as(posix.fd_t, 4242), pollfds[nw.cdp_start].fd);
1107+
try testing.expectEqual(@as(posix.fd_t, 4243), pollfds[nw.cdp_start + 1].fd);
1108+
}

0 commit comments

Comments
 (0)