Skip to content

Commit e1e49c8

Browse files
committed
Fix Network dropping CDP sockets from its poll set once a multi exists
preparePollFds cleared and rebuilt the curl portion of `pollfds` every loop iteration, but sliced `pollfds[PSEUDO_POLLFDS..]` — all the way to the end of the array. That range also covers the CDP socket region `[cdp_start..]`, which prepareCdpPollFds owns and only rebuilds when `cdp_dirty` is set (a steady-state optimization). So the @Memset wiped every live CDP socket fd to -1 on each iteration. This only bites once Network owns a curl multi handle, which is created solely by telemetry — and telemetry is disabled in Debug builds, which is why it reproduced only in ReleaseFast/ReleaseSafe (and the nightly). Regular HTTP/navigation runs on the worker's own handles, not Network's multi, so it never triggered the path in Debug. Once the CDP sockets are dropped from the poll set, the Network thread stops reading client messages (#2508, hard stall after the first command) and never observes peer EOF or `conn.shutdown`, so the worker is never told to exit and SIGTERM is ignored after a connection (#2507). Fix: slice only the curl region `[PSEUDO_POLLFDS..cdp_start]`. Also harden the poll timeout: `curl_multi_timeout` returns -1 when curl is idle, and `@min(250, -1)` is -1 (block forever), which starved onTick (telemetry's periodic flush) and turned any missed wakeup into a permanent hang rather than a <=250ms blip. Treat curl_timeout <= 0 as "no deadline" and fall back to the 250ms cap. Fixes #2507 Fixes #2508
1 parent f1b0adf commit e1e49c8

1 file changed

Lines changed: 55 additions & 2 deletions

File tree

src/network/Network.zig

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -722,8 +722,15 @@ pub fn run(self: *Network) void {
722722
break :blk min_timeout;
723723
}
724724

725+
// curl_multi_timeout reports -1 when curl has no timeout
726+
// preference (idle) and 0 when it wants to be serviced
727+
// immediately. Treat both as "no curl-imposed deadline" and
728+
// fall back to min_timeout — otherwise @min(min_timeout, -1)
729+
// would be -1, i.e. poll() blocks forever, starving onTick
730+
// (telemetry's periodic flush) and removing the safety net
731+
// that bounds any missed wakeup to min_timeout.
725732
const curl_timeout = self.getCurlTimeout();
726-
if (curl_timeout == 0) {
733+
if (curl_timeout <= 0) {
727734
break :blk min_timeout;
728735
}
729736

@@ -860,7 +867,16 @@ fn acceptConnections(self: *Network) void {
860867
}
861868

862869
fn preparePollFds(self: *Network, multi: *libcurl.CurlM) void {
863-
const curl_fds = self.pollfds[PSEUDO_POLLFDS..];
870+
// Only the curl slice — NOT through to the end of pollfds. The CDP
871+
// socket fds live in [cdp_start..] and are owned by
872+
// prepareCdpPollFds, which only rebuilds them when cdp_dirty is set
873+
// (a steady-state optimization). Slicing to the end here would
874+
// @memset those fds to -1 every iteration once a multi exists (which
875+
// happens as soon as telemetry sends its first request), silently
876+
// dropping every live CDP socket from the poll set — Network then
877+
// never reads another CDP message (#2508) nor observes peer
878+
// EOF/shutdown (#2507).
879+
const curl_fds = self.pollfds[PSEUDO_POLLFDS..self.cdp_start];
864880
@memset(curl_fds, .{ .fd = -1, .events = 0, .revents = 0 });
865881

866882
var fd_count: c_uint = 0;
@@ -1053,3 +1069,40 @@ fn loadCerts(allocator: Allocator) !libcurl.CurlBlob {
10531069
.flags = 0,
10541070
};
10551071
}
1072+
1073+
const testing = @import("../testing.zig");
1074+
1075+
test "Network: preparePollFds leaves the CDP fd region untouched" {
1076+
// Regression for #2507 / #2508. Once a multi exists (telemetry creates
1077+
// one in optimized builds), preparePollFds runs every loop iteration.
1078+
// It rebuilds only the curl slice [PSEUDO_POLLFDS..cdp_start]; the CDP
1079+
// region [cdp_start..] is owned by prepareCdpPollFds, which keeps its
1080+
// entries across iterations and only rebuilds when cdp_dirty is set.
1081+
// A slice that ran to the end of pollfds @memset those CDP sockets to
1082+
// -1, silently dropping every live CDP connection from the poll set —
1083+
// so Network stopped reading CDP messages (#2508) and never observed
1084+
// peer EOF/shutdown (#2507). curl global is initialized by the test
1085+
// harness (App.init -> Network.init).
1086+
const multi = libcurl.curl_multi_init() orelse return error.FailedToInitMulti;
1087+
defer libcurl.curl_multi_cleanup(multi) catch {};
1088+
1089+
const curl_slots = 4;
1090+
const cdp_slots = 3;
1091+
var pollfds: [PSEUDO_POLLFDS + curl_slots + cdp_slots]posix.pollfd = undefined;
1092+
@memset(&pollfds, .{ .fd = -1, .events = 0, .revents = 0 });
1093+
1094+
// preparePollFds only reads self.pollfds and self.cdp_start.
1095+
var nw: Network = undefined;
1096+
nw.pollfds = &pollfds;
1097+
nw.cdp_start = PSEUDO_POLLFDS + curl_slots;
1098+
1099+
// Two live CDP sockets parked in the CDP region, mimicking the steady
1100+
// state between cdp_dirty rebuilds.
1101+
pollfds[nw.cdp_start] = .{ .fd = 4242, .events = posix.POLL.IN, .revents = 0 };
1102+
pollfds[nw.cdp_start + 1] = .{ .fd = 4243, .events = posix.POLL.IN, .revents = 0 };
1103+
1104+
nw.preparePollFds(multi);
1105+
1106+
try testing.expectEqual(@as(posix.fd_t, 4242), pollfds[nw.cdp_start].fd);
1107+
try testing.expectEqual(@as(posix.fd_t, 4243), pollfds[nw.cdp_start + 1].fd);
1108+
}

0 commit comments

Comments
 (0)