Skip to content

Commit f123631

Browse files
Use IORING_POLL_ADD_MULTI for the wakeup eventfd poll.
Instead of registering a new one-shot poll_add on the wakeup eventfd before every blocking wait, register a multi-shot poll once and let the kernel keep it alive across fires (IORING_POLL_ADD_MULTI, kernel 5.13+). This removes one SQE submission per select() call on supporting kernels. On older kernels the #ifdef falls back to the one-shot-per-wait path. The wakeup CQE is now identified by a sentinel (address of wakeup_fd) rather than NULL so select_process_completions can drain the eventfd and detect when the multi-shot poll needs re-registration (IORING_CQE_F_MORE absent). Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent cf1a894 commit f123631

1 file changed

Lines changed: 38 additions & 11 deletions

File tree

ext/io/event/selector/uring.c

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ struct IO_Event_Selector_URing
4040
// This allows IORING_SETUP_SINGLE_ISSUER: only the owner thread ever submits SQEs.
4141
int wakeup_fd;
4242

43+
// Whether a multi-shot poll on wakeup_fd is currently registered in the ring.
44+
// With IORING_POLL_ADD_MULTI the poll persists across fires so we only need to
45+
// register it once (re-registering only when the kernel cancels it).
46+
// Without multi-shot support this stays 0 and we register before every wait.
47+
int wakeup_registered;
48+
4349
struct timespec idle_duration;
4450

4551
struct IO_Event_Array completions;
@@ -108,6 +114,7 @@ void close_internal(struct IO_Event_Selector_URing *selector)
108114
if (selector->wakeup_fd >= 0) {
109115
close(selector->wakeup_fd);
110116
selector->wakeup_fd = -1;
117+
selector->wakeup_registered = 0;
111118
}
112119

113120
if (selector->ring.ring_fd >= 0) {
@@ -230,6 +237,7 @@ VALUE IO_Event_Selector_URing_allocate(VALUE self) {
230237
selector->pending = 0;
231238
selector->blocked = 0;
232239
selector->wakeup_fd = -1;
240+
selector->wakeup_registered = 0;
233241

234242
IO_Event_List_initialize(&selector->free_list);
235243

@@ -1104,24 +1112,29 @@ static
11041112
int select_internal_without_gvl(struct select_arguments *arguments) {
11051113
struct IO_Event_Selector_URing *selector = arguments->selector;
11061114

1107-
// Register a one-shot poll on the wakeup eventfd before releasing the GVL.
1108-
// This allows wakeup() to signal us by writing to the fd from any thread
1109-
// without touching the ring's SQ (required for IORING_SETUP_SINGLE_ISSUER).
1110-
struct io_uring_sqe *sqe = io_get_sqe(selector);
1111-
io_uring_prep_poll_add(sqe, selector->wakeup_fd, POLLIN);
1112-
io_uring_sqe_set_data(sqe, NULL);
1113-
selector->pending += 1;
1115+
// Register a poll on the wakeup eventfd before releasing the GVL so that
1116+
// wakeup() can signal us with a plain write() from any thread.
1117+
// With IORING_POLL_ADD_MULTI the poll persists across fires and we only
1118+
// register it once; on older kernels we fall back to one-shot per wait.
1119+
if (!selector->wakeup_registered) {
1120+
struct io_uring_sqe *sqe = io_get_sqe(selector);
1121+
io_uring_prep_poll_add(sqe, selector->wakeup_fd, POLLIN);
1122+
#ifdef IORING_POLL_ADD_MULTI
1123+
sqe->len = IORING_POLL_ADD_MULTI;
1124+
selector->wakeup_registered = 1;
1125+
#endif
1126+
// Use the address of wakeup_fd as a unique sentinel so completions
1127+
// can be identified without allocating a full completion struct.
1128+
io_uring_sqe_set_data(sqe, &selector->wakeup_fd);
1129+
selector->pending += 1;
1130+
}
11141131

11151132
io_uring_submit_flush(selector);
11161133

11171134
selector->blocked = 1;
11181135
rb_thread_call_without_gvl(select_internal, (void *)arguments, RUBY_UBF_IO, 0);
11191136
selector->blocked = 0;
11201137

1121-
// Drain the wakeup eventfd so the next poll_add doesn't fire immediately.
1122-
uint64_t value;
1123-
while (read(selector->wakeup_fd, &value, sizeof(value)) > 0) {}
1124-
11251138
if (arguments->result == -ETIME) {
11261139
arguments->result = 0;
11271140
} else if (arguments->result == -EINTR) {
@@ -1159,6 +1172,20 @@ unsigned select_process_completions(struct IO_Event_Selector_URing *selector) {
11591172
continue;
11601173
}
11611174

1175+
// Wakeup eventfd poll completion — drain the fd and check if the
1176+
// multi-shot poll is still live (IORING_CQE_F_MORE set).
1177+
if (cqe->user_data == (uint64_t)(uintptr_t)&selector->wakeup_fd) {
1178+
uint64_t value;
1179+
while (read(selector->wakeup_fd, &value, sizeof(value)) > 0) {}
1180+
#ifdef IORING_POLL_ADD_MULTI
1181+
if (!(cqe->flags & IORING_CQE_F_MORE)) {
1182+
selector->wakeup_registered = 0;
1183+
}
1184+
#endif
1185+
io_uring_cq_advance(ring, 1);
1186+
continue;
1187+
}
1188+
11621189
struct IO_Event_Selector_URing_Completion *completion = (void*)cqe->user_data;
11631190
struct IO_Event_Selector_URing_Waiting *waiting = completion->waiting;
11641191

0 commit comments

Comments
 (0)