1111#include <stdint.h>
1212#include <time.h>
1313
14+ #include "../interrupt.h"
15+
1416#include "pidfd.c"
1517
1618#include <linux/version.h>
@@ -33,9 +35,21 @@ struct IO_Event_Selector_URing
3335
3436 // Flag indicating whether the selector is currently blocked in a system call.
3537 // Set to 1 when blocked in io_uring_wait_cqe_timeout() without GVL, 0 otherwise.
36- // Used by wakeup() to determine if an interrupt signal is needed.
3738 int blocked ;
3839
40+ // Interrupt used to wake the selector from another thread without touching the ring's SQ.
41+ // This allows IORING_SETUP_SINGLE_ISSUER: only the owner thread ever submits SQEs.
42+ // Uses eventfd on Linux, pipe fallback elsewhere.
43+ struct IO_Event_Interrupt interrupt ;
44+
45+ // Whether an async read on interrupt is currently pending in the ring.
46+ // The read is re-submitted before each blocking wait when not registered.
47+ int wakeup_registered ;
48+
49+ // Buffer for the pending async read on the interrupt descriptor.
50+ // Must remain valid for the lifetime of the in-flight SQE.
51+ uint64_t wakeup_value ;
52+
3953 struct timespec idle_duration ;
4054
4155 struct IO_Event_Array completions ;
@@ -101,6 +115,12 @@ void IO_Event_Selector_URing_Type_compact(void *_selector)
101115static
102116void close_internal (struct IO_Event_Selector_URing * selector )
103117{
118+ if (selector -> interrupt .descriptor >= 0 ) {
119+ IO_Event_Interrupt_close (& selector -> interrupt );
120+ selector -> interrupt .descriptor = -1 ;
121+ selector -> wakeup_registered = 0 ;
122+ }
123+
104124 if (selector -> ring .ring_fd >= 0 ) {
105125 io_uring_queue_exit (& selector -> ring );
106126 selector -> ring .ring_fd = -1 ;
@@ -220,6 +240,8 @@ VALUE IO_Event_Selector_URing_allocate(VALUE self) {
220240
221241 selector -> pending = 0 ;
222242 selector -> blocked = 0 ;
243+ selector -> interrupt .descriptor = -1 ;
244+ selector -> wakeup_registered = 0 ;
223245
224246 IO_Event_List_initialize (& selector -> free_list );
225247
@@ -240,14 +262,38 @@ VALUE IO_Event_Selector_URing_initialize(VALUE self, VALUE loop) {
240262 TypedData_Get_Struct (self , struct IO_Event_Selector_URing , & IO_Event_Selector_URing_Type , selector );
241263
242264 IO_Event_Selector_initialize (& selector -> backend , self , loop );
243- int result = io_uring_queue_init (URING_ENTRIES , & selector -> ring , 0 );
265+
266+ unsigned int flags = 0 ;
267+ // IORING_SETUP_SINGLE_ISSUER (kernel 6.0+): only the owner thread submits SQEs.
268+ // Safe here because wakeup() uses eventfd (no ring access from other threads).
269+ #ifdef IORING_SETUP_SINGLE_ISSUER
270+ flags |= IORING_SETUP_SINGLE_ISSUER ;
271+ #endif
272+ // IORING_SETUP_DEFER_TASKRUN (kernel 6.1+, requires SINGLE_ISSUER): defer io_uring
273+ // task work to the application thread rather than a kernel thread, reducing
274+ // cross-CPU signaling overhead.
275+ #ifdef IORING_SETUP_DEFER_TASKRUN
276+ flags |= IORING_SETUP_DEFER_TASKRUN ;
277+ #endif
278+
279+ int result = io_uring_queue_init (URING_ENTRIES , & selector -> ring , flags );
244280
245281 if (result < 0 ) {
246282 rb_syserr_fail (- result , "IO_Event_Selector_URing_initialize:io_uring_queue_init" );
247283 }
248284
249285 rb_update_max_fd (selector -> ring .ring_fd );
250286
287+ // Interrupt for cross-thread wakeup: another thread calls signal(); the owner
288+ // thread submits an async read before each blocking wait so the ring wakes up
289+ // without the waking thread ever touching the SQ.
290+ IO_Event_Interrupt_open (& selector -> interrupt );
291+ if (selector -> interrupt .descriptor < 0 ) {
292+ io_uring_queue_exit (& selector -> ring );
293+ selector -> ring .ring_fd = -1 ;
294+ rb_sys_fail ("IO_Event_Selector_URing_initialize:IO_Event_Interrupt_open" );
295+ }
296+
251297 return self ;
252298}
253299
@@ -1073,11 +1119,25 @@ void * select_internal(void *_arguments) {
10731119
10741120static
10751121int select_internal_without_gvl (struct select_arguments * arguments ) {
1076- io_uring_submit_flush (arguments -> selector );
1122+ struct IO_Event_Selector_URing * selector = arguments -> selector ;
1123+
1124+ // Submit an async read on the wakeup eventfd before releasing the GVL.
1125+ // When wakeup() writes to the fd the read completes, consuming the counter
1126+ // atomically — no separate poll + drain step required.
1127+ // The address of the interrupt struct serves as a unique sentinel in user_data.
1128+ if (!selector -> wakeup_registered ) {
1129+ struct io_uring_sqe * sqe = io_get_sqe (selector );
1130+ io_uring_prep_read (sqe , IO_Event_Interrupt_descriptor (& selector -> interrupt ), & selector -> wakeup_value , sizeof (selector -> wakeup_value ), 0 );
1131+ io_uring_sqe_set_data (sqe , & selector -> interrupt );
1132+ selector -> wakeup_registered = 1 ;
1133+ selector -> pending += 1 ;
1134+ }
10771135
1078- arguments -> selector -> blocked = 1 ;
1136+ io_uring_submit_flush (selector );
1137+
1138+ selector -> blocked = 1 ;
10791139 rb_thread_call_without_gvl (select_internal , (void * )arguments , RUBY_UBF_IO , 0 );
1080- arguments -> selector -> blocked = 0 ;
1140+ selector -> blocked = 0 ;
10811141
10821142 if (arguments -> result == - ETIME ) {
10831143 arguments -> result = 0 ;
@@ -1116,6 +1176,14 @@ unsigned select_process_completions(struct IO_Event_Selector_URing *selector) {
11161176 continue ;
11171177 }
11181178
1179+ // Interrupt read completion — the read already consumed the counter.
1180+ // Clear the flag so the next blocking wait re-submits the read.
1181+ if (cqe -> user_data == (uint64_t )(uintptr_t )& selector -> interrupt ) {
1182+ selector -> wakeup_registered = 0 ;
1183+ io_uring_cq_advance (ring , 1 );
1184+ continue ;
1185+ }
1186+
11191187 struct IO_Event_Selector_URing_Completion * completion = (void * )cqe -> user_data ;
11201188 struct IO_Event_Selector_URing_Waiting * waiting = completion -> waiting ;
11211189
@@ -1201,25 +1269,10 @@ VALUE IO_Event_Selector_URing_wakeup(VALUE self) {
12011269 struct IO_Event_Selector_URing * selector = NULL ;
12021270 TypedData_Get_Struct (self , struct IO_Event_Selector_URing , & IO_Event_Selector_URing_Type , selector );
12031271
1204- // If we are blocking, we can schedule a nop event to wake up the selector:
1272+ // Wake the selector by signalling the interrupt. This is safe from any thread
1273+ // and never touches the ring's SQ, which is required for IORING_SETUP_SINGLE_ISSUER.
12051274 if (selector -> blocked ) {
1206- struct io_uring_sqe * sqe = NULL ;
1207-
1208- while (true) {
1209- sqe = io_uring_get_sqe (& selector -> ring );
1210- if (sqe ) break ;
1211-
1212- rb_thread_schedule ();
1213-
1214- // It's possible we became unblocked already, so we can assume the selector has already cycled at least once:
1215- if (!selector -> blocked ) return Qfalse ;
1216- }
1217-
1218- io_uring_prep_nop (sqe );
1219- // If you don't set this line, the SQE will eventually be recycled and have valid user selector which can cause odd behaviour:
1220- io_uring_sqe_set_data (sqe , NULL );
1221- io_uring_submit (& selector -> ring );
1222-
1275+ IO_Event_Interrupt_signal (& selector -> interrupt );
12231276 return Qtrue ;
12241277 }
12251278
@@ -1230,7 +1283,16 @@ VALUE IO_Event_Selector_URing_wakeup(VALUE self) {
12301283
12311284static int IO_Event_Selector_URing_supported_p (void ) {
12321285 struct io_uring ring ;
1233- int result = io_uring_queue_init (32 , & ring , 0 );
1286+
1287+ unsigned int flags = 0 ;
1288+ #ifdef IORING_SETUP_SINGLE_ISSUER
1289+ flags |= IORING_SETUP_SINGLE_ISSUER ;
1290+ #endif
1291+ #ifdef IORING_SETUP_DEFER_TASKRUN
1292+ flags |= IORING_SETUP_DEFER_TASKRUN ;
1293+ #endif
1294+
1295+ int result = io_uring_queue_init (32 , & ring , flags );
12341296
12351297 if (result < 0 ) {
12361298 rb_warn ("io_uring_queue_init() was available at compile time but failed at run time: %s\n" , strerror (- result ));
0 commit comments