1111#include <stdint.h>
1212#include <time.h>
1313
14+ #include "../interrupt.h"
15+
1416#include "pidfd.c"
1517
1618#include <linux/version.h>
@@ -33,9 +35,21 @@ struct IO_Event_Selector_URing
3335
3436 // Flag indicating whether the selector is currently blocked in a system call.
3537 // Set to 1 when blocked in io_uring_wait_cqe_timeout() without GVL, 0 otherwise.
36- // Used by wakeup() to determine if an interrupt signal is needed.
3738 int blocked ;
3839
40+ // Interrupt used to wake the selector from another thread without touching the ring's SQ.
41+ // This allows IORING_SETUP_SINGLE_ISSUER: only the owner thread ever submits SQEs.
42+ // Uses eventfd on Linux, pipe fallback elsewhere.
43+ struct IO_Event_Interrupt interrupt ;
44+
45+ // Whether an async read on interrupt is currently pending in the ring.
46+ // The read is re-submitted before each blocking wait when not registered.
47+ int wakeup_registered ;
48+
49+ // Buffer for the pending async read on the interrupt descriptor.
50+ // Must remain valid for the lifetime of the in-flight SQE.
51+ uint64_t wakeup_value ;
52+
3953 struct timespec idle_duration ;
4054
4155 struct IO_Event_Array completions ;
@@ -101,6 +115,12 @@ void IO_Event_Selector_URing_Type_compact(void *_selector)
101115static
102116void close_internal (struct IO_Event_Selector_URing * selector )
103117{
118+ if (selector -> interrupt .descriptor >= 0 ) {
119+ IO_Event_Interrupt_close (& selector -> interrupt );
120+ selector -> interrupt .descriptor = -1 ;
121+ selector -> wakeup_registered = 0 ;
122+ }
123+
104124 if (selector -> ring .ring_fd >= 0 ) {
105125 io_uring_queue_exit (& selector -> ring );
106126 selector -> ring .ring_fd = -1 ;
@@ -220,6 +240,8 @@ VALUE IO_Event_Selector_URing_allocate(VALUE self) {
220240
221241 selector -> pending = 0 ;
222242 selector -> blocked = 0 ;
243+ selector -> interrupt .descriptor = -1 ;
244+ selector -> wakeup_registered = 0 ;
223245
224246 IO_Event_List_initialize (& selector -> free_list );
225247
@@ -240,14 +262,42 @@ VALUE IO_Event_Selector_URing_initialize(VALUE self, VALUE loop) {
240262 TypedData_Get_Struct (self , struct IO_Event_Selector_URing , & IO_Event_Selector_URing_Type , selector );
241263
242264 IO_Event_Selector_initialize (& selector -> backend , self , loop );
243- int result = io_uring_queue_init (URING_ENTRIES , & selector -> ring , 0 );
265+
266+ unsigned int flags = 0 ;
267+ // IORING_SETUP_SINGLE_ISSUER (kernel 6.0+): only the owner thread submits SQEs.
268+ // Safe here because wakeup() uses eventfd (no ring access from other threads).
269+ #ifdef IORING_SETUP_SINGLE_ISSUER
270+ flags |= IORING_SETUP_SINGLE_ISSUER ;
271+ #endif
272+ // IORING_SETUP_DEFER_TASKRUN (kernel 6.1+, requires SINGLE_ISSUER): defer io_uring
273+ // task work to the application thread rather than a kernel thread, reducing
274+ // cross-CPU signaling overhead.
275+ // NOTE: DEFER_TASKRUN changes how completions are delivered and can interact with
276+ // Ruby's signal/interrupt mechanism (RUBY_UBF_IO) in ways that cause spurious
277+ // Interrupt exceptions in cancellation tests. Disabled until that interaction is
278+ // fully understood.
279+ // #ifdef IORING_SETUP_DEFER_TASKRUN
280+ // flags |= IORING_SETUP_DEFER_TASKRUN;
281+ // #endif
282+
283+ int result = io_uring_queue_init (URING_ENTRIES , & selector -> ring , flags );
244284
245285 if (result < 0 ) {
246286 rb_syserr_fail (- result , "IO_Event_Selector_URing_initialize:io_uring_queue_init" );
247287 }
248288
249289 rb_update_max_fd (selector -> ring .ring_fd );
250290
291+ // Interrupt for cross-thread wakeup: another thread calls signal(); the owner
292+ // thread submits an async read before each blocking wait so the ring wakes up
293+ // without the waking thread ever touching the SQ.
294+ IO_Event_Interrupt_open (& selector -> interrupt );
295+ if (selector -> interrupt .descriptor < 0 ) {
296+ io_uring_queue_exit (& selector -> ring );
297+ selector -> ring .ring_fd = -1 ;
298+ rb_sys_fail ("IO_Event_Selector_URing_initialize:IO_Event_Interrupt_open" );
299+ }
300+
251301 return self ;
252302}
253303
@@ -1073,11 +1123,25 @@ void * select_internal(void *_arguments) {
10731123
10741124static
10751125int select_internal_without_gvl (struct select_arguments * arguments ) {
1076- io_uring_submit_flush (arguments -> selector );
1126+ struct IO_Event_Selector_URing * selector = arguments -> selector ;
1127+
1128+ // Submit an async read on the wakeup eventfd before releasing the GVL.
1129+ // When wakeup() writes to the fd the read completes, consuming the counter
1130+ // atomically — no separate poll + drain step required.
1131+ // The address of the interrupt struct serves as a unique sentinel in user_data.
1132+ if (!selector -> wakeup_registered ) {
1133+ struct io_uring_sqe * sqe = io_get_sqe (selector );
1134+ io_uring_prep_read (sqe , IO_Event_Interrupt_descriptor (& selector -> interrupt ), & selector -> wakeup_value , sizeof (selector -> wakeup_value ), 0 );
1135+ io_uring_sqe_set_data (sqe , & selector -> interrupt );
1136+ selector -> wakeup_registered = 1 ;
1137+ selector -> pending += 1 ;
1138+ }
10771139
1078- arguments -> selector -> blocked = 1 ;
1140+ io_uring_submit_flush (selector );
1141+
1142+ selector -> blocked = 1 ;
10791143 rb_thread_call_without_gvl (select_internal , (void * )arguments , RUBY_UBF_IO , 0 );
1080- arguments -> selector -> blocked = 0 ;
1144+ selector -> blocked = 0 ;
10811145
10821146 if (arguments -> result == - ETIME ) {
10831147 arguments -> result = 0 ;
@@ -1116,6 +1180,14 @@ unsigned select_process_completions(struct IO_Event_Selector_URing *selector) {
11161180 continue ;
11171181 }
11181182
1183+ // Interrupt read completion — the read already consumed the counter.
1184+ // Clear the flag so the next blocking wait re-submits the read.
1185+ if (cqe -> user_data == (uint64_t )(uintptr_t )& selector -> interrupt ) {
1186+ selector -> wakeup_registered = 0 ;
1187+ io_uring_cq_advance (ring , 1 );
1188+ continue ;
1189+ }
1190+
11191191 struct IO_Event_Selector_URing_Completion * completion = (void * )cqe -> user_data ;
11201192 struct IO_Event_Selector_URing_Waiting * waiting = completion -> waiting ;
11211193
@@ -1201,25 +1273,10 @@ VALUE IO_Event_Selector_URing_wakeup(VALUE self) {
12011273 struct IO_Event_Selector_URing * selector = NULL ;
12021274 TypedData_Get_Struct (self , struct IO_Event_Selector_URing , & IO_Event_Selector_URing_Type , selector );
12031275
1204- // If we are blocking, we can schedule a nop event to wake up the selector:
1276+ // Wake the selector by signalling the interrupt. This is safe from any thread
1277+ // and never touches the ring's SQ, which is required for IORING_SETUP_SINGLE_ISSUER.
12051278 if (selector -> blocked ) {
1206- struct io_uring_sqe * sqe = NULL ;
1207-
1208- while (true) {
1209- sqe = io_uring_get_sqe (& selector -> ring );
1210- if (sqe ) break ;
1211-
1212- rb_thread_schedule ();
1213-
1214- // It's possible we became unblocked already, so we can assume the selector has already cycled at least once:
1215- if (!selector -> blocked ) return Qfalse ;
1216- }
1217-
1218- io_uring_prep_nop (sqe );
1219- // If you don't set this line, the SQE will eventually be recycled and have valid user selector which can cause odd behaviour:
1220- io_uring_sqe_set_data (sqe , NULL );
1221- io_uring_submit (& selector -> ring );
1222-
1279+ IO_Event_Interrupt_signal (& selector -> interrupt );
12231280 return Qtrue ;
12241281 }
12251282
@@ -1230,7 +1287,12 @@ VALUE IO_Event_Selector_URing_wakeup(VALUE self) {
12301287
12311288static int IO_Event_Selector_URing_supported_p (void ) {
12321289 struct io_uring ring ;
1233- int result = io_uring_queue_init (32 , & ring , 0 );
1290+
1291+ unsigned int flags = 0 ;
1292+ #ifdef IORING_SETUP_SINGLE_ISSUER
1293+ flags |= IORING_SETUP_SINGLE_ISSUER ;
1294+ #endif
1295+ int result = io_uring_queue_init (32 , & ring , flags );
12341296
12351297 if (result < 0 ) {
12361298 rb_warn ("io_uring_queue_init() was available at compile time but failed at run time: %s\n" , strerror (- result ));
0 commit comments