Skip to content

Commit 5ca29f7

Browse files
Revert to stack allocation: safe due to GVL ordering, no heap cost on hot path
Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent a401939 commit 5ca29f7

1 file changed

Lines changed: 22 additions & 23 deletions

File tree

ext/io/event/worker_pool.c

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -363,41 +363,44 @@ static VALUE worker_pool_call(VALUE self, VALUE _blocking_operation) {
363363
rb_raise(rb_eArgError, "Invalid blocking operation!");
364364
}
365365

366-
// Heap-allocate the work item. A stack allocation would be destroyed by
367-
// rb_jump_tag (used on the exception path) while the worker thread may
368-
// still be accessing the struct (e.g. inside rb_fiber_scheduler_unblock).
369-
// In practice the GVL prevents concurrent access, but the implicit
370-
// ordering is fragile; heap allocation makes the lifetime explicit and
371-
// safe regardless of future refactoring.
372-
struct IO_Event_WorkerPool_Work *work = RB_ALLOC(struct IO_Event_WorkerPool_Work);
373-
work->blocking_operation = blocking_operation;
374-
work->blocking_operation_value = _blocking_operation;
375-
work->completed = false;
376-
work->scheduler = scheduler;
377-
work->blocker = self;
378-
work->fiber = fiber;
379-
work->next = NULL;
366+
// Stack-allocate the work item. The struct is alive for the entire
367+
// execution of worker_pool_call:
368+
// - While in the queue: worker_pool_mark traces it via pool->work_queue.
369+
// - While executing (dequeued): it is on this fiber's C stack, which
370+
// the conservative GC scanner covers.
371+
// - rb_jump_tag is only called after work.completed is true, meaning
372+
// the worker has already released the GVL — so the stack frame is
373+
// not destroyed while the worker is still using the struct.
374+
struct IO_Event_WorkerPool_Work work = {
375+
.blocking_operation = blocking_operation,
376+
.blocking_operation_value = _blocking_operation,
377+
.completed = false,
378+
.scheduler = scheduler,
379+
.blocker = self,
380+
.fiber = fiber,
381+
.next = NULL
382+
};
380383

381384
// Enqueue work:
382385
pthread_mutex_lock(&pool->mutex);
383-
enqueue_work(pool, work);
386+
enqueue_work(pool, &work);
384387
pthread_cond_signal(&pool->work_available);
385388
pthread_mutex_unlock(&pool->mutex);
386389

387390
// Block the current fiber until work is completed:
388391
int state = 0;
389392
while (true) {
390393
int current_state = 0;
391-
rb_protect(worker_pool_work_begin, (VALUE)work, &current_state);
392-
if (DEBUG) fprintf(stderr, "-- worker_pool_call:work completed=%d, current_state=%d, state=%d\n", work->completed, current_state, state);
394+
rb_protect(worker_pool_work_begin, (VALUE)&work, &current_state);
395+
if (DEBUG) fprintf(stderr, "-- worker_pool_call:work completed=%d, current_state=%d, state=%d\n", work.completed, current_state, state);
393396

394397
// Store the first exception state:
395398
if (!state) {
396399
state = current_state;
397400
}
398401

399402
// If the work is still in the queue, we must wait for a worker to complete it (even if cancelled):
400-
if (work->completed) {
403+
if (work.completed) {
401404
// The work was completed, we can exit the loop:
402405
break;
403406
} else {
@@ -409,11 +412,7 @@ static VALUE worker_pool_call(VALUE self, VALUE _blocking_operation) {
409412
}
410413
}
411414

412-
if (DEBUG) fprintf(stderr, "<- worker_pool_call:work completed=%d, state=%d\n", work->completed, state);
413-
414-
// Work is done — worker has released the GVL and will no longer access
415-
// the struct. Safe to free before re-raising any pending exception.
416-
xfree(work);
415+
if (DEBUG) fprintf(stderr, "<- worker_pool_call:work completed=%d, state=%d\n", work.completed, state);
417416

418417
if (state) {
419418
if (DEBUG) fprintf(stderr, "[worker_pool] exception path: blocking_operation=%p state_tag=%d\n",

0 commit comments

Comments
 (0)