Skip to content

Commit a77d1fb

Browse files
Add crashing test: blocking_operation_wait exception bypasses cleanup
Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 16139a4 commit a77d1fb

2 files changed

Lines changed: 65 additions & 26 deletions

File tree

scheduler.c

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,15 +1088,6 @@ rb_fiber_scheduler_address_resolve(VALUE scheduler, VALUE hostname)
10881088
* Thread.new { blocking_operation.call }.join
10891089
* end
10901090
*/
1091-
// Helper for rb_protect: calls scheduler.blocking_operation_wait(blocking_operation).
1092-
// args[0] = scheduler VALUE, args[1] = blocking_operation VALUE.
1093-
static VALUE
1094-
scheduler_blocking_operation_wait_call(VALUE _args)
1095-
{
1096-
VALUE *args = (VALUE *)_args;
1097-
return rb_funcall(args[0], id_blocking_operation_wait, 1, args[1]);
1098-
}
1099-
11001091
VALUE rb_fiber_scheduler_blocking_operation_wait(VALUE scheduler, void* (*function)(void *), void *data, rb_unblock_function_t *unblock_function, void *data2, int flags, struct rb_fiber_scheduler_blocking_operation_state *state)
11011092
{
11021093
// Check if scheduler supports blocking_operation_wait before creating the object
@@ -1107,35 +1098,22 @@ VALUE rb_fiber_scheduler_blocking_operation_wait(VALUE scheduler, void* (*functi
11071098
// Create a new BlockingOperation with the blocking operation
11081099
VALUE blocking_operation = rb_fiber_scheduler_blocking_operation_new(function, data, unblock_function, data2, flags, state);
11091100

1110-
rb_fiber_scheduler_blocking_operation_t *operation = get_blocking_operation(blocking_operation);
1111-
1112-
// Use rb_protect so that cleanup runs even when the scheduler raises an exception
1113-
// (e.g. via rb_jump_tag from worker_pool_call). Without this, a longjmp from
1114-
// inside rb_funcall bypasses the cleanup below and RB_GC_GUARD, leaving
1115-
// operation with stale pointers and blocking_operation without a live GC root.
1116-
VALUE call_args[2] = {scheduler, blocking_operation};
1117-
int tag = 0;
1118-
VALUE result = rb_protect(scheduler_blocking_operation_wait_call, (VALUE)call_args, &tag);
1119-
1120-
operation = get_blocking_operation(blocking_operation);
1101+
VALUE result = rb_funcall(scheduler, id_blocking_operation_wait, 1, blocking_operation);
11211102

11221103
// Get the operation data to check if it was executed
1104+
rb_fiber_scheduler_blocking_operation_t *operation = get_blocking_operation(blocking_operation);
11231105
rb_atomic_t current_status = RUBY_ATOMIC_LOAD(operation->status);
11241106

1125-
// Invalidate the operation now that we're done with it — must happen even on
1126-
// exception paths, since operation->state may point to a caller's stack frame.
1107+
// Invalidate the operation now that we're done with it
11271108
operation->function = NULL;
11281109
operation->state = NULL;
11291110
operation->data = NULL;
11301111
operation->data2 = NULL;
11311112
operation->unblock_function = NULL;
11321113

1133-
// Ensure that blocking_operation remains a live GC root through the cleanup above.
1114+
// Ensure that the blocking operation remains visible until this point:
11341115
RB_GC_GUARD(blocking_operation);
11351116

1136-
// Re-raise any exception from the scheduler after cleanup.
1137-
if (tag) rb_jump_tag(tag);
1138-
11391117
// If the blocking operation was never executed, return Qundef to signal the caller to use rb_nogvl instead
11401118
if (current_status == RB_FIBER_SCHEDULER_BLOCKING_OPERATION_STATUS_QUEUED) {
11411119
return Qundef;
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# frozen_string_literal: true
2+
3+
# Test that rb_fiber_scheduler_blocking_operation_wait correctly handles
4+
# exceptions raised by the scheduler's blocking_operation_wait method.
5+
#
6+
# When the scheduler raises (e.g. due to fiber interrupt), the exception
7+
# propagates through rb_funcall in the C function, bypassing the cleanup
8+
# code that nulls out operation->function, operation->state, etc.
9+
# operation->state points to a stack-allocated struct in the caller
10+
# (rb_nogvl), so after the exception unwinds that frame the pointer is
11+
# dangling. Any subsequent dereference — including from dfree during GC
12+
# or a re-execution of the operation — causes a segfault.
13+
14+
require "test/unit"
15+
require_relative "scheduler"
16+
17+
class TestBlockingOperationException < Test::Unit::TestCase
18+
# Scheduler whose blocking_operation_wait raises after completing the work,
19+
# simulating a fiber interrupt arriving after the operation finishes.
20+
class InterruptingScheduler < Scheduler
21+
def blocking_operation_wait(blocking_operation)
22+
super
23+
raise Interrupt, "simulated fiber interrupt"
24+
end
25+
end
26+
27+
def test_blocking_operation_exception_does_not_corrupt_state
28+
skip "IO::Buffer not available" unless defined?(IO::Buffer)
29+
30+
# Use a buffer large enough to trigger the scheduler's blocking_operation_wait
31+
# (rb_nogvl calls the scheduler hook for large copies).
32+
size = 2 * 1024 * 1024 # 2 MiB
33+
source = IO::Buffer.new(size)
34+
dest = IO::Buffer.new(size)
35+
source.clear(65) # fill with 'A'
36+
37+
caught = []
38+
39+
Thread.new do
40+
Fiber.set_scheduler(InterruptingScheduler.new)
41+
42+
Fiber.schedule do
43+
dest.copy(source, 0, size, 0)
44+
rescue Interrupt => e
45+
caught << e
46+
end
47+
end.join
48+
49+
assert_equal 1, caught.size, "Expected exactly one Interrupt to be rescued"
50+
51+
# Trigger GC to detect any use-after-free via the stale operation->state
52+
# pointer. Without the fix, the dfree for the blocking_operation TypedData
53+
# can dereference a now-invalid stack pointer, causing a segfault here or
54+
# silently corrupting a subsequent allocation.
55+
GC.start(full_mark: true, immediate_sweep: true)
56+
GC.compact if GC.respond_to?(:compact)
57+
58+
# If we reach here without crashing, the fix is working.
59+
assert_equal size, dest.size
60+
end
61+
end

0 commit comments

Comments
 (0)