11name : Test Ruby GC Bug — Root Cause Investigation
22
3+ # Goal: understand WHY RB_GC_GUARD fixes the crash.
4+ #
5+ # The upstream fix (8351378bf3) adds RB_GC_GUARD after the last implicit use
6+ # of `blocking_operation` via the derived `operation` pointer. We want to:
7+ #
8+ # BUGGY — remove RB_GC_GUARD from fixed code → should crash
9+ # FIXED — base code (has RB_GC_GUARD already) → should pass
10+ # CASE_C — BUGGY code + __attribute__((optimize("O0"))) on function → does O0 fix it?
11+ #
12+ # If BUGGY crashes and CASE_C does not, it is purely a compiler optimisation issue.
13+
314on : [push, pull_request]
415
516jobs :
617 investigate :
7- name : " CASE_A (O3, crashes) vs CASE_C (O0 on fn )"
18+ name : " BUGGY vs FIXED vs CASE_C (O0)"
819 runs-on : ubuntu-24.04
920
1021 steps :
@@ -17,90 +28,117 @@ jobs:
1728 ruby bison libyaml-dev libgdbm-dev libreadline-dev libncurses5-dev \
1829 liburing-dev autoconf automake gcc make binutils
1930
20- - name : Clone Ruby (bug/blocking-operation-gc — CASE_B is base )
31+ - name : Clone Ruby (bug/blocking-operation-gc — has fix applied )
2132 uses : actions/checkout@v4
2233 with :
2334 repository : samuel-williams-shopify/ruby
2435 ref : bug/blocking-operation-gc
2536 path : ruby-src
2637
27- # ── CASE_A: NULL, full O3 (expected to crash) ──── ─────────────────────
28- - name : Patch to CASE_A — operation = NULL
38+ # ── FIXED baseline: build with RB_GC_GUARD intact ─────────────────────
39+ - name : Configure + build FIXED (RB_GC_GUARD present)
2940 working-directory : ruby-src
3041 run : |
3142 ./autogen.sh
32- ./configure --prefix=$HOME/.rubies/ruby-caseA --enable-shared --disable-install-doc cppflags="-DENABLE_PATH_CHECK=0"
33- sed -i 's/rb_fiber_scheduler_blocking_operation_t \*operation = get_blocking_operation(blocking_operation);/rb_fiber_scheduler_blocking_operation_t *operation = NULL;/' scheduler.c
34- grep -n "operation = NULL" scheduler.c
43+ ./configure --prefix=$HOME/.rubies/ruby-fixed --enable-shared \
44+ --disable-install-doc cppflags="-DENABLE_PATH_CHECK=0"
45+ make -j$(nproc) && make install
46+
47+ # ── BUGGY: remove RB_GC_GUARD to reproduce original crash ─────────────
48+ - name : Patch BUGGY — remove RB_GC_GUARD(blocking_operation)
49+ working-directory : ruby-src
50+ run : |
51+ sed -i '/RB_GC_GUARD(blocking_operation)/d' scheduler.c
52+ echo "--- scheduler.c around the change ---"
53+ grep -n "blocking_operation\|funcall\|RB_GC" scheduler.c | tail -20
3554
36- - name : Build + install CASE_A
55+ - name : Configure + build BUGGY
3756 working-directory : ruby-src
38- run : make -j$(nproc) && make install
57+ run : |
58+ ./configure --prefix=$HOME/.rubies/ruby-buggy --enable-shared \
59+ --disable-install-doc cppflags="-DENABLE_PATH_CHECK=0"
60+ make -j$(nproc) && make install
3961
40- # ── CASE_C: NULL + O0 on function ─ ────────────────────────────────────
41- - name : Patch to CASE_C — CASE_A + __attribute__((optimize("O0")))
62+ # ── CASE_C: BUGGY + O0 on function ────────────────────────────────────
63+ - name : Patch CASE_C — BUGGY + __attribute__((optimize("O0"))) on function
4264 working-directory : ruby-src
4365 run : |
44- git checkout scheduler.c
45- sed -i 's/rb_fiber_scheduler_blocking_operation_t \*operation = get_blocking_operation(blocking_operation);/rb_fiber_scheduler_blocking_operation_t *operation = NULL;/' scheduler.c
4666 sed -i 's/^VALUE rb_fiber_scheduler_blocking_operation_wait(/__attribute__((optimize("O0"))) VALUE rb_fiber_scheduler_blocking_operation_wait(/' scheduler.c
47- grep -n 'optimize\|operation = NULL' scheduler.c | head -5
48- ./configure --prefix=$HOME/.rubies/ruby-caseC --enable-shared --disable-install-doc cppflags="-DENABLE_PATH_CHECK=0"
67+ grep -n 'optimize\|rb_fiber_scheduler_blocking_operation_wait' scheduler.c | head -5
4968
50- - name : Build + install CASE_C
69+ - name : Configure + build CASE_C (BUGGY + O0)
5170 working-directory : ruby-src
52- run : make -j$(nproc) && make install
71+ run : |
72+ ./configure --prefix=$HOME/.rubies/ruby-caseC --enable-shared \
73+ --disable-install-doc cppflags="-DENABLE_PATH_CHECK=0"
74+ make -j$(nproc) && make install
5375
54- # ── Disassemble both ───── ──────────────────────────────────────────────
55- - name : Disassemble CASE_A and CASE_C
76+ # ── Disassemble all three ──────────────────────────────────────────────
77+ - name : Disassemble rb_fiber_scheduler_blocking_operation_wait in all builds
5678 run : |
57- echo "=== CASE_A (O3, operation=NULL) ==="
58- objdump -d --no-show-raw-insn -M intel $HOME/.rubies/ruby-caseA/lib/libruby.so.4.1 \
59- | awk '/^[0-9a-f]+ <rb_fiber_scheduler_blocking_operation_wait>:/{found=1} found{print} found && /^$/{exit}' \
60- > /tmp/case_a.asm
61- cat /tmp/case_a.asm
62- echo ""
63- echo "=== CASE_C (O0 on fn, operation=NULL) ==="
64- objdump -d --no-show-raw-insn -M intel $HOME/.rubies/ruby-caseC/lib/libruby.so.4.1 \
65- | awk '/^[0-9a-f]+ <rb_fiber_scheduler_blocking_operation_wait>:/{found=1} found{print} found && /^$/{exit}' \
66- > /tmp/case_c.asm
67- cat /tmp/case_c.asm
68-
69- - name : Upload disassembly artifacts
79+ for build in fixed buggy caseC; do
80+ echo "=== $build ==="
81+ objdump -d --no-show-raw-insn -M intel \
82+ $HOME/.rubies/ruby-${build}/lib/libruby.so.4.1 \
83+ | awk '/^[0-9a-f]+ <rb_fiber_scheduler_blocking_operation_wait>:/{found=1} found{print} found && /^$/{exit}' \
84+ > /tmp/${build}.asm
85+ wc -l /tmp/${build}.asm
86+ done
87+
88+ - name : Upload disassembly
7089 uses : actions/upload-artifact@v4
7190 if : always()
7291 with :
73- name : disassembly-A-vs-C
74- path : /tmp/case_*.asm
92+ name : disassembly-fixed-buggy-caseC
93+ path : /tmp/*.asm
94+
95+ # ── Build io-event extension and run tests ─────────────────────────────
96+ - name : Build io-event extension (FIXED Ruby)
97+ run : |
98+ export PATH=$HOME/.rubies/ruby-fixed/bin:$PATH
99+ gem install sus --no-document
100+ ruby ext/io/event/extconf.rb
101+ make -C ext/io/event
102+
103+ - name : Run worker_pool test with FIXED Ruby
104+ id : test_fixed
105+ run : |
106+ export PATH=$HOME/.rubies/ruby-fixed/bin:$PATH
107+ ruby -I lib -rsus test/io/event/worker_pool.rb 2>&1 | tail -5
108+ echo "exit:$?"
109+ continue-on-error : true
75110
76- # ── Build io-event extension for each Ruby and test ───────────────────
77- - name : Build io-event for CASE_A
111+ - name : Build io-event extension (BUGGY Ruby)
78112 run : |
79- export PATH=$HOME/.rubies/ruby-caseA /bin:$PATH
80- gem install bundler --no-document
81- bundle install
82- cd ext/io/event && ruby extconf.rb && make
113+ export PATH=$HOME/.rubies/ruby-buggy /bin:$PATH
114+ gem install sus --no-document
115+ ruby ext/io/event/extconf.rb
116+ make -C ext/io/event
83117
84- - name : Run worker_pool test CASE_A (expect crash)
85- id : test_caseA
118+ - name : Run worker_pool test with BUGGY Ruby (expect crash)
119+ id : test_buggy
86120 run : |
87- export PATH=$HOME/.rubies/ruby-caseA /bin:$PATH
88- (bundle exec ruby test/io/event/worker_pool.rb 2>&1 | tail -10 ; echo "exit:$?") || true
121+ export PATH=$HOME/.rubies/ruby-buggy /bin:$PATH
122+ (ruby -I lib -rsus test/io/event/worker_pool.rb 2>&1 | tail -5 ; echo "exit:$?") || true
89123 continue-on-error : true
90124
91- - name : Build io-event for CASE_C
125+ - name : Build io-event extension ( CASE_C Ruby)
92126 run : |
93127 export PATH=$HOME/.rubies/ruby-caseC/bin:$PATH
94- cd ext/io/event && ruby extconf.rb && make
128+ gem install sus --no-document
129+ ruby ext/io/event/extconf.rb
130+ make -C ext/io/event
95131
96- - name : Run worker_pool test CASE_C (does O0 fix the crash?)
132+ - name : Run worker_pool test with CASE_C Ruby (does O0 fix the crash?)
97133 id : test_caseC
98134 run : |
99135 export PATH=$HOME/.rubies/ruby-caseC/bin:$PATH
100- (bundle exec ruby test/io/event/worker_pool.rb 2>&1 | tail -10 ; echo "exit:$?") || true
136+ (ruby -I lib -rsus test/io/event/worker_pool.rb 2>&1 | tail -5 ; echo "exit:$?") || true
101137 continue-on-error : true
102138
103139 - name : Summary
104140 if : always()
105141 run : |
106- echo "If CASE_A crashes and CASE_C passes -> optimization level is the cause"
142+ echo "FIXED (RB_GC_GUARD present): should pass"
143+ echo "BUGGY (no RB_GC_GUARD): should crash"
144+ echo "CASE_C (BUGGY + O0 on fn): if passes → optimization is the cause"
0 commit comments