9999# include <mach/mach_port.h>
100100#endif
101101
102+ #if defined(__x86_64__ )
103+ #include <immintrin.h> // Required for _mm_pause
104+ #endif
105+
102106#ifndef VM_CHECK_MODE
103107# define VM_CHECK_MODE RUBY_DEBUG
104108#endif
122126#else
123127#define psweep_debug (...) (void)0
124128#endif
125- #define PSWEEP_LOCK_STATS 0
129+ #define PSWEEP_LOCK_STATS 1
126130#define PSWEEP_COLLECT_TIMINGS 0
127131
128132#ifndef GC_HEAP_FREE_SLOTS
@@ -1194,7 +1198,8 @@ print_lock_stats(void)
11941198typedef struct sweep_step_contention {
11951199 size_t step_count ;
11961200 size_t swept_pages_trylock_success ;
1197- size_t swept_pages_trylock_fail ;
1201+ size_t swept_pages_trylock_fail1 ;
1202+ size_t swept_pages_trylock_fail2 ;
11981203} sweep_step_contention_t ;
11991204
12001205/* [0] = first step (gc_sweep), [1] = continue step (gc_sweep_continue) */
@@ -1212,15 +1217,23 @@ print_sweep_step_contention(void)
12121217 step_contention [0 ].step_count , step_contention [1 ].step_count );
12131218 fprintf (stderr , "%-30s %15zu %15zu\n" , "trylock_success" ,
12141219 step_contention [0 ].swept_pages_trylock_success , step_contention [1 ].swept_pages_trylock_success );
1215- fprintf (stderr , "%-30s %15zu %15zu\n" , "trylock_fail" ,
1216- step_contention [0 ].swept_pages_trylock_fail , step_contention [1 ].swept_pages_trylock_fail );
1220+ fprintf (stderr , "%-30s %15zu %15zu\n" , "trylock_fail1" ,
1221+ step_contention [0 ].swept_pages_trylock_fail1 , step_contention [1 ].swept_pages_trylock_fail1 );
1222+ fprintf (stderr , "%-30s %15zu %15zu\n" , "trylock_fail2" ,
1223+ step_contention [0 ].swept_pages_trylock_fail2 , step_contention [1 ].swept_pages_trylock_fail2 );
12171224
12181225 {
1219- size_t total0 = step_contention [0 ].swept_pages_trylock_success + step_contention [0 ].swept_pages_trylock_fail ;
1220- size_t total1 = step_contention [1 ].swept_pages_trylock_success + step_contention [1 ].swept_pages_trylock_fail ;
1221- fprintf (stderr , "%-30s %14.2f%% %14.2f%%\n" , "trylock_fail_ratio" ,
1222- total0 > 0 ? (double )step_contention [0 ].swept_pages_trylock_fail / total0 * 100.0 : 0 ,
1223- total1 > 0 ? (double )step_contention [1 ].swept_pages_trylock_fail / total1 * 100.0 : 0 );
1226+ size_t total0 = step_contention [0 ].swept_pages_trylock_success + step_contention [0 ].swept_pages_trylock_fail1 ;
1227+ size_t total1 = step_contention [1 ].swept_pages_trylock_success + step_contention [1 ].swept_pages_trylock_fail1 ;
1228+ fprintf (stderr , "%-30s %14.2f%% %14.2f%%\n" , "trylock_fail1_ratio" ,
1229+ total0 > 0 ? (double )step_contention [0 ].swept_pages_trylock_fail1 / total0 * 100.0 : 0 ,
1230+ total1 > 0 ? (double )step_contention [1 ].swept_pages_trylock_fail1 / total1 * 100.0 : 0 );
1231+
1232+ size_t fail1_0 = step_contention [0 ].swept_pages_trylock_fail1 ;
1233+ size_t fail1_1 = step_contention [1 ].swept_pages_trylock_fail1 ;
1234+ fprintf (stderr , "%-30s %14.2f%% %14.2f%%\n" , "trylock_fail2_of_fail1" ,
1235+ fail1_0 > 0 ? (double )step_contention [0 ].swept_pages_trylock_fail2 / fail1_0 * 100.0 : 0 ,
1236+ fail1_1 > 0 ? (double )step_contention [1 ].swept_pages_trylock_fail2 / fail1_1 * 100.0 : 0 );
12241237 }
12251238
12261239 fprintf (stderr , "=====================================================\n\n" );
@@ -5072,6 +5085,7 @@ gc_sweep_finish(rb_objspace_t *objspace)
50725085#endif
50735086}
50745087
5088+
50755089// Dequeue a page swept by the sweep thread. If `free_in_user_thread` is true, then
50765090// dequeue an unswept page to be swept by the Ruby thread. It can also dequeue an unswept
50775091// page if otherwise it would have to wait for the sweep thread. In that case, `dequeued_unswept_page`
@@ -5115,12 +5129,25 @@ gc_sweep_dequeue_page(rb_objspace_t *objspace, rb_heap_t *heap, bool free_in_use
51155129 }
51165130 else {
51175131#if PSWEEP_LOCK_STATS > 0
5118- step_contention [current_step_type ].swept_pages_trylock_fail ++ ;
5132+ step_contention [current_step_type ].swept_pages_trylock_fail1 ++ ;
51195133#endif
51205134 for (volatile int i = 0 ; i < 100 ; i ++ ) {
5135+ #if defined(__x86_64__ )
5136+ _mm_pause ();
5137+ #elif defined(__ARM_ARCH ) && (__ARM_ARCH >= 6 )
5138+ asm volatile ("yield" );
5139+ #endif
5140+ }
5141+ if (rb_native_mutex_trylock (& heap -> swept_pages_lock ) == 0 ) {
5142+ goto swept_pages_lock_inner ;
5143+ }
5144+ else {
5145+ #if PSWEEP_LOCK_STATS > 0
5146+ step_contention [current_step_type ].swept_pages_trylock_fail2 ++ ;
5147+ #endif
5148+ rb_native_mutex_lock (& heap -> swept_pages_lock );
5149+ goto swept_pages_lock_inner ;
51215150 }
5122- rb_native_mutex_lock (& heap -> swept_pages_lock );
5123- goto swept_pages_lock_inner ;
51245151 }
51255152 if (page ) return page ;
51265153
0 commit comments