Skip to content

Commit 9ca8953

Browse files
committed
smarter busy-wait
1 parent 8f9677c commit 9ca8953

1 file changed

Lines changed: 39 additions & 12 deletions

File tree

gc/default/default.c

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@
9999
# include <mach/mach_port.h>
100100
#endif
101101

102+
#if defined(__x86_64__)
103+
#include <immintrin.h> // Required for _mm_pause
104+
#endif
105+
102106
#ifndef VM_CHECK_MODE
103107
# define VM_CHECK_MODE RUBY_DEBUG
104108
#endif
@@ -122,7 +126,7 @@
122126
#else
123127
#define psweep_debug(...) (void)0
124128
#endif
125-
#define PSWEEP_LOCK_STATS 0
129+
#define PSWEEP_LOCK_STATS 1
126130
#define PSWEEP_COLLECT_TIMINGS 0
127131

128132
#ifndef GC_HEAP_FREE_SLOTS
@@ -1194,7 +1198,8 @@ print_lock_stats(void)
11941198
typedef struct sweep_step_contention {
11951199
size_t step_count;
11961200
size_t swept_pages_trylock_success;
1197-
size_t swept_pages_trylock_fail;
1201+
size_t swept_pages_trylock_fail1;
1202+
size_t swept_pages_trylock_fail2;
11981203
} sweep_step_contention_t;
11991204

12001205
/* [0] = first step (gc_sweep), [1] = continue step (gc_sweep_continue) */
@@ -1212,15 +1217,23 @@ print_sweep_step_contention(void)
12121217
step_contention[0].step_count, step_contention[1].step_count);
12131218
fprintf(stderr, "%-30s %15zu %15zu\n", "trylock_success",
12141219
step_contention[0].swept_pages_trylock_success, step_contention[1].swept_pages_trylock_success);
1215-
fprintf(stderr, "%-30s %15zu %15zu\n", "trylock_fail",
1216-
step_contention[0].swept_pages_trylock_fail, step_contention[1].swept_pages_trylock_fail);
1220+
fprintf(stderr, "%-30s %15zu %15zu\n", "trylock_fail1",
1221+
step_contention[0].swept_pages_trylock_fail1, step_contention[1].swept_pages_trylock_fail1);
1222+
fprintf(stderr, "%-30s %15zu %15zu\n", "trylock_fail2",
1223+
step_contention[0].swept_pages_trylock_fail2, step_contention[1].swept_pages_trylock_fail2);
12171224

12181225
{
1219-
size_t total0 = step_contention[0].swept_pages_trylock_success + step_contention[0].swept_pages_trylock_fail;
1220-
size_t total1 = step_contention[1].swept_pages_trylock_success + step_contention[1].swept_pages_trylock_fail;
1221-
fprintf(stderr, "%-30s %14.2f%% %14.2f%%\n", "trylock_fail_ratio",
1222-
total0 > 0 ? (double)step_contention[0].swept_pages_trylock_fail / total0 * 100.0 : 0,
1223-
total1 > 0 ? (double)step_contention[1].swept_pages_trylock_fail / total1 * 100.0 : 0);
1226+
size_t total0 = step_contention[0].swept_pages_trylock_success + step_contention[0].swept_pages_trylock_fail1;
1227+
size_t total1 = step_contention[1].swept_pages_trylock_success + step_contention[1].swept_pages_trylock_fail1;
1228+
fprintf(stderr, "%-30s %14.2f%% %14.2f%%\n", "trylock_fail1_ratio",
1229+
total0 > 0 ? (double)step_contention[0].swept_pages_trylock_fail1 / total0 * 100.0 : 0,
1230+
total1 > 0 ? (double)step_contention[1].swept_pages_trylock_fail1 / total1 * 100.0 : 0);
1231+
1232+
size_t fail1_0 = step_contention[0].swept_pages_trylock_fail1;
1233+
size_t fail1_1 = step_contention[1].swept_pages_trylock_fail1;
1234+
fprintf(stderr, "%-30s %14.2f%% %14.2f%%\n", "trylock_fail2_of_fail1",
1235+
fail1_0 > 0 ? (double)step_contention[0].swept_pages_trylock_fail2 / fail1_0 * 100.0 : 0,
1236+
fail1_1 > 0 ? (double)step_contention[1].swept_pages_trylock_fail2 / fail1_1 * 100.0 : 0);
12241237
}
12251238

12261239
fprintf(stderr, "=====================================================\n\n");
@@ -5072,6 +5085,7 @@ gc_sweep_finish(rb_objspace_t *objspace)
50725085
#endif
50735086
}
50745087

5088+
50755089
// Dequeue a page swept by the sweep thread. If `free_in_user_thread` is true, then
50765090
// dequeue an unswept page to be swept by the Ruby thread. It can also dequeue an unswept
50775091
// page if otherwise it would have to wait for the sweep thread. In that case, `dequeued_unswept_page`
@@ -5115,12 +5129,25 @@ gc_sweep_dequeue_page(rb_objspace_t *objspace, rb_heap_t *heap, bool free_in_use
51155129
}
51165130
else {
51175131
#if PSWEEP_LOCK_STATS > 0
5118-
step_contention[current_step_type].swept_pages_trylock_fail++;
5132+
step_contention[current_step_type].swept_pages_trylock_fail1++;
51195133
#endif
51205134
for (volatile int i = 0; i < 100; i++) {
5135+
#if defined(__x86_64__)
5136+
_mm_pause();
5137+
#elif defined(__ARM_ARCH) && (__ARM_ARCH >= 6)
5138+
asm volatile("yield");
5139+
#endif
5140+
}
5141+
if (rb_native_mutex_trylock(&heap->swept_pages_lock) == 0) {
5142+
goto swept_pages_lock_inner;
5143+
}
5144+
else {
5145+
#if PSWEEP_LOCK_STATS > 0
5146+
step_contention[current_step_type].swept_pages_trylock_fail2++;
5147+
#endif
5148+
rb_native_mutex_lock(&heap->swept_pages_lock);
5149+
goto swept_pages_lock_inner;
51215150
}
5122-
rb_native_mutex_lock(&heap->swept_pages_lock);
5123-
goto swept_pages_lock_inner;
51245151
}
51255152
if (page) return page;
51265153

0 commit comments

Comments
 (0)