Skip to content

Commit 245f3e8

Browse files
committed
Add deferred free object bitmap per page
1 parent f20ad0e commit 245f3e8

1 file changed

Lines changed: 59 additions & 80 deletions

File tree

gc/default/default.c

Lines changed: 59 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -454,11 +454,6 @@ typedef struct mark_stack {
454454

455455
typedef int (*gc_compact_compare_func)(const void *l, const void *r, void *d);
456456

457-
typedef struct {
458-
rb_darray(VALUE) object_list;
459-
rb_nativethread_lock_t lock;
460-
} deferred_sweep_data_t;
461-
462457
typedef struct rb_heap_struct {
463458
short slot_size;
464459
bits_t slot_bits_mask;
@@ -492,7 +487,6 @@ typedef struct rb_heap_struct {
492487
rb_nativethread_cond_t sweep_page_cond; // associated with global sweep lock
493488
rb_nativethread_lock_t swept_pages_lock;
494489
size_t pre_swept_slots_deferred;
495-
deferred_sweep_data_t deferred_sweep_data;
496490
bool is_finished_sweeping;
497491
bool done_background_sweep;
498492
bool skip_sweep_continue; // skip current sweep continue
@@ -860,6 +854,7 @@ struct heap_page {
860854
/* If set, the object is not movable */
861855
bits_t pinned_bits[HEAP_PAGE_BITMAP_LIMIT];
862856
bits_t age_bits[HEAP_PAGE_BITMAP_LIMIT * RVALUE_AGE_BIT_COUNT];
857+
bits_t deferred_free_bits[HEAP_PAGE_BITMAP_LIMIT];
863858
};
864859

865860
/*
@@ -1074,7 +1069,7 @@ typedef struct lock_stats {
10741069

10751070
static lock_stats_t sweep_lock_stats = {"objspace->sweep_lock", {{0}}, 0};
10761071
static lock_stats_t swept_pages_lock_stats = {"heap->swept_pages_lock", {{0}}, 0};
1077-
static lock_stats_t deferred_sweep_data_lock_stats = {"heap->deferred_sweep_data.lock", {{0}}, 0};
1072+
10781073

10791074
static lock_callsite_stats_t*
10801075
find_or_create_callsite(lock_stats_t *stats, const char *function, int line)
@@ -1125,9 +1120,9 @@ print_lock_stats(void)
11251120
fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "Lock Name", "Callsite", "Uncontended", "Contended", "Ratio");
11261121
fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "---------", "--------", "-----------", "---------", "-----");
11271122

1128-
lock_stats_t *all_stats[] = {&sweep_lock_stats, &swept_pages_lock_stats, &deferred_sweep_data_lock_stats};
1123+
lock_stats_t *all_stats[] = {&sweep_lock_stats, &swept_pages_lock_stats};
11291124

1130-
for (int i = 0; i < 3; i++) {
1125+
for (int i = 0; i < 2; i++) {
11311126
lock_stats_t *stats = all_stats[i];
11321127

11331128
/* Sort callsites by total contentions (descending) */
@@ -1955,8 +1950,8 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr)
19551950
GC_ASSERT(marked == RVALUE_MARKED_ATOMIC(objspace, ptr));
19561951
return during_lazy_sweep && !marked && RUBY_ATOMIC_LOAD(page->before_sweep);
19571952
}
1958-
// we're currently lazy sweeping with the sweep thread in background mode
19591953
else if (during_lazy_sweep) {
1954+
// we're currently lazy sweeping with the sweep thread
19601955
bool marked = RVALUE_MARKED_ATOMIC(objspace, ptr); // load it atomically so it can't be re-ordered past the next atomic load
19611956
bool before_sweep = RUBY_ATOMIC_LOAD(page->before_sweep);
19621957
bool is_garbage = !marked && before_sweep;
@@ -4095,32 +4090,6 @@ wait_for_background_sweeping_to_finish(rb_objspace_t *objspace, bool abort_curre
40954090
sweep_lock_unlock(&objspace->sweep_lock);
40964091
}
40974092

4098-
// dequeue MIN(left_to_deq, 10) objects from the deferred object list into `obj_buf`, returning the amount dequeued.
4099-
static short
4100-
deq_deferred_sweep_objects(rb_objspace_t *objspace, rb_heap_t *heap, VALUE obj_buf[10], short left_to_deq)
4101-
{
4102-
GC_ASSERT(left_to_deq > 0);
4103-
short to_deq = 10;
4104-
if (left_to_deq < 10) to_deq = left_to_deq;
4105-
#if PSWEEP_LOCK_STATS > 0
4106-
instrumented_lock_acquire(&heap->deferred_sweep_data.lock, &deferred_sweep_data_lock_stats);
4107-
#else
4108-
rb_native_mutex_lock(&heap->deferred_sweep_data.lock);
4109-
#endif
4110-
{
4111-
if ((size_t)to_deq > rb_darray_size(heap->deferred_sweep_data.object_list)) {
4112-
psweep_debug(0, "Error: trying to deq %hi from object_list of size %lu\n", to_deq, rb_darray_size(heap->deferred_sweep_data.object_list));
4113-
}
4114-
GC_ASSERT((size_t)to_deq <= rb_darray_size(heap->deferred_sweep_data.object_list));
4115-
for (short i = 0; i < to_deq; i++) {
4116-
obj_buf[i] = rb_darray_get(heap->deferred_sweep_data.object_list, i);
4117-
}
4118-
}
4119-
rb_darray_shift_n(heap->deferred_sweep_data.object_list, to_deq);
4120-
rb_native_mutex_unlock(&heap->deferred_sweep_data.lock);
4121-
return to_deq;
4122-
}
4123-
41244093
// Free the object in a Ruby thread. Return whether or not we put the slot back on the page's freelist.
41254094
static bool
41264095
deferred_free(rb_objspace_t *objspace, VALUE obj)
@@ -4389,22 +4358,10 @@ heap_page_freelist_append(struct heap_page *page, struct free_slot *freelist)
43894358
static void
43904359
sweep_in_ruby_thread(rb_objspace_t *objspace, struct heap_page *page, VALUE obj, bool nozombie)
43914360
{
4392-
rb_heap_t *heap = page->heap;
4393-
#if PSWEEP_LOCK_STATS > 0
4394-
instrumented_lock_acquire(&heap->deferred_sweep_data.lock, &deferred_sweep_data_lock_stats);
4395-
#else
4396-
rb_native_mutex_lock(&heap->deferred_sweep_data.lock);
4397-
#endif
4398-
{
4399-
page->pre_deferred_free_slots += 1;
4400-
psweep_debug(1, "[sweep] register sweep later: page(%p), obj(%p) %s\n", (void*)page, (void*)obj, rb_obj_info(obj));
4401-
GC_ASSERT(BUILTIN_TYPE(obj) != T_NONE);
4402-
rb_darray_append_without_gc(&heap->deferred_sweep_data.object_list, obj);
4403-
/*if (rb_darray_size(heap->deferred_sweep_data.object_list) > 128) {*/
4404-
/*fprintf(stderr, "deferred sweep data object list size:%lu\n", rb_darray_size(heap->deferred_sweep_data.object_list));*/
4405-
/*}*/
4406-
}
4407-
rb_native_mutex_unlock(&heap->deferred_sweep_data.lock);
4361+
page->pre_deferred_free_slots += 1;
4362+
psweep_debug(1, "[sweep] register sweep later: page(%p), obj(%p) %s\n", (void*)page, (void*)obj, rb_obj_info(obj));
4363+
GC_ASSERT(BUILTIN_TYPE(obj) != T_NONE);
4364+
MARK_IN_BITMAP(page->deferred_free_bits, obj);
44084365
}
44094366

44104367
bool
@@ -4596,6 +4553,7 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa
45964553
psweep_debug(1, "[sweep] gc_pre_sweep_page(heap:%p page:%p) start\n", heap, page);
45974554
GC_ASSERT(page->heap == heap);
45984555
page->pre_deferred_free_slots = 0;
4556+
memset(page->deferred_free_bits, 0, sizeof(page->deferred_free_bits));
45994557
page->pre_zombie_slots = 0;
46004558
page->pre_freed_malloc_bytes = 0;
46014559
current_sweep_thread_page = page;
@@ -4718,6 +4676,7 @@ clear_pre_sweep_fields(struct heap_page *page)
47184676
{
47194677
page->pre_freed_slots = 0;
47204678
page->pre_deferred_free_slots = 0;
4679+
memset(page->deferred_free_bits, 0, sizeof(page->deferred_free_bits));
47214680
page->pre_empty_slots = 0;
47224681
page->pre_final_slots = 0;
47234682
page->pre_zombie_slots = 0;
@@ -4800,6 +4759,7 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap)
48004759
{
48014760
if (heap->swept_pages) {
48024761
// NOTE: heap->swept_pages needs to be in swept order for gc_sweep_step to work properly.
4762+
// TODO: Change to LIFO to get better shared memory cache benefits across threads (L2/L3)
48034763
struct heap_page *latest = heap->latest_swept_page;
48044764
GC_ASSERT(latest);
48054765
latest->free_next = sweep_page;
@@ -4909,12 +4869,6 @@ gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap)
49094869
heap->is_finished_sweeping = false;
49104870
heap->done_background_sweep = false;
49114871
heap->skip_sweep_continue = false;
4912-
// TODO
4913-
/*rb_darray_clear_and_free_without_gc(heap->deferred_sweep_data.object_list);*/
4914-
/*if (rb_darray_size(heap->deferred_sweep_data.object_list) > 0) {*/
4915-
/*psweep_debug(-1, "Error: gc_sweep_start_heap with object_list of size %lu\n", rb_darray_size(heap->deferred_sweep_data.object_list));*/
4916-
/*}*/
4917-
/*GC_ASSERT(rb_darray_size(heap->deferred_sweep_data.object_list) == 0);*/
49184872

49194873
struct heap_page *page = NULL;
49204874

@@ -5278,31 +5232,59 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap)
52785232
unsigned short deferred_free_freed = 0;
52795233
unsigned short deferred_to_free = sweep_page->pre_deferred_free_slots;
52805234

5281-
VALUE obj_buf[10];
5282-
short deq_sz = 0;
52835235
psweep_debug(-2, "[gc] gc_sweep_step: (heap:%p %ld, page:%p) free_ruby_th: %d, deferred_to_free:%d, pre_freed:%d, pre_empty:%d\n",
52845236
heap, heap - heaps, sweep_page, free_in_user_thread_p, deferred_to_free, sweep_page->pre_freed_slots, sweep_page->pre_empty_slots);
5285-
int deferred_processed = 0;
5286-
while (deferred_processed < deferred_to_free) {
5287-
deq_sz = deq_deferred_sweep_objects(objspace, heap, obj_buf, deferred_to_free - deferred_processed);
5288-
psweep_debug(1, "[gc] gc_sweep_step(heap:%p %ld, page:%p) deq:%d\n", heap, heap - heaps, sweep_page, deq_sz);
5289-
for (short i = 0; i < deq_sz; i++) {
5290-
VALUE obj = obj_buf[i];
5291-
#if VM_CHECK_MODE > 0
5292-
if (GET_HEAP_PAGE(obj) != sweep_page) {
5293-
psweep_debug(0, "Error! bad heap page (got:%p, expecting:%p) obj type:%s\n", GET_HEAP_PAGE(obj), sweep_page, rb_obj_info(obj));
5294-
}
5295-
GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page);
5296-
#endif
5297-
if (deferred_free(objspace, obj)) {
5298-
deferred_free_freed++;
5237+
5238+
if (deferred_to_free > 0) {
5239+
uintptr_t p = (uintptr_t)sweep_page->start;
5240+
bits_t *deferred_bits = sweep_page->deferred_free_bits;
5241+
short slot_size = sweep_page->slot_size;
5242+
short slot_bits = slot_size / BASE_SLOT_SIZE;
5243+
bits_t slot_mask = heap->slot_bits_mask;
5244+
5245+
int page_rvalue_count = sweep_page->total_slots * slot_bits;
5246+
int bitmap_plane_count = CEILDIV(NUM_IN_PAGE(p) + page_rvalue_count, BITS_BITLENGTH);
5247+
5248+
// First plane: skip out-of-range slots at head of page
5249+
bits_t bitset = deferred_bits[0];
5250+
bitset >>= NUM_IN_PAGE(p);
5251+
bitset &= slot_mask;
5252+
while (bitset) {
5253+
if (bitset & 1) {
5254+
VALUE obj = (VALUE)p;
5255+
GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page);
5256+
if (deferred_free(objspace, obj)) {
5257+
deferred_free_freed++;
5258+
}
5259+
else {
5260+
deferred_free_final_slots++;
5261+
}
52995262
}
5300-
else {
5301-
deferred_free_final_slots++;
5263+
p += slot_size;
5264+
bitset >>= slot_bits;
5265+
}
5266+
p = (uintptr_t)sweep_page->start + (BITS_BITLENGTH - NUM_IN_PAGE((uintptr_t)sweep_page->start)) * BASE_SLOT_SIZE;
5267+
5268+
for (int i = 1; i < bitmap_plane_count; i++) {
5269+
bitset = deferred_bits[i] & slot_mask;
5270+
while (bitset) {
5271+
if (bitset & 1) {
5272+
VALUE obj = (VALUE)p;
5273+
GC_ASSERT(GET_HEAP_PAGE(obj) == sweep_page);
5274+
if (deferred_free(objspace, obj)) {
5275+
deferred_free_freed++;
5276+
}
5277+
else {
5278+
deferred_free_final_slots++;
5279+
}
5280+
}
5281+
p += slot_size;
5282+
bitset >>= slot_bits;
53025283
}
5303-
deferred_processed++;
5284+
p = (uintptr_t)sweep_page->start + (BITS_BITLENGTH * (i + 1) - NUM_IN_PAGE((uintptr_t)sweep_page->start)) * BASE_SLOT_SIZE;
53045285
}
53055286
}
5287+
53065288
ctx.final_slots = sweep_page->pre_final_slots + deferred_free_final_slots;
53075289
ctx.freed_slots = sweep_page->pre_freed_slots + deferred_free_freed;
53085290
ctx.empty_slots = sweep_page->pre_empty_slots;
@@ -11219,7 +11201,6 @@ rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid)
1121911201
for (int i = 0; i < HEAP_COUNT; i++) {
1122011202
rb_heap_t *heap = &heaps[i];
1122111203

11222-
rb_native_mutex_initialize(&heap->deferred_sweep_data.lock);
1122311204
rb_native_mutex_initialize(&heap->swept_pages_lock);
1122411205
rb_native_cond_initialize(&heap->sweep_page_cond);
1122511206
heap->pre_sweeping_page = NULL;
@@ -11342,9 +11323,7 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
1134211323
heap->slot_bits_mask = slot_bits_masks[i];
1134311324

1134411325
ccan_list_head_init(&heap->pages);
11345-
rb_native_mutex_initialize(&heap->deferred_sweep_data.lock);
1134611326
rb_native_mutex_initialize(&heap->swept_pages_lock);
11347-
rb_darray_make_without_gc(&heap->deferred_sweep_data.object_list, 0);
1134811327
rb_native_cond_initialize(&heap->sweep_page_cond);
1134911328
}
1135011329

0 commit comments

Comments
 (0)