@@ -454,11 +454,6 @@ typedef struct mark_stack {
454454
455455typedef int (* gc_compact_compare_func )(const void * l , const void * r , void * d );
456456
457- typedef struct {
458- rb_darray (VALUE ) object_list ;
459- rb_nativethread_lock_t lock ;
460- } deferred_sweep_data_t ;
461-
462457typedef struct rb_heap_struct {
463458 short slot_size ;
464459 bits_t slot_bits_mask ;
@@ -492,7 +487,6 @@ typedef struct rb_heap_struct {
492487 rb_nativethread_cond_t sweep_page_cond ; // associated with global sweep lock
493488 rb_nativethread_lock_t swept_pages_lock ;
494489 size_t pre_swept_slots_deferred ;
495- deferred_sweep_data_t deferred_sweep_data ;
496490 bool is_finished_sweeping ;
497491 bool done_background_sweep ;
498492 bool skip_sweep_continue ; // skip current sweep continue
@@ -860,6 +854,7 @@ struct heap_page {
860854 /* If set, the object is not movable */
861855 bits_t pinned_bits [HEAP_PAGE_BITMAP_LIMIT ];
862856 bits_t age_bits [HEAP_PAGE_BITMAP_LIMIT * RVALUE_AGE_BIT_COUNT ];
857+ bits_t deferred_free_bits [HEAP_PAGE_BITMAP_LIMIT ];
863858};
864859
865860/*
@@ -1074,7 +1069,7 @@ typedef struct lock_stats {
10741069
10751070static lock_stats_t sweep_lock_stats = {"objspace->sweep_lock" , {{0 }}, 0 };
10761071static lock_stats_t swept_pages_lock_stats = {"heap->swept_pages_lock" , {{0 }}, 0 };
1077- static lock_stats_t deferred_sweep_data_lock_stats = { "heap->deferred_sweep_data.lock" , {{ 0 }}, 0 };
1072+
10781073
10791074static lock_callsite_stats_t *
10801075find_or_create_callsite (lock_stats_t * stats , const char * function , int line )
@@ -1125,9 +1120,9 @@ print_lock_stats(void)
11251120 fprintf (stderr , "%-40s %-30s %12s %12s %10s\n" , "Lock Name" , "Callsite" , "Uncontended" , "Contended" , "Ratio" );
11261121 fprintf (stderr , "%-40s %-30s %12s %12s %10s\n" , "---------" , "--------" , "-----------" , "---------" , "-----" );
11271122
1128- lock_stats_t * all_stats [] = {& sweep_lock_stats , & swept_pages_lock_stats , & deferred_sweep_data_lock_stats };
1123+ lock_stats_t * all_stats [] = {& sweep_lock_stats , & swept_pages_lock_stats };
11291124
1130- for (int i = 0 ; i < 3 ; i ++ ) {
1125+ for (int i = 0 ; i < 2 ; i ++ ) {
11311126 lock_stats_t * stats = all_stats [i ];
11321127
11331128 /* Sort callsites by total contentions (descending) */
@@ -1955,8 +1950,8 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr)
19551950 GC_ASSERT (marked == RVALUE_MARKED_ATOMIC (objspace , ptr ));
19561951 return during_lazy_sweep && !marked && RUBY_ATOMIC_LOAD (page -> before_sweep );
19571952 }
1958- // we're currently lazy sweeping with the sweep thread in background mode
19591953 else if (during_lazy_sweep ) {
1954+ // we're currently lazy sweeping with the sweep thread
19601955 bool marked = RVALUE_MARKED_ATOMIC (objspace , ptr ); // load it atomically so it can't be re-ordered past the next atomic load
19611956 bool before_sweep = RUBY_ATOMIC_LOAD (page -> before_sweep );
19621957 bool is_garbage = !marked && before_sweep ;
@@ -4095,32 +4090,6 @@ wait_for_background_sweeping_to_finish(rb_objspace_t *objspace, bool abort_curre
40954090 sweep_lock_unlock (& objspace -> sweep_lock );
40964091}
40974092
4098- // dequeue MIN(left_to_deq, 10) objects from the deferred object list into `obj_buf`, returning the amount dequeued.
4099- static short
4100- deq_deferred_sweep_objects (rb_objspace_t * objspace , rb_heap_t * heap , VALUE obj_buf [10 ], short left_to_deq )
4101- {
4102- GC_ASSERT (left_to_deq > 0 );
4103- short to_deq = 10 ;
4104- if (left_to_deq < 10 ) to_deq = left_to_deq ;
4105- #if PSWEEP_LOCK_STATS > 0
4106- instrumented_lock_acquire (& heap -> deferred_sweep_data .lock , & deferred_sweep_data_lock_stats );
4107- #else
4108- rb_native_mutex_lock (& heap -> deferred_sweep_data .lock );
4109- #endif
4110- {
4111- if ((size_t )to_deq > rb_darray_size (heap -> deferred_sweep_data .object_list )) {
4112- psweep_debug (0 , "Error: trying to deq %hi from object_list of size %lu\n" , to_deq , rb_darray_size (heap -> deferred_sweep_data .object_list ));
4113- }
4114- GC_ASSERT ((size_t )to_deq <= rb_darray_size (heap -> deferred_sweep_data .object_list ));
4115- for (short i = 0 ; i < to_deq ; i ++ ) {
4116- obj_buf [i ] = rb_darray_get (heap -> deferred_sweep_data .object_list , i );
4117- }
4118- }
4119- rb_darray_shift_n (heap -> deferred_sweep_data .object_list , to_deq );
4120- rb_native_mutex_unlock (& heap -> deferred_sweep_data .lock );
4121- return to_deq ;
4122- }
4123-
41244093// Free the object in a Ruby thread. Return whether or not we put the slot back on the page's freelist.
41254094static bool
41264095deferred_free (rb_objspace_t * objspace , VALUE obj )
@@ -4389,22 +4358,10 @@ heap_page_freelist_append(struct heap_page *page, struct free_slot *freelist)
43894358static void
43904359sweep_in_ruby_thread (rb_objspace_t * objspace , struct heap_page * page , VALUE obj , bool nozombie )
43914360{
4392- rb_heap_t * heap = page -> heap ;
4393- #if PSWEEP_LOCK_STATS > 0
4394- instrumented_lock_acquire (& heap -> deferred_sweep_data .lock , & deferred_sweep_data_lock_stats );
4395- #else
4396- rb_native_mutex_lock (& heap -> deferred_sweep_data .lock );
4397- #endif
4398- {
4399- page -> pre_deferred_free_slots += 1 ;
4400- psweep_debug (1 , "[sweep] register sweep later: page(%p), obj(%p) %s\n" , (void * )page , (void * )obj , rb_obj_info (obj ));
4401- GC_ASSERT (BUILTIN_TYPE (obj ) != T_NONE );
4402- rb_darray_append_without_gc (& heap -> deferred_sweep_data .object_list , obj );
4403- /*if (rb_darray_size(heap->deferred_sweep_data.object_list) > 128) {*/
4404- /*fprintf(stderr, "deferred sweep data object list size:%lu\n", rb_darray_size(heap->deferred_sweep_data.object_list));*/
4405- /*}*/
4406- }
4407- rb_native_mutex_unlock (& heap -> deferred_sweep_data .lock );
4361+ page -> pre_deferred_free_slots += 1 ;
4362+ psweep_debug (1 , "[sweep] register sweep later: page(%p), obj(%p) %s\n" , (void * )page , (void * )obj , rb_obj_info (obj ));
4363+ GC_ASSERT (BUILTIN_TYPE (obj ) != T_NONE );
4364+ MARK_IN_BITMAP (page -> deferred_free_bits , obj );
44084365}
44094366
44104367bool
@@ -4596,6 +4553,7 @@ gc_pre_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *pa
45964553 psweep_debug (1 , "[sweep] gc_pre_sweep_page(heap:%p page:%p) start\n" , heap , page );
45974554 GC_ASSERT (page -> heap == heap );
45984555 page -> pre_deferred_free_slots = 0 ;
4556+ memset (page -> deferred_free_bits , 0 , sizeof (page -> deferred_free_bits ));
45994557 page -> pre_zombie_slots = 0 ;
46004558 page -> pre_freed_malloc_bytes = 0 ;
46014559 current_sweep_thread_page = page ;
@@ -4718,6 +4676,7 @@ clear_pre_sweep_fields(struct heap_page *page)
47184676{
47194677 page -> pre_freed_slots = 0 ;
47204678 page -> pre_deferred_free_slots = 0 ;
4679+ memset (page -> deferred_free_bits , 0 , sizeof (page -> deferred_free_bits ));
47214680 page -> pre_empty_slots = 0 ;
47224681 page -> pre_final_slots = 0 ;
47234682 page -> pre_zombie_slots = 0 ;
@@ -4800,6 +4759,7 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap)
48004759 {
48014760 if (heap -> swept_pages ) {
48024761 // NOTE: heap->swept_pages needs to be in swept order for gc_sweep_step to work properly.
4762+ // TODO: Change to LIFO to get better shared memory cache benefits across threads (L2/L3)
48034763 struct heap_page * latest = heap -> latest_swept_page ;
48044764 GC_ASSERT (latest );
48054765 latest -> free_next = sweep_page ;
@@ -4909,12 +4869,6 @@ gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap)
49094869 heap -> is_finished_sweeping = false;
49104870 heap -> done_background_sweep = false;
49114871 heap -> skip_sweep_continue = false;
4912- // TODO
4913- /*rb_darray_clear_and_free_without_gc(heap->deferred_sweep_data.object_list);*/
4914- /*if (rb_darray_size(heap->deferred_sweep_data.object_list) > 0) {*/
4915- /*psweep_debug(-1, "Error: gc_sweep_start_heap with object_list of size %lu\n", rb_darray_size(heap->deferred_sweep_data.object_list));*/
4916- /*}*/
4917- /*GC_ASSERT(rb_darray_size(heap->deferred_sweep_data.object_list) == 0);*/
49184872
49194873 struct heap_page * page = NULL ;
49204874
@@ -5278,31 +5232,59 @@ gc_sweep_step(rb_objspace_t *objspace, rb_heap_t *heap)
52785232 unsigned short deferred_free_freed = 0 ;
52795233 unsigned short deferred_to_free = sweep_page -> pre_deferred_free_slots ;
52805234
5281- VALUE obj_buf [10 ];
5282- short deq_sz = 0 ;
52835235 psweep_debug (-2 , "[gc] gc_sweep_step: (heap:%p %ld, page:%p) free_ruby_th: %d, deferred_to_free:%d, pre_freed:%d, pre_empty:%d\n" ,
52845236 heap , heap - heaps , sweep_page , free_in_user_thread_p , deferred_to_free , sweep_page -> pre_freed_slots , sweep_page -> pre_empty_slots );
5285- int deferred_processed = 0 ;
5286- while (deferred_processed < deferred_to_free ) {
5287- deq_sz = deq_deferred_sweep_objects (objspace , heap , obj_buf , deferred_to_free - deferred_processed );
5288- psweep_debug (1 , "[gc] gc_sweep_step(heap:%p %ld, page:%p) deq:%d\n" , heap , heap - heaps , sweep_page , deq_sz );
5289- for (short i = 0 ; i < deq_sz ; i ++ ) {
5290- VALUE obj = obj_buf [i ];
5291- #if VM_CHECK_MODE > 0
5292- if (GET_HEAP_PAGE (obj ) != sweep_page ) {
5293- psweep_debug (0 , "Error! bad heap page (got:%p, expecting:%p) obj type:%s\n" , GET_HEAP_PAGE (obj ), sweep_page , rb_obj_info (obj ));
5294- }
5295- GC_ASSERT (GET_HEAP_PAGE (obj ) == sweep_page );
5296- #endif
5297- if (deferred_free (objspace , obj )) {
5298- deferred_free_freed ++ ;
5237+
5238+ if (deferred_to_free > 0 ) {
5239+ uintptr_t p = (uintptr_t )sweep_page -> start ;
5240+ bits_t * deferred_bits = sweep_page -> deferred_free_bits ;
5241+ short slot_size = sweep_page -> slot_size ;
5242+ short slot_bits = slot_size / BASE_SLOT_SIZE ;
5243+ bits_t slot_mask = heap -> slot_bits_mask ;
5244+
5245+ int page_rvalue_count = sweep_page -> total_slots * slot_bits ;
5246+ int bitmap_plane_count = CEILDIV (NUM_IN_PAGE (p ) + page_rvalue_count , BITS_BITLENGTH );
5247+
5248+ // First plane: skip out-of-range slots at head of page
5249+ bits_t bitset = deferred_bits [0 ];
5250+ bitset >>= NUM_IN_PAGE (p );
5251+ bitset &= slot_mask ;
5252+ while (bitset ) {
5253+ if (bitset & 1 ) {
5254+ VALUE obj = (VALUE )p ;
5255+ GC_ASSERT (GET_HEAP_PAGE (obj ) == sweep_page );
5256+ if (deferred_free (objspace , obj )) {
5257+ deferred_free_freed ++ ;
5258+ }
5259+ else {
5260+ deferred_free_final_slots ++ ;
5261+ }
52995262 }
5300- else {
5301- deferred_free_final_slots ++ ;
5263+ p += slot_size ;
5264+ bitset >>= slot_bits ;
5265+ }
5266+ p = (uintptr_t )sweep_page -> start + (BITS_BITLENGTH - NUM_IN_PAGE ((uintptr_t )sweep_page -> start )) * BASE_SLOT_SIZE ;
5267+
5268+ for (int i = 1 ; i < bitmap_plane_count ; i ++ ) {
5269+ bitset = deferred_bits [i ] & slot_mask ;
5270+ while (bitset ) {
5271+ if (bitset & 1 ) {
5272+ VALUE obj = (VALUE )p ;
5273+ GC_ASSERT (GET_HEAP_PAGE (obj ) == sweep_page );
5274+ if (deferred_free (objspace , obj )) {
5275+ deferred_free_freed ++ ;
5276+ }
5277+ else {
5278+ deferred_free_final_slots ++ ;
5279+ }
5280+ }
5281+ p += slot_size ;
5282+ bitset >>= slot_bits ;
53025283 }
5303- deferred_processed ++ ;
5284+ p = ( uintptr_t ) sweep_page -> start + ( BITS_BITLENGTH * ( i + 1 ) - NUM_IN_PAGE (( uintptr_t ) sweep_page -> start )) * BASE_SLOT_SIZE ;
53045285 }
53055286 }
5287+
53065288 ctx .final_slots = sweep_page -> pre_final_slots + deferred_free_final_slots ;
53075289 ctx .freed_slots = sweep_page -> pre_freed_slots + deferred_free_freed ;
53085290 ctx .empty_slots = sweep_page -> pre_empty_slots ;
@@ -11219,7 +11201,6 @@ rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid)
1121911201 for (int i = 0 ; i < HEAP_COUNT ; i ++ ) {
1122011202 rb_heap_t * heap = & heaps [i ];
1122111203
11222- rb_native_mutex_initialize (& heap -> deferred_sweep_data .lock );
1122311204 rb_native_mutex_initialize (& heap -> swept_pages_lock );
1122411205 rb_native_cond_initialize (& heap -> sweep_page_cond );
1122511206 heap -> pre_sweeping_page = NULL ;
@@ -11342,9 +11323,7 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
1134211323 heap -> slot_bits_mask = slot_bits_masks [i ];
1134311324
1134411325 ccan_list_head_init (& heap -> pages );
11345- rb_native_mutex_initialize (& heap -> deferred_sweep_data .lock );
1134611326 rb_native_mutex_initialize (& heap -> swept_pages_lock );
11347- rb_darray_make_without_gc (& heap -> deferred_sweep_data .object_list , 0 );
1134811327 rb_native_cond_initialize (& heap -> sweep_page_cond );
1134911328 }
1135011329
0 commit comments