Skip to content

Commit 31a3530

Browse files
Merge pull request #71 from ruby/mvh-batch-obj-free-candidates
Buffer obj_free candidates.
2 parents 2023bba + 7889da7 commit 31a3530

4 files changed

Lines changed: 91 additions & 25 deletions

File tree

gc/mmtk/mmtk.c

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,20 @@ struct objspace {
4848
unsigned int fork_hook_vm_lock_lev;
4949
};
5050

51+
#define OBJ_FREE_BUF_CAPACITY 128
52+
5153
struct MMTk_ractor_cache {
5254
struct ccan_list_node list_node;
5355

5456
MMTk_Mutator *mutator;
5557
bool gc_mutator_p;
5658

5759
MMTk_BumpPointer *bump_pointer;
60+
61+
MMTk_ObjectReference obj_free_parallel_buf[OBJ_FREE_BUF_CAPACITY];
62+
size_t obj_free_parallel_count;
63+
MMTk_ObjectReference obj_free_non_parallel_buf[OBJ_FREE_BUF_CAPACITY];
64+
size_t obj_free_non_parallel_count;
5865
};
5966

6067
struct MMTk_final_job {
@@ -143,6 +150,8 @@ rb_mmtk_resume_mutators(void)
143150
}
144151
}
145152

153+
static void mmtk_flush_obj_free_buffer(struct MMTk_ractor_cache *cache);
154+
146155
static void
147156
rb_mmtk_block_for_gc(MMTk_VMMutatorThread mutator)
148157
{
@@ -173,6 +182,11 @@ rb_mmtk_block_for_gc(MMTk_VMMutatorThread mutator)
173182

174183
rb_gc_vm_barrier();
175184

185+
struct MMTk_ractor_cache *rc;
186+
ccan_list_for_each(&objspace->ractor_caches, rc, list_node) {
187+
mmtk_flush_obj_free_buffer(rc);
188+
}
189+
176190
objspace->world_stopped = true;
177191

178192
pthread_cond_broadcast(&objspace->cond_world_stopped);
@@ -584,7 +598,7 @@ rb_gc_impl_ractor_cache_alloc(void *objspace_ptr, void *ractor)
584598
}
585599
objspace->live_ractor_cache_count++;
586600

587-
struct MMTk_ractor_cache *cache = malloc(sizeof(struct MMTk_ractor_cache));
601+
struct MMTk_ractor_cache *cache = calloc(1, sizeof(struct MMTk_ractor_cache));
588602
ccan_list_add(&objspace->ractor_caches, &cache->list_node);
589603

590604
cache->mutator = mmtk_bind_mutator(cache);
@@ -601,6 +615,8 @@ rb_gc_impl_ractor_cache_free(void *objspace_ptr, void *cache_ptr)
601615

602616
ccan_list_del(&cache->list_node);
603617

618+
mmtk_flush_obj_free_buffer(cache);
619+
604620
if (ruby_free_at_exit_p()) {
605621
MMTK_ASSERT(objspace->live_ractor_cache_count > 0);
606622
}
@@ -801,6 +817,42 @@ obj_can_parallel_free_p(VALUE obj)
801817
}
802818
}
803819

820+
static void
821+
mmtk_flush_obj_free_buffer(struct MMTk_ractor_cache *cache)
822+
{
823+
if (cache->obj_free_parallel_count > 0) {
824+
mmtk_add_obj_free_candidates(cache->obj_free_parallel_buf,
825+
cache->obj_free_parallel_count, true);
826+
cache->obj_free_parallel_count = 0;
827+
}
828+
if (cache->obj_free_non_parallel_count > 0) {
829+
mmtk_add_obj_free_candidates(cache->obj_free_non_parallel_buf,
830+
cache->obj_free_non_parallel_count, false);
831+
cache->obj_free_non_parallel_count = 0;
832+
}
833+
}
834+
835+
static inline void
836+
mmtk_buffer_obj_free_candidate(struct MMTk_ractor_cache *cache, VALUE obj)
837+
{
838+
if (obj_can_parallel_free_p(obj)) {
839+
cache->obj_free_parallel_buf[cache->obj_free_parallel_count++] = (MMTk_ObjectReference)obj;
840+
if (cache->obj_free_parallel_count >= OBJ_FREE_BUF_CAPACITY) {
841+
mmtk_add_obj_free_candidates(cache->obj_free_parallel_buf,
842+
cache->obj_free_parallel_count, true);
843+
cache->obj_free_parallel_count = 0;
844+
}
845+
}
846+
else {
847+
cache->obj_free_non_parallel_buf[cache->obj_free_non_parallel_count++] = (MMTk_ObjectReference)obj;
848+
if (cache->obj_free_non_parallel_count >= OBJ_FREE_BUF_CAPACITY) {
849+
mmtk_add_obj_free_candidates(cache->obj_free_non_parallel_buf,
850+
cache->obj_free_non_parallel_count, false);
851+
cache->obj_free_non_parallel_count = 0;
852+
}
853+
}
854+
}
855+
804856
VALUE
805857
rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags, bool wb_protected, size_t alloc_size)
806858
{
@@ -837,7 +889,7 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags
837889
mmtk_post_alloc(ractor_cache->mutator, (void*)alloc_obj, alloc_size, MMTK_ALLOCATION_SEMANTICS_DEFAULT);
838890

839891
// TODO: only add when object needs obj_free to be called
840-
mmtk_add_obj_free_candidate(alloc_obj, obj_can_parallel_free_p((VALUE)alloc_obj));
892+
mmtk_buffer_obj_free_candidate(ractor_cache, (VALUE)alloc_obj);
841893

842894
objspace->total_allocated_objects++;
843895

@@ -1277,6 +1329,11 @@ rb_gc_impl_shutdown_call_finalizer(void *objspace_ptr)
12771329

12781330
unsigned int lev = RB_GC_VM_LOCK();
12791331
{
1332+
struct MMTk_ractor_cache *rc;
1333+
ccan_list_for_each(&objspace->ractor_caches, rc, list_node) {
1334+
mmtk_flush_obj_free_buffer(rc);
1335+
}
1336+
12801337
struct MMTk_RawVecOfObjRef registered_candidates = mmtk_get_all_obj_free_candidates();
12811338
for (size_t i = 0; i < registered_candidates.len; i++) {
12821339
VALUE obj = (VALUE)registered_candidates.ptr[i];

gc/mmtk/mmtk.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@ void mmtk_post_alloc(MMTk_Mutator *mutator,
123123
size_t bytes,
124124
MMTk_AllocationSemantics semantics);
125125

126-
void mmtk_add_obj_free_candidate(MMTk_ObjectReference object, bool can_parallel_free);
126+
void mmtk_add_obj_free_candidates(const MMTk_ObjectReference *objects,
127+
size_t count,
128+
bool can_parallel_free);
127129

128130
void mmtk_declare_weak_references(MMTk_ObjectReference object);
129131

gc/mmtk/src/api.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -297,12 +297,16 @@ pub unsafe extern "C" fn mmtk_post_alloc(
297297
memory_manager::post_alloc::<Ruby>(unsafe { &mut *mutator }, refer, bytes, semantics)
298298
}
299299

300-
// TODO: Replace with buffered mmtk_add_obj_free_candidates
301300
#[no_mangle]
302-
pub extern "C" fn mmtk_add_obj_free_candidate(object: ObjectReference, can_parallel_free: bool) {
301+
pub unsafe extern "C" fn mmtk_add_obj_free_candidates(
302+
objects: *const ObjectReference,
303+
count: usize,
304+
can_parallel_free: bool,
305+
) {
306+
let objects = unsafe { std::slice::from_raw_parts(objects, count) };
303307
binding()
304308
.weak_proc
305-
.add_obj_free_candidate(object, can_parallel_free)
309+
.add_obj_free_candidates_batch(objects, can_parallel_free)
306310
}
307311

308312
// =============== Weak references ===============

gc/mmtk/src/weak_proc.rs

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::sync::atomic::AtomicUsize;
2-
use std::sync::atomic::Ordering;
31
use std::sync::Mutex;
42

53
use mmtk::scheduler::GCWork;
@@ -15,7 +13,6 @@ use crate::Ruby;
1513
pub struct WeakProcessor {
1614
non_parallel_obj_free_candidates: Mutex<Vec<ObjectReference>>,
1715
parallel_obj_free_candidates: Vec<Mutex<Vec<ObjectReference>>>,
18-
parallel_obj_free_candidates_counter: AtomicUsize,
1916

2017
/// Objects that needs `obj_free` called when dying.
2118
/// If it is a bottleneck, replace it with a lock-free data structure,
@@ -34,7 +31,6 @@ impl WeakProcessor {
3431
Self {
3532
non_parallel_obj_free_candidates: Mutex::new(Vec::new()),
3633
parallel_obj_free_candidates: vec![Mutex::new(Vec::new())],
37-
parallel_obj_free_candidates_counter: AtomicUsize::new(0),
3834
weak_references: Mutex::new(Vec::new()),
3935
}
4036
}
@@ -48,27 +44,34 @@ impl WeakProcessor {
4844
}
4945
}
5046

51-
/// Add an object as a candidate for `obj_free`.
47+
/// Add a batch of objects as candidates for `obj_free`.
5248
///
53-
/// Multiple mutators can call it concurrently, so it has `&self`.
54-
pub fn add_obj_free_candidate(&self, object: ObjectReference, can_parallel_free: bool) {
49+
/// Amortizes mutex acquisition over the entire batch. Called when a
50+
/// mutator's local buffer is flushed (buffer full or stop-the-world).
51+
pub fn add_obj_free_candidates_batch(
52+
&self,
53+
objects: &[ObjectReference],
54+
can_parallel_free: bool,
55+
) {
56+
if objects.is_empty() {
57+
return;
58+
}
59+
5560
if can_parallel_free {
56-
// Newly allocated objects are placed in parallel_obj_free_candidates using
57-
// round-robin. This may not be ideal for load balancing.
58-
let idx = self
59-
.parallel_obj_free_candidates_counter
60-
.fetch_add(1, Ordering::Relaxed)
61-
% self.parallel_obj_free_candidates.len();
62-
63-
self.parallel_obj_free_candidates[idx]
64-
.lock()
65-
.unwrap()
66-
.push(object);
61+
let num_buckets = self.parallel_obj_free_candidates.len();
62+
for idx in 0..num_buckets {
63+
let mut bucket = self.parallel_obj_free_candidates[idx].lock().unwrap();
64+
for (i, &obj) in objects.iter().enumerate() {
65+
if i % num_buckets == idx {
66+
bucket.push(obj);
67+
}
68+
}
69+
}
6770
} else {
6871
self.non_parallel_obj_free_candidates
6972
.lock()
7073
.unwrap()
71-
.push(object);
74+
.extend_from_slice(objects);
7275
}
7376
}
7477

0 commit comments

Comments
 (0)