Skip to content

Commit b1b9c32

Browse files
committed
gc.c: Fix a race condition in object_id for shareable objects
If an object is shareable and has no capacity left, it isn't safe to store the object ID in fields as it requires an object resize which can't be done unless all field reads are synchronized. So in this case we have to store the ID externally like we used to.
1 parent 6921bdf commit b1b9c32

5 files changed

Lines changed: 211 additions & 12 deletions

File tree

gc.c

Lines changed: 106 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,61 @@ static const rb_data_type_t id2ref_tbl_type = {
18791879
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
18801880
};
18811881

1882+
static VALUE obj_to_id_value = 0;
1883+
static st_table *obj_to_id_tbl = NULL;
1884+
1885+
static void mark_hash_values(st_table *tbl);
1886+
1887+
static void
1888+
obj_to_id_tbl_mark(void *data)
1889+
{
1890+
st_table *table = (st_table *)data;
1891+
if (UNLIKELY(!RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1892+
// It's very unlikely, but if enough object ids were generated, keys may be T_BIGNUM
1893+
mark_hash_values(table);
1894+
}
1895+
// We purposedly don't mark keys, as they are weak references.
1896+
// rb_gc_obj_free_vm_weak_references takes care of cleaning them up.
1897+
}
1898+
1899+
static size_t
1900+
obj_to_id_tbl_memsize(const void *data)
1901+
{
1902+
return rb_st_memsize(data);
1903+
}
1904+
1905+
static void
1906+
obj_to_id_tbl_compact(void *data)
1907+
{
1908+
st_table *table = (st_table *)data;
1909+
if (LIKELY(RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1910+
// We know values are all FIXNUM, so no need to update them.
1911+
gc_ref_update_table_keys_only(table);
1912+
}
1913+
else {
1914+
gc_update_table_refs(table);
1915+
}
1916+
}
1917+
1918+
static void
1919+
obj_to_id_tbl_free(void *data)
1920+
{
1921+
id2ref_tbl = NULL; // clear global ref
1922+
st_table *table = (st_table *)data;
1923+
st_free_table(table);
1924+
}
1925+
1926+
static const rb_data_type_t obj_to_id_tbl_type = {
1927+
.wrap_struct_name = "VM/obj_to_id_table",
1928+
.function = {
1929+
.dmark = obj_to_id_tbl_mark,
1930+
.dfree = obj_to_id_tbl_free,
1931+
.dsize = obj_to_id_tbl_memsize,
1932+
.dcompact = obj_to_id_tbl_compact,
1933+
},
1934+
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
1935+
};
1936+
18821937
#define RUBY_ATOMIC_VALUE_LOAD(x) (VALUE)(RUBY_ATOMIC_PTR_LOAD(x))
18831938

18841939
static VALUE
@@ -1901,23 +1956,46 @@ class_object_id(VALUE klass)
19011956
}
19021957

19031958
static VALUE
1904-
object_id0(VALUE obj)
1959+
object_id0(VALUE obj, bool shareable)
19051960
{
19061961
VALUE id = Qfalse;
19071962

1908-
if (rb_shape_has_object_id(rb_obj_shape(obj))) {
1909-
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1910-
id = rb_obj_field_get(obj, object_id_shape_id);
1963+
rb_shape_t *current_shape = rb_obj_shape(obj);
1964+
1965+
if (rb_shape_has_object_id(current_shape)) {
1966+
shape_id_t object_id_shape_id = rb_shape_object_id(obj);
1967+
1968+
if (RB_UNLIKELY(shareable && RSHAPE(object_id_shape_id)->type == SHAPE_EXTERNAL_OBJ_ID)) {
1969+
RUBY_ASSERT(obj_to_id_tbl);
1970+
st_lookup(obj_to_id_tbl, obj, &id);
1971+
}
1972+
else {
1973+
id = rb_obj_field_get(obj, object_id_shape_id);
1974+
}
19111975
RUBY_ASSERT(id, "object_id missing");
19121976
return id;
19131977
}
19141978

1915-
// rb_shape_object_id_shape may lock if the current shape has
1916-
// multiple children.
1917-
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1918-
19191979
id = generate_next_object_id();
1920-
rb_obj_field_set(obj, object_id_shape_id, id);
1980+
// If the object is shareable and doesn't have free capacity we can't safely
1981+
// resize it. So we have to store the object_id externally.
1982+
if (shareable && current_shape->next_field_index == current_shape->capacity) {
1983+
shape_id_t object_id_shape_id = rb_shape_transition_external_object_id(obj);
1984+
1985+
if (RB_UNLIKELY(!obj_to_id_tbl)) {
1986+
obj_to_id_tbl = st_init_numtable();
1987+
obj_to_id_value = TypedData_Wrap_Struct(0, &obj_to_id_tbl_type, obj_to_id_tbl);
1988+
}
1989+
st_insert(obj_to_id_tbl, obj, id);
1990+
rb_shape_set_shape_id(obj, object_id_shape_id);
1991+
}
1992+
else {
1993+
// rb_shape_object_id_shape may lock if the current shape has
1994+
// multiple children.
1995+
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1996+
rb_obj_field_set(obj, object_id_shape_id, id);
1997+
}
1998+
19211999
if (RB_UNLIKELY(id2ref_tbl)) {
19222000
st_insert(id2ref_tbl, (st_data_t)id, (st_data_t)obj);
19232001
}
@@ -1940,12 +2018,12 @@ object_id(VALUE obj)
19402018

19412019
if (UNLIKELY(rb_gc_multi_ractor_p() && rb_ractor_shareable_p(obj))) {
19422020
unsigned int lock_lev = rb_gc_vm_lock();
1943-
VALUE id = object_id0(obj);
2021+
VALUE id = object_id0(obj, true);
19442022
rb_gc_vm_unlock(lock_lev);
19452023
return id;
19462024
}
19472025

1948-
return object_id0(obj);
2026+
return object_id0(obj, false);
19492027
}
19502028

19512029
static void
@@ -2717,6 +2795,14 @@ mark_key(st_data_t key, st_data_t value, st_data_t data)
27172795
return ST_CONTINUE;
27182796
}
27192797

2798+
static int
2799+
mark_value(st_data_t key, st_data_t value, st_data_t data)
2800+
{
2801+
gc_mark_internal((VALUE)value);
2802+
2803+
return ST_CONTINUE;
2804+
}
2805+
27202806
void
27212807
rb_mark_set(st_table *tbl)
27222808
{
@@ -2725,6 +2811,14 @@ rb_mark_set(st_table *tbl)
27252811
st_foreach(tbl, mark_key, (st_data_t)rb_gc_get_objspace());
27262812
}
27272813

2814+
static void
2815+
mark_hash_values(st_table *tbl)
2816+
{
2817+
if (!tbl) return;
2818+
2819+
st_foreach(tbl, mark_value, 0);
2820+
}
2821+
27282822
static int
27292823
mark_keyvalue(st_data_t key, st_data_t value, st_data_t data)
27302824
{
@@ -5512,6 +5606,7 @@ Init_GC(void)
55125606
{
55135607
#undef rb_intern
55145608
rb_gc_register_address(&id2ref_value);
5609+
rb_gc_register_address(&obj_to_id_value);
55155610

55165611
malloc_offset = gc_compute_malloc_offset();
55175612

gc/gc.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,34 @@ gc_ref_update_table_values_only(st_table *tbl)
135135
}
136136
}
137137

138+
static int
139+
hash_foreach_replace_key(st_data_t key, st_data_t value, st_data_t argp, int error)
140+
{
141+
if (rb_gc_location((VALUE)key) != (VALUE)key) {
142+
return ST_REPLACE;
143+
}
144+
return ST_CONTINUE;
145+
}
146+
147+
static int
148+
hash_replace_ref_key(st_data_t *key, st_data_t *value, st_data_t argp, int existing)
149+
{
150+
*key = rb_gc_location((VALUE)*key);
151+
152+
return ST_CONTINUE;
153+
}
154+
155+
static void
156+
gc_ref_update_table_keys_only(st_table *tbl)
157+
{
158+
if (!tbl || tbl->num_entries == 0) return;
159+
160+
// FIXME: this certainly isn't correct. If a key moved, we need to re-hash.
161+
if (st_foreach_with_replace(tbl, hash_foreach_replace_key, hash_replace_ref_key, 0)) {
162+
rb_raise(rb_eRuntimeError, "hash modified during iteration");
163+
}
164+
}
165+
138166
static int
139167
gc_mark_tbl_no_pin_i(st_data_t key, st_data_t value, st_data_t data)
140168
{

shape.c

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
static ID id_frozen;
4949
static ID id_t_object;
50+
static ID id_external_object_id;
5051
ID ruby_internal_object_id; // extern
5152

5253
#define LEAF 0
@@ -469,6 +470,7 @@ rb_shape_alloc_new_child(ID id, rb_shape_t *shape, enum shape_type shape_type)
469470

470471
switch (shape_type) {
471472
case SHAPE_OBJ_ID:
473+
case SHAPE_EXTERNAL_OBJ_ID:
472474
new_shape->flags |= SHAPE_FL_HAS_OBJECT_ID;
473475
// fallthrough
474476
case SHAPE_IVAR:
@@ -745,22 +747,57 @@ rb_shape_has_object_id(rb_shape_t *shape)
745747
return shape->flags & SHAPE_FL_HAS_OBJECT_ID;
746748
}
747749

750+
shape_id_t
751+
rb_shape_object_id(VALUE obj)
752+
{
753+
rb_shape_t *shape = rb_obj_shape(obj);
754+
RUBY_ASSERT(shape);
755+
RUBY_ASSERT(shape->flags & SHAPE_FL_HAS_OBJECT_ID);
756+
757+
while (shape->type != SHAPE_OBJ_ID && shape->type != SHAPE_EXTERNAL_OBJ_ID) {
758+
shape = RSHAPE(shape->parent_id);
759+
}
760+
return rb_shape_id(shape);
761+
}
762+
748763
shape_id_t
749764
rb_shape_transition_object_id(VALUE obj)
750765
{
751-
rb_shape_t* shape = rb_obj_shape(obj);
766+
rb_shape_t *shape = rb_obj_shape(obj);
752767
RUBY_ASSERT(shape);
753768

754769
if (shape->flags & SHAPE_FL_HAS_OBJECT_ID) {
755770
while (shape->type != SHAPE_OBJ_ID) {
771+
RUBY_ASSERT(shape->type != SHAPE_EXTERNAL_OBJ_ID);
756772
shape = RSHAPE(shape->parent_id);
757773
}
758774
}
759775
else {
760776
bool dont_care;
761777
shape = get_next_shape_internal(shape, ruby_internal_object_id, SHAPE_OBJ_ID, &dont_care, true);
762778
}
779+
780+
RUBY_ASSERT(shape && shape->type == SHAPE_OBJ_ID);
781+
return rb_shape_id(shape);
782+
}
783+
784+
shape_id_t
785+
rb_shape_transition_external_object_id(VALUE obj)
786+
{
787+
rb_shape_t* shape = rb_obj_shape(obj);
763788
RUBY_ASSERT(shape);
789+
790+
if (shape->flags & SHAPE_FL_HAS_OBJECT_ID) {
791+
while (shape->type != SHAPE_EXTERNAL_OBJ_ID) {
792+
RUBY_ASSERT(shape->type != SHAPE_OBJ_ID);
793+
shape = RSHAPE(shape->parent_id);
794+
}
795+
}
796+
else {
797+
bool dont_care;
798+
shape = get_next_shape_internal(shape, id_external_object_id, SHAPE_EXTERNAL_OBJ_ID, &dont_care, true);
799+
}
800+
RUBY_ASSERT(shape && shape->type == SHAPE_EXTERNAL_OBJ_ID);
764801
return rb_shape_id(shape);
765802
}
766803

@@ -924,6 +961,7 @@ shape_get_iv_index(rb_shape_t *shape, ID id, attr_index_t *value)
924961
return false;
925962
case SHAPE_OBJ_TOO_COMPLEX:
926963
case SHAPE_OBJ_ID:
964+
case SHAPE_EXTERNAL_OBJ_ID:
927965
case SHAPE_FROZEN:
928966
rb_bug("Ivar should not exist on transition");
929967
}
@@ -1009,6 +1047,7 @@ shape_traverse_from_new_root(rb_shape_t *initial_shape, rb_shape_t *dest_shape)
10091047
switch ((enum shape_type)dest_shape->type) {
10101048
case SHAPE_IVAR:
10111049
case SHAPE_OBJ_ID:
1050+
case SHAPE_EXTERNAL_OBJ_ID:
10121051
case SHAPE_FROZEN:
10131052
if (!next_shape->edges) {
10141053
return NULL;
@@ -1080,6 +1119,7 @@ rb_shape_rebuild_shape(rb_shape_t *initial_shape, rb_shape_t *dest_shape)
10801119
midway_shape = shape_get_next_iv_shape(midway_shape, dest_shape->edge_name);
10811120
break;
10821121
case SHAPE_OBJ_ID:
1122+
case SHAPE_EXTERNAL_OBJ_ID:
10831123
case SHAPE_ROOT:
10841124
case SHAPE_FROZEN:
10851125
case SHAPE_T_OBJECT:
@@ -1368,6 +1408,7 @@ Init_default_shapes(void)
13681408

13691409
id_frozen = rb_make_internal_id();
13701410
id_t_object = rb_make_internal_id();
1411+
id_external_object_id = rb_make_internal_id();
13711412
ruby_internal_object_id = rb_make_internal_id();
13721413

13731414
#ifdef HAVE_MMAP

shape.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ enum shape_type {
6767
SHAPE_ROOT,
6868
SHAPE_IVAR,
6969
SHAPE_OBJ_ID,
70+
SHAPE_EXTERNAL_OBJ_ID,
7071
SHAPE_FROZEN,
7172
SHAPE_T_OBJECT,
7273
SHAPE_OBJ_TOO_COMPLEX,
@@ -169,6 +170,9 @@ bool rb_shape_transition_remove_ivar(VALUE obj, ID id, VALUE *removed);
169170
shape_id_t rb_shape_transition_add_ivar(VALUE obj, ID id);
170171
shape_id_t rb_shape_transition_add_ivar_no_warnings(VALUE obj, ID id);
171172
shape_id_t rb_shape_transition_object_id(VALUE obj);
173+
shape_id_t rb_shape_transition_external_object_id(VALUE obj);
174+
shape_id_t rb_shape_object_id(VALUE obj);
175+
172176

173177
bool rb_shape_has_object_id(rb_shape_t *shape);
174178
void rb_shape_free_all(void);

test/ruby/test_object_id.rb

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,3 +195,34 @@ def setup
195195
end
196196
end
197197
end
198+
199+
class TestObjectIdRactor < Test::Unit::TestCase
200+
def test_object_id_race_free
201+
assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
202+
begin;
203+
Warning[:experimental] = false
204+
class MyClass
205+
attr_reader :a, :b, :c
206+
def initialize
207+
@a = @b = @c = nil
208+
end
209+
end
210+
211+
N = 10_000
212+
objs = Ractor.make_shareable(N.times.map { MyClass.new })
213+
214+
results = 4.times.map{
215+
Ractor.new(objs) { |objs|
216+
vars = []
217+
ids = []
218+
objs.each do |obj|
219+
vars << obj.a << obj.b << obj.c
220+
ids << obj.object_id
221+
end
222+
[vars, ids]
223+
}
224+
}.map(&:take)
225+
assert_equal 1, results.uniq.size
226+
end;
227+
end
228+
end

0 commit comments

Comments
 (0)