@@ -840,9 +840,6 @@ ruby_modular_gc_init(void)
840840 load_modular_gc_func (undefine_finalizer );
841841 load_modular_gc_func (copy_finalizer );
842842 load_modular_gc_func (shutdown_call_finalizer );
843- // Object ID
844- load_modular_gc_func (object_id );
845- load_modular_gc_func (object_id_to_ref );
846843 // Forking
847844 load_modular_gc_func (before_fork );
848845 load_modular_gc_func (after_fork );
@@ -923,9 +920,6 @@ ruby_modular_gc_init(void)
923920# define rb_gc_impl_undefine_finalizer rb_gc_functions.undefine_finalizer
924921# define rb_gc_impl_copy_finalizer rb_gc_functions.copy_finalizer
925922# define rb_gc_impl_shutdown_call_finalizer rb_gc_functions.shutdown_call_finalizer
926- // Object ID
927- # define rb_gc_impl_object_id rb_gc_functions.object_id
928- # define rb_gc_impl_object_id_to_ref rb_gc_functions.object_id_to_ref
929923// Forking
930924# define rb_gc_impl_before_fork rb_gc_functions.before_fork
931925# define rb_gc_impl_after_fork rb_gc_functions.after_fork
@@ -1213,9 +1207,15 @@ rb_data_free(void *objspace, VALUE obj)
12131207 return true;
12141208}
12151209
1210+ static void obj_free_object_id (VALUE obj );
1211+
12161212void
12171213rb_gc_obj_free_vm_weak_references (VALUE obj )
12181214{
1215+ if (FL_TEST_RAW (obj , FL_SEEN_OBJ_ID )) {
1216+ obj_free_object_id (obj );
1217+ }
1218+
12191219 if (FL_TEST (obj , FL_EXIVAR )) {
12201220 rb_free_generic_ivar ((VALUE )obj );
12211221 FL_UNSET (obj , FL_EXIVAR );
@@ -1759,6 +1759,201 @@ rb_gc_pointer_to_heap_p(VALUE obj)
17591759 return rb_gc_impl_pointer_to_heap_p (rb_gc_get_objspace (), (void * )obj );
17601760}
17611761
1762+ #define OBJ_ID_INCREMENT (RUBY_IMMEDIATE_MASK + 1)
1763+ #define OBJ_ID_INITIAL (OBJ_ID_INCREMENT)
1764+
1765+ static unsigned long long next_object_id = OBJ_ID_INITIAL ;
1766+ static VALUE id_to_obj_value = 0 ;
1767+ static st_table * id_to_obj_tbl = NULL ;
1768+
1769+ static int
1770+ object_id_cmp (st_data_t x , st_data_t y )
1771+ {
1772+ if (RB_TYPE_P (x , T_BIGNUM )) {
1773+ return !rb_big_eql (x , y );
1774+ }
1775+ else {
1776+ return x != y ;
1777+ }
1778+ }
1779+
1780+ static st_index_t
1781+ object_id_hash (st_data_t n )
1782+ {
1783+ return FIX2LONG (rb_hash ((VALUE )n ));
1784+ }
1785+
1786+ static const struct st_hash_type object_id_hash_type = {
1787+ object_id_cmp ,
1788+ object_id_hash ,
1789+ };
1790+
1791+ static void gc_mark_tbl_no_pin (st_table * table );
1792+
1793+ static void
1794+ id_to_obj_tbl_mark (void * data )
1795+ {
1796+ st_table * table = (st_table * )data ;
1797+ if (UNLIKELY (!RB_POSFIXABLE (next_object_id ))) {
1798+ // It's very unlikely, but if enough object ids were generated, keys may be T_BIGNUM
1799+ rb_mark_set (table );
1800+ }
1801+ // We purposedly don't mark values, as they are weak references.
1802+ // rb_gc_obj_free_vm_weak_references takes care of cleaning them up.
1803+ }
1804+
1805+ static size_t
1806+ id_to_obj_tbl_memsize (const void * data )
1807+ {
1808+ return rb_st_memsize (data );
1809+ }
1810+
1811+ static void
1812+ id_to_obj_tbl_compact (void * data )
1813+ {
1814+ st_table * table = (st_table * )data ;
1815+ if (LIKELY (RB_POSFIXABLE (next_object_id ))) {
1816+ // We know keys are all FIXNUM, so no need to update them.
1817+ gc_ref_update_table_values_only (table );
1818+ }
1819+ else {
1820+ gc_update_table_refs (table );
1821+ }
1822+ }
1823+
1824+ static const rb_data_type_t id_to_obj_tbl_type = {
1825+ .wrap_struct_name = "VM/id_to_obj_table" ,
1826+ .function = {
1827+ .dmark = id_to_obj_tbl_mark ,
1828+ .dfree = (RUBY_DATA_FUNC )st_free_table ,
1829+ .dsize = id_to_obj_tbl_memsize ,
1830+ .dcompact = id_to_obj_tbl_compact ,
1831+ },
1832+ .flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
1833+ };
1834+
1835+ static VALUE
1836+ object_id (VALUE obj )
1837+ {
1838+ VALUE id ;
1839+
1840+ rb_shape_t * shape = rb_shape_get_shape (obj );
1841+ unsigned int lock_lev ;
1842+
1843+ if (shape -> type == SHAPE_OBJ_TOO_COMPLEX ) {
1844+ // we could not lock if the object isn't shareable, but may not be worth the effort
1845+ lock_lev = rb_gc_vm_lock ();
1846+ id = rb_attr_get (obj , internal_object_id );
1847+ if (NIL_P (id )) {
1848+ id = ULL2NUM (next_object_id );
1849+ next_object_id += OBJ_ID_INCREMENT ;
1850+ rb_ivar_set_internal (obj , internal_object_id , id );
1851+ if (RB_UNLIKELY (id_to_obj_tbl )) {
1852+ st_insert (id_to_obj_tbl , (st_data_t )id , (st_data_t )obj );
1853+ }
1854+ FL_SET_RAW (obj , FL_SEEN_OBJ_ID );
1855+ }
1856+
1857+ rb_gc_vm_unlock (lock_lev );
1858+ return id ;
1859+ }
1860+
1861+ if (rb_shape_has_object_id (shape )) {
1862+ // We could avoid locking if the object isn't shareable
1863+ lock_lev = rb_gc_vm_lock ();
1864+
1865+ rb_shape_t * object_id_shape = rb_shape_object_id_shape (obj );
1866+ id = rb_ivar_at (obj , object_id_shape );
1867+
1868+ rb_gc_vm_unlock (lock_lev );
1869+ return id ;
1870+ }
1871+ else {
1872+ // We could avoid locking if the object isn't shareable
1873+ // but we'll lock anyway to lookup the next shape, and
1874+ // we'd at least need to generate the object_id using atomics.
1875+ lock_lev = rb_gc_vm_lock ();
1876+
1877+ id = ULL2NUM (next_object_id );
1878+ next_object_id += OBJ_ID_INCREMENT ;
1879+
1880+ rb_shape_t * object_id_shape = rb_shape_object_id_shape (obj );
1881+ if (object_id_shape -> type == SHAPE_OBJ_TOO_COMPLEX ) {
1882+ rb_evict_ivars_to_hash (obj );
1883+ rb_ivar_set_internal (obj , internal_object_id , id );
1884+ } else {
1885+ rb_ivar_set_at_internal (obj , object_id_shape , id );
1886+ }
1887+ if (RB_UNLIKELY (id_to_obj_tbl )) {
1888+ st_insert (id_to_obj_tbl , (st_data_t )id , (st_data_t )obj );
1889+ }
1890+ FL_SET_RAW (obj , FL_SEEN_OBJ_ID );
1891+
1892+ rb_gc_vm_unlock (lock_lev );
1893+ return id ;
1894+ }
1895+ }
1896+
1897+ static void
1898+ build_id_to_obj_i (VALUE obj , void * data )
1899+ {
1900+ st_table * id_to_obj_tbl = (st_table * )data ;
1901+ if (FL_TEST_RAW (obj , FL_SEEN_OBJ_ID )) {
1902+ st_insert (id_to_obj_tbl , rb_obj_id (obj ), obj );
1903+ }
1904+ }
1905+
1906+ static VALUE
1907+ object_id_to_ref (void * objspace_ptr , VALUE object_id )
1908+ {
1909+ rb_objspace_t * objspace = objspace_ptr ;
1910+
1911+ unsigned int lev = rb_gc_vm_lock ();
1912+
1913+ if (!id_to_obj_tbl ) {
1914+ rb_gc_vm_barrier (); // stop other ractors
1915+
1916+ id_to_obj_tbl = st_init_table (& object_id_hash_type );
1917+ id_to_obj_value = TypedData_Wrap_Struct (0 , & id_to_obj_tbl_type , id_to_obj_tbl );
1918+ rb_gc_impl_each_object (objspace , build_id_to_obj_i , (void * )id_to_obj_tbl );
1919+ }
1920+
1921+ VALUE obj ;
1922+ bool found = st_lookup (id_to_obj_tbl , object_id , & obj ) && !rb_gc_impl_garbage_object_p (objspace , obj );
1923+
1924+ rb_gc_vm_unlock (lev );
1925+
1926+ if (found ) {
1927+ return obj ;
1928+ }
1929+
1930+ if (rb_funcall (object_id , rb_intern (">=" ), 1 , ULL2NUM (next_object_id ))) {
1931+ rb_raise (rb_eRangeError , "%+" PRIsVALUE " is not an id value" , rb_funcall (object_id , rb_intern ("to_s" ), 1 , INT2FIX (10 )));
1932+ }
1933+ else {
1934+ rb_raise (rb_eRangeError , "%+" PRIsVALUE " is a recycled object" , rb_funcall (object_id , rb_intern ("to_s" ), 1 , INT2FIX (10 )));
1935+ }
1936+ }
1937+
1938+ static void
1939+ obj_free_object_id (VALUE obj )
1940+ {
1941+ GC_ASSERT (BUILTIN_TYPE (obj ) == T_NONE || FL_TEST (obj , FL_SEEN_OBJ_ID ));
1942+
1943+ if (RB_UNLIKELY (id_to_obj_tbl )) {
1944+ st_data_t id = (st_data_t )rb_obj_id (obj );
1945+ GC_ASSERT (id );
1946+ FL_UNSET (obj , FL_SEEN_OBJ_ID );
1947+
1948+ if (!st_delete (id_to_obj_tbl , & id , NULL )) {
1949+ rb_bug ("Object ID seen, but not in id_to_obj table: %s" , rb_obj_info (obj ));
1950+ }
1951+ }
1952+ else {
1953+ FL_UNSET (obj , FL_SEEN_OBJ_ID );
1954+ }
1955+ }
1956+
17621957/*
17631958 * call-seq:
17641959 * ObjectSpace._id2ref(object_id) -> an_object
@@ -1806,7 +2001,7 @@ id2ref(VALUE objid)
18062001 }
18072002 }
18082003
1809- VALUE obj = rb_gc_impl_object_id_to_ref (rb_gc_get_objspace (), objid );
2004+ VALUE obj = object_id_to_ref (rb_gc_get_objspace (), objid );
18102005 if (!rb_multi_ractor_p () || rb_ractor_shareable_p (obj )) {
18112006 return obj ;
18122007 }
@@ -1823,7 +2018,7 @@ os_id2ref(VALUE os, VALUE objid)
18232018}
18242019
18252020static VALUE
1826- rb_find_object_id (void * objspace , VALUE obj , VALUE (* get_heap_object_id )(void * , VALUE ))
2021+ rb_find_object_id (void * objspace , VALUE obj , VALUE (* get_heap_object_id )(VALUE ))
18272022{
18282023 if (SPECIAL_CONST_P (obj )) {
18292024#if SIZEOF_LONG == SIZEOF_VOIDP
@@ -1833,11 +2028,11 @@ rb_find_object_id(void *objspace, VALUE obj, VALUE (*get_heap_object_id)(void *,
18332028#endif
18342029 }
18352030
1836- return get_heap_object_id (objspace , obj );
2031+ return get_heap_object_id (obj );
18372032}
18382033
18392034static VALUE
1840- nonspecial_obj_id (void * _objspace , VALUE obj )
2035+ nonspecial_obj_id (VALUE obj )
18412036{
18422037#if SIZEOF_LONG == SIZEOF_VOIDP
18432038 return (VALUE )((SIGNED_VALUE )(obj )|FIXNUM_FLAG );
@@ -1888,7 +2083,7 @@ rb_obj_id(VALUE obj)
18882083 * Otherwise, the object ID is a Numeric that is a non-zero multiple of
18892084 * (RUBY_IMMEDIATE_MASK + 1) which guarantees that it does not collide with
18902085 * any immediates. */
1891- return rb_find_object_id (rb_gc_get_objspace (), obj , rb_gc_impl_object_id );
2086+ return rb_find_object_id (rb_gc_get_objspace (), obj , object_id );
18922087}
18932088
18942089static enum rb_id_table_iterator_result
@@ -4992,6 +5187,8 @@ void
49925187Init_GC (void )
49935188{
49945189#undef rb_intern
5190+ rb_gc_register_address (& id_to_obj_value );
5191+
49955192 malloc_offset = gc_compute_malloc_offset ();
49965193
49975194 rb_mGC = rb_define_module ("GC" );
0 commit comments