Skip to content

Commit a8f3c34

Browse files
committed
ZJIT: Add missing guard on ivar access on T_{DATA,CLASS,MODULE}
T_DATA, T_MODULE, and T_CLASS objects can share the exact same shape. The shape on these objects give an index off of the fields array to get at the ivar. When two objects share the same shape, but differ in the T_* builtin type, however, the way to get to the fields array differ. Previously, we did not guard the builtin type, so the guard allowed using say, loading `t_string[RCLASS_OFFSET_PRIME_FIELDS_OBJ]`. A classic type confusion situation that crashed. Guard the builtin type, in addition to the shape. Note that this is not necessary for T_OBJECTs since they never have the same shape as other builtin types.
1 parent 8824fd3 commit a8f3c34

File tree

8 files changed

+161
-22
lines changed

8 files changed

+161
-22
lines changed

zjit/src/codegen.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2549,7 +2549,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard
25492549

25502550
asm.cmp(klass, Opnd::Value(expected_class));
25512551
asm.jne(jit, side_exit);
2552-
} else if guard_type.is_subtype(types::String) {
2552+
} else if guard_type.is_subtype(types::TypedTData) {
25532553
let side = side_exit(jit, state, GuardType(guard_type));
25542554

25552555
// Check special constant
@@ -2560,13 +2560,15 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard
25602560
asm.cmp(val, Qfalse.into());
25612561
asm.je(jit, side.clone());
25622562

2563+
// Check the builtin type and RUBY_TYPED_FL_IS_TYPED_DATA with mask and compare
25632564
let val = asm.load_mem(val);
2564-
25652565
let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS));
2566-
let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64));
2567-
asm.cmp(tag, Opnd::UImm(RUBY_T_STRING as u64));
2566+
let mask = RUBY_T_MASK.to_usize() | RUBY_TYPED_FL_IS_TYPED_DATA.to_usize();
2567+
let expected = RUBY_T_DATA.to_usize() | RUBY_TYPED_FL_IS_TYPED_DATA.to_usize();
2568+
let masked = asm.and(flags, mask.into());
2569+
asm.cmp(masked, expected.into());
25682570
asm.jne(jit, side);
2569-
} else if guard_type.is_subtype(types::Array) {
2571+
} else if let Some(builtin_type) = guard_type.builtin_type_equivalent() {
25702572
let side = side_exit(jit, state, GuardType(guard_type));
25712573

25722574
// Check special constant
@@ -2577,11 +2579,11 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard
25772579
asm.cmp(val, Qfalse.into());
25782580
asm.je(jit, side.clone());
25792581

2582+
// Mask and check the builtin type
25802583
let val = asm.load_mem(val);
2581-
25822584
let flags = asm.load(Opnd::mem(VALUE_BITS, val, RUBY_OFFSET_RBASIC_FLAGS));
25832585
let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64));
2584-
asm.cmp(tag, Opnd::UImm(RUBY_T_ARRAY as u64));
2586+
asm.cmp(tag, Opnd::UImm(builtin_type as u64));
25852587
asm.jne(jit, side);
25862588
} else if guard_type.bit_equal(types::HeapBasicObject) {
25872589
let side_exit = side_exit(jit, state, GuardType(guard_type));

zjit/src/codegen_tests.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3623,6 +3623,97 @@ fn test_attr_accessor_getivar() {
36233623
assert_snapshot!(assert_compiles("c = C.new; [test(c), test(c)]"), @"[4, 4]");
36243624
}
36253625

3626+
#[test]
3627+
fn test_getivar_t_data_then_string() {
3628+
// This is a regression test for a type confusion miscomp where
3629+
// we end up reading the fields object using an offset off of a
3630+
// string, assuming that it has a the same layout as a T_DATA object.
3631+
// At the time of writing the fields object of strings are stored
3632+
// in a global table, out-of-line of each string.
3633+
// The string and the thread end up sharing one shape ID.
3634+
set_call_threshold(2);
3635+
eval(r#"
3636+
module GetThousand
3637+
def test = @var1000
3638+
end
3639+
class Thread
3640+
include GetThousand
3641+
end
3642+
class String
3643+
include GetThousand
3644+
end
3645+
OBJ = Thread.new { }
3646+
OBJ.join
3647+
STR = +''
3648+
(0..1000).each do |i|
3649+
ivar_name = :"@var#{i}"
3650+
OBJ.instance_variable_set(ivar_name, i)
3651+
STR.instance_variable_set(ivar_name, i)
3652+
end
3653+
OBJ.test; OBJ.test # profile and compile for Thread (T_DATA)
3654+
"#);
3655+
assert_snapshot!(assert_compiles("[STR.test, STR.test]"), @"[1000, 1000]");
3656+
}
3657+
3658+
#[test]
3659+
fn test_getivar_t_object_then_string() {
3660+
// This test construct an object and a string that have the same set of ivars.
3661+
// They wouldn't share the same shape ID, though, and we rely on this fact in
3662+
// our guards.
3663+
set_call_threshold(2);
3664+
eval(r#"
3665+
module GetThousand
3666+
def test = @var1000
3667+
end
3668+
class MyObject
3669+
include GetThousand
3670+
end
3671+
class String
3672+
include GetThousand
3673+
end
3674+
OBJ = MyObject.new
3675+
STR = +''
3676+
(0..1000).each do |i|
3677+
ivar_name = :"@var#{i}"
3678+
OBJ.instance_variable_set(ivar_name, i)
3679+
STR.instance_variable_set(ivar_name, i)
3680+
end
3681+
OBJ.test; OBJ.test # profile and compile for MyObject
3682+
"#);
3683+
assert_snapshot!(assert_compiles("[STR.test, STR.test]"), @"[1000, 1000]");
3684+
}
3685+
3686+
#[test]
3687+
fn test_getivar_t_class_then_string() {
3688+
// This is a regression test for a type confusion miscomp where
3689+
// we end up reading the fields object using an offset off of a
3690+
// string, assuming that it has a the same layout as a T_CLASS object.
3691+
// At the time of writing the fields object of strings are stored
3692+
// in a global table, out-of-line of each string.
3693+
// The string and the class end up sharing one shape ID.
3694+
set_call_threshold(2);
3695+
eval(r#"
3696+
module GetThousand
3697+
def test = @var1000
3698+
end
3699+
class MyClass
3700+
extend GetThousand
3701+
end
3702+
class String
3703+
include GetThousand
3704+
end
3705+
STR = +''
3706+
(0..1000).each do |i|
3707+
ivar_name = :"@var#{i}"
3708+
MyClass.instance_variable_set(ivar_name, i)
3709+
STR.instance_variable_set(ivar_name, i)
3710+
end
3711+
p MyClass.test; p MyClass.test # profile and compile for MyClass
3712+
p STR.test
3713+
"#);
3714+
assert_snapshot!(assert_compiles("[STR.test, STR.test]"), @"[1000, 1000]");
3715+
}
3716+
36263717
#[test]
36273718
fn test_attr_accessor_setivar() {
36283719
eval("

zjit/src/cruby.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,7 @@ impl VALUE {
604604
unsafe { rb_jit_class_fields_embedded_p(self) }
605605
}
606606

607+
/// Typed `T_DATA` made from `TypedData_Make_Struct()` (e.g. Thread, ARGF)
607608
pub fn typed_data_p(self) -> bool {
608609
!self.special_const_p() &&
609610
self.builtin_type() == RUBY_T_DATA &&

zjit/src/hir.rs

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4431,6 +4431,23 @@ impl Function {
44314431
}
44324432
}
44334433

4434+
/// This puts a guard that establishes the preconditon for [Self::load_ivar]
4435+
fn load_ivar_guard_type(&mut self, block: BlockId, recv: InsnId, recv_type: ProfiledType, state: InsnId) -> InsnId {
4436+
if recv_type.class().is_subclass_of(unsafe { rb_cClass }) == ClassRelationship::Subclass {
4437+
// Check class first since `Class < Module`
4438+
self.push_insn(block, Insn::GuardType { val: recv, guard_type: types::Class, state })
4439+
} else if recv_type.class().is_subclass_of(unsafe { rb_cModule }) == ClassRelationship::Subclass {
4440+
self.push_insn(block, Insn::GuardType { val: recv, guard_type: types::Module, state })
4441+
} else if recv_type.flags().is_typed_data() {
4442+
self.push_insn(block, Insn::GuardType { val: recv, guard_type: types::TypedTData, state })
4443+
} else {
4444+
// HeapBasicObject is wider than T_OBJECT, but shapes for T_OBJECTs are in a pool of
4445+
// its own and are guaranteed to be different from shapes of any other T_* types. So
4446+
// the shape check that follows already covers checking for T_OBJECT.
4447+
self.push_insn(block, Insn::GuardType { val: recv, guard_type: types::HeapBasicObject, state })
4448+
}
4449+
}
4450+
44344451
fn load_ivar(&mut self, block: BlockId, self_val: InsnId, recv_type: ProfiledType, id: ID, state: InsnId) -> InsnId {
44354452
// Too-complex shapes use hash tables; rb_shape_get_iv_index doesn't support them.
44364453
// Callers must filter these out before calling load_ivar.
@@ -4500,7 +4517,7 @@ impl Function {
45004517
self.count(block, Counter::getivar_fallback_too_complex);
45014518
self.push_insn_id(block, insn_id); continue;
45024519
}
4503-
let self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: types::HeapBasicObject, state });
4520+
let self_val = self.load_ivar_guard_type(block, self_val, recv_type, state);
45044521
let shape = self.load_shape(block, self_val);
45054522
self.guard_shape(block, shape, recv_type.shape(), state);
45064523
let replacement = self.load_ivar(block, self_val, recv_type, id, state);
@@ -4529,7 +4546,7 @@ impl Function {
45294546
self.count(block, Counter::definedivar_fallback_too_complex);
45304547
self.push_insn_id(block, insn_id); continue;
45314548
}
4532-
let self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: types::HeapBasicObject, state });
4549+
let self_val = self.load_ivar_guard_type(block, self_val, recv_type, state);
45334550
let shape = self.load_shape(block, self_val);
45344551
self.guard_shape(block, shape, recv_type.shape(), state);
45354552
let mut ivar_index: u16 = 0;
@@ -4601,7 +4618,7 @@ impl Function {
46014618
}
46024619
// Fall through to emitting the ivar write
46034620
}
4604-
let self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: types::HeapBasicObject, state });
4621+
let self_val = self.load_ivar_guard_type(block, self_val, recv_type, state);
46054622
let shape = self.load_shape(block, self_val);
46064623
self.guard_shape(block, shape, recv_type.shape(), state);
46074624
// Current shape contains this ivar

zjit/src/hir/opt_tests.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7346,7 +7346,7 @@ mod hir_opt_tests {
73467346
Jump bb3(v4)
73477347
bb3(v6:BasicObject):
73487348
PatchPoint SingleRactorMode
7349-
v17:HeapBasicObject = GuardType v6, HeapBasicObject
7349+
v17:Module = GuardType v6, Module
73507350
v18:CShape = LoadField v17, :_shape_id@0x1000
73517351
v19:CShape[0x1001] = GuardBitEquals v18, CShape(0x1001)
73527352
PatchPoint RootBoxOnly
@@ -7381,7 +7381,7 @@ mod hir_opt_tests {
73817381
Jump bb3(v4)
73827382
bb3(v6:BasicObject):
73837383
PatchPoint SingleRactorMode
7384-
v17:HeapBasicObject = GuardType v6, HeapBasicObject
7384+
v17:Module = GuardType v6, Module
73857385
v18:CShape = LoadField v17, :_shape_id@0x1000
73867386
v19:CShape[0x1001] = GuardBitEquals v18, CShape(0x1001)
73877387
PatchPoint RootBoxOnly
@@ -7414,7 +7414,7 @@ mod hir_opt_tests {
74147414
Jump bb3(v4)
74157415
bb3(v6:BasicObject):
74167416
PatchPoint SingleRactorMode
7417-
v17:HeapBasicObject = GuardType v6, HeapBasicObject
7417+
v17:Class = GuardType v6, Class
74187418
v18:CShape = LoadField v17, :_shape_id@0x1000
74197419
v19:CShape[0x1001] = GuardBitEquals v18, CShape(0x1001)
74207420
PatchPoint RootBoxOnly
@@ -7449,7 +7449,7 @@ mod hir_opt_tests {
74497449
Jump bb3(v4)
74507450
bb3(v6:BasicObject):
74517451
PatchPoint SingleRactorMode
7452-
v17:HeapBasicObject = GuardType v6, HeapBasicObject
7452+
v17:Class = GuardType v6, Class
74537453
v18:CShape = LoadField v17, :_shape_id@0x1000
74547454
v19:CShape[0x1001] = GuardBitEquals v18, CShape(0x1001)
74557455
PatchPoint RootBoxOnly
@@ -7520,7 +7520,7 @@ mod hir_opt_tests {
75207520
Jump bb3(v4)
75217521
bb3(v6:BasicObject):
75227522
PatchPoint SingleRactorMode
7523-
v17:HeapBasicObject = GuardType v6, HeapBasicObject
7523+
v17:TypedTData = GuardType v6, TypedTData
75247524
v18:CShape = LoadField v17, :_shape_id@0x1000
75257525
v19:CShape[0x1001] = GuardBitEquals v18, CShape(0x1001)
75267526
v20:RubyValue = LoadField v17, :_fields_obj@0x1002
@@ -7556,7 +7556,7 @@ mod hir_opt_tests {
75567556
Jump bb3(v4)
75577557
bb3(v6:BasicObject):
75587558
PatchPoint SingleRactorMode
7559-
v17:HeapBasicObject = GuardType v6, HeapBasicObject
7559+
v17:TypedTData = GuardType v6, TypedTData
75607560
v18:CShape = LoadField v17, :_shape_id@0x1000
75617561
v19:CShape[0x1001] = GuardBitEquals v18, CShape(0x1001)
75627562
v20:RubyValue = LoadField v17, :_fields_obj@0x1002

zjit/src/hir_type/gen_hir_type.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ def final_type name, base: $object, c_name: nil
132132
true_exact = final_type "TrueClass", c_name: "rb_cTrueClass"
133133
false_exact = final_type "FalseClass", c_name: "rb_cFalseClass"
134134

135+
# Typed T_DATA objects (RTYPEDDATA_P). These have a distinct memory layout
136+
# for field access (fields_obj at a fixed offset in RTypedData). These
137+
# don't have a common class ancestor below BasicObject.
138+
basic_object.subtype "TypedTData"
139+
135140
# Build the cvalue object universe. This is for C-level types that may be
136141
# passed around when calling into the Ruby VM or after some strength reduction
137142
# of HIR.

zjit/src/hir_type/hir_type.inc.rs

Lines changed: 9 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

zjit/src/hir_type/mod.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! High-level intermediate representation types.
22
33
#![allow(non_upper_case_globals)]
4+
use crate::cruby;
45
use crate::cruby::{rb_block_param_proxy, Qfalse, Qnil, Qtrue, RUBY_T_ARRAY, RUBY_T_CLASS, RUBY_T_HASH, RUBY_T_MODULE, RUBY_T_STRING, VALUE};
56
use crate::cruby::{rb_cInteger, rb_cFloat, rb_cArray, rb_cHash, rb_cString, rb_cSymbol, rb_cRange, rb_cModule, rb_zjit_singleton_class_p};
67
use crate::cruby::ClassRelationship;
@@ -213,6 +214,7 @@ impl Type {
213214
else if val.class_of() == unsafe { rb_cSymbol } { bits::DynamicSymbol }
214215
else if let Some(bits) = Self::bits_from_exact_class(val.class_of()) { bits }
215216
else if let Some(bits) = Self::bits_from_subclass(val.class_of()) { bits }
217+
else if val.typed_data_p() { bits::TypedTData }
216218
else {
217219
unreachable!("Class {} is not a subclass of BasicObject! Don't know what to do.",
218220
get_class_name(val.class_of()))
@@ -429,6 +431,24 @@ impl Type {
429431
matches!(self.spec, Specialization::Object(_))
430432
}
431433

434+
/// Find a `T_*` type that is exactly as wide as `self`.
435+
pub fn builtin_type_equivalent(&self) -> Option<cruby::ruby_value_type> {
436+
if self.bit_equal(types::Array) {
437+
Some(cruby::RUBY_T_ARRAY)
438+
} else if self.bit_equal(types::Class) {
439+
Some(cruby::RUBY_T_CLASS)
440+
} else if self.bit_equal(types::Module) {
441+
Some(cruby::RUBY_T_MODULE)
442+
} else if self.bit_equal(types::String) {
443+
Some(cruby::RUBY_T_STRING)
444+
} else if self.bit_equal(types::Hash) {
445+
Some(cruby::RUBY_T_HASH)
446+
} else {
447+
// Note that types::TypedTData is narrower than T_DATA, so not here.
448+
None
449+
}
450+
}
451+
432452
fn is_builtin(class: VALUE) -> bool {
433453
types::ExactBitsAndClass
434454
.iter()

0 commit comments

Comments
 (0)