Skip to content

Commit 8aa579d

Browse files
authored
Properly tag inline bulk-copy loads/stores with alias regions (#13610)
The small-constant-length inline fast path for `{memory,table,array}.copy` emitted region-less loads/stores, while every other access to the same bytes is region-tagged. This led to stale values being forwarded in store-to-load forwarding.
1 parent 56c7d5f commit 8aa579d

6 files changed

Lines changed: 122 additions & 12 deletions

File tree

crates/cranelift/src/func_environ.rs

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,22 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
433433
self.alias_region(func, key)
434434
}
435435

436+
/// Get the alias region for the storage of a bulk-copy entity.
437+
fn bulk_copy_alias_region(
438+
&mut self,
439+
func: &mut Function,
440+
entity: CheckedEntity,
441+
) -> Option<ir::AliasRegion> {
442+
match entity {
443+
CheckedEntity::Memory(index) => Some(self.memory_alias_region(func, index)),
444+
CheckedEntity::Table { table, .. } => Some(self.table_alias_region(func, table)),
445+
CheckedEntity::Array { .. } => Some(self.gc_heap_alias_region(func)),
446+
CheckedEntity::Data { .. } | CheckedEntity::RuntimeData(_) | CheckedEntity::Elem(_) => {
447+
None
448+
}
449+
}
450+
}
451+
436452
pub(crate) fn vmctx_alias_region(
437453
&mut self,
438454
func: &mut Function,
@@ -3687,14 +3703,18 @@ impl FuncEnvironment<'_> {
36873703
dst,
36883704
src,
36893705
const_len: Some(bytes),
3706+
src_entity,
3707+
dst_entity,
36903708
..
36913709
} = op
36923710
{
36933711
if bytes <= INLINE_COPY_MAX_BYTES {
36943712
if self.tunables.consume_fuel {
36953713
self.fuel_consumed += bytes as i64;
36963714
}
3697-
self.emit_inline_memcpy(builder, dst, src, bytes);
3715+
let src_region = self.bulk_copy_alias_region(builder.func, src_entity);
3716+
let dst_region = self.bulk_copy_alias_region(builder.func, dst_entity);
3717+
self.emit_inline_memcpy(builder, dst, src, bytes, src_region, dst_region);
36983718
return;
36993719
}
37003720
}
@@ -4347,6 +4367,8 @@ impl FuncEnvironment<'_> {
43474367
src: src_raw_addr,
43484368
len: len_ptr,
43494369
const_len: const_count,
4370+
src_entity,
4371+
dst_entity,
43504372
},
43514373
);
43524374
Ok(())
@@ -4704,6 +4726,8 @@ impl FuncEnvironment<'_> {
47044726
src: src_elem_addr,
47054727
len: dst_copy_byte_len,
47064728
const_len,
4729+
src_entity,
4730+
dst_entity,
47074731
},
47084732
);
47094733
return Ok(());
@@ -4823,14 +4847,27 @@ impl FuncEnvironment<'_> {
48234847
dst_addr: ir::Value,
48244848
src_addr: ir::Value,
48254849
bytes: u64,
4850+
src_region: Option<ir::AliasRegion>,
4851+
dst_region: Option<ir::AliasRegion>,
48264852
) {
48274853
// `trusted()` (notrap + aligned) is sound: the range is already
48284854
// bounds-checked, and each load feeds only its paired store, so the
48294855
// backend selects unaligned moves regardless of the `aligned` flag.
48304856
// Endianness is pinned to `Little` because Pulley's `v128` load/store
48314857
// only encode the little-endian variant, and matching load/store
48324858
// endianness preserves the destination bytes either way.
4833-
let flags = ir::MemFlagsData::trusted().with_endianness(Endianness::Little);
4859+
//
4860+
// The loads/stores carry the source/destination entity's alias region
4861+
// so alias analysis keeps them in the same disjoint memory category as
4862+
// the entity's other (region-tagged) accesses; otherwise a region-less
4863+
// load here could be forwarded a stale value across an intervening
4864+
// region-tagged store to the same address.
4865+
let load_flags = ir::MemFlagsData::trusted()
4866+
.with_endianness(Endianness::Little)
4867+
.with_alias_region(src_region);
4868+
let store_flags = ir::MemFlagsData::trusted()
4869+
.with_endianness(Endianness::Little)
4870+
.with_alias_region(dst_region);
48344871
const WIDTHS: &[(u64, ir::Type)] = &[
48354872
(16, ir::types::I8X16),
48364873
(8, ir::types::I64),
@@ -4853,10 +4890,10 @@ impl FuncEnvironment<'_> {
48534890
}
48544891
let vals: SmallVec<[ir::Value; 12]> = chunks
48554892
.iter()
4856-
.map(|&(off, ty)| builder.ins().load(ty, flags, src_addr, off))
4893+
.map(|&(off, ty)| builder.ins().load(ty, load_flags, src_addr, off))
48574894
.collect();
48584895
for (&(off, _), val) in chunks.iter().zip(vals) {
4859-
builder.ins().store(flags, val, dst_addr, off);
4896+
builder.ins().store(store_flags, val, dst_addr, off);
48604897
}
48614898
}
48624899

@@ -6319,11 +6356,20 @@ enum BulkOp {
63196356
/// must have type `env.pointer_type()`. `const_len`, when set, is the
63206357
/// statically-known byte length (from a constant wasm length); the inline
63216358
/// fast path in `raw_bulk_memory_operation` uses it to expand small copies.
6359+
///
6360+
/// `src_entity`/`dst_entity` are the source and destination entities,
6361+
/// carried so that the inline fast path can tag its loads/stores with each
6362+
/// entity's alias region (the per-memory or GC-heap region); otherwise a
6363+
/// region-less inline load could be forwarded a stale value across an
6364+
/// intervening region-tagged store to the same address. They are unused on
6365+
/// the libcall path.
63226366
MemoryCopy {
63236367
dst: ir::Value,
63246368
src: ir::Value,
63256369
len: ir::Value,
63266370
const_len: Option<u64>,
6371+
src_entity: CheckedEntity,
6372+
dst_entity: CheckedEntity,
63276373
},
63286374

63296375
/// A `memory.fill` operation, setting all bytes of `dst` to `val`.

tests/disas/array-copy-inline.wat

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,12 @@
6969
;; @002a v70 = uadd_overflow_trap v52, v92, user2 ; v92 = 28
7070
;; @002a v71 = icmp ugt v70, v62
7171
;; @002a trapnz v71, user2
72-
;; @002a v72 = load.i8x16 notrap aligned little v52
73-
;; @002a v73 = load.i64 notrap aligned little v52+16
74-
;; @002a v74 = load.i32 notrap aligned little v52+24
75-
;; @002a store notrap aligned little v72, v29
76-
;; @002a store notrap aligned little v73, v29+16
77-
;; @002a store notrap aligned little v74, v29+24
72+
;; @002a v72 = load.i8x16 notrap aligned little region1 v52
73+
;; @002a v73 = load.i64 notrap aligned little region1 v52+16
74+
;; @002a v74 = load.i32 notrap aligned little region1 v52+24
75+
;; @002a store notrap aligned little region1 v72, v29
76+
;; @002a store notrap aligned little region1 v73, v29+16
77+
;; @002a store notrap aligned little region1 v74, v29+24
7878
;; @002e jump block1
7979
;;
8080
;; block1:

tests/disas/memory-copy-inline.wat

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
)
1414
;; function u0:0(i64 vmctx, i64, i32, i32) tail {
1515
;; region0 = 8 "VMContext+0x8"
16+
;; region1 = 805306368 "DefinedMemory(StaticModuleIndex(0), DefinedMemoryIndex(0))"
1617
;; gv0 = vmctx
1718
;; gv1 = load.i64 notrap aligned readonly region0 gv0+8
1819
;; gv2 = load.i64 notrap aligned gv1+24
@@ -34,9 +35,9 @@
3435
;; @0024 trapnz v25, heap_oob
3536
;; @0024 v13 = load.i64 notrap aligned readonly can_move v0+56
3637
;; @0024 v30 = iadd v13, v20
37-
;; @0024 v32 = load.i8x16 notrap aligned little v30
38+
;; @0024 v32 = load.i8x16 notrap aligned little region1 v30
3839
;; @0024 v17 = iadd v13, v7
39-
;; @0024 store notrap aligned little v32, v17
40+
;; @0024 store notrap aligned little region1 v32, v17
4041
;; @0028 jump block1
4142
;;
4243
;; block1:
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
;;! bulk_memory = true
2+
3+
;; Regression test for an alias-region miscompile in the inline `memory.copy`
4+
;; fast path (`emit_inline_memcpy`). A small constant-length copy expands to
5+
;; inline loads/stores that must carry the memory's alias region; otherwise the
6+
;; region-less load can be store-to-load forwarded a stale value across the
7+
;; intervening region-tagged `i32.store`, dropping that store.
8+
(module
9+
(memory 1)
10+
(func (export "test") (result i32)
11+
(i32.store (i32.const 16) (i32.const 0x1111)) ;; addr16 = 0x1111
12+
(memory.copy (i32.const 0) (i32.const 16) (i32.const 4)) ;; addr0 = 0x1111 (inline copy)
13+
(i32.store (i32.const 0) (i32.const 0xAAAA)) ;; addr0 = 0xAAAA (region-tagged)
14+
(memory.copy (i32.const 32) (i32.const 0) (i32.const 4)) ;; addr32 = *addr0 (inline copy)
15+
(i32.load (i32.const 32)) ;; must be 0xAAAA
16+
)
17+
)
18+
(assert_return (invoke "test") (i32.const 0xAAAA))
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
;;! gc = true
2+
3+
;; Regression test for an alias-region miscompile in the inline `array.copy`
4+
;; fast path (`emit_inline_memcpy`). A small constant-length copy of an `i32`
5+
;; array must tag its inline loads/stores with the GC-heap alias region;
6+
;; otherwise the region-less load can be store-to-load forwarded a stale value
7+
;; across the intervening region-tagged `array.set`.
8+
;;
9+
;; The array is passed as a function parameter so that the two `array.copy`
10+
;; element addresses share a single SSA value (a fresh `array.new` local or a
11+
;; `global.get` reloads the reference and hides the forwarding).
12+
(module
13+
(type $a (array (mut i32)))
14+
(func $op (param $arr (ref $a)) (result i32)
15+
(array.set $a (local.get $arr) (i32.const 4) (i32.const 0x1111))
16+
(array.copy $a $a (local.get $arr) (i32.const 0) (local.get $arr) (i32.const 4) (i32.const 1))
17+
(array.set $a (local.get $arr) (i32.const 0) (i32.const 0x2222))
18+
(array.copy $a $a (local.get $arr) (i32.const 2) (local.get $arr) (i32.const 0) (i32.const 1))
19+
(array.get $a (local.get $arr) (i32.const 2)) ;; must be 0x2222
20+
)
21+
(func (export "test") (result i32)
22+
(call $op (array.new $a (i32.const 0) (i32.const 8)))
23+
)
24+
)
25+
(assert_return (invoke "test") (i32.const 0x2222))
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
;;! gc = true
2+
;;! bulk_memory = true
3+
4+
;; Regression test for an alias-region miscompile in the inline `table.copy`
5+
;; fast path (`emit_inline_memcpy`). A small constant-length copy of an `i31ref`
6+
;; table must tag its inline loads/stores with the table's alias region;
7+
;; otherwise the region-less load can be store-to-load forwarded a stale value
8+
;; across the intervening region-tagged `table.set`. (`i31ref` gives a clean
9+
;; region-tagged element with no lazy-init libcall to mask the bug.)
10+
(module
11+
(table $t 8 8 i31ref)
12+
(func (export "test") (result i32)
13+
(table.set $t (i32.const 4) (ref.i31 (i32.const 0x1111)))
14+
(table.copy $t $t (i32.const 0) (i32.const 4) (i32.const 1)) ;; t[0] = 0x1111 (inline copy)
15+
(table.set $t (i32.const 0) (ref.i31 (i32.const 0x2222))) ;; t[0] = 0x2222 (region-tagged)
16+
(table.copy $t $t (i32.const 2) (i32.const 0) (i32.const 1)) ;; t[2] = *t[0] (inline copy)
17+
(i31.get_u (table.get $t (i32.const 2))) ;; must be 0x2222
18+
)
19+
)
20+
(assert_return (invoke "test") (i32.const 0x2222))

0 commit comments

Comments
 (0)