Skip to content

Commit 88a07e0

Browse files
authored
Handle ref before expected in Heap2Local for StructCmpxchg (#8566)
When the same optimized allocation flows into both the `ref` and `expected` fields of a StructCmpxchg, we previously (arbitrarily) prioritized optimizing based on the flow through `expected`. But this optimization stores the `ref` in a scratch local and then creates a new struct.get of its value. Since the same optimized allocation is also flowing through the `ref` field, that means we end up trying to do a struct.get on a null, which traps. To fix the problem, prioritize doing the optimization based on the flow through `ref` instead. This drops the other expressions and does not introduce any new accesses of the optimized value. Fixes #8563.
1 parent c32215e commit 88a07e0

File tree

2 files changed

+136
-68
lines changed

2 files changed

+136
-68
lines changed

src/passes/Heap2Local.cpp

Lines changed: 69 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,83 +1137,84 @@ struct Struct2Local : PostWalker<Struct2Local> {
11371137

11381138
// The allocation might flow into `ref` or `expected`, but not
11391139
// `replacement`, because then it would be considered to have escaped.
1140-
if (analyzer.getInteraction(curr->expected) ==
1141-
ParentChildInteraction::Flows) {
1142-
// Since the allocation does not escape, it cannot possibly match the
1143-
// value already in the struct. The cmpxchg will just do a read. Drop the
1144-
// other arguments and do the atomic read at the end, when the cmpxchg
1145-
// would have happened. Use a nullable scratch local in case we also
1146-
// optimize `ref` later and need to replace it with a null.
1147-
auto refType = curr->ref->type.with(Nullable);
1148-
auto refScratch = builder.addVar(func, refType);
1149-
auto* setRefScratch = builder.makeLocalSet(refScratch, curr->ref);
1150-
auto* getRefScratch = builder.makeLocalGet(refScratch, refType);
1151-
auto* structGet = builder.makeStructGet(
1152-
curr->index, getRefScratch, curr->order, curr->type);
1153-
auto* block = builder.makeBlock({setRefScratch,
1154-
builder.makeDrop(curr->expected),
1155-
builder.makeDrop(curr->replacement),
1156-
structGet});
1140+
if (analyzer.getInteraction(curr->ref) == ParentChildInteraction::Flows) {
1141+
[[maybe_unused]] auto& field = fields[curr->index];
1142+
auto type = curr->type;
1143+
assert(type == field.type);
1144+
assert(!field.isPacked());
1145+
1146+
// Hold everything in scratch locals, just like for other RMW ops and
1147+
// struct.new. Use a nullable (shared) eqref local for `expected` to
1148+
// accommodate any allowed optimized or unoptimized value there.
1149+
auto expectedType = type;
1150+
if (type.isRef()) {
1151+
expectedType = Type(
1152+
HeapTypes::eq.getBasic(type.getHeapType().getShared()), Nullable);
1153+
}
1154+
auto oldScratch = builder.addVar(func, type);
1155+
auto expectedScratch = builder.addVar(func, expectedType);
1156+
auto replacementScratch = builder.addVar(func, type);
1157+
auto local = localIndexes[curr->index];
1158+
1159+
auto* block = builder.makeBlock(
1160+
{builder.makeDrop(curr->ref),
1161+
builder.makeLocalSet(expectedScratch, curr->expected),
1162+
builder.makeLocalSet(replacementScratch, curr->replacement),
1163+
builder.makeLocalSet(oldScratch, builder.makeLocalGet(local, type))});
1164+
1165+
// Create the check for whether we should do the exchange.
1166+
auto* lhs = builder.makeLocalGet(local, type);
1167+
auto* rhs = builder.makeLocalGet(expectedScratch, expectedType);
1168+
Expression* pred;
1169+
if (type.isRef()) {
1170+
pred = builder.makeRefEq(lhs, rhs);
1171+
} else {
1172+
pred =
1173+
builder.makeBinary(Abstract::getBinary(type, Abstract::Eq), lhs, rhs);
1174+
}
1175+
1176+
// The conditional exchange.
1177+
block->list.push_back(builder.makeIf(
1178+
pred,
1179+
builder.makeLocalSet(local,
1180+
builder.makeLocalGet(replacementScratch, type))));
1181+
1182+
// Unstash the old value.
1183+
block->list.push_back(builder.makeLocalGet(oldScratch, type));
1184+
block->type = type;
11571185
replaceCurrent(block);
1158-
// Record the new data flow into and out of the new scratch local. This is
1159-
// necessary in case `ref` gets processed later so we can detect that it
1160-
// flows to the new struct.atomic.get, which may need to be replaced.
1161-
analyzer.parents.setParent(curr->ref, setRefScratch);
1162-
analyzer.scratchInfo.insert({setRefScratch, getRefScratch});
1163-
analyzer.parents.setParent(getRefScratch, structGet);
11641186
return;
11651187
}
1166-
if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) {
1188+
if (analyzer.getInteraction(curr->expected) !=
1189+
ParentChildInteraction::Flows) {
11671190
// Since the allocation does not flow from `ref`, it must not flow through
11681191
// this cmpxchg at all.
11691192
return;
11701193
}
11711194

1172-
[[maybe_unused]] auto& field = fields[curr->index];
1173-
auto type = curr->type;
1174-
assert(type == field.type);
1175-
assert(!field.isPacked());
1176-
1177-
// Hold everything in scratch locals, just like for other RMW ops and
1178-
// struct.new. Use a nullable (shared) eqref local for `expected` to
1179-
// accommodate any allowed optimized or unoptimized value there.
1180-
auto expectedType = type;
1181-
if (type.isRef()) {
1182-
expectedType =
1183-
Type(HeapTypes::eq.getBasic(type.getHeapType().getShared()), Nullable);
1184-
}
1185-
auto oldScratch = builder.addVar(func, type);
1186-
auto expectedScratch = builder.addVar(func, expectedType);
1187-
auto replacementScratch = builder.addVar(func, type);
1188-
auto local = localIndexes[curr->index];
1189-
1190-
auto* block = builder.makeBlock(
1191-
{builder.makeDrop(curr->ref),
1192-
builder.makeLocalSet(expectedScratch, curr->expected),
1193-
builder.makeLocalSet(replacementScratch, curr->replacement),
1194-
builder.makeLocalSet(oldScratch, builder.makeLocalGet(local, type))});
1195-
1196-
// Create the check for whether we should do the exchange.
1197-
auto* lhs = builder.makeLocalGet(local, type);
1198-
auto* rhs = builder.makeLocalGet(expectedScratch, expectedType);
1199-
Expression* pred;
1200-
if (type.isRef()) {
1201-
pred = builder.makeRefEq(lhs, rhs);
1202-
} else {
1203-
pred =
1204-
builder.makeBinary(Abstract::getBinary(type, Abstract::Eq), lhs, rhs);
1205-
}
1206-
1207-
// The conditional exchange.
1208-
block->list.push_back(
1209-
builder.makeIf(pred,
1210-
builder.makeLocalSet(
1211-
local, builder.makeLocalGet(replacementScratch, type))));
1212-
1213-
// Unstash the old value.
1214-
block->list.push_back(builder.makeLocalGet(oldScratch, type));
1215-
block->type = type;
1195+
// Since the allocation does not escape, it cannot possibly match the value
1196+
// already in the struct. The cmpxchg will just do a read. Drop the other
1197+
// arguments and do the atomic read at the end, when the cmpxchg would have
1198+
// happened. Use a nullable scratch local in case we also optimize `ref`
1199+
// later and need to replace it with a null.
1200+
auto refType = curr->ref->type.with(Nullable);
1201+
auto refScratch = builder.addVar(func, refType);
1202+
auto* setRefScratch = builder.makeLocalSet(refScratch, curr->ref);
1203+
auto* getRefScratch = builder.makeLocalGet(refScratch, refType);
1204+
auto* structGet = builder.makeStructGet(
1205+
curr->index, getRefScratch, curr->order, curr->type);
1206+
auto* block = builder.makeBlock({setRefScratch,
1207+
builder.makeDrop(curr->expected),
1208+
builder.makeDrop(curr->replacement),
1209+
structGet});
12161210
replaceCurrent(block);
1211+
// Record the new data flow into and out of the new scratch local. This is
1212+
// necessary in case `ref` gets processed later so we can detect that it
1213+
// flows to the new struct.atomic.get, which may need to be replaced.
1214+
analyzer.parents.setParent(curr->ref, setRefScratch);
1215+
analyzer.scratchInfo.insert({setRefScratch, getRefScratch});
1216+
analyzer.parents.setParent(getRefScratch, structGet);
1217+
return;
12171218
}
12181219

12191220
void visitArrayCmpxchg(ArrayCmpxchg* curr) {

test/lit/passes/heap2local-rmw.wast

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,6 +1354,73 @@
13541354
)
13551355
)
13561356

1357+
(module
1358+
;; CHECK: (type $struct (struct (field (mut (ref null $struct)))))
1359+
(type $struct (struct (field (mut (ref null $struct)))))
1360+
1361+
;; CHECK: (type $1 (func))
1362+
1363+
;; CHECK: (export "test" (func $cmpxchg-ref-and-expected))
1364+
1365+
;; CHECK: (func $cmpxchg-ref-and-expected (type $1)
1366+
;; CHECK-NEXT: (local $local (ref $struct))
1367+
;; CHECK-NEXT: (local $1 (ref null $struct))
1368+
;; CHECK-NEXT: (local $2 (ref null $struct))
1369+
;; CHECK-NEXT: (local $3 eqref)
1370+
;; CHECK-NEXT: (local $4 (ref null $struct))
1371+
;; CHECK-NEXT: (drop
1372+
;; CHECK-NEXT: (block (result (ref null $struct))
1373+
;; CHECK-NEXT: (drop
1374+
;; CHECK-NEXT: (block (result nullref)
1375+
;; CHECK-NEXT: (local.set $1
1376+
;; CHECK-NEXT: (ref.null none)
1377+
;; CHECK-NEXT: )
1378+
;; CHECK-NEXT: (ref.null none)
1379+
;; CHECK-NEXT: )
1380+
;; CHECK-NEXT: )
1381+
;; CHECK-NEXT: (local.set $3
1382+
;; CHECK-NEXT: (ref.null none)
1383+
;; CHECK-NEXT: )
1384+
;; CHECK-NEXT: (local.set $4
1385+
;; CHECK-NEXT: (ref.null none)
1386+
;; CHECK-NEXT: )
1387+
;; CHECK-NEXT: (local.set $2
1388+
;; CHECK-NEXT: (local.get $1)
1389+
;; CHECK-NEXT: )
1390+
;; CHECK-NEXT: (if
1391+
;; CHECK-NEXT: (ref.eq
1392+
;; CHECK-NEXT: (local.get $1)
1393+
;; CHECK-NEXT: (local.get $3)
1394+
;; CHECK-NEXT: )
1395+
;; CHECK-NEXT: (then
1396+
;; CHECK-NEXT: (local.set $1
1397+
;; CHECK-NEXT: (local.get $4)
1398+
;; CHECK-NEXT: )
1399+
;; CHECK-NEXT: )
1400+
;; CHECK-NEXT: )
1401+
;; CHECK-NEXT: (local.get $2)
1402+
;; CHECK-NEXT: )
1403+
;; CHECK-NEXT: )
1404+
;; CHECK-NEXT: )
1405+
(func $cmpxchg-ref-and-expected (export "test")
1406+
(local $local (ref $struct))
1407+
(drop
1408+
;; The allocation flows to both `ref` and `expected` fields. We must
1409+
;; prioritize the optimization for flows through `ref`. Otherwise, if we
1410+
;; did the optimization for `expected` first, we would end up with a
1411+
;; struct.get of the ref value, but the ref value would have been changed
1412+
;; to a null and we would introduce a trap.
1413+
(struct.atomic.rmw.cmpxchg $struct 0
1414+
(local.tee $local
1415+
(struct.new_default $struct)
1416+
)
1417+
(local.get $local)
1418+
(ref.null none)
1419+
)
1420+
)
1421+
)
1422+
)
1423+
13571424
(module
13581425
(type $array (shared (array i8)))
13591426
;; CHECK: (type $struct (shared (struct (field (mut (ref null (shared array)))))))

0 commit comments

Comments
 (0)