diff --git a/src/passes/Heap2Local.cpp b/src/passes/Heap2Local.cpp index adba4f0ae36..7515d1999e9 100644 --- a/src/passes/Heap2Local.cpp +++ b/src/passes/Heap2Local.cpp @@ -1137,83 +1137,84 @@ struct Struct2Local : PostWalker { // The allocation might flow into `ref` or `expected`, but not // `replacement`, because then it would be considered to have escaped. - if (analyzer.getInteraction(curr->expected) == - ParentChildInteraction::Flows) { - // Since the allocation does not escape, it cannot possibly match the - // value already in the struct. The cmpxchg will just do a read. Drop the - // other arguments and do the atomic read at the end, when the cmpxchg - // would have happened. Use a nullable scratch local in case we also - // optimize `ref` later and need to replace it with a null. - auto refType = curr->ref->type.with(Nullable); - auto refScratch = builder.addVar(func, refType); - auto* setRefScratch = builder.makeLocalSet(refScratch, curr->ref); - auto* getRefScratch = builder.makeLocalGet(refScratch, refType); - auto* structGet = builder.makeStructGet( - curr->index, getRefScratch, curr->order, curr->type); - auto* block = builder.makeBlock({setRefScratch, - builder.makeDrop(curr->expected), - builder.makeDrop(curr->replacement), - structGet}); + if (analyzer.getInteraction(curr->ref) == ParentChildInteraction::Flows) { + [[maybe_unused]] auto& field = fields[curr->index]; + auto type = curr->type; + assert(type == field.type); + assert(!field.isPacked()); + + // Hold everything in scratch locals, just like for other RMW ops and + // struct.new. Use a nullable (shared) eqref local for `expected` to + // accommodate any allowed optimized or unoptimized value there. + auto expectedType = type; + if (type.isRef()) { + expectedType = Type( + HeapTypes::eq.getBasic(type.getHeapType().getShared()), Nullable); + } + auto oldScratch = builder.addVar(func, type); + auto expectedScratch = builder.addVar(func, expectedType); + auto replacementScratch = builder.addVar(func, type); + auto local = localIndexes[curr->index]; + + auto* block = builder.makeBlock( + {builder.makeDrop(curr->ref), + builder.makeLocalSet(expectedScratch, curr->expected), + builder.makeLocalSet(replacementScratch, curr->replacement), + builder.makeLocalSet(oldScratch, builder.makeLocalGet(local, type))}); + + // Create the check for whether we should do the exchange. + auto* lhs = builder.makeLocalGet(local, type); + auto* rhs = builder.makeLocalGet(expectedScratch, expectedType); + Expression* pred; + if (type.isRef()) { + pred = builder.makeRefEq(lhs, rhs); + } else { + pred = + builder.makeBinary(Abstract::getBinary(type, Abstract::Eq), lhs, rhs); + } + + // The conditional exchange. + block->list.push_back(builder.makeIf( + pred, + builder.makeLocalSet(local, + builder.makeLocalGet(replacementScratch, type)))); + + // Unstash the old value. + block->list.push_back(builder.makeLocalGet(oldScratch, type)); + block->type = type; replaceCurrent(block); - // Record the new data flow into and out of the new scratch local. This is - // necessary in case `ref` gets processed later so we can detect that it - // flows to the new struct.atomic.get, which may need to be replaced. - analyzer.parents.setParent(curr->ref, setRefScratch); - analyzer.scratchInfo.insert({setRefScratch, getRefScratch}); - analyzer.parents.setParent(getRefScratch, structGet); return; } - if (analyzer.getInteraction(curr->ref) != ParentChildInteraction::Flows) { + if (analyzer.getInteraction(curr->expected) != + ParentChildInteraction::Flows) { // Since the allocation does not flow from `ref`, it must not flow through // this cmpxchg at all. return; } - [[maybe_unused]] auto& field = fields[curr->index]; - auto type = curr->type; - assert(type == field.type); - assert(!field.isPacked()); - - // Hold everything in scratch locals, just like for other RMW ops and - // struct.new. Use a nullable (shared) eqref local for `expected` to - // accommodate any allowed optimized or unoptimized value there. - auto expectedType = type; - if (type.isRef()) { - expectedType = - Type(HeapTypes::eq.getBasic(type.getHeapType().getShared()), Nullable); - } - auto oldScratch = builder.addVar(func, type); - auto expectedScratch = builder.addVar(func, expectedType); - auto replacementScratch = builder.addVar(func, type); - auto local = localIndexes[curr->index]; - - auto* block = builder.makeBlock( - {builder.makeDrop(curr->ref), - builder.makeLocalSet(expectedScratch, curr->expected), - builder.makeLocalSet(replacementScratch, curr->replacement), - builder.makeLocalSet(oldScratch, builder.makeLocalGet(local, type))}); - - // Create the check for whether we should do the exchange. - auto* lhs = builder.makeLocalGet(local, type); - auto* rhs = builder.makeLocalGet(expectedScratch, expectedType); - Expression* pred; - if (type.isRef()) { - pred = builder.makeRefEq(lhs, rhs); - } else { - pred = - builder.makeBinary(Abstract::getBinary(type, Abstract::Eq), lhs, rhs); - } - - // The conditional exchange. - block->list.push_back( - builder.makeIf(pred, - builder.makeLocalSet( - local, builder.makeLocalGet(replacementScratch, type)))); - - // Unstash the old value. - block->list.push_back(builder.makeLocalGet(oldScratch, type)); - block->type = type; + // Since the allocation does not escape, it cannot possibly match the value + // already in the struct. The cmpxchg will just do a read. Drop the other + // arguments and do the atomic read at the end, when the cmpxchg would have + // happened. Use a nullable scratch local in case we also optimize `ref` + // later and need to replace it with a null. + auto refType = curr->ref->type.with(Nullable); + auto refScratch = builder.addVar(func, refType); + auto* setRefScratch = builder.makeLocalSet(refScratch, curr->ref); + auto* getRefScratch = builder.makeLocalGet(refScratch, refType); + auto* structGet = builder.makeStructGet( + curr->index, getRefScratch, curr->order, curr->type); + auto* block = builder.makeBlock({setRefScratch, + builder.makeDrop(curr->expected), + builder.makeDrop(curr->replacement), + structGet}); replaceCurrent(block); + // Record the new data flow into and out of the new scratch local. This is + // necessary in case `ref` gets processed later so we can detect that it + // flows to the new struct.atomic.get, which may need to be replaced. + analyzer.parents.setParent(curr->ref, setRefScratch); + analyzer.scratchInfo.insert({setRefScratch, getRefScratch}); + analyzer.parents.setParent(getRefScratch, structGet); + return; } void visitArrayCmpxchg(ArrayCmpxchg* curr) { diff --git a/test/lit/passes/heap2local-rmw.wast b/test/lit/passes/heap2local-rmw.wast index 5ded32d4378..48fb88df62b 100644 --- a/test/lit/passes/heap2local-rmw.wast +++ b/test/lit/passes/heap2local-rmw.wast @@ -1354,6 +1354,73 @@ ) ) +(module + ;; CHECK: (type $struct (struct (field (mut (ref null $struct))))) + (type $struct (struct (field (mut (ref null $struct))))) + + ;; CHECK: (type $1 (func)) + + ;; CHECK: (export "test" (func $cmpxchg-ref-and-expected)) + + ;; CHECK: (func $cmpxchg-ref-and-expected (type $1) + ;; CHECK-NEXT: (local $local (ref $struct)) + ;; CHECK-NEXT: (local $1 (ref null $struct)) + ;; CHECK-NEXT: (local $2 (ref null $struct)) + ;; CHECK-NEXT: (local $3 eqref) + ;; CHECK-NEXT: (local $4 (ref null $struct)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result (ref null $struct)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $3 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $4 + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $2 + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (if + ;; CHECK-NEXT: (ref.eq + ;; CHECK-NEXT: (local.get $1) + ;; CHECK-NEXT: (local.get $3) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (local.set $1 + ;; CHECK-NEXT: (local.get $4) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.get $2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $cmpxchg-ref-and-expected (export "test") + (local $local (ref $struct)) + (drop + ;; The allocation flows to both `ref` and `expected` fields. We must + ;; prioritize the optimization for flows through `ref`. Otherwise, if we + ;; did the optimization for `expected` first, we would end up with a + ;; struct.get of the ref value, but the ref value would have been changed + ;; to a null and we would introduce a trap. + (struct.atomic.rmw.cmpxchg $struct 0 + (local.tee $local + (struct.new_default $struct) + ) + (local.get $local) + (ref.null none) + ) + ) + ) +) + (module (type $array (shared (array i8))) ;; CHECK: (type $struct (shared (struct (field (mut (ref null (shared array)))))))