Skip to content

Commit d67b3db

Browse files
committed
PPC64: P10 prefixed-load fast paths for loadPtr/loadDouble/loadFloat32/loadUnalignedSimd128 (Address)
Adds a HasPOWER10()+34-bit-signed-offset fast path before the movePtr+indexed-load fallback in: - loadPtr(Address) → as_pld - loadDouble(Address) → as_plfd - loadFloat32(Address) → as_plfs - loadUnalignedSimd128(Address) → as_plxv Each replaces the 2-insn `movePtr scratch + lXdx` sequence (plus a GPR scratch acquire) with a single 8-byte prefixed load when the offset fits 34-bit signed. Hot on deep stack frames where slot offsets exceed 16-bit signed (loadPtr) and on FP/SIMD reads from constant-offset structure fields. The DS-form (loadPtr) and 16-bit- signed (loadDouble/Float32) fast paths are still preferred when applicable — they're 4 bytes vs 8 for the prefixed form. All four emitters (`as_pld` / `as_plfd` / `as_plfs` / `as_plxv`) were already wired and used elsewhere; this commit only plumbs them into the Address-form load helpers. Verified across the verification matrix: - Real-P9 (`obj-sm-dbgopt`): wasm/large-memory + wasm/memory + wasm/spec/spec/address + ion + basic 3958/0 - Real-P10 (`power10`): same suite 3958/0 - ARM64 sim FORCE_POWER10: same suite 3958/0 Closes PLAN.md candidates mozilla-firefox#19 (read-side), mozilla-firefox#29, mozilla-firefox#30. Item mozilla-firefox#31 (prefixed-store side) still pending — it needs new emitters (as_pstd/as_pstfd/as_pstfs/as_pstxv) and matching sim decoders.
1 parent ec23585 commit d67b3db

2 files changed

Lines changed: 21 additions & 0 deletions

File tree

js/src/jit/ppc64/MacroAssembler-ppc64-inl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3315,6 +3315,12 @@ inline void MacroAssembler::cmp32Set(Assembler::Condition cond, Address lhs,
33153315
FaultingCodeOffset MacroAssembler::loadUnalignedSimd128(const Address& src,
33163316
FloatRegister dest) {
33173317
UseScratchRegisterScope temps(asMasm());
3318+
if (HasPOWER10() && is_intN((intptr_t)src.offset, 34)) {
3319+
// POWER10 prefixed load — natural-LE byte order, no GPR scratch.
3320+
return FaultingCodeOffset(
3321+
as_plxv(dest.encoding(), src.base, (int64_t)src.offset, /*R=*/false)
3322+
.getOffset());
3323+
}
33183324
if (HasPOWER9()) {
33193325
// POWER9: lxvx (X-form, indexed) loads 128 bits in correct LE order.
33203326
Register scratch = temps.Acquire();

js/src/jit/ppc64/MacroAssembler-ppc64.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,6 +1439,11 @@ class MacroAssemblerPPC64Compat : public MacroAssemblerPPC64 {
14391439
return FaultingCodeOffset(
14401440
as_ld(dest, address.base, address.offset).getOffset());
14411441
}
1442+
if (HasPOWER10() && is_intN((intptr_t)address.offset, 34)) {
1443+
return FaultingCodeOffset(
1444+
as_pld(dest, address.base, (int64_t)address.offset, /*R=*/false)
1445+
.getOffset());
1446+
}
14421447
UseScratchRegisterScope temps(*this);
14431448
Register scratch = temps.Acquire();
14441449
MOZ_ASSERT(scratch != dest);
@@ -1474,6 +1479,11 @@ class MacroAssemblerPPC64Compat : public MacroAssemblerPPC64 {
14741479
return FaultingCodeOffset(
14751480
as_lfd(dest, addr.base, addr.offset).getOffset());
14761481
}
1482+
if (HasPOWER10() && is_intN((intptr_t)addr.offset, 34)) {
1483+
return FaultingCodeOffset(
1484+
as_plfd(dest, addr.base, (int64_t)addr.offset, /*R=*/false)
1485+
.getOffset());
1486+
}
14771487
UseScratchRegisterScope temps(*this);
14781488
Register scratch = temps.Acquire();
14791489
movePtr(ImmWord(addr.offset), scratch);
@@ -1495,6 +1505,11 @@ class MacroAssemblerPPC64Compat : public MacroAssemblerPPC64 {
14951505
return FaultingCodeOffset(
14961506
as_lfs(dest, addr.base, addr.offset).getOffset());
14971507
}
1508+
if (HasPOWER10() && is_intN((intptr_t)addr.offset, 34)) {
1509+
return FaultingCodeOffset(
1510+
as_plfs(dest, addr.base, (int64_t)addr.offset, /*R=*/false)
1511+
.getOffset());
1512+
}
14981513
UseScratchRegisterScope temps(*this);
14991514
Register scratch = temps.Acquire();
15001515
movePtr(ImmWord(addr.offset), scratch);

0 commit comments

Comments
 (0)