Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/cmd/asm/internal/asm/testdata/arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,9 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
PRFM 8(R12), PLIL3STRM // 8d0580f9
PRFM (R8), $25 // 190180f9
PRFM 8(R9), $30 // 3e0580f9
RPRFM (R1), R2, PLDKEEP // 3848a2f8
RPRFM (RSP), R4, PSTSTRM // fd4ba4f8
RPRFM (R6), R12, $25 // d978acf8
NOOP // 1f2003d5
HINT $0 // 1f2003d5
DMB $1
Expand Down
4 changes: 4 additions & 0 deletions src/cmd/asm/internal/asm/testdata/arm64error.s
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,10 @@ TEXT errors(SB),$0
AUTIA1716 $45 // ERROR "illegal combination"
AUTIB1716 R0 // ERROR "illegal combination"
SB $1 // ERROR "illegal combination"
RPRFM (R1), RSP, PLDKEEP // ERROR "illegal combination"
RPRFM 2(RSP), R4, PSTSTRM // ERROR "illegal combination"
RPRFM (R2), R3, $100 // ERROR "range prefetch immediate must be 0 to 63"
RPRFM (R5), R6, PLDL1KEEP // ERROR "illegal range prefetch operand"

// VMUL family invalid arrangement tests
VMUL V0.D2, V0.D2, V1.D2 // ERROR "invalid arrangement"
Expand Down
5 changes: 5 additions & 0 deletions src/cmd/compile/internal/ssa/_gen/generic.rules
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,11 @@
(Sub(64|32|16|8) <t> (Mul(64|32|16|8) x y) (Mul(64|32|16|8) x z))
=> (Mul(64|32|16|8) x (Sub(64|32|16|8) <t> y z))

// Canonicalize x+x to x << 1.
// This is often slower since most CPUs have more adders than shifters, but it can enable other optimizations.
// Arches who care about this like AMD64 convert x << 1 back to x+x in their arch-specific rules which is useful anyhow.
(Add(64|32|16|8) x x) => (Lsh(64|32|16|8)x64 x (Const64 <types.Types[types.TUINT64]> [1]))

// rewrite shifts of 8/16/32 bit consts into 64 bit consts to reduce
// the number of the other rewrite rules for const shifts
(Lsh64x32 <t> x (Const32 [c])) => (Lsh64x64 x (Const64 <t> [int64(uint32(c))]))
Expand Down
52 changes: 52 additions & 0 deletions src/cmd/compile/internal/ssa/rewritegeneric.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions src/cmd/compile/internal/ssagen/intrinsics.go
Original file line number Diff line number Diff line change
Expand Up @@ -2042,6 +2042,10 @@ func branchTableN(s *state, idx *ssa.Value, intrinsicCall *ir.CallExpr, genOp fu
s.startBlock(bPanic)
s.rtcall(ir.Syms.PanicSimdImm, false, nil)
}
if s.curBlock != nil {
bb := s.endBlock()
bb.AddEdgeTo(jt)
}

s.startBlock(jt)
jt.Kind = ssa.BlockPlain
Expand Down
7 changes: 7 additions & 0 deletions src/cmd/internal/obj/arm64/a.out.go
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,7 @@ const (
AREVW
AROR
ARORW
ARPRFM
ASB
ASBC
ASBCS
Expand Down Expand Up @@ -1482,5 +1483,11 @@ const (
SPOP_J
SPOP_JC

// Range PReFetch of Memory (RPRFM)
SPOP_PLDKEEP
SPOP_PSTKEEP
SPOP_PLDSTRM
SPOP_PSTSTRM

SPOP_END
)
1 change: 1 addition & 0 deletions src/cmd/internal/obj/arm64/anames.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 50 additions & 0 deletions src/cmd/internal/obj/arm64/asm7.go
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,8 @@ var optab = []Optab{
{AMSR, C_VCON, C_NONE, C_NONE, C_SPOP, C_NONE, 37, 4, 0, 0, 0},
{APRFM, C_UOREG32K, C_NONE, C_NONE, C_SPOP, C_NONE, 91, 4, 0, 0, 0},
{APRFM, C_UOREG32K, C_NONE, C_NONE, C_LCON, C_NONE, 91, 4, 0, 0, 0},
{ARPRFM, C_ZOREG, C_REG, C_NONE, C_SPOP, C_NONE, 110, 4, 0, 0, 0},
{ARPRFM, C_ZOREG, C_REG, C_NONE, C_LCON, C_NONE, 110, 4, 0, 0, 0},
{ADMB, C_VCON, C_NONE, C_NONE, C_NONE, C_NONE, 51, 4, 0, 0, 0},
{AHINT, C_VCON, C_NONE, C_NONE, C_NONE, C_NONE, 52, 4, 0, 0, 0},
{ASYS, C_VCON, C_NONE, C_NONE, C_NONE, C_NONE, 50, 4, 0, 0, 0},
Expand Down Expand Up @@ -943,6 +945,13 @@ var pstatefield = []struct {
{SPOP_DAIFClr, 3<<16 | 4<<12 | 7<<5},
}

var rprfopfield = map[SpecialOperand]uint32{
SPOP_PLDKEEP: 0,
SPOP_PSTKEEP: 1,
SPOP_PLDSTRM: 4,
SPOP_PSTSTRM: 5,
}

var prfopfield = map[SpecialOperand]uint32{
SPOP_PLDL1KEEP: 0,
SPOP_PLDL1STRM: 1,
Expand Down Expand Up @@ -3416,6 +3425,7 @@ func buildop(ctxt *obj.Link) {
AVDUP,
AVMOVI,
APRFM,
ARPRFM,
AVEXT,
AVXAR:
break
Expand Down Expand Up @@ -6088,6 +6098,43 @@ func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
rn := uint32(p.Reg & 31)
o1 |= Q<<30 | size<<22 | (rn << 5) | (rd)

case 110: /*rprfm (Rn), Rm, <rprfop/imm6>*/
rn := p.From.Reg
rm := p.Reg
var operation uint32
var ok bool

// Operation is either a 6-bit immediate or named prefetch operation.
if p.To.Type == obj.TYPE_CONST {
operation = uint32(p.To.Offset)
if operation > 63 {
c.ctxt.Diag("range prefetch immediate must be 0 to 63: %v", p)
}
} else {
operation, ok = rprfopfield[SpecialOperand(p.To.Offset)]
if !ok {
c.ctxt.Diag("illegal range prefetch operand, expected PLDKEEP, PSTKEEP, PLDSTRM or PSTSTRM: %v", p)
}
}

// 6-bit placement: the 6-bit value is scattered to match the
// architectural encoding (bits 15,13,12,2-0). This is because the
// instructions word reuses fields from the base load/store hint space.
// option2 (bit5) -> bit15
// option0 (bit4) -> bit13
// S (bit3) -> bit12
// Rt<2:0> (bits2-0) -> bits2-0
// Rt<4:3> are already set by c.opirr() and are fixed for RPRFM.
option2 := (operation & (1 << 5)) << 10
option0 := (operation & (1 << 4)) << 9
s := (operation & (1 << 3)) << 9
rt := (operation & 0x7)

encodedOperation := option2 | option0 | s | rt

o1 = c.opirr(p, p.As)
o1 |= (uint32(rm&31) << 16) | (uint32(rn&31) << 5) | uint32(encodedOperation)

case 127:
// Generic SVE instruction encoding
matched := false
Expand Down Expand Up @@ -7277,6 +7324,9 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {

case APRFM:
return 0xf9<<24 | 2<<22

case ARPRFM:
return 0xf8<<24 | 5<<21 | 18<<10 | 3<<3
}

c.ctxt.Diag("%v: bad irr %v", p, a)
Expand Down
10 changes: 7 additions & 3 deletions src/cmd/internal/obj/arm64/specialoperand_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions src/go/importer/importer.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ type Lookup func(path string) (io.ReadCloser, error)
// it is assumed that the translation to canonical import paths is being
// done by the client of the importer.
//
// A lookup function must be provided for correct module-aware operation.
// Deprecated: If lookup is nil, for backwards-compatibility, the importer
// will attempt to resolve imports in the $GOPATH workspace.
// A lookup function must be provided for correct module-aware
// operation. Providing a nil value for lookup is deprecated but, for
// backwards-compatibility, the importer will in this case attempt to
// resolve imports in the $GOPATH workspace.
func ForCompiler(fset *token.FileSet, compiler string, lookup Lookup) types.Importer {
switch compiler {
case "gc":
Expand Down
7 changes: 6 additions & 1 deletion src/internal/runtime/maps/group.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const (

bitsetLSB = 0x0101010101010101
bitsetMSB = 0x8080808080808080
bitsetL7B = 0x7f7f7f7f7f7f7f7f
bitsetEmpty = bitsetLSB * uint64(ctrlEmpty)
)

Expand Down Expand Up @@ -158,6 +159,11 @@ func (g ctrlGroup) matchH2(h uintptr) bitset {
// Note: On AMD64, this is an intrinsic implemented with SIMD instructions. See
// note on bitset about the packed intrinsified return value.
func ctrlGroupMatchH2(g ctrlGroup, h uintptr) bitset {
v := uint64(g) ^ (bitsetLSB * uint64(h))
if goarch.IsArm64 == 1 {
v = ^v
return bitset((v&bitsetL7B + bitsetLSB) & (v & bitsetMSB))
}
// NB: This generic matching routine produces false positive matches when
// h is 2^N and the control bytes have a seq of 2^N followed by 2^N+1. For
// example: if ctrls==0x0302 and h=02, we'll compute v as 0x0100. When we
Expand All @@ -166,7 +172,6 @@ func ctrlGroupMatchH2(g ctrlGroup, h uintptr) bitset {
// just a rare inefficiency. Note that they only occur if there is a real
// match and never occur on ctrlEmpty, or ctrlDeleted. The subsequent key
// comparisons ensure that there is no correctness issue.
v := uint64(g) ^ (bitsetLSB * uint64(h))
return bitset(((v - bitsetLSB) &^ v) & bitsetMSB)
}

Expand Down
2 changes: 2 additions & 0 deletions src/runtime/cgo/gcc_libinit_unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,9 @@ x_cgo_thread_start(ThreadStart *arg)
ThreadStart *ts;

/* Make our own copy that can persist after we return. */
_cgo_tsan_acquire();
ts = malloc(sizeof *ts);
_cgo_tsan_release();
if(ts == nil) {
fprintf(stderr, "runtime/cgo: out of memory in thread_start\n");
abort();
Expand Down
44 changes: 23 additions & 21 deletions test/codegen/shift.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,47 +120,49 @@ func rshConst64x32(v int64) int64 {
}

func lshConst32x1Add(x int32) int32 {
// amd64:"SHLL [$]2"
// loong64:"SLL [$]2"
// riscv64:"SLLI [$]2"
// ppc64x:"SLW [$]2" -"ADD"
// amd64:-"ADD" "SHLL [$]2"
// loong64:-"ADD" "SLL [$]2"
// riscv64:-"ADD" "SLLI [$]2"
// ppc64x:-"ADD" "SLW [$]2"
return (x + x) << 1
}

func lshConst64x1Add(x int64) int64 {
// amd64:"SHLQ [$]2"
// loong64:"SLLV [$]2"
// riscv64:"SLLI [$]2"
// ppc64x:"SLD [$]2" -"ADD"
// amd64:-"ADD" "SHLQ [$]2"
// loong64:-"ADD" "SLLV [$]2"
// riscv64:-"ADD" "SLLI [$]2"
// ppc64x:-"ADD" "SLD [$]2"
return (x + x) << 1
}

func lshConst32x2Add(x int32) int32 {
// amd64:"SHLL [$]3"
// loong64:"SLL [$]3"
// riscv64:"SLLI [$]3"
// ppc64x:"SLW [$]3" -"ADD"
// amd64:-"ADD" "SHLL [$]3"
// loong64:-"ADD" "SLL [$]3"
// riscv64:-"ADD" "SLLI [$]3"
// ppc64x:-"ADD" "SLW [$]3"
return (x + x) << 2
}

func lshConst64x2Add(x int64) int64 {
// amd64:"SHLQ [$]3"
// loong64:"SLLV [$]3"
// riscv64:"SLLI [$]3"
// ppc64x:"SLD [$]3" -"ADD"
// amd64:-"ADD" "SHLQ [$]3"
// loong64:-"ADD" "SLLV [$]3"
// riscv64:-"ADD" "SLLI [$]3"
// ppc64x:-"ADD" "SLD [$]3"
return (x + x) << 2
}

func lshConst32x31Add(x int32) int32 {
// loong64:-"SLL " "MOVV R0"
// riscv64:-"SLLI" "MOV [$]0"
// ppc64x:"ADD" "SLW [$]31" -"SLW [$]32"
// amd64:-"ADD" -"SHL" "XORL AX, AX"
// loong64:-"ADD" -"SLL " "MOVV R0"
// riscv64:-"ADD" -"SLLI" "MOV [$]0"
// ppc64x:-"ADD" -"SLW" "MOVD [$]0"
return (x + x) << 31
}

func lshConst64x63Add(x int64) int64 {
// loong64:-"SLLV" "MOVV R0"
// riscv64:-"SLLI" "MOV [$]0"
// amd64:-"ADD" -"SHL" "XORL AX, AX"
// loong64:-"ADD" -"SLLV" "MOVV R0"
// riscv64:-"ADD" -"SLLI" "MOV [$]0"
return (x + x) << 63
}

Expand Down
Loading