Skip to content

Commit cf5be82

Browse files
author
Sean Fertile
committed
Reland "[PPC64] Add split - stack support."
Recommitting https://reviews.llvm.org/rL344544 after fixing undefined behavior from left-shifting a negative value. Original commit message: This support is slightly different then the X86_64 implementation in that calls to __morestack don't need to get rewritten to calls to __moresatck_non_split when a split-stack caller calls a non-split-stack callee. Instead the size of the stack frame requested by the caller is adjusted prior to the call to __morestack. The size the stack-frame will be adjusted by is tune-able through a new --split-stack-adjust-size option. git-svn-id: https://llvm.org/svn/llvm-project/lld/trunk@344622 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent fa9dbc4 commit cf5be82

13 files changed

Lines changed: 601 additions & 12 deletions

ELF/Arch/PPC64.cpp

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ class PPC64 final : public TargetInfo {
120120
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
121121
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
122122
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
123+
124+
bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
125+
uint8_t StOther) const override;
123126
};
124127
} // namespace
125128

@@ -213,6 +216,8 @@ PPC64::PPC64() {
213216

214217
TlsGotRel = R_PPC64_TPREL64;
215218

219+
NeedsMoreStackNonSplit = false;
220+
216221
// We need 64K pages (at least under glibc/Linux, the loader won't
217222
// set different permissions on a finer granularity than that).
218223
DefaultMaxPageSize = 65536;
@@ -761,7 +766,115 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
761766
}
762767
}
763768

769+
// The prologue for a split-stack function is expected to look roughly
770+
// like this:
771+
// .Lglobal_entry_point:
772+
// # TOC pointer initalization.
773+
// ...
774+
// .Llocal_entry_point:
775+
// # load the __private_ss member of the threads tcbhead.
776+
// ld r0,-0x7000-64(r13)
777+
// # subtract the functions stack size from the stack pointer.
778+
// addis r12, r1, ha(-stack-frame size)
779+
// addi r12, r12, l(-stack-frame size)
780+
// # compare needed to actual and branch to allocate_more_stack if more
781+
// # space is needed, otherwise fallthrough to 'normal' function body.
782+
// cmpld cr7,r12,r0
783+
// blt- .Lallocate_more_stack
784+
//
785+
// -) The allocate_more_stack block might be placed after the split-stack
786+
// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
787+
// instead.
788+
// -) If either the addis or addi is not needed due to the stack size being
789+
// smaller then 32K or a multiple of 64K they will be replaced with a nop,
790+
// but there will always be 2 instructions the linker can overwrite for the
791+
// adjusted stack size.
792+
//
793+
// The linkers job here is to increase the stack size used in the addis/addi
794+
// pair by split-stack-size-adjust.
795+
// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
796+
// addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
797+
bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
798+
uint8_t StOther) const {
799+
// If the caller has a global entry point adjust the buffer past it. The start
800+
// of the split-stack prologue will be at the local entry point.
801+
Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther);
802+
803+
// At the very least we expect to see a load of some split-stack data from the
804+
// tcb, and 2 instructions that calculate the ending stack address this
805+
// function will require. If there is not enough room for at least 3
806+
// instructions it can't be a split-stack prologue.
807+
if (Loc + 12 >= End)
808+
return false;
809+
810+
// First instruction must be `ld r0, -0x7000-64(r13)`
811+
if (read32(Loc) != 0xe80d8fc0)
812+
return false;
813+
814+
int16_t HiImm = 0;
815+
int16_t LoImm = 0;
816+
// First instruction can be either an addis if the frame size is larger then
817+
// 32K, or an addi if the size is less then 32K.
818+
int32_t FirstInstr = read32(Loc + 4);
819+
if (getPrimaryOpCode(FirstInstr) == 15) {
820+
HiImm = FirstInstr & 0xFFFF;
821+
} else if (getPrimaryOpCode(FirstInstr) == 14) {
822+
LoImm = FirstInstr & 0xFFFF;
823+
} else {
824+
return false;
825+
}
826+
827+
// Second instruction is either an addi or a nop. If the first instruction was
828+
// an addi then LoImm is set and the second instruction must be a nop.
829+
uint32_t SecondInstr = read32(Loc + 8);
830+
if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) {
831+
LoImm = SecondInstr & 0xFFFF;
832+
} else if (SecondInstr != 0x60000000) {
833+
return false;
834+
}
835+
836+
// The register operands of the first instruction should be the stack-pointer
837+
// (r1) as the input (RA) and r12 as the output (RT). If the second
838+
// instruction is not a nop, then it should use r12 as both input and output.
839+
auto CheckRegOperands =
840+
[](uint32_t Instr, uint8_t ExpectedRT, uint8_t ExpectedRA) {
841+
return ((Instr & 0x3E00000) >> 21 == ExpectedRT) &&
842+
((Instr & 0x1F0000) >> 16 == ExpectedRA);
843+
};
844+
if (!CheckRegOperands(FirstInstr, 12, 1))
845+
return false;
846+
if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12))
847+
return false;
848+
849+
int32_t StackFrameSize = (HiImm * 65536) + LoImm;
850+
// Check that the adjusted size doesn't overflow what we can represent with 2
851+
// instructions.
852+
if (StackFrameSize < -2147483648 + Config->SplitStackAdjustSize) {
853+
error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows");
854+
return false;
855+
}
856+
857+
int32_t AdjustedStackFrameSize =
858+
StackFrameSize - Config->SplitStackAdjustSize;
859+
860+
LoImm = AdjustedStackFrameSize & 0xFFFF;
861+
HiImm = (AdjustedStackFrameSize + 0x8000) >> 16;
862+
if (HiImm) {
863+
write32(Loc + 4, 0x3D810000 | (uint16_t)HiImm);
864+
// If the low immediate is zero the second instruction will be a nop.
865+
SecondInstr =
866+
LoImm ? 0x398C0000 | (uint16_t)LoImm : 0x60000000;
867+
write32(Loc + 8, SecondInstr);
868+
} else {
869+
// addi r12, r1, imm
870+
write32(Loc + 4, (0x39810000) | (uint16_t)LoImm);
871+
write32(Loc + 8, 0x60000000);
872+
}
873+
874+
return true;
875+
}
876+
764877
TargetInfo *elf::getPPC64TargetInfo() {
765878
static PPC64 Target;
766879
return &Target;
767-
}
880+
}

ELF/Arch/X86_64.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ template <class ELFT> class X86_64 : public TargetInfo {
4343
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
4444
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
4545
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
46-
bool adjustPrologueForCrossSplitStack(uint8_t *Loc,
47-
uint8_t *End) const override;
46+
bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
47+
uint8_t StOther) const override;
4848

4949
private:
5050
void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
@@ -482,7 +482,8 @@ namespace {
482482
// B) Or a load of a stack pointer offset with an lea to r10 or r11.
483483
template <>
484484
bool X86_64<ELF64LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
485-
uint8_t *End) const {
485+
uint8_t *End,
486+
uint8_t StOther) const {
486487
if (Loc + 8 >= End)
487488
return false;
488489

@@ -509,7 +510,8 @@ bool X86_64<ELF64LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
509510

510511
template <>
511512
bool X86_64<ELF32LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
512-
uint8_t *End) const {
513+
uint8_t *End,
514+
uint8_t StOther) const {
513515
llvm_unreachable("Target doesn't support split stacks.");
514516
}
515517

ELF/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ struct Configuration {
217217
unsigned LTOO;
218218
unsigned Optimize;
219219
unsigned ThinLTOJobs;
220+
int32_t SplitStackAdjustSize;
220221

221222
// The following config options do not directly correspond to any
222223
// particualr command line options.

ELF/Driver.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
829829
Config->SingleRoRx = Args.hasArg(OPT_no_rosegment);
830830
Config->SoName = Args.getLastArgValue(OPT_soname);
831831
Config->SortSection = getSortSection(Args);
832+
Config->SplitStackAdjustSize = args::getInteger(Args, OPT_split_stack_adjust_size, 16384);
832833
Config->Strip = getStrip(Args);
833834
Config->Sysroot = Args.getLastArgValue(OPT_sysroot);
834835
Config->Target1Rel = Args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
@@ -901,6 +902,9 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
901902
if (Config->ThinLTOJobs == 0)
902903
error("--thinlto-jobs: number of threads must be > 0");
903904

905+
if (Config->SplitStackAdjustSize < 0)
906+
error("--split-stack-adjust-size: size must be >= 0");
907+
904908
// Parse ELF{32,64}{LE,BE} and CPU type.
905909
if (auto *Arg = Args.getLastArg(OPT_m)) {
906910
StringRef S = Arg->getValue();

ELF/InputSection.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,15 +1009,17 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
10091009
if (Defined *F = getEnclosingFunction<ELFT>(Rel.Offset)) {
10101010
Prologues.insert(F);
10111011
if (Target->adjustPrologueForCrossSplitStack(Buf + getOffset(F->Value),
1012-
End))
1012+
End, F->StOther))
10131013
continue;
10141014
if (!getFile<ELFT>()->SomeNoSplitStack)
10151015
error(lld::toString(this) + ": " + F->getName() +
10161016
" (with -fsplit-stack) calls " + Rel.Sym->getName() +
10171017
" (without -fsplit-stack), but couldn't adjust its prologue");
10181018
}
10191019
}
1020-
switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls);
1020+
1021+
if (Target->NeedsMoreStackNonSplit)
1022+
switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls);
10211023
}
10221024

10231025
template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {

ELF/Options.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ defm compress_debug_sections:
4242

4343
defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">;
4444

45+
defm split_stack_adjust_size
46+
: Eq<"split-stack-adjust-size",
47+
"Specify adjustment to stack size when a split-stack function calls a "
48+
"non-split-stack function">,
49+
MetaVarName<"<value>">;
50+
4551
defm library_path:
4652
Eq<"library-path", "Add a directory to the library search path">, MetaVarName<"<dir>">;
4753

ELF/Target.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,11 @@ bool TargetInfo::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
132132
return false;
133133
}
134134

135-
bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc,
136-
uint8_t *End) const {
135+
bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
136+
uint8_t StOther) const {
137137
llvm_unreachable("Target doesn't support split stacks.");
138138
}
139139

140-
141140
bool TargetInfo::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
142141
return true;
143142
}

ELF/Target.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ class TargetInfo {
6969
// The function with a prologue starting at Loc was compiled with
7070
// -fsplit-stack and it calls a function compiled without. Adjust the prologue
7171
// to do the right thing. See https://gcc.gnu.org/wiki/SplitStacks.
72-
virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc,
73-
uint8_t *End) const;
72+
// The symbols st_other flags are needed on PowerPC64 for determining the
73+
// offset to the split-stack prologue.
74+
virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
75+
uint8_t StOther) const;
7476

7577
// Return true if we can reach Dst from Src with Relocation RelocType
7678
virtual bool inBranchRange(RelType Type, uint64_t Src,
@@ -130,6 +132,11 @@ class TargetInfo {
130132
// executable OutputSections.
131133
uint32_t TrapInstr = 0;
132134

135+
// If a target needs to rewrite calls to __morestack to instead call
136+
// __morestack_non_split when a split-stack enabled caller calls a
137+
// non-split-stack callee this will return true. Otherwise returns false.
138+
bool NeedsMoreStackNonSplit = true;
139+
133140
virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
134141
RelExpr Expr) const;
135142
virtual void relaxGot(uint8_t *Loc, uint64_t Val) const;
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.abiversion 2
2+
.p2align 2
3+
.global nss_callee
4+
.type nss_callee, @function
5+
nss_callee:
6+
li 3, 1
7+
blr
8+
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# REQUIRES: ppc
2+
3+
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t1.o
4+
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o
5+
6+
# RUN: not ld.lld --defsym __morestack=0x10010000 %t1.o %t2.o -o %t 2>&1 | FileCheck %s
7+
8+
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t1.o
9+
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/ppc64-no-split-stack.s -o %t2.o
10+
11+
# RUN: not ld.lld --defsym __morestack=0x10010000 %t1.o %t2.o -o %t 2>&1 | FileCheck %s
12+
13+
# CHECK: error: {{.*}}.o:(.text): wrong_regs (with -fsplit-stack) calls nss_callee (without -fsplit-stack), but couldn't adjust its prologue
14+
15+
.abiversion 2
16+
.section ".text"
17+
18+
.p2align 2
19+
.global wrong_regs
20+
.type wrong_regs, @function
21+
22+
wrong_regs:
23+
.Lwr_gep:
24+
addis 2, 12, .TOC.-.Lwr_gep@ha
25+
addi 2, 2, .TOC.-.Lwr_gep@l
26+
.localentry wrong_regs, .-wrong_regs
27+
ld 0, -0x7040(13)
28+
addis 5, 2, -1
29+
addi 5, 5, -32
30+
addi 12, 1, -32
31+
nop
32+
cmpld 7, 12, 0
33+
blt- 7, .Lwr_alloc_more
34+
.Lwr_body:
35+
mflr 0
36+
std 0, 16(1)
37+
stdu 1, -32(1)
38+
bl nss_callee
39+
addi 1, 1, 32
40+
ld 0, 16(1)
41+
mtlr 0
42+
blr
43+
.Lwr_alloc_more:
44+
mflr 0
45+
std 0, 16(1)
46+
bl __morestack
47+
ld 0, 16(1)
48+
mtlr 0
49+
blr
50+
b .Lwr_body
51+
.size wrong_regs, .-wrong_regs
52+
53+
.section .note.GNU-split-stack,"",@progbits

0 commit comments

Comments
 (0)