@@ -120,6 +120,9 @@ class PPC64 final : public TargetInfo {
120120 void relaxTlsGdToLe (uint8_t *Loc, RelType Type, uint64_t Val) const override ;
121121 void relaxTlsLdToLe (uint8_t *Loc, RelType Type, uint64_t Val) const override ;
122122 void relaxTlsIeToLe (uint8_t *Loc, RelType Type, uint64_t Val) const override ;
123+
124+ bool adjustPrologueForCrossSplitStack (uint8_t *Loc, uint8_t *End,
125+ uint8_t StOther) const override ;
123126};
124127} // namespace
125128
@@ -213,6 +216,8 @@ PPC64::PPC64() {
213216
214217 TlsGotRel = R_PPC64_TPREL64;
215218
219+ NeedsMoreStackNonSplit = false ;
220+
216221 // We need 64K pages (at least under glibc/Linux, the loader won't
217222 // set different permissions on a finer granularity than that).
218223 DefaultMaxPageSize = 65536 ;
@@ -761,7 +766,115 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
761766 }
762767}
763768
769+ // The prologue for a split-stack function is expected to look roughly
770+ // like this:
771+ // .Lglobal_entry_point:
772+ // # TOC pointer initalization.
773+ // ...
774+ // .Llocal_entry_point:
775+ // # load the __private_ss member of the threads tcbhead.
776+ // ld r0,-0x7000-64(r13)
777+ // # subtract the functions stack size from the stack pointer.
778+ // addis r12, r1, ha(-stack-frame size)
779+ // addi r12, r12, l(-stack-frame size)
780+ // # compare needed to actual and branch to allocate_more_stack if more
781+ // # space is needed, otherwise fallthrough to 'normal' function body.
782+ // cmpld cr7,r12,r0
783+ // blt- .Lallocate_more_stack
784+ //
785+ // -) The allocate_more_stack block might be placed after the split-stack
786+ // prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
787+ // instead.
788+ // -) If either the addis or addi is not needed due to the stack size being
789+ // smaller then 32K or a multiple of 64K they will be replaced with a nop,
790+ // but there will always be 2 instructions the linker can overwrite for the
791+ // adjusted stack size.
792+ //
793+ // The linkers job here is to increase the stack size used in the addis/addi
794+ // pair by split-stack-size-adjust.
795+ // addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
796+ // addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
797+ bool PPC64::adjustPrologueForCrossSplitStack (uint8_t *Loc, uint8_t *End,
798+ uint8_t StOther) const {
799+ // If the caller has a global entry point adjust the buffer past it. The start
800+ // of the split-stack prologue will be at the local entry point.
801+ Loc += getPPC64GlobalEntryToLocalEntryOffset (StOther);
802+
803+ // At the very least we expect to see a load of some split-stack data from the
804+ // tcb, and 2 instructions that calculate the ending stack address this
805+ // function will require. If there is not enough room for at least 3
806+ // instructions it can't be a split-stack prologue.
807+ if (Loc + 12 >= End)
808+ return false ;
809+
810+ // First instruction must be `ld r0, -0x7000-64(r13)`
811+ if (read32 (Loc) != 0xe80d8fc0 )
812+ return false ;
813+
814+ int16_t HiImm = 0 ;
815+ int16_t LoImm = 0 ;
816+ // First instruction can be either an addis if the frame size is larger then
817+ // 32K, or an addi if the size is less then 32K.
818+ int32_t FirstInstr = read32 (Loc + 4 );
819+ if (getPrimaryOpCode (FirstInstr) == 15 ) {
820+ HiImm = FirstInstr & 0xFFFF ;
821+ } else if (getPrimaryOpCode (FirstInstr) == 14 ) {
822+ LoImm = FirstInstr & 0xFFFF ;
823+ } else {
824+ return false ;
825+ }
826+
827+ // Second instruction is either an addi or a nop. If the first instruction was
828+ // an addi then LoImm is set and the second instruction must be a nop.
829+ uint32_t SecondInstr = read32 (Loc + 8 );
830+ if (!LoImm && getPrimaryOpCode (SecondInstr) == 14 ) {
831+ LoImm = SecondInstr & 0xFFFF ;
832+ } else if (SecondInstr != 0x60000000 ) {
833+ return false ;
834+ }
835+
836+ // The register operands of the first instruction should be the stack-pointer
837+ // (r1) as the input (RA) and r12 as the output (RT). If the second
838+ // instruction is not a nop, then it should use r12 as both input and output.
839+ auto CheckRegOperands =
840+ [](uint32_t Instr, uint8_t ExpectedRT, uint8_t ExpectedRA) {
841+ return ((Instr & 0x3E00000 ) >> 21 == ExpectedRT) &&
842+ ((Instr & 0x1F0000 ) >> 16 == ExpectedRA);
843+ };
844+ if (!CheckRegOperands (FirstInstr, 12 , 1 ))
845+ return false ;
846+ if (SecondInstr != 0x60000000 && !CheckRegOperands (SecondInstr, 12 , 12 ))
847+ return false ;
848+
849+ int32_t StackFrameSize = (HiImm * 65536 ) + LoImm;
850+ // Check that the adjusted size doesn't overflow what we can represent with 2
851+ // instructions.
852+ if (StackFrameSize < -2147483648 + Config->SplitStackAdjustSize ) {
853+ error (getErrorLocation (Loc) + " split-stack prologue adjustment overflows" );
854+ return false ;
855+ }
856+
857+ int32_t AdjustedStackFrameSize =
858+ StackFrameSize - Config->SplitStackAdjustSize ;
859+
860+ LoImm = AdjustedStackFrameSize & 0xFFFF ;
861+ HiImm = (AdjustedStackFrameSize + 0x8000 ) >> 16 ;
862+ if (HiImm) {
863+ write32 (Loc + 4 , 0x3D810000 | (uint16_t )HiImm);
864+ // If the low immediate is zero the second instruction will be a nop.
865+ SecondInstr =
866+ LoImm ? 0x398C0000 | (uint16_t )LoImm : 0x60000000 ;
867+ write32 (Loc + 8 , SecondInstr);
868+ } else {
869+ // addi r12, r1, imm
870+ write32 (Loc + 4 , (0x39810000 ) | (uint16_t )LoImm);
871+ write32 (Loc + 8 , 0x60000000 );
872+ }
873+
874+ return true ;
875+ }
876+
764877TargetInfo *elf::getPPC64TargetInfo () {
765878 static PPC64 Target;
766879 return &Target;
767- }
880+ }
0 commit comments