aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp586
1 files changed, 258 insertions, 328 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 2ee394e9259d..16536bf23deb 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -39,15 +39,6 @@ EnablePEVectorSpills("ppc-enable-pe-vector-spills",
cl::desc("Enable spills in prologue to vector registers."),
cl::init(false), cl::Hidden);
-/// VRRegNo - Map from a numbered VR register to its enum value.
-///
-static const MCPhysReg VRRegNo[] = {
- PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
- PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
- PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
- PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
-};
-
static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
if (STI.isAIXABI())
return STI.isPPC64() ? 16 : 8;
@@ -227,19 +218,14 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
CALLEE_SAVED_VRS
};
- static const SpillSlot AIXOffsets32[] = {
- CALLEE_SAVED_FPRS,
- CALLEE_SAVED_GPRS32,
- // Add AIX's extra CSR.
- {PPC::R13, -76},
- // TODO: Update when we add vector support for AIX.
- };
+ static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
+ CALLEE_SAVED_GPRS32,
+ // Add AIX's extra CSR.
+ {PPC::R13, -76},
+ CALLEE_SAVED_VRS};
static const SpillSlot AIXOffsets64[] = {
- CALLEE_SAVED_FPRS,
- CALLEE_SAVED_GPRS64,
- // TODO: Update when we add vector support for AIX.
- };
+ CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
if (Subtarget.is64BitELFABI()) {
NumEntries = array_lengthof(ELFOffsets64);
@@ -262,153 +248,11 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
return AIXOffsets32;
}
-/// RemoveVRSaveCode - We have found that this function does not need any code
-/// to manipulate the VRSAVE register, even though it uses vector registers.
-/// This can happen when the only registers used are known to be live in or out
-/// of the function. Remove all of the VRSAVE related code from the function.
-/// FIXME: The removal of the code results in a compile failure at -O0 when the
-/// function contains a function call, as the GPR containing original VRSAVE
-/// contents is spilled and reloaded around the call. Without the prolog code,
-/// the spill instruction refers to an undefined register. This code needs
-/// to account for all uses of that GPR.
-static void RemoveVRSaveCode(MachineInstr &MI) {
- MachineBasicBlock *Entry = MI.getParent();
- MachineFunction *MF = Entry->getParent();
-
- // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
- MachineBasicBlock::iterator MBBI = MI;
- ++MBBI;
- assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
- MBBI->eraseFromParent();
-
- bool RemovedAllMTVRSAVEs = true;
- // See if we can find and remove the MTVRSAVE instruction from all of the
- // epilog blocks.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
- // If last instruction is a return instruction, add an epilogue
- if (I->isReturnBlock()) {
- bool FoundIt = false;
- for (MBBI = I->end(); MBBI != I->begin(); ) {
- --MBBI;
- if (MBBI->getOpcode() == PPC::MTVRSAVE) {
- MBBI->eraseFromParent(); // remove it.
- FoundIt = true;
- break;
- }
- }
- RemovedAllMTVRSAVEs &= FoundIt;
- }
- }
-
- // If we found and removed all MTVRSAVE instructions, remove the read of
- // VRSAVE as well.
- if (RemovedAllMTVRSAVEs) {
- MBBI = MI;
- assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
- --MBBI;
- assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
- MBBI->eraseFromParent();
- }
-
- // Finally, nuke the UPDATE_VRSAVE.
- MI.eraseFromParent();
-}
-
-// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
-// instruction selector. Based on the vector registers that have been used,
-// transform this into the appropriate ORI instruction.
-static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
- MachineFunction *MF = MI.getParent()->getParent();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- DebugLoc dl = MI.getDebugLoc();
-
- const MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned UsedRegMask = 0;
- for (unsigned i = 0; i != 32; ++i)
- if (MRI.isPhysRegModified(VRRegNo[i]))
- UsedRegMask |= 1 << (31-i);
-
- // Live in and live out values already must be in the mask, so don't bother
- // marking them.
- for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
- unsigned RegNo = TRI->getEncodingValue(LI.first);
- if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg.
- UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
- }
-
- // Live out registers appear as use operands on return instructions.
- for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
- UsedRegMask != 0 && BI != BE; ++BI) {
- const MachineBasicBlock &MBB = *BI;
- if (!MBB.isReturnBlock())
- continue;
- const MachineInstr &Ret = MBB.back();
- for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = Ret.getOperand(I);
- if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
- continue;
- unsigned RegNo = TRI->getEncodingValue(MO.getReg());
- UsedRegMask &= ~(1 << (31-RegNo));
- }
- }
-
- // If no registers are used, turn this into a copy.
- if (UsedRegMask == 0) {
- // Remove all VRSAVE code.
- RemoveVRSaveCode(MI);
- return;
- }
-
- Register SrcReg = MI.getOperand(1).getReg();
- Register DstReg = MI.getOperand(0).getReg();
-
- if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
- if (DstReg != SrcReg)
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask);
- else
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask);
- } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
- if (DstReg != SrcReg)
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask >> 16);
- else
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask >> 16);
- } else {
- if (DstReg != SrcReg)
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg)
- .addImm(UsedRegMask >> 16);
- else
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(UsedRegMask >> 16);
-
- BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
- .addReg(DstReg, RegState::Kill)
- .addImm(UsedRegMask & 0xFFFF);
- }
-
- // Remove the old UPDATE_VRSAVE instruction.
- MI.eraseFromParent();
-}
-
static bool spillsCR(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->isCRSpilled();
}
-static bool spillsVRSAVE(const MachineFunction &MF) {
- const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- return FuncInfo->isVRSAVESpilled();
-}
-
static bool hasSpills(const MachineFunction &MF) {
const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
return FuncInfo->hasSpills();
@@ -474,7 +318,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
!FI->mustSaveTOC() && // No need to save TOC.
!RegInfo->hasBasePointer(MF); // No special alignment.
- // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
+ // Note: for PPC32 SVR4ABI, we can still generate stackless
// code if all local vars are reg-allocated.
bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
@@ -531,9 +375,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
- MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
- (MF.getTarget().Options.GuaranteedTailCallOpt &&
- MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+ MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
+ MF.exposesReturnsTwice() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
@@ -681,6 +526,8 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
// register is available, we can adjust for that by not overlapping the spill
// code. However, if we need to realign the stack (i.e. have a base pointer)
// and the stack frame is large, we need two scratch registers.
+// Also, stack probe requires two scratch registers, one for old sp, one for
+// large frame and large probe size.
bool
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -692,8 +539,10 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
Align MaxAlign = MFI.getMaxAlign();
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
+ const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
- return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
+ return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
+ TLI.hasInlineStackProbe(MF);
}
bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
@@ -736,8 +585,8 @@ bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
// Frame pointers and base pointers complicate matters so don't do anything
// if we have them. For example having a frame pointer will sometimes require
// a copy of r1 into r31 and that makes keeping track of updates to r1 more
- // difficult.
- if (hasFP(MF) || RegInfo->hasBasePointer(MF))
+ // difficult. Similar situation exists with setjmp.
+ if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
return false;
// Calls to fast_cc functions use different rules for passing parameters on
@@ -771,24 +620,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
bool isPPC64 = Subtarget.isPPC64();
// Get the ABI.
bool isSVR4ABI = Subtarget.isSVR4ABI();
- bool isAIXABI = Subtarget.isAIXABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
- assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
-
- // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
- // process it.
- if (!isSVR4ABI)
- for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
- if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
- if (isAIXABI)
- report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
- HandleVRSaveUpdate(*MBBI, TII);
- break;
- }
- }
-
- // Move MBBI back to the beginning of the prologue block.
- MBBI = MBB.begin();
+ assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
// Work out frame sizes.
unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
@@ -848,12 +681,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
// Using the same bool variable as below to suppress compiler warnings.
- // Stack probe requires two scratch registers, one for old sp, one for large
- // frame and large probe size.
bool SingleScratchReg = findScratchRegister(
- &MBB, false,
- twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
- &ScratchReg, &TempReg);
+ &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
assert(SingleScratchReg &&
"Required number of registers not available in this block");
@@ -863,26 +692,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
int FPOffset = 0;
if (HasFP) {
- if (isSVR4ABI) {
- MachineFrameInfo &MFI = MF.getFrameInfo();
- int FPIndex = FI->getFramePointerSaveIndex();
- assert(FPIndex && "No Frame Pointer Save Slot!");
- FPOffset = MFI.getObjectOffset(FPIndex);
- } else {
- FPOffset = getFramePointerSaveOffset();
- }
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = MFI.getObjectOffset(FPIndex);
}
int BPOffset = 0;
if (HasBP) {
- if (isSVR4ABI) {
- MachineFrameInfo &MFI = MF.getFrameInfo();
- int BPIndex = FI->getBasePointerSaveIndex();
- assert(BPIndex && "No Base Pointer Save Slot!");
- BPOffset = MFI.getObjectOffset(BPIndex);
- } else {
- BPOffset = getBasePointerSaveOffset();
- }
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = MFI.getObjectOffset(BPIndex);
}
int PBPOffset = 0;
@@ -1038,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl,
TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
: PPC::PROBED_STACKALLOC_32))
- .addDef(ScratchReg)
- .addDef(TempReg) // TempReg stores the old sp.
+ .addDef(TempReg)
+ .addDef(ScratchReg) // ScratchReg stores the old sp.
.addImm(NegFrameSize);
// FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
// update the ScratchReg to meet the assumption that ScratchReg contains
// the NegFrameSize. This solution is rather tricky.
if (!HasRedZone) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
- .addReg(TempReg)
+ .addReg(ScratchReg)
.addReg(SPReg);
HasSTUX = true;
}
@@ -1366,7 +1187,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const {
- // TODO: Generate CFI instructions.
bool isPPC64 = Subtarget.isPPC64();
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -1382,10 +1202,12 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
if (StackAllocMIPos == PrologMBB.end())
return;
const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
+ MachineBasicBlock *CurrentMBB = &PrologMBB;
DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
MachineInstr &MI = *StackAllocMIPos;
int64_t NegFrameSize = MI.getOperand(2).getImm();
- int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
+ unsigned ProbeSize = TLI.getStackProbeSize(MF);
+ int64_t NegProbeSize = -(int64_t)ProbeSize;
assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
int64_t NumBlocks = NegFrameSize / NegProbeSize;
int64_t NegResidualSize = NegFrameSize % NegProbeSize;
@@ -1394,10 +1216,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
Register FPReg = MI.getOperand(1).getReg();
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
bool HasBP = RegInfo->hasBasePointer(MF);
+ Register BPReg = RegInfo->getBaseRegister(MF);
Align MaxAlign = MFI.getMaxAlign();
- // Initialize current frame pointer.
+ bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
- BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
// Subroutines to generate .cfi_* directives.
auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, Register Reg) {
@@ -1437,96 +1259,234 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
// Subroutine to store frame pointer and decrease stack pointer by probe size.
auto allocateAndProbe = [&](MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, int64_t NegSize,
- Register NegSizeReg, bool UseDForm) {
+ Register NegSizeReg, bool UseDForm,
+ Register StoreReg) {
if (UseDForm)
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
- .addReg(FPReg)
+ .addReg(StoreReg)
.addImm(NegSize)
.addReg(SPReg);
else
BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
- .addReg(FPReg)
+ .addReg(StoreReg)
.addReg(SPReg)
.addReg(NegSizeReg);
};
- // Use FPReg to calculate CFA.
- if (needsCFI)
- buildDefCFA(PrologMBB, {MI}, FPReg, 0);
- // For case HasBP && MaxAlign > 1, we have to align the SP by performing
- // SP = SP - SP % MaxAlign.
+ // Used to probe stack when realignment is required.
+ // Note that, according to ABI's requirement, *sp must always equals the
+ // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
+ // Following is pseudo code:
+ // final_sp = (sp & align) + negframesize;
+ // neg_gap = final_sp - sp;
+ // while (neg_gap < negprobesize) {
+ // stdu fp, negprobesize(sp);
+ // neg_gap -= negprobesize;
+ // }
+ // stdux fp, sp, neg_gap
+ //
+ // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
+ // before probe code, we don't need to save it, so we get one additional reg
+ // that can be used to materialize the probeside if needed to use xform.
+ // Otherwise, we can NOT materialize probeside, so we can only use Dform for
+ // now.
+ //
+ // The allocations are:
+ // if (HasBP && HasRedzone) {
+ // r0: materialize the probesize if needed so that we can use xform.
+ // r12: `neg_gap`
+ // } else {
+ // r0: back-chain pointer
+ // r12: `neg_gap`.
+ // }
+ auto probeRealignedStack = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ Register ScratchReg, Register TempReg) {
+ assert(HasBP && "The function is supposed to have base pointer when its "
+ "stack is realigned.");
+ assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
+
+ // FIXME: We can eliminate this limitation if we get more infomation about
+ // which part of redzone are already used. Used redzone can be treated
+ // probed. But there might be `holes' in redzone probed, this could
+ // complicate the implementation.
+ assert(ProbeSize >= Subtarget.getRedZoneSize() &&
+ "Probe size should be larger or equal to the size of red-zone so "
+ "that red-zone is not clobbered by probing.");
+
+ Register &FinalStackPtr = TempReg;
+ // FIXME: We only support NegProbeSize materializable by DForm currently.
+ // When HasBP && HasRedzone, we can use xform if we have an additional idle
+ // register.
+ NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
+ assert(isInt<16>(NegProbeSize) &&
+ "NegProbeSize should be materializable by DForm");
+ Register CRReg = PPC::CR0;
+ // Layout of output assembly kinda like:
+ // bb.0:
+ // ...
+ // sub $scratchreg, $finalsp, r1
+ // cmpdi $scratchreg, <negprobesize>
+ // bge bb.2
+ // bb.1:
+ // stdu <backchain>, <negprobesize>(r1)
+ // sub $scratchreg, $scratchreg, negprobesize
+ // cmpdi $scratchreg, <negprobesize>
+ // blt bb.1
+ // bb.2:
+ // stdux <backchain>, r1, $scratchreg
+ MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
+ MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
+ MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ProbeExitMBB);
+ // bb.2
+ {
+ Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+ allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
+ BackChainPointer);
+ if (HasRedZone)
+ // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
+ // to TempReg to satisfy it.
+ BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
+ .addReg(BPReg)
+ .addReg(BPReg);
+ ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
+ ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ }
+ // bb.0
+ {
+ BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
+ .addReg(SPReg)
+ .addReg(FinalStackPtr);
+ if (!HasRedZone)
+ BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
+ BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
+ .addReg(ScratchReg)
+ .addImm(NegProbeSize);
+ BuildMI(&MBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_GE)
+ .addReg(CRReg)
+ .addMBB(ProbeExitMBB);
+ MBB.addSuccessor(ProbeLoopBodyMBB);
+ MBB.addSuccessor(ProbeExitMBB);
+ }
+ // bb.1
+ {
+ Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+ allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
+ 0, true /*UseDForm*/, BackChainPointer);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
+ ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-NegProbeSize);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
+ CRReg)
+ .addReg(ScratchReg)
+ .addImm(NegProbeSize);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_LT)
+ .addReg(CRReg)
+ .addMBB(ProbeLoopBodyMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
+ }
+ // Update liveins.
+ recomputeLiveIns(*ProbeLoopBodyMBB);
+ recomputeLiveIns(*ProbeExitMBB);
+ return ProbeExitMBB;
+ };
+ // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
+ // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
+ // the offset subtracted from SP is determined by SP's runtime value.
if (HasBP && MaxAlign > 1) {
+ // Calculate final stack pointer.
if (isPPC64)
- BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(FPReg)
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
.addImm(0)
.addImm(64 - Log2(MaxAlign));
else
- BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
- .addReg(FPReg)
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
.addImm(0)
.addImm(32 - Log2(MaxAlign))
.addImm(31);
- BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC),
- SPReg)
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
+ FPReg)
.addReg(ScratchReg)
.addReg(SPReg);
- }
- // Probe residual part.
- if (NegResidualSize) {
- bool ResidualUseDForm = CanUseDForm(NegResidualSize);
- if (!ResidualUseDForm)
- MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
- allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
- ResidualUseDForm);
- }
- bool UseDForm = CanUseDForm(NegProbeSize);
- // If number of blocks is small, just probe them directly.
- if (NumBlocks < 3) {
- if (!UseDForm)
- MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
- for (int i = 0; i < NumBlocks; ++i)
- allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
- if (needsCFI) {
- // Restore using SPReg to calculate CFA.
- buildDefCFAReg(PrologMBB, {MI}, SPReg);
- }
+ MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
+ FPReg)
+ .addReg(ScratchReg)
+ .addReg(FPReg);
+ CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
+ if (needsCFI)
+ buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
} else {
- // Since CTR is a volatile register and current shrinkwrap implementation
- // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
- // CTR loop to probe.
- // Calculate trip count and stores it in CTRReg.
- MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
- BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
- .addReg(ScratchReg, RegState::Kill);
- if (!UseDForm)
- MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
- // Create MBBs of the loop.
- MachineFunction::iterator MBBInsertPoint =
- std::next(PrologMBB.getIterator());
- MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, LoopMBB);
- MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, ExitMBB);
- // Synthesize the loop body.
- allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
- UseDForm);
- BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
- .addMBB(LoopMBB);
- LoopMBB->addSuccessor(ExitMBB);
- LoopMBB->addSuccessor(LoopMBB);
- // Synthesize the exit MBB.
- ExitMBB->splice(ExitMBB->end(), &PrologMBB,
- std::next(MachineBasicBlock::iterator(MI)),
- PrologMBB.end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
- PrologMBB.addSuccessor(LoopMBB);
- if (needsCFI) {
- // Restore using SPReg to calculate CFA.
- buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ // Initialize current frame pointer.
+ BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
+ // Use FPReg to calculate CFA.
+ if (needsCFI)
+ buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
+ // Probe residual part.
+ if (NegResidualSize) {
+ bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+ if (!ResidualUseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
+ allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
+ ResidualUseDForm, FPReg);
+ }
+ bool UseDForm = CanUseDForm(NegProbeSize);
+ // If number of blocks is small, just probe them directly.
+ if (NumBlocks < 3) {
+ if (!UseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+ for (int i = 0; i < NumBlocks; ++i)
+ allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
+ FPReg);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
+ }
+ } else {
+ // Since CTR is a volatile register and current shrinkwrap implementation
+ // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+ // CTR loop to probe.
+ // Calculate trip count and stores it in CTRReg.
+ MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+ .addReg(ScratchReg, RegState::Kill);
+ if (!UseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+ // Create MBBs of the loop.
+ MachineFunction::iterator MBBInsertPoint =
+ std::next(CurrentMBB->getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ExitMBB);
+ // Synthesize the loop body.
+ allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+ UseDForm, FPReg);
+ BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+ .addMBB(LoopMBB);
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), CurrentMBB,
+ std::next(MachineBasicBlock::iterator(MI)),
+ CurrentMBB->end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
+ CurrentMBB->addSuccessor(LoopMBB);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ }
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
}
- // Update liveins.
- recomputeLiveIns(*LoopMBB);
- recomputeLiveIns(*ExitMBB);
}
++NumPrologProbed;
MI.eraseFromParent();
@@ -1551,8 +1511,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
- // Get the ABI.
- bool isSVR4ABI = Subtarget.isSVR4ABI();
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1600,24 +1558,16 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
SingleScratchReg = ScratchReg == TempReg;
if (HasFP) {
- if (isSVR4ABI) {
- int FPIndex = FI->getFramePointerSaveIndex();
- assert(FPIndex && "No Frame Pointer Save Slot!");
- FPOffset = MFI.getObjectOffset(FPIndex);
- } else {
- FPOffset = getFramePointerSaveOffset();
- }
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = MFI.getObjectOffset(FPIndex);
}
int BPOffset = 0;
if (HasBP) {
- if (isSVR4ABI) {
int BPIndex = FI->getBasePointerSaveIndex();
assert(BPIndex && "No Base Pointer Save Slot!");
BPOffset = MFI.getObjectOffset(BPIndex);
- } else {
- BPOffset = getBasePointerSaveOffset();
- }
}
int PBPOffset = 0;
@@ -1703,11 +1653,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
// zone add this offset back now.
+ // If the function has a base pointer, the stack pointer has been copied
+ // to it so we can restore it by copying in the other direction.
+ if (HasRedZone && HasBP) {
+ BuildMI(MBB, MBBI, dl, OrInst, RBReg).
+ addReg(BPReg).
+ addReg(BPReg);
+ }
// If this function contained a fastcc call and GuaranteedTailCallOpt is
// enabled (=> hasFastCall()==true) the fastcc call might contain a tail
// call which invalidates the stack pointer value in SP(0). So we use the
- // value of R31 in this case.
- if (FI->hasFastCall()) {
+ // value of R31 in this case. Similar situation exists with setjmp.
+ else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
assert(HasFP && "Expecting a valid frame pointer.");
if (!HasRedZone)
RBReg = FPReg;
@@ -2053,7 +2010,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
bool HasGPSaveArea = false;
bool HasG8SaveArea = false;
bool HasFPSaveArea = false;
- bool HasVRSAVESaveArea = false;
bool HasVRSaveArea = false;
SmallVector<CalleeSavedInfo, 18> GPRegs;
@@ -2093,8 +2049,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
} else if (PPC::CRBITRCRegClass.contains(Reg) ||
PPC::CRRCRegClass.contains(Reg)) {
; // do nothing, as we already know whether CRs are spilled
- } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
- HasVRSAVESaveArea = true;
} else if (PPC::VRRCRegClass.contains(Reg) ||
PPC::SPERCRegClass.contains(Reg)) {
// Altivec and SPE are mutually exclusive, but have the same stack
@@ -2217,23 +2171,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
LowerBound -= 4; // The CR save area is always 4 bytes long.
}
- if (HasVRSAVESaveArea) {
- // FIXME SVR4: Is it actually possible to have multiple elements in CSI
- // which have the VRSAVE register class?
- // Adjust the frame index of the VRSAVE spill slot.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
-
- if (PPC::VRSAVERCRegClass.contains(Reg)) {
- int FI = CSI[i].getFrameIdx();
-
- MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
- }
- }
-
- LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
- }
-
// Both Altivec and SPE have the same alignment and padding requirements
// within the stack frame.
if (HasVRSaveArea) {
@@ -2273,8 +2210,8 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// needed alignment padding.
unsigned StackSize = determineFrameLayout(MF, true);
MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
- hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+ if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
+ (hasSpills(MF) && !isInt<16>(StackSize))) {
const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
@@ -2288,7 +2225,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
// These kinds of spills might need two registers.
- if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
+ if (spillsCR(MF) || HasAlVars)
RS->addScavengingFrameIndex(
MFI.CreateStackObject(Size, Alignment, false));
}
@@ -2365,9 +2302,6 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
- if (Reg == PPC::VRSAVE)
- continue;
// CR2 through CR4 are the nonvolatile CR fields.
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
@@ -2532,10 +2466,6 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
- if (Reg == PPC::VRSAVE)
- continue;
-
if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
continue;