diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 586 |
1 files changed, 258 insertions, 328 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 2ee394e9259d..16536bf23deb 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -39,15 +39,6 @@ EnablePEVectorSpills("ppc-enable-pe-vector-spills", cl::desc("Enable spills in prologue to vector registers."), cl::init(false), cl::Hidden); -/// VRRegNo - Map from a numbered VR register to its enum value. -/// -static const MCPhysReg VRRegNo[] = { - PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , - PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, - PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 -}; - static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { if (STI.isAIXABI()) return STI.isPPC64() ? 16 : 8; @@ -227,19 +218,14 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( CALLEE_SAVED_VRS }; - static const SpillSlot AIXOffsets32[] = { - CALLEE_SAVED_FPRS, - CALLEE_SAVED_GPRS32, - // Add AIX's extra CSR. - {PPC::R13, -76}, - // TODO: Update when we add vector support for AIX. - }; + static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, + CALLEE_SAVED_GPRS32, + // Add AIX's extra CSR. + {PPC::R13, -76}, + CALLEE_SAVED_VRS}; static const SpillSlot AIXOffsets64[] = { - CALLEE_SAVED_FPRS, - CALLEE_SAVED_GPRS64, - // TODO: Update when we add vector support for AIX. - }; + CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; if (Subtarget.is64BitELFABI()) { NumEntries = array_lengthof(ELFOffsets64); @@ -262,153 +248,11 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( return AIXOffsets32; } -/// RemoveVRSaveCode - We have found that this function does not need any code -/// to manipulate the VRSAVE register, even though it uses vector registers. -/// This can happen when the only registers used are known to be live in or out -/// of the function. Remove all of the VRSAVE related code from the function. -/// FIXME: The removal of the code results in a compile failure at -O0 when the -/// function contains a function call, as the GPR containing original VRSAVE -/// contents is spilled and reloaded around the call. Without the prolog code, -/// the spill instruction refers to an undefined register. This code needs -/// to account for all uses of that GPR. -static void RemoveVRSaveCode(MachineInstr &MI) { - MachineBasicBlock *Entry = MI.getParent(); - MachineFunction *MF = Entry->getParent(); - - // We know that the MTVRSAVE instruction immediately follows MI. Remove it. - MachineBasicBlock::iterator MBBI = MI; - ++MBBI; - assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); - MBBI->eraseFromParent(); - - bool RemovedAllMTVRSAVEs = true; - // See if we can find and remove the MTVRSAVE instruction from all of the - // epilog blocks. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { - // If last instruction is a return instruction, add an epilogue - if (I->isReturnBlock()) { - bool FoundIt = false; - for (MBBI = I->end(); MBBI != I->begin(); ) { - --MBBI; - if (MBBI->getOpcode() == PPC::MTVRSAVE) { - MBBI->eraseFromParent(); // remove it. - FoundIt = true; - break; - } - } - RemovedAllMTVRSAVEs &= FoundIt; - } - } - - // If we found and removed all MTVRSAVE instructions, remove the read of - // VRSAVE as well. - if (RemovedAllMTVRSAVEs) { - MBBI = MI; - assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); - --MBBI; - assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); - MBBI->eraseFromParent(); - } - - // Finally, nuke the UPDATE_VRSAVE. - MI.eraseFromParent(); -} - -// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the -// instruction selector. Based on the vector registers that have been used, -// transform this into the appropriate ORI instruction. -static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { - MachineFunction *MF = MI.getParent()->getParent(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - DebugLoc dl = MI.getDebugLoc(); - - const MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned UsedRegMask = 0; - for (unsigned i = 0; i != 32; ++i) - if (MRI.isPhysRegModified(VRRegNo[i])) - UsedRegMask |= 1 << (31-i); - - // Live in and live out values already must be in the mask, so don't bother - // marking them. - for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { - unsigned RegNo = TRI->getEncodingValue(LI.first); - if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. - UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. - } - - // Live out registers appear as use operands on return instructions. - for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); - UsedRegMask != 0 && BI != BE; ++BI) { - const MachineBasicBlock &MBB = *BI; - if (!MBB.isReturnBlock()) - continue; - const MachineInstr &Ret = MBB.back(); - for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = Ret.getOperand(I); - if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) - continue; - unsigned RegNo = TRI->getEncodingValue(MO.getReg()); - UsedRegMask &= ~(1 << (31-RegNo)); - } - } - - // If no registers are used, turn this into a copy. - if (UsedRegMask == 0) { - // Remove all VRSAVE code. - RemoveVRSaveCode(MI); - return; - } - - Register SrcReg = MI.getOperand(1).getReg(); - Register DstReg = MI.getOperand(0).getReg(); - - if ((UsedRegMask & 0xFFFF) == UsedRegMask) { - if (DstReg != SrcReg) - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) - .addReg(SrcReg) - .addImm(UsedRegMask); - else - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(UsedRegMask); - } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { - if (DstReg != SrcReg) - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg) - .addImm(UsedRegMask >> 16); - else - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(UsedRegMask >> 16); - } else { - if (DstReg != SrcReg) - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg) - .addImm(UsedRegMask >> 16); - else - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(UsedRegMask >> 16); - - BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) - .addReg(DstReg, RegState::Kill) - .addImm(UsedRegMask & 0xFFFF); - } - - // Remove the old UPDATE_VRSAVE instruction. - MI.eraseFromParent(); -} - static bool spillsCR(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); return FuncInfo->isCRSpilled(); } -static bool spillsVRSAVE(const MachineFunction &MF) { - const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - return FuncInfo->isVRSAVESpilled(); -} - static bool hasSpills(const MachineFunction &MF) { const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); return FuncInfo->hasSpills(); @@ -474,7 +318,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, !FI->mustSaveTOC() && // No need to save TOC. !RegInfo->hasBasePointer(MF); // No special alignment. - // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless + // Note: for PPC32 SVR4ABI, we can still generate stackless // code if all local vars are reg-allocated. bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); @@ -531,9 +375,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || - MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || - (MF.getTarget().Options.GuaranteedTailCallOpt && - MF.getInfo<PPCFunctionInfo>()->hasFastCall()); + MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || + MF.exposesReturnsTwice() || + (MF.getTarget().Options.GuaranteedTailCallOpt && + MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { @@ -681,6 +526,8 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, // register is available, we can adjust for that by not overlapping the spill // code. However, if we need to realign the stack (i.e. have a base pointer) // and the stack frame is large, we need two scratch registers. +// Also, stack probe requires two scratch registers, one for old sp, one for +// large frame and large probe size. bool PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); @@ -692,8 +539,10 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); Align MaxAlign = MFI.getMaxAlign(); bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); + const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); - return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; + return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || + TLI.hasInlineStackProbe(MF); } bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { @@ -736,8 +585,8 @@ bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { // Frame pointers and base pointers complicate matters so don't do anything // if we have them. For example having a frame pointer will sometimes require // a copy of r1 into r31 and that makes keeping track of updates to r1 more - // difficult. - if (hasFP(MF) || RegInfo->hasBasePointer(MF)) + // difficult. Similar situation exists with setjmp. + if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) return false; // Calls to fast_cc functions use different rules for passing parameters on @@ -771,24 +620,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, bool isPPC64 = Subtarget.isPPC64(); // Get the ABI. bool isSVR4ABI = Subtarget.isSVR4ABI(); - bool isAIXABI = Subtarget.isAIXABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); - assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); - - // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, - // process it. - if (!isSVR4ABI) - for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { - if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { - if (isAIXABI) - report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); - HandleVRSaveUpdate(*MBBI, TII); - break; - } - } - - // Move MBBI back to the beginning of the prologue block. - MBBI = MBB.begin(); + assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); // Work out frame sizes. unsigned FrameSize = determineFrameLayoutAndUpdate(MF); @@ -848,12 +681,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); // Using the same bool variable as below to suppress compiler warnings. - // Stack probe requires two scratch registers, one for old sp, one for large - // frame and large probe size. bool SingleScratchReg = findScratchRegister( - &MBB, false, - twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF), - &ScratchReg, &TempReg); + &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); assert(SingleScratchReg && "Required number of registers not available in this block"); @@ -863,26 +692,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, int FPOffset = 0; if (HasFP) { - if (isSVR4ABI) { - MachineFrameInfo &MFI = MF.getFrameInfo(); - int FPIndex = FI->getFramePointerSaveIndex(); - assert(FPIndex && "No Frame Pointer Save Slot!"); - FPOffset = MFI.getObjectOffset(FPIndex); - } else { - FPOffset = getFramePointerSaveOffset(); - } + MachineFrameInfo &MFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = MFI.getObjectOffset(FPIndex); } int BPOffset = 0; if (HasBP) { - if (isSVR4ABI) { - MachineFrameInfo &MFI = MF.getFrameInfo(); - int BPIndex = FI->getBasePointerSaveIndex(); - assert(BPIndex && "No Base Pointer Save Slot!"); - BPOffset = MFI.getObjectOffset(BPIndex); - } else { - BPOffset = getBasePointerSaveOffset(); - } + MachineFrameInfo &MFI = MF.getFrameInfo(); + int BPIndex = FI->getBasePointerSaveIndex(); + assert(BPIndex && "No Base Pointer Save Slot!"); + BPOffset = MFI.getObjectOffset(BPIndex); } int PBPOffset = 0; @@ -1038,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 : PPC::PROBED_STACKALLOC_32)) - .addDef(ScratchReg) - .addDef(TempReg) // TempReg stores the old sp. + .addDef(TempReg) + .addDef(ScratchReg) // ScratchReg stores the old sp. .addImm(NegFrameSize); // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we // update the ScratchReg to meet the assumption that ScratchReg contains // the NegFrameSize. This solution is rather tricky. if (!HasRedZone) { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) - .addReg(TempReg) + .addReg(ScratchReg) .addReg(SPReg); HasSTUX = true; } @@ -1366,7 +1187,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const { - // TODO: Generate CFI instructions. bool isPPC64 = Subtarget.isPPC64(); const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -1382,10 +1202,12 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, if (StackAllocMIPos == PrologMBB.end()) return; const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); + MachineBasicBlock *CurrentMBB = &PrologMBB; DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); MachineInstr &MI = *StackAllocMIPos; int64_t NegFrameSize = MI.getOperand(2).getImm(); - int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF); + unsigned ProbeSize = TLI.getStackProbeSize(MF); + int64_t NegProbeSize = -(int64_t)ProbeSize; assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); int64_t NumBlocks = NegFrameSize / NegProbeSize; int64_t NegResidualSize = NegFrameSize % NegProbeSize; @@ -1394,10 +1216,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, Register FPReg = MI.getOperand(1).getReg(); const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); bool HasBP = RegInfo->hasBasePointer(MF); + Register BPReg = RegInfo->getBaseRegister(MF); Align MaxAlign = MFI.getMaxAlign(); - // Initialize current frame pointer. + bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); - BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); // Subroutines to generate .cfi_* directives. auto buildDefCFAReg = [&](MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register Reg) { @@ -1437,96 +1259,234 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, // Subroutine to store frame pointer and decrease stack pointer by probe size. auto allocateAndProbe = [&](MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int64_t NegSize, - Register NegSizeReg, bool UseDForm) { + Register NegSizeReg, bool UseDForm, + Register StoreReg) { if (UseDForm) BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) - .addReg(FPReg) + .addReg(StoreReg) .addImm(NegSize) .addReg(SPReg); else BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) - .addReg(FPReg) + .addReg(StoreReg) .addReg(SPReg) .addReg(NegSizeReg); }; - // Use FPReg to calculate CFA. - if (needsCFI) - buildDefCFA(PrologMBB, {MI}, FPReg, 0); - // For case HasBP && MaxAlign > 1, we have to align the SP by performing - // SP = SP - SP % MaxAlign. + // Used to probe stack when realignment is required. + // Note that, according to ABI's requirement, *sp must always equals the + // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. + // Following is pseudo code: + // final_sp = (sp & align) + negframesize; + // neg_gap = final_sp - sp; + // while (neg_gap < negprobesize) { + // stdu fp, negprobesize(sp); + // neg_gap -= negprobesize; + // } + // stdux fp, sp, neg_gap + // + // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg + // before probe code, we don't need to save it, so we get one additional reg + // that can be used to materialize the probeside if needed to use xform. + // Otherwise, we can NOT materialize probeside, so we can only use Dform for + // now. + // + // The allocations are: + // if (HasBP && HasRedzone) { + // r0: materialize the probesize if needed so that we can use xform. + // r12: `neg_gap` + // } else { + // r0: back-chain pointer + // r12: `neg_gap`. + // } + auto probeRealignedStack = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register ScratchReg, Register TempReg) { + assert(HasBP && "The function is supposed to have base pointer when its " + "stack is realigned."); + assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); + + // FIXME: We can eliminate this limitation if we get more infomation about + // which part of redzone are already used. Used redzone can be treated + // probed. But there might be `holes' in redzone probed, this could + // complicate the implementation. + assert(ProbeSize >= Subtarget.getRedZoneSize() && + "Probe size should be larger or equal to the size of red-zone so " + "that red-zone is not clobbered by probing."); + + Register &FinalStackPtr = TempReg; + // FIXME: We only support NegProbeSize materializable by DForm currently. + // When HasBP && HasRedzone, we can use xform if we have an additional idle + // register. + NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); + assert(isInt<16>(NegProbeSize) && + "NegProbeSize should be materializable by DForm"); + Register CRReg = PPC::CR0; + // Layout of output assembly kinda like: + // bb.0: + // ... + // sub $scratchreg, $finalsp, r1 + // cmpdi $scratchreg, <negprobesize> + // bge bb.2 + // bb.1: + // stdu <backchain>, <negprobesize>(r1) + // sub $scratchreg, $scratchreg, negprobesize + // cmpdi $scratchreg, <negprobesize> + // blt bb.1 + // bb.2: + // stdux <backchain>, r1, $scratchreg + MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); + MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); + MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ProbeExitMBB); + // bb.2 + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, + BackChainPointer); + if (HasRedZone) + // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg + // to TempReg to satisfy it. + BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) + .addReg(BPReg) + .addReg(BPReg); + ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); + ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + } + // bb.0 + { + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) + .addReg(SPReg) + .addReg(FinalStackPtr); + if (!HasRedZone) + BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(&MBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_GE) + .addReg(CRReg) + .addMBB(ProbeExitMBB); + MBB.addSuccessor(ProbeLoopBodyMBB); + MBB.addSuccessor(ProbeExitMBB); + } + // bb.1 + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, + 0, true /*UseDForm*/, BackChainPointer); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), + ScratchReg) + .addReg(ScratchReg) + .addImm(-NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), + CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_LT) + .addReg(CRReg) + .addMBB(ProbeLoopBodyMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + } + // Update liveins. + recomputeLiveIns(*ProbeLoopBodyMBB); + recomputeLiveIns(*ProbeExitMBB); + return ProbeExitMBB; + }; + // For case HasBP && MaxAlign > 1, we have to realign the SP by performing + // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since + // the offset subtracted from SP is determined by SP's runtime value. if (HasBP && MaxAlign > 1) { + // Calculate final stack pointer. if (isPPC64) - BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(FPReg) + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) + .addReg(SPReg) .addImm(0) .addImm(64 - Log2(MaxAlign)); else - BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) - .addReg(FPReg) + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) + .addReg(SPReg) .addImm(0) .addImm(32 - Log2(MaxAlign)) .addImm(31); - BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC), - SPReg) + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), + FPReg) .addReg(ScratchReg) .addReg(SPReg); - } - // Probe residual part. - if (NegResidualSize) { - bool ResidualUseDForm = CanUseDForm(NegResidualSize); - if (!ResidualUseDForm) - MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg); - allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg, - ResidualUseDForm); - } - bool UseDForm = CanUseDForm(NegProbeSize); - // If number of blocks is small, just probe them directly. - if (NumBlocks < 3) { - if (!UseDForm) - MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); - for (int i = 0; i < NumBlocks; ++i) - allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(PrologMBB, {MI}, SPReg); - } + MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), + FPReg) + .addReg(ScratchReg) + .addReg(FPReg); + CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); + if (needsCFI) + buildDefCFAReg(*CurrentMBB, {MI}, FPReg); } else { - // Since CTR is a volatile register and current shrinkwrap implementation - // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a - // CTR loop to probe. - // Calculate trip count and stores it in CTRReg. - MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg); - BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) - .addReg(ScratchReg, RegState::Kill); - if (!UseDForm) - MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); - // Create MBBs of the loop. - MachineFunction::iterator MBBInsertPoint = - std::next(PrologMBB.getIterator()); - MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, LoopMBB); - MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ExitMBB); - // Synthesize the loop body. - allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, - UseDForm); - BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) - .addMBB(LoopMBB); - LoopMBB->addSuccessor(ExitMBB); - LoopMBB->addSuccessor(LoopMBB); - // Synthesize the exit MBB. - ExitMBB->splice(ExitMBB->end(), &PrologMBB, - std::next(MachineBasicBlock::iterator(MI)), - PrologMBB.end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB); - PrologMBB.addSuccessor(LoopMBB); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + // Initialize current frame pointer. + BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); + // Use FPReg to calculate CFA. + if (needsCFI) + buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); + // Probe residual part. + if (NegResidualSize) { + bool ResidualUseDForm = CanUseDForm(NegResidualSize); + if (!ResidualUseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); + allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, + ResidualUseDForm, FPReg); + } + bool UseDForm = CanUseDForm(NegProbeSize); + // If number of blocks is small, just probe them directly. + if (NumBlocks < 3) { + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + for (int i = 0; i < NumBlocks; ++i) + allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, + FPReg); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + } + } else { + // Since CTR is a volatile register and current shrinkwrap implementation + // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a + // CTR loop to probe. + // Calculate trip count and stores it in CTRReg. + MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) + .addReg(ScratchReg, RegState::Kill); + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + // Create MBBs of the loop. + MachineFunction::iterator MBBInsertPoint = + std::next(CurrentMBB->getIterator()); + MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, LoopMBB); + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ExitMBB); + // Synthesize the loop body. + allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, + UseDForm, FPReg); + BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) + .addMBB(LoopMBB); + LoopMBB->addSuccessor(ExitMBB); + LoopMBB->addSuccessor(LoopMBB); + // Synthesize the exit MBB. + ExitMBB->splice(ExitMBB->end(), CurrentMBB, + std::next(MachineBasicBlock::iterator(MI)), + CurrentMBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); + CurrentMBB->addSuccessor(LoopMBB); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + } + // Update liveins. + recomputeLiveIns(*LoopMBB); + recomputeLiveIns(*ExitMBB); } - // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); } ++NumPrologProbed; MI.eraseFromParent(); @@ -1551,8 +1511,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Get processor type. bool isPPC64 = Subtarget.isPPC64(); - // Get the ABI. - bool isSVR4ABI = Subtarget.isSVR4ABI(); // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); @@ -1600,24 +1558,16 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, SingleScratchReg = ScratchReg == TempReg; if (HasFP) { - if (isSVR4ABI) { - int FPIndex = FI->getFramePointerSaveIndex(); - assert(FPIndex && "No Frame Pointer Save Slot!"); - FPOffset = MFI.getObjectOffset(FPIndex); - } else { - FPOffset = getFramePointerSaveOffset(); - } + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = MFI.getObjectOffset(FPIndex); } int BPOffset = 0; if (HasBP) { - if (isSVR4ABI) { int BPIndex = FI->getBasePointerSaveIndex(); assert(BPIndex && "No Base Pointer Save Slot!"); BPOffset = MFI.getObjectOffset(BPIndex); - } else { - BPOffset = getBasePointerSaveOffset(); - } } int PBPOffset = 0; @@ -1703,11 +1653,18 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red // zone add this offset back now. + // If the function has a base pointer, the stack pointer has been copied + // to it so we can restore it by copying in the other direction. + if (HasRedZone && HasBP) { + BuildMI(MBB, MBBI, dl, OrInst, RBReg). + addReg(BPReg). + addReg(BPReg); + } // If this function contained a fastcc call and GuaranteedTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the - // value of R31 in this case. - if (FI->hasFastCall()) { + // value of R31 in this case. Similar situation exists with setjmp. + else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { assert(HasFP && "Expecting a valid frame pointer."); if (!HasRedZone) RBReg = FPReg; @@ -2053,7 +2010,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, bool HasGPSaveArea = false; bool HasG8SaveArea = false; bool HasFPSaveArea = false; - bool HasVRSAVESaveArea = false; bool HasVRSaveArea = false; SmallVector<CalleeSavedInfo, 18> GPRegs; @@ -2093,8 +2049,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, } else if (PPC::CRBITRCRegClass.contains(Reg) || PPC::CRRCRegClass.contains(Reg)) { ; // do nothing, as we already know whether CRs are spilled - } else if (PPC::VRSAVERCRegClass.contains(Reg)) { - HasVRSAVESaveArea = true; } else if (PPC::VRRCRegClass.contains(Reg) || PPC::SPERCRegClass.contains(Reg)) { // Altivec and SPE are mutually exclusive, but have the same stack @@ -2217,23 +2171,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, LowerBound -= 4; // The CR save area is always 4 bytes long. } - if (HasVRSAVESaveArea) { - // FIXME SVR4: Is it actually possible to have multiple elements in CSI - // which have the VRSAVE register class? - // Adjust the frame index of the VRSAVE spill slot. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - - if (PPC::VRSAVERCRegClass.contains(Reg)) { - int FI = CSI[i].getFrameIdx(); - - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); - } - } - - LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. - } - // Both Altivec and SPE have the same alignment and padding requirements // within the stack frame. if (HasVRSaveArea) { @@ -2273,8 +2210,8 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, // needed alignment padding. unsigned StackSize = determineFrameLayout(MF, true); MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || - hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { + if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || + (hasSpills(MF) && !isInt<16>(StackSize))) { const TargetRegisterClass &GPRC = PPC::GPRCRegClass; const TargetRegisterClass &G8RC = PPC::G8RCRegClass; const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; @@ -2288,7 +2225,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); // These kinds of spills might need two registers. - if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) + if (spillsCR(MF) || HasAlVars) RS->addScavengingFrameIndex( MFI.CreateStackObject(Size, Alignment, false)); } @@ -2365,9 +2302,6 @@ bool PPCFrameLowering::spillCalleeSavedRegisters( for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. - if (Reg == PPC::VRSAVE) - continue; // CR2 through CR4 are the nonvolatile CR fields. bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; @@ -2532,10 +2466,6 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters( for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. - if (Reg == PPC::VRSAVE) - continue; - if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) continue; |