diff options
Diffstat (limited to 'contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 1975 |
1 files changed, 1081 insertions, 894 deletions
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index b91a3f6f8c6c..81850548bb6e 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -16,35 +16,19 @@ // prune the dependence. // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/DFAPacketizer.h" -#include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "HexagonVLIWPacketizer.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <map> #include <vector> @@ -52,9 +36,22 @@ using namespace llvm; #define DEBUG_TYPE "packets" +static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon packetizer pass")); + static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles", - cl::ZeroOrMore, cl::Hidden, cl::init(true), - cl::desc("Allow non-solo packetization of volatile memory references")); + cl::ZeroOrMore, cl::Hidden, cl::init(true), + cl::desc("Allow non-solo packetization of volatile memory references")); + +static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false), + cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC")); + +static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores", + cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Disable vector double new-value-stores")); + +extern cl::opt<bool> ScheduleInlineAsm; namespace llvm { FunctionPass *createHexagonPacketizer(); @@ -64,7 +61,6 @@ namespace llvm { namespace { class HexagonPacketizer : public MachineFunctionPass { - public: static char ID; HexagonPacketizer() : MachineFunctionPass(ID) { @@ -73,103 +69,25 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<MachineBranchProbabilityInfo>(); - AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); AU.addPreserved<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const override { return "Hexagon Packetizer"; } - bool runOnMachineFunction(MachineFunction &Fn) override; - }; - char HexagonPacketizer::ID = 0; - - class HexagonPacketizerList : public VLIWPacketizerList { private: - - // Has the instruction been promoted to a dot-new instruction. - bool PromotedToDotNew; - - // Has the instruction been glued to allocframe. - bool GlueAllocframeStore; - - // Has the feeder instruction been glued to new value jump. - bool GlueToNewValueJump; - - // Check if there is a dependence between some instruction already in this - // packet and this instruction. - bool Dependence; - - // Only check for dependence if there are resources available to - // schedule this instruction. - bool FoundSequentialDependence; - - /// \brief A handle to the branch probability pass. - const MachineBranchProbabilityInfo *MBPI; - - // Track MIs with ignored dependece. - std::vector<MachineInstr*> IgnoreDepMIs; - - public: - // Ctor. - HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, - const MachineBranchProbabilityInfo *MBPI); - - // initPacketizerState - initialize some internal flags. - void initPacketizerState() override; - - // ignorePseudoInstruction - Ignore bundling of pseudo instructions. - bool ignorePseudoInstruction(MachineInstr *MI, - MachineBasicBlock *MBB) override; - - // isSoloInstruction - return true if instruction MI can not be packetized - // with any other instruction, which means that MI itself is a packet. - bool isSoloInstruction(MachineInstr *MI) override; - - // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ - // together. - bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override; - - // isLegalToPruneDependencies - Is it legal to prune dependece between SUI - // and SUJ. - bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override; - - MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override; - private: - bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg); - bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType, - MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC); - bool CanPromoteToDotNew(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit, - MachineBasicBlock::iterator &MII, - const TargetRegisterClass *RC); - bool - CanPromoteToNewValue(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit, - MachineBasicBlock::iterator &MII); - bool CanPromoteToNewValueStore( - MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit); - bool DemoteToDotOld(MachineInstr *MI); - bool ArePredicatesComplements( - MachineInstr *MI1, MachineInstr *MI2, - const std::map<MachineInstr *, SUnit *> &MIToSUnit); - bool RestrictingDepExistInPacket(MachineInstr *, unsigned, - const std::map<MachineInstr *, SUnit *> &); - bool isNewifiable(MachineInstr* MI); - bool isCondInst(MachineInstr* MI); - bool tryAllocateResourcesForConstExt(MachineInstr* MI); - bool canReserveResourcesForConstExt(MachineInstr *MI); - void reserveResourcesForConstExt(MachineInstr* MI); - bool isNewValueInst(MachineInstr* MI); + const HexagonInstrInfo *HII; + const HexagonRegisterInfo *HRI; }; + + char HexagonPacketizer::ID = 0; } INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", @@ -177,26 +95,93 @@ INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", false, false) -// HexagonPacketizerList Ctor. -HexagonPacketizerList::HexagonPacketizerList( - MachineFunction &MF, MachineLoopInfo &MLI, - const MachineBranchProbabilityInfo *MBPI) - : VLIWPacketizerList(MF, MLI, true) { - this->MBPI = MBPI; +HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, + MachineLoopInfo &MLI, AliasAnalysis *AA, + const MachineBranchProbabilityInfo *MBPI) + : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) { + HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); } -bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { - const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); - MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); - const MachineBranchProbabilityInfo *MBPI = - &getAnalysis<MachineBranchProbabilityInfo>(); +// Check if FirstI modifies a register that SecondI reads. +static bool hasWriteToReadDep(const MachineInstr *FirstI, + const MachineInstr *SecondI, const TargetRegisterInfo *TRI) { + for (auto &MO : FirstI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned R = MO.getReg(); + if (SecondI->readsRegister(R, TRI)) + return true; + } + return false; +} + + +static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, + MachineBasicBlock::iterator BundleIt, bool Before) { + MachineBasicBlock::instr_iterator InsertPt; + if (Before) + InsertPt = BundleIt.getInstrIterator(); + else + InsertPt = std::next(BundleIt).getInstrIterator(); + + MachineBasicBlock &B = *MI->getParent(); + // The instruction should at least be bundled with the preceding instruction + // (there will always be one, i.e. BUNDLE, if nothing else). + assert(MI->isBundledWithPred()); + if (MI->isBundledWithSucc()) { + MI->clearFlag(MachineInstr::BundledSucc); + MI->clearFlag(MachineInstr::BundledPred); + } else { + // If it's not bundled with the successor (i.e. it is the last one + // in the bundle), then we can simply unbundle it from the predecessor, + // which will take care of updating the predecessor's flag. + MI->unbundleFromPred(); + } + B.splice(InsertPt, &B, MI); + + // Get the size of the bundle without asserting. + MachineBasicBlock::const_instr_iterator I(BundleIt); + MachineBasicBlock::const_instr_iterator E = B.instr_end(); + unsigned Size = 0; + for (++I; I != E && I->isBundledWithPred(); ++I) + ++Size; + + // If there are still two or more instructions, then there is nothing + // else to be done. + if (Size > 1) + return BundleIt; + + // Otherwise, extract the single instruction out and delete the bundle. + MachineBasicBlock::iterator NextIt = std::next(BundleIt); + MachineInstr *SingleI = BundleIt->getNextNode(); + SingleI->unbundleFromPred(); + assert(!SingleI->isBundledWithSucc()); + BundleIt->eraseFromParent(); + return NextIt; +} + + +bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { + if (DisablePacketizer) + return false; + + HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + auto &MLI = getAnalysis<MachineLoopInfo>(); + auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + + if (EnableGenAllInsnClass) + HII->genAllInsnTimingClasses(MF); + // Instantiate the packetizer. - HexagonPacketizerList Packetizer(Fn, MLI, MBPI); + HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI); // DFA state table should not be empty. assert(Packetizer.getResourceTracker() && "Empty DFA table!"); @@ -211,162 +196,107 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { // dependence between Insn 0 and Insn 2. This can lead to incorrect // packetization // - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - MachineBasicBlock::iterator End = MBB->end(); - MachineBasicBlock::iterator MI = MBB->begin(); + for (auto &MB : MF) { + auto End = MB.end(); + auto MI = MB.begin(); while (MI != End) { + auto NextI = std::next(MI); if (MI->isKill()) { - MachineBasicBlock::iterator DeleteMI = MI; - ++MI; - MBB->erase(DeleteMI); - End = MBB->end(); - continue; + MB.erase(MI); + End = MB.end(); } - ++MI; + MI = NextI; } } // Loop over all of the basic blocks. - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - // Find scheduling regions and schedule / packetize each region. - unsigned RemainingCount = MBB->size(); - for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin();) { - // The next region starts above the previous region. Look backward in the - // instruction stream until we find the nearest boundary. - MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingCount) { - if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn)) - break; - } - I = MBB->begin(); - - // Skip empty scheduling regions. - if (I == RegionEnd) { - RegionEnd = std::prev(RegionEnd); - --RemainingCount; - continue; - } - // Skip regions with one instruction. - if (I == std::prev(RegionEnd)) { - RegionEnd = std::prev(RegionEnd); - continue; - } - - Packetizer.PacketizeMIs(MBB, I, RegionEnd); - RegionEnd = I; + for (auto &MB : MF) { + auto Begin = MB.begin(), End = MB.end(); + while (Begin != End) { + // First the first non-boundary starting from the end of the last + // scheduling region. + MachineBasicBlock::iterator RB = Begin; + while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF)) + ++RB; + // First the first boundary starting from the beginning of the new + // region. + MachineBasicBlock::iterator RE = RB; + while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF)) + ++RE; + // Add the scheduling boundary if it's not block end. + if (RE != End) + ++RE; + // If RB == End, then RE == End. + if (RB != End) + Packetizer.PacketizeMIs(&MB, RB, RE); + + Begin = RE; } } + Packetizer.unpacketizeSoloInstrs(MF); return true; } -static bool IsIndirectCall(MachineInstr* MI) { - return MI->getOpcode() == Hexagon::J2_callr; +// Reserve resources for a constant extender. Trigger an assertion if the +// reservation fails. +void HexagonPacketizerList::reserveResourcesForConstExt() { + if (!tryAllocateResourcesForConstExt(true)) + llvm_unreachable("Resources not available"); } -// Reserve resources for constant extender. Trigure an assertion if -// reservation fail. -void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); - - if (ResourceTracker->canReserveResources(PseudoMI)) { - ResourceTracker->reserveResources(PseudoMI); - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); - } else { - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); - llvm_unreachable("can not reserve resources for constant extender."); - } - return; +bool HexagonPacketizerList::canReserveResourcesForConstExt() { + return tryAllocateResourcesForConstExt(false); } -bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - assert((QII->isExtended(MI) || QII->isConstExtended(MI)) && - "Should only be called for constant extended instructions"); - MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); - bool CanReserve = ResourceTracker->canReserveResources(PseudoMI); - MF.DeleteMachineInstr(PseudoMI); - return CanReserve; +// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded, +// return true, otherwise, return false. +bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { + auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc()); + bool Avail = ResourceTracker->canReserveResources(ExtMI); + if (Reserve && Avail) + ResourceTracker->reserveResources(ExtMI); + MF.DeleteMachineInstr(ExtMI); + return Avail; } -// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return -// true, otherwise, return false. -bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext), - MI->getDebugLoc()); - if (ResourceTracker->canReserveResources(PseudoMI)) { - ResourceTracker->reserveResources(PseudoMI); - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); +bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI, + SDep::Kind DepType, unsigned DepReg) { + // Check for LR dependence. + if (DepReg == HRI->getRARegister()) return true; - } else { - MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); - return false; - } -} - - -bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI, - SDep::Kind DepType, - unsigned DepReg) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); - - // Check for lr dependence - if (DepReg == QRI->getRARegister()) { - return true; - } - if (QII->isDeallocRet(MI)) { - if (DepReg == QRI->getFrameRegister() || - DepReg == QRI->getStackRegister()) + if (HII->isDeallocRet(MI)) + if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister()) return true; - } - // Check if this is a predicate dependence - const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg); - if (RC == &Hexagon::PredRegsRegClass) { + // Check if this is a predicate dependence. + const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg); + if (RC == &Hexagon::PredRegsRegClass) return true; - } - // - // Lastly check for an operand used in an indirect call - // If we had an attribute for checking if an instruction is an indirect call, - // then we could have avoided this relatively brittle implementation of - // IsIndirectCall() - // - // Assumes that the first operand of the CALLr is the function address - // - if (IsIndirectCall(MI) && (DepType == SDep::Data)) { + // Assumes that the first operand of the CALLr is the function address. + if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) { MachineOperand MO = MI->getOperand(0); - if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) { + if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) return true; - } } return false; } -static bool IsRegDependence(const SDep::Kind DepType) { - return (DepType == SDep::Data || DepType == SDep::Anti || - DepType == SDep::Output); +static bool isRegDependence(const SDep::Kind DepType) { + return DepType == SDep::Data || DepType == SDep::Anti || + DepType == SDep::Output; } -static bool IsDirectJump(MachineInstr* MI) { - return (MI->getOpcode() == Hexagon::J2_jump); +static bool isDirectJump(const MachineInstr* MI) { + return MI->getOpcode() == Hexagon::J2_jump; } -static bool IsSchedBarrier(MachineInstr* MI) { +static bool isSchedBarrier(const MachineInstr* MI) { switch (MI->getOpcode()) { case Hexagon::Y2_barrier: return true; @@ -374,76 +304,127 @@ static bool IsSchedBarrier(MachineInstr* MI) { return false; } -static bool IsControlFlow(MachineInstr* MI) { +static bool isControlFlow(const MachineInstr* MI) { return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); } -static bool IsLoopN(MachineInstr *MI) { - return (MI->getOpcode() == Hexagon::J2_loop0i || - MI->getOpcode() == Hexagon::J2_loop0r); -} -/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a -/// callee-saved register. -static bool DoesModifyCalleeSavedReg(MachineInstr *MI, +/// Returns true if the instruction modifies a callee-saved register. +static bool doesModifyCalleeSavedReg(const MachineInstr *MI, const TargetRegisterInfo *TRI) { - for (const MCPhysReg *CSR = - TRI->getCalleeSavedRegs(MI->getParent()->getParent()); - *CSR; ++CSR) { - unsigned CalleeSavedReg = *CSR; - if (MI->modifiesRegister(CalleeSavedReg, TRI)) + const MachineFunction &MF = *MI->getParent()->getParent(); + for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + if (MI->modifiesRegister(*CSR, TRI)) return true; - } return false; } -// Returns true if an instruction can be promoted to .new predicate -// or new-value store. -bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - return isCondInst(MI) || QII->mayBeNewStore(MI); +// TODO: MI->isIndirectBranch() and IsRegisterJump(MI) +// Returns true if an instruction can be promoted to .new predicate or +// new-value store. +bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) { + return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI); } -bool HexagonPacketizerList::isCondInst (MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - const MCInstrDesc& TID = MI->getDesc(); - // bug 5670: until that is fixed, - // this portion is disabled. - if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) || - || QII->isConditionalTransfer(MI) - || QII->isConditionalALU32(MI) - || QII->isConditionalLoad(MI) - || QII->isConditionalStore(MI)) { - return true; +// Promote an instructiont to its .cur form. +// At this time, we have already made a call to canPromoteToDotCur and made +// sure that it can *indeed* be promoted. +bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + assert(DepType == SDep::Data); + int CurOpcode = HII->getDotCurOp(MI); + MI->setDesc(HII->get(CurOpcode)); + return true; +} + +void HexagonPacketizerList::cleanUpDotCur() { + MachineInstr *MI = NULL; + for (auto BI : CurrentPacketMIs) { + DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); + if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { + MI = BI; + continue; + } + if (MI) { + for (auto &MO : BI->operands()) + if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg()) + return; + } } - return false; + if (!MI) + return; + // We did not find a use of the CUR, so de-cur it. + MI->setDesc(HII->get(Hexagon::V6_vL32b_ai)); + DEBUG(dbgs() << "Demoted CUR "; MI->dump();); } +// Check to see if an instruction can be dot cur. +bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass *RC) { + if (!HII->isV60VectorInstruction(MI)) + return false; + if (!HII->isV60VectorInstruction(MII)) + return false; -// Promote an instructiont to its .new form. -// At this time, we have already made a call to CanPromoteToDotNew -// and made sure that it can *indeed* be promoted. -bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI, - SDep::Kind DepType, MachineBasicBlock::iterator &MII, - const TargetRegisterClass* RC) { + // Already a dot new instruction. + if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI)) + return false; - assert (DepType == SDep::Data); - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + if (!HII->mayBeCurLoad(MI)) + return false; + + // The "cur value" cannot come from inline asm. + if (PacketSU->getInstr()->isInlineAsm()) + return false; + + // Make sure candidate instruction uses cur. + DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; + MI->dump(); + dbgs() << "in packet\n";); + MachineInstr *MJ = MII; + DEBUG(dbgs() << "Checking CUR against "; MJ->dump();); + unsigned DestReg = MI->getOperand(0).getReg(); + bool FoundMatch = false; + for (auto &MO : MJ->operands()) + if (MO.isReg() && MO.getReg() == DestReg) + FoundMatch = true; + if (!FoundMatch) + return false; + + // Check for existing uses of a vector register within the packet which + // would be affected by converting a vector load into .cur formt. + for (auto BI : CurrentPacketMIs) { + DEBUG(dbgs() << "packet has "; BI->dump();); + if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo())) + return false; + } + + DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump();); + // We can convert the opcode into a .cur. + return true; +} +// Promote an instruction to its .new form. At this time, we have already +// made a call to canPromoteToDotNew and made sure that it can *indeed* be +// promoted. +bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + assert (DepType == SDep::Data); int NewOpcode; if (RC == &Hexagon::PredRegsRegClass) - NewOpcode = QII->GetDotNewPredOp(MI, MBPI); + NewOpcode = HII->getDotNewPredOp(MI, MBPI); else - NewOpcode = QII->GetDotNewOp(MI); - MI->setDesc(QII->get(NewOpcode)); - + NewOpcode = HII->getDotNewOp(MI); + MI->setDesc(HII->get(NewOpcode)); return true; } -bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - int NewOpcode = QII->GetDotOldOp(MI->getOpcode()); - MI->setDesc(QII->get(NewOpcode)); +bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) { + int NewOpcode = HII->getDotOldOp(MI->getOpcode()); + MI->setDesc(HII->get(NewOpcode)); return true; } @@ -455,175 +436,173 @@ enum PredicateKind { /// Returns true if an instruction is predicated on p0 and false if it's /// predicated on !p0. -static PredicateKind getPredicateSense(MachineInstr* MI, - const HexagonInstrInfo *QII) { - if (!QII->isPredicated(MI)) +static PredicateKind getPredicateSense(const MachineInstr *MI, + const HexagonInstrInfo *HII) { + if (!HII->isPredicated(MI)) return PK_Unknown; - - if (QII->isPredicatedTrue(MI)) + if (HII->isPredicatedTrue(MI)) return PK_True; - return PK_False; } -static MachineOperand& GetPostIncrementOperand(MachineInstr *MI, - const HexagonInstrInfo *QII) { - assert(QII->isPostIncrement(MI) && "Not a post increment operation."); +static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI, + const HexagonInstrInfo *HII) { + assert(HII->isPostIncrement(MI) && "Not a post increment operation."); #ifndef NDEBUG // Post Increment means duplicates. Use dense map to find duplicates in the // list. Caution: Densemap initializes with the minimum of 64 buckets, // whereas there are at most 5 operands in the post increment. - DenseMap<unsigned, unsigned> DefRegsSet; - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) - if (MI->getOperand(opNum).isReg() && - MI->getOperand(opNum).isDef()) { - DefRegsSet[MI->getOperand(opNum).getReg()] = 1; - } - - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) - if (MI->getOperand(opNum).isReg() && - MI->getOperand(opNum).isUse()) { - if (DefRegsSet[MI->getOperand(opNum).getReg()]) { - return MI->getOperand(opNum); - } - } + DenseSet<unsigned> DefRegsSet; + for (auto &MO : MI->operands()) + if (MO.isReg() && MO.isDef()) + DefRegsSet.insert(MO.getReg()); + + for (auto &MO : MI->operands()) + if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg())) + return MO; #else - if (MI->getDesc().mayLoad()) { + if (MI->mayLoad()) { + const MachineOperand &Op1 = MI->getOperand(1); // The 2nd operand is always the post increment operand in load. - assert(MI->getOperand(1).isReg() && - "Post increment operand has be to a register."); - return (MI->getOperand(1)); + assert(Op1.isReg() && "Post increment operand has be to a register."); + return Op1; } if (MI->getDesc().mayStore()) { + const MachineOperand &Op0 = MI->getOperand(0); // The 1st operand is always the post increment operand in store. - assert(MI->getOperand(0).isReg() && - "Post increment operand has be to a register."); - return (MI->getOperand(0)); + assert(Op0.isReg() && "Post increment operand has be to a register."); + return Op0; } #endif // we should never come here. llvm_unreachable("mayLoad or mayStore not set for Post Increment operation"); } -// get the value being stored -static MachineOperand& GetStoreValueOperand(MachineInstr *MI) { +// Get the value being stored. +static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) { // value being stored is always the last operand. - return (MI->getOperand(MI->getNumOperands()-1)); + return MI->getOperand(MI->getNumOperands()-1); +} + +static bool isLoadAbsSet(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::L4_loadrd_ap: + case Hexagon::L4_loadrb_ap: + case Hexagon::L4_loadrh_ap: + case Hexagon::L4_loadrub_ap: + case Hexagon::L4_loadruh_ap: + case Hexagon::L4_loadri_ap: + return true; + } + return false; } -// can be new value store? +static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) { + assert(isLoadAbsSet(MI)); + return MI->getOperand(1); +} + + +// Can be new value store? // Following restrictions are to be respected in convert a store into // a new value store. // 1. If an instruction uses auto-increment, its address register cannot // be a new-value register. Arch Spec 5.4.2.1 -// 2. If an instruction uses absolute-set addressing mode, -// its address register cannot be a new-value register. -// Arch Spec 5.4.2.1.TODO: This is not enabled as -// as absolute-set address mode patters are not implemented. +// 2. If an instruction uses absolute-set addressing mode, its address +// register cannot be a new-value register. Arch Spec 5.4.2.1. // 3. If an instruction produces a 64-bit result, its registers cannot be used // as new-value registers. Arch Spec 5.4.2.2. -// 4. If the instruction that sets a new-value register is conditional, then +// 4. If the instruction that sets the new-value register is conditional, then // the instruction that uses the new-value register must also be conditional, // and both must always have their predicates evaluate identically. // Arch Spec 5.4.2.3. -// 5. There is an implied restriction of a packet can not have another store, -// if there is a new value store in the packet. Corollary, if there is +// 5. There is an implied restriction that a packet cannot have another store, +// if there is a new value store in the packet. Corollary: if there is // already a store in a packet, there can not be a new value store. // Arch Spec: 3.4.4.2 -bool HexagonPacketizerList::CanPromoteToNewValueStore( - MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; +bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, + const MachineInstr *PacketMI, unsigned DepReg) { // Make sure we are looking at the store, that can be promoted. - if (!QII->mayBeNewStore(MI)) + if (!HII->mayBeNewStore(MI)) return false; - // Make sure there is dependency and can be new value'ed - if (GetStoreValueOperand(MI).isReg() && - GetStoreValueOperand(MI).getReg() != DepReg) + // Make sure there is dependency and can be new value'd. + const MachineOperand &Val = getStoreValueOperand(MI); + if (Val.isReg() && Val.getReg() != DepReg) return false; - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); const MCInstrDesc& MCID = PacketMI->getDesc(); - // first operand is always the result - - const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF); - - // if there is already an store in the packet, no can do new value store - // Arch Spec 3.4.4.2. - for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), - VE = CurrentPacketMIs.end(); - (VI != VE); ++VI) { - SUnit *PacketSU = MIToSUnit.find(*VI)->second; - if (PacketSU->getInstr()->getDesc().mayStore() || - // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME, - // then we don't need this - PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || - PacketSU->getInstr()->getOpcode() == Hexagon::L2_deallocframe) - return false; - } - if (PacketRC == &Hexagon::DoubleRegsRegClass) { - // new value store constraint: double regs can not feed into new value store - // arch spec section: 5.4.2.2 + // First operand is always the result. + const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF); + // Double regs can not feed into new value store: PRM section: 5.4.2.2. + if (PacketRC == &Hexagon::DoubleRegsRegClass) return false; + + // New-value stores are of class NV (slot 0), dual stores require class ST + // in slot 0 (PRM 5.5). + for (auto I : CurrentPacketMIs) { + SUnit *PacketSU = MIToSUnit.find(I)->second; + if (PacketSU->getInstr()->mayStore()) + return false; } // Make sure it's NOT the post increment register that we are going to // new value. - if (QII->isPostIncrement(MI) && - MI->getDesc().mayStore() && - GetPostIncrementOperand(MI, QII).getReg() == DepReg) { + if (HII->isPostIncrement(MI) && + getPostIncrementOperand(MI, HII).getReg() == DepReg) { return false; } - if (QII->isPostIncrement(PacketMI) && - PacketMI->getDesc().mayLoad() && - GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) { - // if source is post_inc, or absolute-set addressing, - // it can not feed into new value store - // r3 = memw(r2++#4) - // memw(r30 + #-1404) = r2.new -> can not be new value store - // arch spec section: 5.4.2.1 + if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() && + getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) { + // If source is post_inc, or absolute-set addressing, it can not feed + // into new value store + // r3 = memw(r2++#4) + // memw(r30 + #-1404) = r2.new -> can not be new value store + // arch spec section: 5.4.2.1. return false; } + if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg) + return false; + // If the source that feeds the store is predicated, new value store must // also be predicated. - if (QII->isPredicated(PacketMI)) { - if (!QII->isPredicated(MI)) + if (HII->isPredicated(PacketMI)) { + if (!HII->isPredicated(MI)) return false; // Check to make sure that they both will have their predicates - // evaluate identically + // evaluate identically. unsigned predRegNumSrc = 0; unsigned predRegNumDst = 0; const TargetRegisterClass* predRegClass = nullptr; - // Get predicate register used in the source instruction - for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { - if ( PacketMI->getOperand(opNum).isReg()) - predRegNumSrc = PacketMI->getOperand(opNum).getReg(); - predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc); - if (predRegClass == &Hexagon::PredRegsRegClass) { + // Get predicate register used in the source instruction. + for (auto &MO : PacketMI->operands()) { + if (!MO.isReg()) + continue; + predRegNumSrc = MO.getReg(); + predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc); + if (predRegClass == &Hexagon::PredRegsRegClass) break; - } } - assert ((predRegClass == &Hexagon::PredRegsRegClass ) && - ("predicate register not found in a predicated PacketMI instruction")); - - // Get predicate register used in new-value store instruction - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { - if ( MI->getOperand(opNum).isReg()) - predRegNumDst = MI->getOperand(opNum).getReg(); - predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst); - if (predRegClass == &Hexagon::PredRegsRegClass) { + assert((predRegClass == &Hexagon::PredRegsRegClass) && + "predicate register not found in a predicated PacketMI instruction"); + + // Get predicate register used in new-value store instruction. + for (auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + predRegNumDst = MO.getReg(); + predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst); + if (predRegClass == &Hexagon::PredRegsRegClass) break; - } } - assert ((predRegClass == &Hexagon::PredRegsRegClass ) && - ("predicate register not found in a predicated MI instruction")); + assert((predRegClass == &Hexagon::PredRegsRegClass) && + "predicate register not found in a predicated MI instruction"); // New-value register producer and user (store) need to satisfy these // constraints: @@ -632,13 +611,11 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( // should also be .new predicated and if producer is not .new predicated // then store should not be .new predicated. // 3) Both new-value register producer and user should have same predicate - // sense, i.e, either both should be negated or both should be none negated. - - if (( predRegNumDst != predRegNumSrc) || - QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) || - getPredicateSense(MI, QII) != getPredicateSense(PacketMI, QII)) { + // sense, i.e, either both should be negated or both should be non-negated. + if (predRegNumDst != predRegNumSrc || + HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || + getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII)) return false; - } } // Make sure that other than the new-value register no other store instruction @@ -649,81 +626,77 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( // including PacketMI. Howerver, we need to perform the check for the // remaining instructions in the packet. - std::vector<MachineInstr*>::iterator VI; - std::vector<MachineInstr*>::iterator VE; unsigned StartCheck = 0; - for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); - (VI != VE); ++VI) { - SUnit *TempSU = MIToSUnit.find(*VI)->second; + for (auto I : CurrentPacketMIs) { + SUnit *TempSU = MIToSUnit.find(I)->second; MachineInstr* TempMI = TempSU->getInstr(); // Following condition is true for all the instructions until PacketMI is // reached (StartCheck is set to 0 before the for loop). // StartCheck flag is 1 for all the instructions after PacketMI. - if (TempMI != PacketMI && !StartCheck) // start processing only after - continue; // encountering PacketMI + if (TempMI != PacketMI && !StartCheck) // Start processing only after + continue; // encountering PacketMI. StartCheck = 1; - if (TempMI == PacketMI) // We don't want to check PacketMI for dependence + if (TempMI == PacketMI) // We don't want to check PacketMI for dependence. continue; - for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { - if (MI->getOperand(opNum).isReg() && - TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(), - QRI)) + for (auto &MO : MI->operands()) + if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI)) return false; - } } // Make sure that for non-POST_INC stores: // 1. The only use of reg is DepReg and no other registers. // This handles V4 base+index registers. // The following store can not be dot new. - // Eg. r0 = add(r0, #3)a + // Eg. r0 = add(r0, #3) // memw(r1+r0<<#2) = r0 - if (!QII->isPostIncrement(MI) && - GetStoreValueOperand(MI).isReg() && - GetStoreValueOperand(MI).getReg() == DepReg) { - for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { - if (MI->getOperand(opNum).isReg() && - MI->getOperand(opNum).getReg() == DepReg) { - return false; - } - } - // 2. If data definition is because of implicit definition of the register, - // do not newify the store. Eg. - // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def> - // STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343] - for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { - if (PacketMI->getOperand(opNum).isReg() && - PacketMI->getOperand(opNum).getReg() == DepReg && - PacketMI->getOperand(opNum).isDef() && - PacketMI->getOperand(opNum).isImplicit()) { + if (!HII->isPostIncrement(MI)) { + for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isReg() && MO.getReg() == DepReg) return false; - } } } + // If data definition is because of implicit definition of the register, + // do not newify the store. Eg. + // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def> + // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343] + for (auto &MO : PacketMI->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) + continue; + unsigned R = MO.getReg(); + if (R == DepReg || HRI->isSuperRegister(DepReg, R)) + return false; + } + + // Handle imp-use of super reg case. There is a target independent side + // change that should prevent this situation but I am handling it for + // just-in-case. For example, we cannot newify R2 in the following case: + // %R3<def> = A2_tfrsi 0; + // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>; + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg) + return false; + } + // Can be dot new store. return true; } -// can this MI to promoted to either -// new value store or new value jump -bool HexagonPacketizerList::CanPromoteToNewValue( - MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit, - MachineBasicBlock::iterator &MII) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if (!QII->mayBeNewStore(MI)) +// Can this MI to promoted to either new value store or new value jump. +bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, + MachineBasicBlock::iterator &MII) { + if (!HII->mayBeNewStore(MI)) return false; - MachineInstr *PacketMI = PacketSU->getInstr(); - // Check to see the store can be new value'ed. - if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit)) + MachineInstr *PacketMI = PacketSU->getInstr(); + if (canPromoteToNewValueStore(MI, PacketMI, DepReg)) return true; // Check to see the compare/jump can be new value'ed. @@ -731,93 +704,110 @@ bool HexagonPacketizerList::CanPromoteToNewValue( return false; } +static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) { + for (auto &MO : I->operands()) + if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) + return true; + return false; +} + // Check to see if an instruction can be dot new // There are three kinds. // 1. dot new on predicate - V2/V3/V4 // 2. dot new on stores NV/ST - V4 // 3. dot new on jump NV/J - V4 -- This is generated in a pass. -bool HexagonPacketizerList::CanPromoteToDotNew( - MachineInstr *MI, SUnit *PacketSU, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit, - MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; +bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, + const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { // Already a dot new instruction. - if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI)) + if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI)) return false; if (!isNewifiable(MI)) return false; + const MachineInstr *PI = PacketSU->getInstr(); + + // The "new value" cannot come from inline asm. + if (PI->isInlineAsm()) + return false; + + // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no + // sense. + if (PI->isImplicitDef()) + return false; + + // If dependency is trough an implicitly defined register, we should not + // newify the use. + if (isImplicitDependency(PI, DepReg)) + return false; + + const MCInstrDesc& MCID = PI->getDesc(); + const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF); + if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass) + return false; + // predicate .new - if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI)) - return true; - else if (RC != &Hexagon::PredRegsRegClass && - !QII->mayBeNewStore(MI)) // MI is not a new-value store + // bug 5670: until that is fixed + // TODO: MI->isIndirectBranch() and IsRegisterJump(MI) + if (RC == &Hexagon::PredRegsRegClass) + if (HII->isCondInst(MI) || MI->isReturn()) + return HII->predCanBeUsedAsDotNew(PI, DepReg); + + if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) + return false; + + // Create a dot new machine instruction to see if resources can be + // allocated. If not, bail out now. + int NewOpcode = HII->getDotNewOp(MI); + const MCInstrDesc &D = HII->get(NewOpcode); + MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc()); + bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); + MF.DeleteMachineInstr(NewMI); + if (!ResourcesAvailable) + return false; + + // New Value Store only. New Value Jump generated as a separate pass. + if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII)) return false; - else { - // Create a dot new machine instruction to see if resources can be - // allocated. If not, bail out now. - int NewOpcode = QII->GetDotNewOp(MI); - const MCInstrDesc &desc = QII->get(NewOpcode); - DebugLoc dl; - MachineInstr *NewMI = - MI->getParent()->getParent()->CreateMachineInstr(desc, dl); - bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); - MI->getParent()->getParent()->DeleteMachineInstr(NewMI); - - if (!ResourcesAvailable) - return false; - // new value store only - // new new value jump generated as a passes - if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) { - return false; - } - } return true; } -// Go through the packet instructions and search for anti dependency -// between them and DepReg from MI -// Consider this case: +// Go through the packet instructions and search for an anti dependency between +// them and DepReg from MI. Consider this case: // Trying to add // a) %R1<def> = TFRI_cdNotPt %P3, 2 // to this packet: // { -// b) %P0<def> = OR_pp %P3<kill>, %P0<kill> -// c) %P3<def> = TFR_PdRs %R23 -// d) %R1<def> = TFRI_cdnPt %P3, 4 +// b) %P0<def> = C2_or %P3<kill>, %P0<kill> +// c) %P3<def> = C2_tfrrp %R23 +// d) %R1<def> = C2_cmovenewit %P3, 4 // } // The P3 from a) and d) will be complements after // a)'s P3 is converted to .new form -// Anti Dep between c) and b) is irrelevant for this case -bool HexagonPacketizerList::RestrictingDepExistInPacket( - MachineInstr *MI, unsigned DepReg, - const std::map<MachineInstr *, SUnit *> &MIToSUnit) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; +// Anti-dep between c) and b) is irrelevant for this case +bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI, + unsigned DepReg) { SUnit *PacketSUDep = MIToSUnit.find(MI)->second; - for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(), - VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { - + for (auto I : CurrentPacketMIs) { // We only care for dependencies to predicated instructions - if(!QII->isPredicated(*VIN)) continue; + if (!HII->isPredicated(I)) + continue; // Scheduling Unit for current insn in the packet - SUnit *PacketSU = MIToSUnit.find(*VIN)->second; + SUnit *PacketSU = MIToSUnit.find(I)->second; - // Look at dependencies between current members of the packet - // and predicate defining instruction MI. - // Make sure that dependency is on the exact register - // we care about. + // Look at dependencies between current members of the packet and + // predicate defining instruction MI. Make sure that dependency is + // on the exact register we care about. if (PacketSU->isSucc(PacketSUDep)) { for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { - if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) && - (PacketSU->Succs[i].getKind() == SDep::Anti) && - (PacketSU->Succs[i].getReg() == DepReg)) { + auto &Dep = PacketSU->Succs[i]; + if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti && + Dep.getReg() == DepReg) return true; - } } } } @@ -831,276 +821,362 @@ static unsigned getPredicatedRegister(MachineInstr *MI, const HexagonInstrInfo *QII) { /// We use the following rule: The first predicate register that is a use is /// the predicate register of a predicated instruction. - assert(QII->isPredicated(MI) && "Must be predicated instruction"); - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - MachineOperand &Op = *OI; + for (auto &Op : MI->operands()) { if (Op.isReg() && Op.getReg() && Op.isUse() && Hexagon::PredRegsRegClass.contains(Op.getReg())) return Op.getReg(); } llvm_unreachable("Unknown instruction operand layout"); - return 0; } // Given two predicated instructions, this function detects whether -// the predicates are complements -bool HexagonPacketizerList::ArePredicatesComplements( - MachineInstr *MI1, MachineInstr *MI2, - const std::map<MachineInstr *, SUnit *> &MIToSUnit) { - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - +// the predicates are complements. +bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1, + MachineInstr *MI2) { // If we don't know the predicate sense of the instructions bail out early, we // need it later. - if (getPredicateSense(MI1, QII) == PK_Unknown || - getPredicateSense(MI2, QII) == PK_Unknown) + if (getPredicateSense(MI1, HII) == PK_Unknown || + getPredicateSense(MI2, HII) == PK_Unknown) return false; - // Scheduling unit for candidate - SUnit *SU = MIToSUnit.find(MI1)->second; + // Scheduling unit for candidate. + SUnit *SU = MIToSUnit[MI1]; // One corner case deals with the following scenario: // Trying to add - // a) %R24<def> = TFR_cPt %P0, %R25 + // a) %R24<def> = A2_tfrt %P0, %R25 // to this packet: - // // { - // b) %R25<def> = TFR_cNotPt %P0, %R24 - // c) %P0<def> = CMPEQri %R26, 1 + // b) %R25<def> = A2_tfrf %P0, %R24 + // c) %P0<def> = C2_cmpeqi %R26, 1 // } // - // On general check a) and b) are complements, but - // presence of c) will convert a) to .new form, and - // then it is not a complement - // We attempt to detect it by analyzing existing - // dependencies in the packet + // On general check a) and b) are complements, but presence of c) will + // convert a) to .new form, and then it is not a complement. + // We attempt to detect it by analyzing existing dependencies in the packet. // Analyze relationships between all existing members of the packet. - // Look for Anti dependecy on the same predicate reg - // as used in the candidate - for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(), - VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { - - // Scheduling Unit for current insn in the packet - SUnit *PacketSU = MIToSUnit.find(*VIN)->second; + // Look for Anti dependecy on the same predicate reg as used in the + // candidate. + for (auto I : CurrentPacketMIs) { + // Scheduling Unit for current insn in the packet. + SUnit *PacketSU = MIToSUnit.find(I)->second; // If this instruction in the packet is succeeded by the candidate... if (PacketSU->isSucc(SU)) { for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { - // The corner case exist when there is true data - // dependency between candidate and one of current - // packet members, this dep is on predicate reg, and - // there already exist anti dep on the same pred in + auto Dep = PacketSU->Succs[i]; + // The corner case exist when there is true data dependency between + // candidate and one of current packet members, this dep is on + // predicate reg, and there already exist anti dep on the same pred in // the packet. - if (PacketSU->Succs[i].getSUnit() == SU && - PacketSU->Succs[i].getKind() == SDep::Data && - Hexagon::PredRegsRegClass.contains( - PacketSU->Succs[i].getReg()) && - // Here I know that *VIN is predicate setting instruction - // with true data dep to candidate on the register - // we care about - c) in the above example. - // Now I need to see if there is an anti dependency - // from c) to any other instruction in the - // same packet on the pred reg of interest - RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(), - MIToSUnit)) { - return false; + if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data && + Hexagon::PredRegsRegClass.contains(Dep.getReg())) { + // Here I know that I is predicate setting instruction with true + // data dep to candidate on the register we care about - c) in the + // above example. Now I need to see if there is an anti dependency + // from c) to any other instruction in the same packet on the pred + // reg of interest. + if (restrictingDepExistInPacket(I, Dep.getReg())) + return false; } } } } - // If the above case does not apply, check regular - // complement condition. - // Check that the predicate register is the same and - // that the predicate sense is different - // We also need to differentiate .old vs. .new: - // !p0 is not complimentary to p0.new - unsigned PReg1 = getPredicatedRegister(MI1, QII); - unsigned PReg2 = getPredicatedRegister(MI2, QII); - return ((PReg1 == PReg2) && - Hexagon::PredRegsRegClass.contains(PReg1) && - Hexagon::PredRegsRegClass.contains(PReg2) && - (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) && - (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2))); + // If the above case does not apply, check regular complement condition. + // Check that the predicate register is the same and that the predicate + // sense is different We also need to differentiate .old vs. .new: !p0 + // is not complementary to p0.new. + unsigned PReg1 = getPredicatedRegister(MI1, HII); + unsigned PReg2 = getPredicatedRegister(MI2, HII); + return PReg1 == PReg2 && + Hexagon::PredRegsRegClass.contains(PReg1) && + Hexagon::PredRegsRegClass.contains(PReg2) && + getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) && + HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2); } -// initPacketizerState - Initialize packetizer flags +// Initialize packetizer flags. void HexagonPacketizerList::initPacketizerState() { - Dependence = false; PromotedToDotNew = false; GlueToNewValueJump = false; GlueAllocframeStore = false; FoundSequentialDependence = false; - - return; } -// ignorePseudoInstruction - Ignore bundling of pseudo instructions. -bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI, - MachineBasicBlock *MBB) { +// Ignore bundling of pseudo instructions. +bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI, + const MachineBasicBlock*) { if (MI->isDebugValue()) return true; if (MI->isCFIInstruction()) return false; - // We must print out inline assembly + // We must print out inline assembly. if (MI->isInlineAsm()) return false; - // We check if MI has any functional units mapped to it. - // If it doesn't, we ignore the instruction. + if (MI->isImplicitDef()) + return false; + + // We check if MI has any functional units mapped to it. If it doesn't, + // we ignore the instruction. const MCInstrDesc& TID = MI->getDesc(); - unsigned SchedClass = TID.getSchedClass(); - const InstrStage* IS = - ResourceTracker->getInstrItins()->beginStage(SchedClass); + auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass()); unsigned FuncUnits = IS->getUnits(); return !FuncUnits; } -// isSoloInstruction: - Returns true for instructions that must be -// scheduled in their own packet. -bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) { +bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) { if (MI->isEHLabel() || MI->isCFIInstruction()) return true; - if (MI->isInlineAsm()) + // Consider inline asm to not be a solo instruction by default. + // Inline asm will be put in a packet temporarily, but then it will be + // removed, and placed outside of the packet (before or after, depending + // on dependencies). This is to reduce the impact of inline asm as a + // "packet splitting" instruction. + if (MI->isInlineAsm() && !ScheduleInlineAsm) return true; // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: // trap, pause, barrier, icinva, isync, and syncht are solo instructions. // They must not be grouped with other instructions in a packet. - if (IsSchedBarrier(MI)) + if (isSchedBarrier(MI)) + return true; + + if (HII->isSolo(MI)) + return true; + + if (MI->getOpcode() == Hexagon::A2_nop) return true; return false; } -// isLegalToPacketizeTogether: -// SUI is the current instruction that is out side of the current packet. -// SUJ is the current instruction inside the current packet against which that -// SUI will be packetized. -bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { - MachineInstr *I = SUI->getInstr(); - MachineInstr *J = SUJ->getInstr(); - assert(I && J && "Unable to packetize null instruction!"); - const MCInstrDesc &MCIDI = I->getDesc(); - const MCInstrDesc &MCIDJ = J->getDesc(); +// Quick check if instructions MI and MJ cannot coexist in the same packet. +// Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm", +// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm". +// For full test call this function twice: +// cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI) +// Doing the test only one way saves the amount of code in this function, +// since every test would need to be repeated with the MI and MJ reversed. +static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ, + const HexagonInstrInfo &HII) { + const MachineFunction *MF = MI->getParent()->getParent(); + if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() && + HII.isHVXMemWithAIndirect(MI, MJ)) + return true; - MachineBasicBlock::iterator II = I; + // An inline asm cannot be together with a branch, because we may not be + // able to remove the asm out after packetizing (i.e. if the asm must be + // moved past the bundle). Similarly, two asms cannot be together to avoid + // complications when determining their relative order outside of a bundle. + if (MI->isInlineAsm()) + return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() || + MJ->isCall() || MJ->isTerminator(); - const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); - const HexagonRegisterInfo *QRI = - (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo(); - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + // "False" really means that the quick check failed to determine if + // I and J cannot coexist. + return false; +} - // Inline asm cannot go in the packet. - if (I->getOpcode() == Hexagon::INLINEASM) - llvm_unreachable("Should not meet inline asm here!"); - if (isSoloInstruction(I)) - llvm_unreachable("Should not meet solo instr here!"); +// Full, symmetric check. +bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI, + const MachineInstr *MJ) { + return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII); +} - // A save callee-save register function call can only be in a packet - // with instructions that don't write to the callee-save registers. - if ((QII->isSaveCalleeSavedRegsCall(I) && - DoesModifyCalleeSavedReg(J, QRI)) || - (QII->isSaveCalleeSavedRegsCall(J) && - DoesModifyCalleeSavedReg(I, QRI))) { - Dependence = true; - return false; +void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { + for (auto &B : MF) { + MachineBasicBlock::iterator BundleIt; + MachineBasicBlock::instr_iterator NextI; + for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) { + NextI = std::next(I); + MachineInstr *MI = &*I; + if (MI->isBundle()) + BundleIt = I; + if (!MI->isInsideBundle()) + continue; + + // Decide on where to insert the instruction that we are pulling out. + // Debug instructions always go before the bundle, but the placement of + // INLINE_ASM depends on potential dependencies. By default, try to + // put it before the bundle, but if the asm writes to a register that + // other instructions in the bundle read, then we need to place it + // after the bundle (to preserve the bundle semantics). + bool InsertBeforeBundle; + if (MI->isInlineAsm()) + InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI); + else if (MI->isDebugValue()) + InsertBeforeBundle = true; + else + continue; + + BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle); + } } +} - // Two control flow instructions cannot go in the same packet. - if (IsControlFlow(I) && IsControlFlow(J)) { - Dependence = true; - return false; +// Check if a given instruction is of class "system". +static bool isSystemInstr(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::Y2_barrier: + case Hexagon::Y2_dcfetchbo: + return true; } + return false; +} - // A LoopN instruction cannot appear in the same packet as a jump or call. - if (IsLoopN(I) && - (IsDirectJump(J) || MCIDJ.isCall() || QII->isDeallocRet(J))) { - Dependence = true; +bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I, + const MachineInstr *J) { + // The dependence graph may not include edges between dead definitions, + // so without extra checks, we could end up packetizing two instruction + // defining the same (dead) register. + if (I->isCall() || J->isCall()) return false; - } - if (IsLoopN(J) && - (IsDirectJump(I) || MCIDI.isCall() || QII->isDeallocRet(I))) { - Dependence = true; + if (HII->isPredicated(I) || HII->isPredicated(J)) return false; + + BitVector DeadDefs(Hexagon::NUM_TARGET_REGS); + for (auto &MO : I->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isDead()) + continue; + DeadDefs[MO.getReg()] = true; } + for (auto &MO : J->operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.isDead()) + continue; + unsigned R = MO.getReg(); + if (R != Hexagon::USR_OVF && DeadDefs[R]) + return true; + } + return false; +} + +bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, + const MachineInstr *J) { + // A save callee-save register function call can only be in a packet + // with instructions that don't write to the callee-save registers. + if ((HII->isSaveCalleeSavedRegsCall(I) && + doesModifyCalleeSavedReg(J, HRI)) || + (HII->isSaveCalleeSavedRegsCall(J) && + doesModifyCalleeSavedReg(I, HRI))) + return true; + + // Two control flow instructions cannot go in the same packet. + if (isControlFlow(I) && isControlFlow(J)) + return true; + + // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot + // contain a speculative indirect jump, + // a new-value compare jump or a dealloc_return. + auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool { + if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) + return true; + if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI)) + return true; + return false; + }; + + if (HII->isLoopN(I) && isBadForLoopN(J)) + return true; + if (HII->isLoopN(J) && isBadForLoopN(I)) + return true; + // dealloc_return cannot appear in the same packet as a conditional or // unconditional jump. - if (QII->isDeallocRet(I) && - (MCIDJ.isBranch() || MCIDJ.isCall() || MCIDJ.isBarrier())) { - Dependence = true; - return false; + return HII->isDeallocRet(I) && + (J->isBranch() || J->isCall() || J->isBarrier()); +} + +bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I, + const MachineInstr *J) { + bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J); + bool StoreI = I->mayStore(), StoreJ = J->mayStore(); + if ((SysI && StoreJ) || (SysJ && StoreI)) + return true; + + if (StoreI && StoreJ) { + if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I)) + return true; + } else { + // A memop cannot be in the same packet with another memop or a store. + // Two stores can be together, but here I and J cannot both be stores. + bool MopStI = HII->isMemOp(I) || StoreI; + bool MopStJ = HII->isMemOp(J) || StoreJ; + if (MopStI && MopStJ) + return true; } + return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J)); +} - // V4 allows dual store. But does not allow second store, if the - // first store is not in SLOT0. New value store, new value jump, - // dealloc_return and memop always take SLOT0. - // Arch spec 3.4.4.2 - if (MCIDI.mayStore() && MCIDJ.mayStore() && - (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) { - Dependence = true; +// SUI is the current instruction that is out side of the current packet. +// SUJ is the current instruction inside the current packet against which that +// SUI will be packetized. +bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { + MachineInstr *I = SUI->getInstr(); + MachineInstr *J = SUJ->getInstr(); + assert(I && J && "Unable to packetize null instruction!"); + + // Clear IgnoreDepMIs when Packet starts. + if (CurrentPacketMIs.size() == 1) + IgnoreDepMIs.clear(); + + MachineBasicBlock::iterator II = I; + const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + + // Solo instructions cannot go in the packet. + assert(!isSoloInstruction(I) && "Unexpected solo instr!"); + + if (cannotCoexist(I, J)) return false; - } - if ((QII->isMemOp(J) && MCIDI.mayStore()) - || (MCIDJ.mayStore() && QII->isMemOp(I)) - || (QII->isMemOp(J) && QII->isMemOp(I))) { - Dependence = true; + Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J); + if (Dependence) return false; - } - //if dealloc_return - if (MCIDJ.mayStore() && QII->isDeallocRet(I)) { - Dependence = true; + // V4 allows dual stores. It does not allow second store, if the first + // store is not in SLOT0. New value store, new value jump, dealloc_return + // and memop always take SLOT0. Arch spec 3.4.4.2. + Dependence = hasV4SpecificDependence(I, J); + if (Dependence) return false; - } // If an instruction feeds new value jump, glue it. MachineBasicBlock::iterator NextMII = I; ++NextMII; - if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) { + if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) { MachineInstr *NextMI = NextMII; bool secondRegMatch = false; - bool maintainNewValueJump = false; + const MachineOperand &NOp0 = NextMI->getOperand(0); + const MachineOperand &NOp1 = NextMI->getOperand(1); - if (NextMI->getOperand(1).isReg() && - I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) { + if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg()) secondRegMatch = true; - maintainNewValueJump = true; - } - - if (!secondRegMatch && - I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) { - maintainNewValueJump = true; - } - for (std::vector<MachineInstr*>::iterator - VI = CurrentPacketMIs.begin(), - VE = CurrentPacketMIs.end(); - (VI != VE && maintainNewValueJump); ++VI) { - SUnit *PacketSU = MIToSUnit.find(*VI)->second; - - // NVJ can not be part of the dual jump - Arch Spec: section 7.8 - if (PacketSU->getInstr()->getDesc().isCall()) { + for (auto I : CurrentPacketMIs) { + SUnit *PacketSU = MIToSUnit.find(I)->second; + MachineInstr *PI = PacketSU->getInstr(); + // NVJ can not be part of the dual jump - Arch Spec: section 7.8. + if (PI->isCall()) { Dependence = true; break; } - // Validate + // Validate: // 1. Packet does not have a store in it. // 2. If the first operand of the nvj is newified, and the second // operand is also a reg, it (second reg) is not defined in @@ -1108,302 +1184,413 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // 3. If the second operand of the nvj is newified, (which means // first operand is also a reg), first reg is not defined in // the same packet. - if (PacketSU->getInstr()->getDesc().mayStore() || - PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe || - // Check #2. - (!secondRegMatch && NextMI->getOperand(1).isReg() && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(1).getReg(), QRI)) || - // Check #3. - (secondRegMatch && - PacketSU->getInstr()->modifiesRegister( - NextMI->getOperand(0).getReg(), QRI))) { + if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() || + HII->isLoopN(PI)) { + Dependence = true; + break; + } + // Check #2/#3. + const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1; + if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) { Dependence = true; break; } } - if (!Dependence) - GlueToNewValueJump = true; - else + + if (Dependence) return false; + GlueToNewValueJump = true; } - if (SUJ->isSucc(SUI)) { - for (unsigned i = 0; - (i < SUJ->Succs.size()) && !FoundSequentialDependence; - ++i) { + // There no dependency between a prolog instruction and its successor. + if (!SUJ->isSucc(SUI)) + return true; - if (SUJ->Succs[i].getSUnit() != SUI) { - continue; - } + for (unsigned i = 0; i < SUJ->Succs.size(); ++i) { + if (FoundSequentialDependence) + break; - SDep::Kind DepType = SUJ->Succs[i].getKind(); + if (SUJ->Succs[i].getSUnit() != SUI) + continue; - // For direct calls: - // Ignore register dependences for call instructions for - // packetization purposes except for those due to r31 and - // predicate registers. - // - // For indirect calls: - // Same as direct calls + check for true dependences to the register - // used in the indirect call. - // - // We completely ignore Order dependences for call instructions - // - // For returns: - // Ignore register dependences for return instructions like jumpr, - // dealloc return unless we have dependencies on the explicit uses - // of the registers used by jumpr (like r31) or dealloc return - // (like r29 or r30). - // - // TODO: Currently, jumpr is handling only return of r31. So, the - // following logic (specificaly IsCallDependent) is working fine. - // We need to enable jumpr for register other than r31 and then, - // we need to rework the last part, where it handles indirect call - // of that (IsCallDependent) function. Bug 6216 is opened for this. - // - unsigned DepReg = 0; - const TargetRegisterClass* RC = nullptr; - if (DepType == SDep::Data) { - DepReg = SUJ->Succs[i].getReg(); - RC = QRI->getMinimalPhysRegClass(DepReg); - } - if ((MCIDI.isCall() || MCIDI.isReturn()) && - (!IsRegDependence(DepType) || - !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) { - /* do nothing */ - } + SDep::Kind DepType = SUJ->Succs[i].getKind(); + // For direct calls: + // Ignore register dependences for call instructions for packetization + // purposes except for those due to r31 and predicate registers. + // + // For indirect calls: + // Same as direct calls + check for true dependences to the register + // used in the indirect call. + // + // We completely ignore Order dependences for call instructions. + // + // For returns: + // Ignore register dependences for return instructions like jumpr, + // dealloc return unless we have dependencies on the explicit uses + // of the registers used by jumpr (like r31) or dealloc return + // (like r29 or r30). + // + // TODO: Currently, jumpr is handling only return of r31. So, the + // following logic (specificaly isCallDependent) is working fine. + // We need to enable jumpr for register other than r31 and then, + // we need to rework the last part, where it handles indirect call + // of that (isCallDependent) function. Bug 6216 is opened for this. + unsigned DepReg = 0; + const TargetRegisterClass *RC = nullptr; + if (DepType == SDep::Data) { + DepReg = SUJ->Succs[i].getReg(); + RC = HRI->getMinimalPhysRegClass(DepReg); + } - // For instructions that can be promoted to dot-new, try to promote. - else if ((DepType == SDep::Data) && - CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) && - PromoteToDotNew(I, DepType, II, RC)) { - PromotedToDotNew = true; - /* do nothing */ - } + if (I->isCall() || I->isReturn()) { + if (!isRegDependence(DepType)) + continue; + if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg())) + continue; + } - else if ((DepType == SDep::Data) && - (QII->isNewValueJump(I))) { - /* do nothing */ - } + if (DepType == SDep::Data) { + if (canPromoteToDotCur(J, SUJ, DepReg, II, RC)) + if (promoteToDotCur(J, DepType, II, RC)) + continue; + } - // For predicated instructions, if the predicates are complements - // then there can be no dependence. - else if (QII->isPredicated(I) && - QII->isPredicated(J) && - ArePredicatesComplements(I, J, MIToSUnit)) { - /* do nothing */ + // Data dpendence ok if we have load.cur. + if (DepType == SDep::Data && HII->isDotCurInst(J)) { + if (HII->isV60VectorInstruction(I)) + continue; + } + // For instructions that can be promoted to dot-new, try to promote. + if (DepType == SDep::Data) { + if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) { + if (promoteToDotNew(I, DepType, II, RC)) { + PromotedToDotNew = true; + continue; + } } - else if (IsDirectJump(I) && - !MCIDJ.isBranch() && - !MCIDJ.isCall() && - (DepType == SDep::Order)) { - // Ignore Order dependences between unconditional direct branches - // and non-control-flow instructions - /* do nothing */ - } - else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) && - (DepType != SDep::Output)) { - // Ignore all dependences for jumps except for true and output - // dependences - /* do nothing */ - } - - // Ignore output dependences due to superregs. We can - // write to two different subregisters of R1:0 for instance - // in the same cycle - // + if (HII->isNewValueJump(I)) + continue; + } + // For predicated instructions, if the predicates are complements then + // there can be no dependence. + if (HII->isPredicated(I) && HII->isPredicated(J) && + arePredicatesComplements(I, J)) { + // Not always safe to do this translation. + // DAG Builder attempts to reduce dependence edges using transitive + // nature of dependencies. Here is an example: // - // Let the - // If neither I nor J defines DepReg, then this is a - // superfluous output dependence. The dependence must be of the - // form: - // R0 = ... - // R1 = ... - // and there is an output dependence between the two instructions - // with - // DepReg = D0 - // We want to ignore these dependences. - // Ideally, the dependence constructor should annotate such - // dependences. We can then avoid this relatively expensive check. + // r0 = tfr_pt ... (1) + // r0 = tfr_pf ... (2) + // r0 = tfr_pt ... (3) // - else if (DepType == SDep::Output) { - // DepReg is the register that's responsible for the dependence. - unsigned DepReg = SUJ->Succs[i].getReg(); + // There will be an output dependence between (1)->(2) and (2)->(3). + // However, there is no dependence edge between (1)->(3). This results + // in all 3 instructions going in the same packet. We ignore dependce + // only once to avoid this situation. + auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J); + if (Itr != IgnoreDepMIs.end()) { + Dependence = true; + return false; + } + IgnoreDepMIs.push_back(I); + continue; + } + + // Ignore Order dependences between unconditional direct branches + // and non-control-flow instructions. + if (isDirectJump(I) && !J->isBranch() && !J->isCall() && + DepType == SDep::Order) + continue; + + // Ignore all dependences for jumps except for true and output + // dependences. + if (I->isConditionalBranch() && DepType != SDep::Data && + DepType != SDep::Output) + continue; + + // Ignore output dependences due to superregs. We can write to two + // different subregisters of R1:0 for instance in the same cycle. + + // If neither I nor J defines DepReg, then this is a superfluous output + // dependence. The dependence must be of the form: + // R0 = ... + // R1 = ... + // and there is an output dependence between the two instructions with + // DepReg = D0. + // We want to ignore these dependences. Ideally, the dependence + // constructor should annotate such dependences. We can then avoid this + // relatively expensive check. + // + if (DepType == SDep::Output) { + // DepReg is the register that's responsible for the dependence. + unsigned DepReg = SUJ->Succs[i].getReg(); + + // Check if I and J really defines DepReg. + if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg)) + continue; + FoundSequentialDependence = true; + break; + } - // Check if I and J really defines DepReg. - if (I->definesRegister(DepReg) || - J->definesRegister(DepReg)) { + // For Order dependences: + // 1. On V4 or later, volatile loads/stores can be packetized together, + // unless other rules prevent is. + // 2. Store followed by a load is not allowed. + // 3. Store followed by a store is only valid on V4 or later. + // 4. Load followed by any memory operation is allowed. + if (DepType == SDep::Order) { + if (!PacketizeVolatiles) { + bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef(); + if (OrdRefs) { FoundSequentialDependence = true; break; } } - - // We ignore Order dependences for - // 1. Two loads unless they are volatile. - // 2. Two stores in V4 unless they are volatile. - else if ((DepType == SDep::Order) && - !I->hasOrderedMemoryRef() && - !J->hasOrderedMemoryRef()) { - if (MCIDI.mayStore() && MCIDJ.mayStore()) { - /* do nothing */ - } - // store followed by store-- not OK on V2 - // store followed by load -- not OK on all (OK if addresses - // are not aliased) - // load followed by store -- OK on all - // load followed by load -- OK on all - else if ( !MCIDJ.mayStore()) { - /* do nothing */ - } - else { + // J is first, I is second. + bool LoadJ = J->mayLoad(), StoreJ = J->mayStore(); + bool LoadI = I->mayLoad(), StoreI = I->mayStore(); + if (StoreJ) { + // Two stores are only allowed on V4+. Load following store is never + // allowed. + if (LoadI) { FoundSequentialDependence = true; break; } - } - - // For V4, special case ALLOCFRAME. Even though there is dependency - // between ALLOCFRAME and subsequent store, allow it to be - // packetized in a same packet. This implies that the store is using - // caller's SP. Hence, offset needs to be updated accordingly. - else if (DepType == SDep::Data - && J->getOpcode() == Hexagon::S2_allocframe - && (I->getOpcode() == Hexagon::S2_storerd_io - || I->getOpcode() == Hexagon::S2_storeri_io - || I->getOpcode() == Hexagon::S2_storerb_io) - && I->getOperand(0).getReg() == QRI->getStackRegister() - && QII->isValidOffset(I->getOpcode(), - I->getOperand(1).getImm() - - (FrameSize + HEXAGON_LRFP_SIZE))) - { - GlueAllocframeStore = true; - // Since this store is to be glued with allocframe in the same - // packet, it will use SP of the previous stack frame, i.e - // caller's SP. Therefore, we need to recalculate offset according - // to this change. - I->getOperand(1).setImm(I->getOperand(1).getImm() - - (FrameSize + HEXAGON_LRFP_SIZE)); - } - - // - // Skip over anti-dependences. Two instructions that are - // anti-dependent can share a packet - // - else if (DepType != SDep::Anti) { + } else if (!LoadJ || (!LoadI && !StoreI)) { + // If J is neither load nor store, assume a dependency. + // If J is a load, but I is neither, also assume a dependency. FoundSequentialDependence = true; break; } + // Store followed by store: not OK on V2. + // Store followed by load: not OK on all. + // Load followed by store: OK on all. + // Load followed by load: OK on all. + continue; } - if (FoundSequentialDependence) { - Dependence = true; - return false; + // For V4, special case ALLOCFRAME. Even though there is dependency + // between ALLOCFRAME and subsequent store, allow it to be packetized + // in a same packet. This implies that the store is using the caller's + // SP. Hence, offset needs to be updated accordingly. + if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) { + unsigned Opc = I->getOpcode(); + switch (Opc) { + case Hexagon::S2_storerd_io: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerb_io: + if (I->getOperand(0).getReg() == HRI->getStackRegister()) { + int64_t Imm = I->getOperand(1).getImm(); + int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE); + if (HII->isValidOffset(Opc, NewOff)) { + GlueAllocframeStore = true; + // Since this store is to be glued with allocframe in the same + // packet, it will use SP of the previous stack frame, i.e. + // caller's SP. Therefore, we need to recalculate offset + // according to this change. + I->getOperand(1).setImm(NewOff); + continue; + } + } + default: + break; + } + } + + // Skip over anti-dependences. Two instructions that are anti-dependent + // can share a packet. + if (DepType != SDep::Anti) { + FoundSequentialDependence = true; + break; } } + if (FoundSequentialDependence) { + Dependence = true; + return false; + } + return true; } -// isLegalToPruneDependencies bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { MachineInstr *I = SUI->getInstr(); - assert(I && SUJ->getInstr() && "Unable to packetize null instruction!"); - - const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); - - if (Dependence) { + MachineInstr *J = SUJ->getInstr(); + assert(I && J && "Unable to packetize null instruction!"); - // Check if the instruction was promoted to a dot-new. If so, demote it - // back into a dot-old. - if (PromotedToDotNew) { - DemoteToDotOld(I); - } + if (cannotCoexist(I, J)) + return false; - // Check if the instruction (must be a store) was glued with an Allocframe - // instruction. If so, restore its offset to its original value, i.e. use - // curent SP instead of caller's SP. - if (GlueAllocframeStore) { - I->getOperand(1).setImm(I->getOperand(1).getImm() + - FrameSize + HEXAGON_LRFP_SIZE); - } + if (!Dependence) + return true; - return false; + // Check if the instruction was promoted to a dot-new. If so, demote it + // back into a dot-old. + if (PromotedToDotNew) + demoteToDotOld(I); + + cleanUpDotCur(); + // Check if the instruction (must be a store) was glued with an allocframe + // instruction. If so, restore its offset to its original value, i.e. use + // current SP instead of caller's SP. + if (GlueAllocframeStore) { + unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + MachineOperand &MOff = I->getOperand(1); + MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE); } - return true; + return false; } + MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr *MI) { + MachineBasicBlock::iterator MII = MI; + MachineBasicBlock *MBB = MI->getParent(); + if (MI->isImplicitDef()) { + unsigned R = MI->getOperand(0).getReg(); + if (Hexagon::IntRegsRegClass.contains(R)) { + MCSuperRegIterator S(R, HRI, false); + MI->addOperand(MachineOperand::CreateReg(*S, true, true)); + } + return MII; + } + assert(ResourceTracker->canReserveResources(MI)); + + bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); + bool Good = true; + + if (GlueToNewValueJump) { + MachineInstr *NvjMI = ++MII; + // We need to put both instructions in the same packet: MI and NvjMI. + // Either of them can require a constant extender. Try to add both to + // the current packet, and if that fails, end the packet and start a + // new one. + ResourceTracker->reserveResources(MI); + if (ExtMI) + Good = tryAllocateResourcesForConstExt(true); + + bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI); + if (Good) { + if (ResourceTracker->canReserveResources(NvjMI)) + ResourceTracker->reserveResources(NvjMI); + else + Good = false; + } + if (Good && ExtNvjMI) + Good = tryAllocateResourcesForConstExt(true); - MachineBasicBlock::iterator MII = MI; - MachineBasicBlock *MBB = MI->getParent(); - - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - - if (GlueToNewValueJump) { - - ++MII; - MachineInstr *nvjMI = MII; + if (!Good) { + endPacket(MBB, MI); assert(ResourceTracker->canReserveResources(MI)); ResourceTracker->reserveResources(MI); - if ((QII->isExtended(MI) || QII->isConstExtended(MI)) && - !tryAllocateResourcesForConstExt(MI)) { - endPacket(MBB, MI); - ResourceTracker->reserveResources(MI); - assert(canReserveResourcesForConstExt(MI) && - "Ensure that there is a slot"); - reserveResourcesForConstExt(MI); - // Reserve resources for new value jump constant extender. - assert(canReserveResourcesForConstExt(MI) && - "Ensure that there is a slot"); - reserveResourcesForConstExt(nvjMI); - assert(ResourceTracker->canReserveResources(nvjMI) && - "Ensure that there is a slot"); - - } else if ( // Extended instruction takes two slots in the packet. - // Try reserve and allocate 4-byte in the current packet first. - (QII->isExtended(nvjMI) - && (!tryAllocateResourcesForConstExt(nvjMI) - || !ResourceTracker->canReserveResources(nvjMI))) - || // For non-extended instruction, no need to allocate extra 4 bytes. - (!QII->isExtended(nvjMI) && - !ResourceTracker->canReserveResources(nvjMI))) - { - endPacket(MBB, MI); - // A new and empty packet starts. - // We are sure that the resources requirements can be satisfied. - // Therefore, do not need to call "canReserveResources" anymore. - ResourceTracker->reserveResources(MI); - if (QII->isExtended(nvjMI)) - reserveResourcesForConstExt(nvjMI); + if (ExtMI) { + assert(canReserveResourcesForConstExt()); + tryAllocateResourcesForConstExt(true); } - // Here, we are sure that "reserveResources" would succeed. - ResourceTracker->reserveResources(nvjMI); - CurrentPacketMIs.push_back(MI); - CurrentPacketMIs.push_back(nvjMI); - } else { - if ( (QII->isExtended(MI) || QII->isConstExtended(MI)) - && ( !tryAllocateResourcesForConstExt(MI) - || !ResourceTracker->canReserveResources(MI))) - { - endPacket(MBB, MI); - // Check if the instruction was promoted to a dot-new. If so, demote it - // back into a dot-old - if (PromotedToDotNew) { - DemoteToDotOld(MI); - } - reserveResourcesForConstExt(MI); + assert(ResourceTracker->canReserveResources(NvjMI)); + ResourceTracker->reserveResources(NvjMI); + if (ExtNvjMI) { + assert(canReserveResourcesForConstExt()); + reserveResourcesForConstExt(); } - // In case that "MI" is not an extended insn, - // the resource availability has already been checked. - ResourceTracker->reserveResources(MI); - CurrentPacketMIs.push_back(MI); } + CurrentPacketMIs.push_back(MI); + CurrentPacketMIs.push_back(NvjMI); return MII; + } + + ResourceTracker->reserveResources(MI); + if (ExtMI && !tryAllocateResourcesForConstExt(true)) { + endPacket(MBB, MI); + if (PromotedToDotNew) + demoteToDotOld(MI); + ResourceTracker->reserveResources(MI); + reserveResourcesForConstExt(); + } + + CurrentPacketMIs.push_back(MI); + return MII; +} + +void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, + MachineInstr *MI) { + OldPacketMIs = CurrentPacketMIs; + VLIWPacketizerList::endPacket(MBB, MI); +} + +bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) { + return !producesStall(MI); +} + + +// Return true when ConsMI uses a register defined by ProdMI. +static bool isDependent(const MachineInstr *ProdMI, + const MachineInstr *ConsMI) { + if (!ProdMI->getOperand(0).isReg()) + return false; + unsigned DstReg = ProdMI->getOperand(0).getReg(); + + for (auto &Op : ConsMI->operands()) + if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg) + // The MIs depend on each other. + return true; + + return false; } +// V60 forward scheduling. +bool HexagonPacketizerList::producesStall(const MachineInstr *I) { + // Check whether the previous packet is in a different loop. If this is the + // case, there is little point in trying to avoid a stall because that would + // favor the rare case (loop entry) over the common case (loop iteration). + // + // TODO: We should really be able to check all the incoming edges if this is + // the first packet in a basic block, so we can avoid stalls from the loop + // backedge. + if (!OldPacketMIs.empty()) { + auto *OldBB = OldPacketMIs.front()->getParent(); + auto *ThisBB = I->getParent(); + if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB)) + return false; + } + + // Check for stall between two vector instructions. + if (HII->isV60VectorInstruction(I)) { + for (auto J : OldPacketMIs) { + if (!HII->isV60VectorInstruction(J)) + continue; + if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I)) + return true; + } + return false; + } + + // Check for stall between two scalar instructions. First, check that + // there is no definition of a use in the current packet, because it + // may be a candidate for .new. + for (auto J : CurrentPacketMIs) + if (!HII->isV60VectorInstruction(J) && isDependent(J, I)) + return false; + + // Check for stall between I and instructions in the previous packet. + if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) { + for (auto J : OldPacketMIs) { + if (HII->isV60VectorInstruction(J)) + continue; + if (!HII->isLateInstrFeedsEarlyInstr(J, I)) + continue; + if (isDependent(J, I) && !HII->canExecuteInBundle(J, I)) + return true; + } + } + + return false; +} + + //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// |