diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCMIPeephole.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 390 |
1 files changed, 276 insertions, 114 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index ac8ac060f460..74192cb20cd0 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -18,6 +18,8 @@ // //===---------------------------------------------------------------------===// +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" @@ -26,12 +28,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" -#include "MCTargetDesc/PPCPredicates.h" using namespace llvm; @@ -160,33 +162,33 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op, static unsigned getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) { unsigned Opcode = MI->getOpcode(); - if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || - Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo) + if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec || + Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec) return MI->getOperand(3).getImm(); - if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) && - MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm()) + if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) && + MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm()) return MI->getOperand(3).getImm(); - if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || - Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo || + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec || Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && - MI->getOperand(3).getImm() <= MI->getOperand(4).getImm()) + MI->getOperand(3).getImm() <= MI->getOperand(4).getImm()) return 32 + MI->getOperand(3).getImm(); - if (Opcode == PPC::ANDIo) { + if (Opcode == PPC::ANDI_rec) { uint16_t Imm = MI->getOperand(2).getImm(); return 48 + countLeadingZeros(Imm); } - if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo || - Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo || + if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec || + Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec || Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8) // The result ranges from 0 to 32. return 58; - if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo || - Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo) + if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec || + Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec) // The result ranges from 0 to 64. return 57; @@ -331,108 +333,121 @@ bool PPCMIPeephole::simplifyCode(void) { // is identified by an immediate value of 0 or 3. int Immed = MI.getOperand(3).getImm(); - if (Immed != 1) { - - // For each of these simplifications, we need the two source - // regs to match. Unfortunately, MachineCSE ignores COPY and - // SUBREG_TO_REG, so for example we can see - // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed. - // We have to look through chains of COPY and SUBREG_TO_REG - // to find the real source values for comparison. - unsigned TrueReg1 = - TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); - unsigned TrueReg2 = - TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI); - - if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) { - MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); - unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0; - - // If this is a splat fed by a splatting load, the splat is - // redundant. Replace with a copy. This doesn't happen directly due - // to code in PPCDAGToDAGISel.cpp, but it can happen when converting - // a load of a double to a vector of 64-bit integers. - auto isConversionOfLoadAndSplat = [=]() -> bool { - if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS) - return false; - unsigned DefReg = - TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); - if (Register::isVirtualRegister(DefReg)) { - MachineInstr *LoadMI = MRI->getVRegDef(DefReg); - if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) - return true; - } - return false; - }; - if (DefMI && (Immed == 0 || Immed == 3)) { - if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) { - LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat " - "to load-and-splat/copy: "); - LLVM_DEBUG(MI.dump()); - BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), - MI.getOperand(0).getReg()) - .add(MI.getOperand(1)); - ToErase = &MI; - Simplified = true; - } - } + if (Immed == 1) + break; - // If this is a splat or a swap fed by another splat, we - // can replace it with a copy. - if (DefOpc == PPC::XXPERMDI) { - unsigned FeedImmed = DefMI->getOperand(3).getImm(); - unsigned FeedReg1 = - TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); - unsigned FeedReg2 = - TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); - - if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) { - LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat " - "to splat/copy: "); - LLVM_DEBUG(MI.dump()); - BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), - MI.getOperand(0).getReg()) - .add(MI.getOperand(1)); - ToErase = &MI; - Simplified = true; - } - - // If this is a splat fed by a swap, we can simplify modify - // the splat to splat the other value from the swap's input - // parameter. - else if ((Immed == 0 || Immed == 3) - && FeedImmed == 2 && FeedReg1 == FeedReg2) { - LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: "); - LLVM_DEBUG(MI.dump()); - MI.getOperand(1).setReg(DefMI->getOperand(1).getReg()); - MI.getOperand(2).setReg(DefMI->getOperand(2).getReg()); - MI.getOperand(3).setImm(3 - Immed); - Simplified = true; - } - - // If this is a swap fed by a swap, we can replace it - // with a copy from the first swap's input. - else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) { - LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: "); - LLVM_DEBUG(MI.dump()); - BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), - MI.getOperand(0).getReg()) - .add(DefMI->getOperand(1)); - ToErase = &MI; - Simplified = true; - } - } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs && - (DefMI->getOperand(2).getImm() == 0 || - DefMI->getOperand(2).getImm() == 3)) { - // Splat fed by another splat - switch the output of the first - // and remove the second. - DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); - ToErase = &MI; - Simplified = true; - LLVM_DEBUG(dbgs() << "Removing redundant splat: "); - LLVM_DEBUG(MI.dump()); - } + // For each of these simplifications, we need the two source + // regs to match. Unfortunately, MachineCSE ignores COPY and + // SUBREG_TO_REG, so for example we can see + // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed. + // We have to look through chains of COPY and SUBREG_TO_REG + // to find the real source values for comparison. + unsigned TrueReg1 = + TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); + unsigned TrueReg2 = + TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI); + + if (!(TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1))) + break; + + MachineInstr *DefMI = MRI->getVRegDef(TrueReg1); + + if (!DefMI) + break; + + unsigned DefOpc = DefMI->getOpcode(); + + // If this is a splat fed by a splatting load, the splat is + // redundant. Replace with a copy. This doesn't happen directly due + // to code in PPCDAGToDAGISel.cpp, but it can happen when converting + // a load of a double to a vector of 64-bit integers. + auto isConversionOfLoadAndSplat = [=]() -> bool { + if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS) + return false; + unsigned FeedReg1 = + TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); + if (Register::isVirtualRegister(FeedReg1)) { + MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1); + if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) + return true; + } + return false; + }; + if ((Immed == 0 || Immed == 3) && + (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) { + LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat " + "to load-and-splat/copy: "); + LLVM_DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(MI.getOperand(1)); + ToErase = &MI; + Simplified = true; + } + + // If this is a splat or a swap fed by another splat, we + // can replace it with a copy. + if (DefOpc == PPC::XXPERMDI) { + unsigned DefReg1 = DefMI->getOperand(1).getReg(); + unsigned DefReg2 = DefMI->getOperand(2).getReg(); + unsigned DefImmed = DefMI->getOperand(3).getImm(); + + // If the two inputs are not the same register, check to see if + // they originate from the same virtual register after only + // copy-like instructions. + if (DefReg1 != DefReg2) { + unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI); + unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI); + + if (!(FeedReg1 == FeedReg2 && + Register::isVirtualRegister(FeedReg1))) + break; + } + + if (DefImmed == 0 || DefImmed == 3) { + LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat " + "to splat/copy: "); + LLVM_DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(MI.getOperand(1)); + ToErase = &MI; + Simplified = true; } + + // If this is a splat fed by a swap, we can simplify modify + // the splat to splat the other value from the swap's input + // parameter. + else if ((Immed == 0 || Immed == 3) && DefImmed == 2) { + LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: "); + LLVM_DEBUG(MI.dump()); + MI.getOperand(1).setReg(DefReg1); + MI.getOperand(2).setReg(DefReg2); + MI.getOperand(3).setImm(3 - Immed); + Simplified = true; + } + + // If this is a swap fed by a swap, we can replace it + // with a copy from the first swap's input. + else if (Immed == 2 && DefImmed == 2) { + LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: "); + LLVM_DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(DefMI->getOperand(1)); + ToErase = &MI; + Simplified = true; + } + } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs && + (DefMI->getOperand(2).getImm() == 0 || + DefMI->getOperand(2).getImm() == 3)) { + // Splat fed by another splat - switch the output of the first + // and remove the second. + DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + LLVM_DEBUG(dbgs() << "Removing redundant splat: "); + LLVM_DEBUG(MI.dump()); } break; } @@ -805,6 +820,153 @@ bool PPCMIPeephole::simplifyCode(void) { combineSEXTAndSHL(MI, ToErase); break; } + case PPC::RLWINM: + case PPC::RLWINM_rec: + case PPC::RLWINM8: + case PPC::RLWINM8_rec: { + unsigned FoldingReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(FoldingReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); + if (SrcMI->getOpcode() != PPC::RLWINM && + SrcMI->getOpcode() != PPC::RLWINM_rec && + SrcMI->getOpcode() != PPC::RLWINM8 && + SrcMI->getOpcode() != PPC::RLWINM8_rec) + break; + assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && + MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && + SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && + "Invalid PPC::RLWINM Instruction!"); + uint64_t SHSrc = SrcMI->getOperand(2).getImm(); + uint64_t SHMI = MI.getOperand(2).getImm(); + uint64_t MBSrc = SrcMI->getOperand(3).getImm(); + uint64_t MBMI = MI.getOperand(3).getImm(); + uint64_t MESrc = SrcMI->getOperand(4).getImm(); + uint64_t MEMI = MI.getOperand(4).getImm(); + + assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) && + "Invalid PPC::RLWINM Instruction!"); + + // If MBMI is bigger than MEMI, we always can not get run of ones. + // RotatedSrcMask non-wrap: + // 0........31|32........63 + // RotatedSrcMask: B---E B---E + // MaskMI: -----------|--E B------ + // Result: ----- --- (Bad candidate) + // + // RotatedSrcMask wrap: + // 0........31|32........63 + // RotatedSrcMask: --E B----|--E B---- + // MaskMI: -----------|--E B------ + // Result: --- -----|--- ----- (Bad candidate) + // + // One special case is RotatedSrcMask is a full set mask. + // RotatedSrcMask full: + // 0........31|32........63 + // RotatedSrcMask: ------EB---|-------EB--- + // MaskMI: -----------|--E B------ + // Result: -----------|--- ------- (Good candidate) + + // Mark special case. + bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31); + + // For other MBMI > MEMI cases, just return. + if ((MBMI > MEMI) && !SrcMaskFull) + break; + + // Handle MBMI <= MEMI cases. + APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI); + // In MI, we only need low 32 bits of SrcMI, just consider about low 32 + // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0, + // while in PowerPC ISA, lowerest bit is at index 63. + APInt MaskSrc = + APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc); + // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is + // equal to highBit. + // If MBSrc - MESrc == 1, we expect a full set mask instead of Null. + if (SrcMaskFull && (MBSrc - MESrc == 1)) + MaskSrc.setAllBits(); + + APInt RotatedSrcMask = MaskSrc.rotl(SHMI); + APInt FinalMask = RotatedSrcMask & MaskMI; + uint32_t NewMB, NewME; + + // If final mask is 0, MI result should be 0 too. + if (FinalMask.isNullValue()) { + bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 || + MI.getOpcode() == PPC::RLWINM8_rec); + + Simplified = true; + + LLVM_DEBUG(dbgs() << "Replace Instr: "); + LLVM_DEBUG(MI.dump()); + + if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) { + // Replace MI with "LI 0" + MI.RemoveOperand(4); + MI.RemoveOperand(3); + MI.RemoveOperand(2); + MI.getOperand(1).ChangeToImmediate(0); + MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI)); + } else { + // Replace MI with "ANDI_rec reg, 0" + MI.RemoveOperand(4); + MI.RemoveOperand(3); + MI.getOperand(2).setImm(0); + MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec)); + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + if (SrcMI->getOperand(1).isKill()) { + MI.getOperand(1).setIsKill(true); + SrcMI->getOperand(1).setIsKill(false); + } else + // About to replace MI.getOperand(1), clear its kill flag. + MI.getOperand(1).setIsKill(false); + } + + LLVM_DEBUG(dbgs() << "With: "); + LLVM_DEBUG(MI.dump()); + } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, + NewME) && NewMB <= NewME)|| SrcMaskFull) { + // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger + // than NewME. Otherwise we get a 64 bit value after folding, but MI + // return a 32 bit value. + + Simplified = true; + LLVM_DEBUG(dbgs() << "Converting Instr: "); + LLVM_DEBUG(MI.dump()); + + uint16_t NewSH = (SHSrc + SHMI) % 32; + MI.getOperand(2).setImm(NewSH); + // If SrcMI mask is full, no need to update MBMI and MEMI. + if (!SrcMaskFull) { + MI.getOperand(3).setImm(NewMB); + MI.getOperand(4).setImm(NewME); + } + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + if (SrcMI->getOperand(1).isKill()) { + MI.getOperand(1).setIsKill(true); + SrcMI->getOperand(1).setIsKill(false); + } else + // About to replace MI.getOperand(1), clear its kill flag. + MI.getOperand(1).setIsKill(false); + + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(MI.dump()); + } + if (Simplified) { + // If FoldingReg has no non-debug use and it has no implicit def (it + // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI. + // Otherwise keep it. + ++NumRotatesCollapsed; + if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) { + ToErase = SrcMI; + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(SrcMI->dump()); + } + } + break; + } } } |