aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCMIPeephole.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp390
1 files changed, 276 insertions, 114 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index ac8ac060f460..74192cb20cd0 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -18,6 +18,8 @@
//
//===---------------------------------------------------------------------===//
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCPredicates.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
@@ -26,12 +28,12 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
-#include "MCTargetDesc/PPCPredicates.h"
using namespace llvm;
@@ -160,33 +162,33 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
static unsigned
getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
unsigned Opcode = MI->getOpcode();
- if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
- Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo)
+ if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
+ Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
return MI->getOperand(3).getImm();
- if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
- MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
+ if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
+ MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
return MI->getOperand(3).getImm();
- if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
- Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
+ if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
+ Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
- MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
+ MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
return 32 + MI->getOperand(3).getImm();
- if (Opcode == PPC::ANDIo) {
+ if (Opcode == PPC::ANDI_rec) {
uint16_t Imm = MI->getOperand(2).getImm();
return 48 + countLeadingZeros(Imm);
}
- if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
- Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
+ if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
+ Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
// The result ranges from 0 to 32.
return 58;
- if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
- Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo)
+ if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
+ Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
// The result ranges from 0 to 64.
return 57;
@@ -331,108 +333,121 @@ bool PPCMIPeephole::simplifyCode(void) {
// is identified by an immediate value of 0 or 3.
int Immed = MI.getOperand(3).getImm();
- if (Immed != 1) {
-
- // For each of these simplifications, we need the two source
- // regs to match. Unfortunately, MachineCSE ignores COPY and
- // SUBREG_TO_REG, so for example we can see
- // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
- // We have to look through chains of COPY and SUBREG_TO_REG
- // to find the real source values for comparison.
- unsigned TrueReg1 =
- TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
- unsigned TrueReg2 =
- TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
-
- if (TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)) {
- MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
- unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
-
- // If this is a splat fed by a splatting load, the splat is
- // redundant. Replace with a copy. This doesn't happen directly due
- // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
- // a load of a double to a vector of 64-bit integers.
- auto isConversionOfLoadAndSplat = [=]() -> bool {
- if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
- return false;
- unsigned DefReg =
- TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- if (Register::isVirtualRegister(DefReg)) {
- MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
- if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
- return true;
- }
- return false;
- };
- if (DefMI && (Immed == 0 || Immed == 3)) {
- if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
- LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
- "to load-and-splat/copy: ");
- LLVM_DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(1));
- ToErase = &MI;
- Simplified = true;
- }
- }
+ if (Immed == 1)
+ break;
- // If this is a splat or a swap fed by another splat, we
- // can replace it with a copy.
- if (DefOpc == PPC::XXPERMDI) {
- unsigned FeedImmed = DefMI->getOperand(3).getImm();
- unsigned FeedReg1 =
- TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
- unsigned FeedReg2 =
- TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
-
- if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
- LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
- "to splat/copy: ");
- LLVM_DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(1));
- ToErase = &MI;
- Simplified = true;
- }
-
- // If this is a splat fed by a swap, we can simplify modify
- // the splat to splat the other value from the swap's input
- // parameter.
- else if ((Immed == 0 || Immed == 3)
- && FeedImmed == 2 && FeedReg1 == FeedReg2) {
- LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
- LLVM_DEBUG(MI.dump());
- MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
- MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
- MI.getOperand(3).setImm(3 - Immed);
- Simplified = true;
- }
-
- // If this is a swap fed by a swap, we can replace it
- // with a copy from the first swap's input.
- else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
- LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
- LLVM_DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
- MI.getOperand(0).getReg())
- .add(DefMI->getOperand(1));
- ToErase = &MI;
- Simplified = true;
- }
- } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
- (DefMI->getOperand(2).getImm() == 0 ||
- DefMI->getOperand(2).getImm() == 3)) {
- // Splat fed by another splat - switch the output of the first
- // and remove the second.
- DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
- ToErase = &MI;
- Simplified = true;
- LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
- LLVM_DEBUG(MI.dump());
- }
+ // For each of these simplifications, we need the two source
+ // regs to match. Unfortunately, MachineCSE ignores COPY and
+ // SUBREG_TO_REG, so for example we can see
+ // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+ // We have to look through chains of COPY and SUBREG_TO_REG
+ // to find the real source values for comparison.
+ unsigned TrueReg1 =
+ TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
+ unsigned TrueReg2 =
+ TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
+
+ if (!(TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)))
+ break;
+
+ MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+ if (!DefMI)
+ break;
+
+ unsigned DefOpc = DefMI->getOpcode();
+
+ // If this is a splat fed by a splatting load, the splat is
+ // redundant. Replace with a copy. This doesn't happen directly due
+ // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
+ // a load of a double to a vector of 64-bit integers.
+ auto isConversionOfLoadAndSplat = [=]() -> bool {
+ if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
+ return false;
+ unsigned FeedReg1 =
+ TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
+ if (Register::isVirtualRegister(FeedReg1)) {
+ MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
+ if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
+ return true;
+ }
+ return false;
+ };
+ if ((Immed == 0 || Immed == 3) &&
+ (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
+ LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
+ "to load-and-splat/copy: ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+
+ // If this is a splat or a swap fed by another splat, we
+ // can replace it with a copy.
+ if (DefOpc == PPC::XXPERMDI) {
+ unsigned DefReg1 = DefMI->getOperand(1).getReg();
+ unsigned DefReg2 = DefMI->getOperand(2).getReg();
+ unsigned DefImmed = DefMI->getOperand(3).getImm();
+
+ // If the two inputs are not the same register, check to see if
+ // they originate from the same virtual register after only
+ // copy-like instructions.
+ if (DefReg1 != DefReg2) {
+ unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
+ unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
+
+ if (!(FeedReg1 == FeedReg2 &&
+ Register::isVirtualRegister(FeedReg1)))
+ break;
+ }
+
+ if (DefImmed == 0 || DefImmed == 3) {
+ LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
+ "to splat/copy: ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
}
+
+ // If this is a splat fed by a swap, we can simplify modify
+ // the splat to splat the other value from the swap's input
+ // parameter.
+ else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
+ LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+ LLVM_DEBUG(MI.dump());
+ MI.getOperand(1).setReg(DefReg1);
+ MI.getOperand(2).setReg(DefReg2);
+ MI.getOperand(3).setImm(3 - Immed);
+ Simplified = true;
+ }
+
+ // If this is a swap fed by a swap, we can replace it
+ // with a copy from the first swap's input.
+ else if (Immed == 2 && DefImmed == 2) {
+ LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(DefMI->getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+ } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
+ (DefMI->getOperand(2).getImm() == 0 ||
+ DefMI->getOperand(2).getImm() == 3)) {
+ // Splat fed by another splat - switch the output of the first
+ // and remove the second.
+ DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+ ToErase = &MI;
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
+ LLVM_DEBUG(MI.dump());
}
break;
}
@@ -805,6 +820,153 @@ bool PPCMIPeephole::simplifyCode(void) {
combineSEXTAndSHL(MI, ToErase);
break;
}
+ case PPC::RLWINM:
+ case PPC::RLWINM_rec:
+ case PPC::RLWINM8:
+ case PPC::RLWINM8_rec: {
+ unsigned FoldingReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(FoldingReg))
+ break;
+
+ MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
+ if (SrcMI->getOpcode() != PPC::RLWINM &&
+ SrcMI->getOpcode() != PPC::RLWINM_rec &&
+ SrcMI->getOpcode() != PPC::RLWINM8 &&
+ SrcMI->getOpcode() != PPC::RLWINM8_rec)
+ break;
+ assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
+ MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
+ SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
+ "Invalid PPC::RLWINM Instruction!");
+ uint64_t SHSrc = SrcMI->getOperand(2).getImm();
+ uint64_t SHMI = MI.getOperand(2).getImm();
+ uint64_t MBSrc = SrcMI->getOperand(3).getImm();
+ uint64_t MBMI = MI.getOperand(3).getImm();
+ uint64_t MESrc = SrcMI->getOperand(4).getImm();
+ uint64_t MEMI = MI.getOperand(4).getImm();
+
+ assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
+ "Invalid PPC::RLWINM Instruction!");
+
+ // If MBMI is bigger than MEMI, we always can not get run of ones.
+ // RotatedSrcMask non-wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: B---E B---E
+ // MaskMI: -----------|--E B------
+ // Result: ----- --- (Bad candidate)
+ //
+ // RotatedSrcMask wrap:
+ // 0........31|32........63
+ // RotatedSrcMask: --E B----|--E B----
+ // MaskMI: -----------|--E B------
+ // Result: --- -----|--- ----- (Bad candidate)
+ //
+ // One special case is RotatedSrcMask is a full set mask.
+ // RotatedSrcMask full:
+ // 0........31|32........63
+ // RotatedSrcMask: ------EB---|-------EB---
+ // MaskMI: -----------|--E B------
+ // Result: -----------|--- ------- (Good candidate)
+
+ // Mark special case.
+ bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
+
+ // For other MBMI > MEMI cases, just return.
+ if ((MBMI > MEMI) && !SrcMaskFull)
+ break;
+
+ // Handle MBMI <= MEMI cases.
+ APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
+ // In MI, we only need low 32 bits of SrcMI, just consider about low 32
+ // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
+ // while in PowerPC ISA, lowerest bit is at index 63.
+ APInt MaskSrc =
+ APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
+ // Current APInt::getBitsSetWithWrap sets all bits to 0 if loBit is
+ // equal to highBit.
+ // If MBSrc - MESrc == 1, we expect a full set mask instead of Null.
+ if (SrcMaskFull && (MBSrc - MESrc == 1))
+ MaskSrc.setAllBits();
+
+ APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
+ APInt FinalMask = RotatedSrcMask & MaskMI;
+ uint32_t NewMB, NewME;
+
+ // If final mask is 0, MI result should be 0 too.
+ if (FinalMask.isNullValue()) {
+ bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 ||
+ MI.getOpcode() == PPC::RLWINM8_rec);
+
+ Simplified = true;
+
+ LLVM_DEBUG(dbgs() << "Replace Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
+ // Replace MI with "LI 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.RemoveOperand(2);
+ MI.getOperand(1).ChangeToImmediate(0);
+ MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI));
+ } else {
+ // Replace MI with "ANDI_rec reg, 0"
+ MI.RemoveOperand(4);
+ MI.RemoveOperand(3);
+ MI.getOperand(2).setImm(0);
+ MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+ }
+
+ LLVM_DEBUG(dbgs() << "With: ");
+ LLVM_DEBUG(MI.dump());
+ } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB,
+ NewME) && NewMB <= NewME)|| SrcMaskFull) {
+ // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
+ // than NewME. Otherwise we get a 64 bit value after folding, but MI
+ // return a 32 bit value.
+
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Converting Instr: ");
+ LLVM_DEBUG(MI.dump());
+
+ uint16_t NewSH = (SHSrc + SHMI) % 32;
+ MI.getOperand(2).setImm(NewSH);
+ // If SrcMI mask is full, no need to update MBMI and MEMI.
+ if (!SrcMaskFull) {
+ MI.getOperand(3).setImm(NewMB);
+ MI.getOperand(4).setImm(NewME);
+ }
+ MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
+ if (SrcMI->getOperand(1).isKill()) {
+ MI.getOperand(1).setIsKill(true);
+ SrcMI->getOperand(1).setIsKill(false);
+ } else
+ // About to replace MI.getOperand(1), clear its kill flag.
+ MI.getOperand(1).setIsKill(false);
+
+ LLVM_DEBUG(dbgs() << "To: ");
+ LLVM_DEBUG(MI.dump());
+ }
+ if (Simplified) {
+ // If FoldingReg has no non-debug use and it has no implicit def (it
+ // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
+ // Otherwise keep it.
+ ++NumRotatesCollapsed;
+ if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) {
+ ToErase = SrcMI;
+ LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
+ LLVM_DEBUG(SrcMI->dump());
+ }
+ }
+ break;
+ }
}
}