aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC/PPCISelDAGToDAG.cpp')
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp140
1 files changed, 106 insertions, 34 deletions
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 932226842bb7..1eaa8118ba0a 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -16,6 +16,8 @@
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -52,6 +54,11 @@ static cl::opt<bool> BPermRewriterNoMasking(
"bit permutations"),
cl::Hidden);
+static cl::opt<bool> EnableBranchHint(
+ "ppc-use-branch-hint", cl::init(true),
+ cl::desc("Enable static hinting of branches on ppc"),
+ cl::Hidden);
+
namespace llvm {
void initializePPCDAGToDAGISelPass(PassRegistry&);
}
@@ -286,7 +293,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Find all return blocks, outputting a restore in each epilog.
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (!BB->empty() && BB->back().isReturn()) {
+ if (BB->isReturnBlock()) {
IP = BB->end(); --IP;
// Skip over all terminator instructions, which are part of the return
@@ -393,6 +400,55 @@ static bool isInt32Immediate(SDValue N, unsigned &Imm) {
return isInt32Immediate(N.getNode(), Imm);
}
+static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
+ const SDValue &DestMBB) {
+ assert(isa<BasicBlockSDNode>(DestMBB));
+
+ if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
+
+ const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
+ const TerminatorInst *BBTerm = BB->getTerminator();
+
+ if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
+
+ const BasicBlock *TBB = BBTerm->getSuccessor(0);
+ const BasicBlock *FBB = BBTerm->getSuccessor(1);
+
+ auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
+ auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
+
+ // We only want to handle cases which are easy to predict at static time, e.g.
+ // C++ throw statement, that is very likely not taken, or calling never
+ // returned function, e.g. stdlib exit(). So we set Threshold to filter
+ // unwanted cases.
+ //
+ // Below is LLVM branch weight table, we only want to handle case 1, 2
+ //
+ // Case Taken:Nontaken Example
+ // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
+ // 2. Invoke-terminating 1:1048575
+ // 3. Coldblock 4:64 __builtin_expect
+ // 4. Loop Branch 124:4 For loop
+ // 5. PH/ZH/FPH 20:12
+ const uint32_t Threshold = 10000;
+
+ if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
+ return PPC::BR_NO_HINT;
+
+ DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
+ << BB->getName() << "'\n"
+ << " -> " << TBB->getName() << ": " << TProb << "\n"
+ << " -> " << FBB->getName() << ": " << FProb << "\n");
+
+ const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
+
+ // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
+ // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
+ if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
+ std::swap(TProb, FProb);
+
+ return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
+}
// isOpcWithIntImmediate - This method tests to see if the node is a specific
// opcode and that it has a immediate integer right operand.
@@ -564,7 +620,6 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
// Handle first 32 bits.
unsigned Lo = Imm & 0xFFFF;
- unsigned Hi = (Imm >> 16) & 0xFFFF;
// Simple value.
if (isInt<16>(Imm)) {
@@ -586,9 +641,9 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
++Result;
// Add in the last bits as required.
- if ((Hi = (Remainder >> 16) & 0xFFFF))
+ if ((Remainder >> 16) & 0xFFFF)
++Result;
- if ((Lo = Remainder & 0xFFFF))
+ if (Remainder & 0xFFFF)
++Result;
return Result;
@@ -1028,7 +1083,7 @@ class BitPermutationSelector {
BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
- DEBUG(dbgs() << "\tcombining final bit group with inital one\n");
+ DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
BitGroups.erase(BitGroups.begin());
}
@@ -1557,10 +1612,7 @@ class BitPermutationSelector {
return false;
}
- if (VRI.RLAmt != EffRLAmt)
- return false;
-
- return true;
+ return VRI.RLAmt == EffRLAmt;
};
for (auto &BG : BitGroups) {
@@ -2781,7 +2833,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
-
+
SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
unsigned DM[2];
@@ -2798,7 +2850,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
SDValue Base, Offset;
- if (LD->isUnindexed() &&
+ if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
(LD->getMemoryVT() == MVT::f64 ||
LD->getMemoryVT() == MVT::i64) &&
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
@@ -2841,8 +2893,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Op #3 is the Dest MBB
// Op #4 is the Flag.
// Prevent PPC::PRED_* from being selected into LI.
- SDValue Pred =
- getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(), dl);
+ unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (EnableBranchHint)
+ PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
+
+ SDValue Pred = getI32Imm(PCC, dl);
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
N->getOperand(0), N->getOperand(4) };
return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
@@ -2871,6 +2926,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
BitComp, N->getOperand(4), N->getOperand(0));
}
+ if (EnableBranchHint)
+ PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
+
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
N->getOperand(4), N->getOperand(0) };
@@ -2903,9 +2961,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
break;
// The first source operand is a TargetGlobalAddress or a TargetJumpTable.
- // If it is an externally defined symbol, a symbol with common linkage,
- // a non-local function address, or a jump table address, or if we are
- // generating code for large code model, we generate:
+ // If it must be toc-referenced according to PPCSubTarget, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -2920,13 +2976,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
MVT::i64, GA, SDValue(Tmp, 0)));
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
- const GlobalValue *GValue = G->getGlobal();
- if ((GValue->getType()->getElementType()->isFunctionTy() &&
- !GValue->isStrongDefinitionForLinker()) ||
- GValue->isDeclaration() || GValue->hasCommonLinkage() ||
- GValue->hasAvailableExternallyLinkage())
+ const GlobalValue *GV = G->getGlobal();
+ unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
+ if (GVFlags & PPCII::MO_NLP_FLAG) {
return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
MVT::i64, GA, SDValue(Tmp, 0)));
+ }
}
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
@@ -3110,7 +3165,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
if (!CurDAG->MaskedValueIsZero(Op0,
APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
return false;
-
+
LHS = Op0.getOperand(0);
RHS = Op0.getOperand(1);
return true;
@@ -3305,7 +3360,7 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = --Position;
+ SDNode *N = &*--Position;
if (N->use_empty())
continue;
@@ -3989,7 +4044,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = --Position;
+ SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
@@ -4145,7 +4200,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
++Position;
while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = --Position;
+ SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
@@ -4184,16 +4239,24 @@ void PPCDAGToDAGISel::PeepholePPC64() {
break;
}
- // If this is a load or store with a zero offset, we may be able to
- // fold an add-immediate into the memory operation.
- if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
- N->getConstantOperandVal(FirstOp) != 0)
+ // If this is a load or store with a zero offset, or within the alignment,
+ // we may be able to fold an add-immediate into the memory operation.
+ // The check against alignment is below, as it can't occur until we check
+ // the arguments to N
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
continue;
SDValue Base = N->getOperand(FirstOp + 1);
if (!Base.isMachineOpcode())
continue;
+ // On targets with fusion, we don't want this to fire and remove a fusion
+ // opportunity, unless a) it results in another fusion opportunity or
+ // b) optimizing for size.
+ if (PPCSubTarget->hasFusion() &&
+ (!MF->getFunction()->optForSize() && !Base.hasOneUse()))
+ continue;
+
unsigned Flags = 0;
bool ReplaceFlags = true;
@@ -4237,6 +4300,17 @@ void PPCDAGToDAGISel::PeepholePPC64() {
break;
}
+ SDValue ImmOpnd = Base.getOperand(1);
+ int MaxDisplacement = 0;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ const GlobalValue *GV = GA->getGlobal();
+ MaxDisplacement = GV->getAlignment() - 1;
+ }
+
+ int Offset = N->getConstantOperandVal(FirstOp);
+ if (Offset < 0 || Offset > MaxDisplacement)
+ continue;
+
// We found an opportunity. Reverse the operands from the add
// immediate and substitute them into the load or store. If
// needed, update the target flags for the immediate operand to
@@ -4247,8 +4321,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
DEBUG(N->dump(CurDAG));
DEBUG(dbgs() << "\n");
- SDValue ImmOpnd = Base.getOperand(1);
-
// If the relocation information isn't already present on the
// immediate operand, add it now.
if (ReplaceFlags) {
@@ -4259,17 +4331,17 @@ void PPCDAGToDAGISel::PeepholePPC64() {
// is insufficient for the instruction encoding.
if (GV->getAlignment() < 4 &&
(StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
- StorageOpcode == PPC::LWA)) {
+ StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
continue;
}
- ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
const Constant *C = CP->getConstVal();
ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
CP->getAlignment(),
- 0, Flags);
+ Offset, Flags);
}
}