aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/Mips/MipsSEISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/Mips/MipsSEISelLowering.cpp')
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp79
1 files changed, 76 insertions, 3 deletions
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index f7d7e2af85e4..eee5b23117f6 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -701,6 +701,77 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT,
+ SelectionDAG &DAG,
+ const MipsSubtarget &Subtarget) {
+ // Estimate the number of operations the below transform will turn a
+ // constant multiply into. The number is approximately how many powers
+ // of two summed together that the constant can be broken down into.
+
+ SmallVector<APInt, 16> WorkStack(1, C);
+ unsigned Steps = 0;
+ unsigned BitWidth = C.getBitWidth();
+
+ while (!WorkStack.empty()) {
+ APInt Val = WorkStack.pop_back_val();
+
+ if (Val == 0 || Val == 1)
+ continue;
+
+ if (Val.isPowerOf2()) {
+ ++Steps;
+ continue;
+ }
+
+ APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
+ APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
+ : APInt(BitWidth, 1) << C.ceilLogBase2();
+
+ if ((Val - Floor).ule(Ceil - Val)) {
+ WorkStack.push_back(Floor);
+ WorkStack.push_back(Val - Floor);
+ ++Steps;
+ continue;
+ }
+
+ WorkStack.push_back(Ceil);
+ WorkStack.push_back(Ceil - Val);
+ ++Steps;
+
+ // If we have taken more than 12[1] / 8[2] steps to attempt the
+ // optimization for a native sized value, it is more than likely that this
+ // optimization will make things worse.
+ //
+ // [1] MIPS64 requires 6 instructions at most to materialize any constant,
+ // multiplication requires at least 4 cycles, but another cycle (or two)
+ // to retrieve the result from the HI/LO registers.
+ //
+ // [2] For MIPS32, more than 8 steps is expensive as the constant could be
+ // materialized in 2 instructions, multiplication requires at least 4
+ // cycles, but another cycle (or two) to retrieve the result from the
+ // HI/LO registers.
+
+ if (Steps > 12 && (Subtarget.isABI_N32() || Subtarget.isABI_N64()))
+ return false;
+
+ if (Steps > 8 && Subtarget.isABI_O32())
+ return false;
+ }
+
+ // If the value being multiplied is not supported natively, we have to pay
+ // an additional legalization cost, conservatively assume an increase in the
+ // cost of 3 instructions per step. This values for this heuristic were
+ // determined experimentally.
+ unsigned RegisterSize = DAG.getTargetLoweringInfo()
+ .getRegisterType(*DAG.getContext(), VT)
+ .getSizeInBits();
+ Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
+ if (Steps > 27)
+ return false;
+
+ return true;
+}
+
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
EVT ShiftTy, SelectionDAG &DAG) {
// Return 0.
@@ -739,11 +810,13 @@ static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT,
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering::DAGCombinerInfo &DCI,
- const MipsSETargetLowering *TL) {
+ const MipsSETargetLowering *TL,
+ const MipsSubtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- if (!VT.isVector())
+ if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs(
+ C->getAPIntValue(), VT, DAG, Subtarget))
return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
DAG);
@@ -983,7 +1056,7 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
Val = performORCombine(N, DAG, DCI, Subtarget);
break;
case ISD::MUL:
- return performMULCombine(N, DAG, DCI, this);
+ return performMULCombine(N, DAG, DCI, this, Subtarget);
case ISD::SHL:
Val = performSHLCombine(N, DAG, DCI, Subtarget);
break;