diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 510 |
1 files changed, 314 insertions, 196 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 776ec52e2604..8ffd89ef5ccd 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -139,6 +139,7 @@ namespace { class PPCDAGToDAGISel : public SelectionDAGISel { const PPCTargetMachine &TM; const PPCSubtarget *PPCSubTarget = nullptr; + const PPCSubtarget *Subtarget = nullptr; const PPCTargetLowering *PPCLowering = nullptr; unsigned GlobalBaseReg = 0; @@ -150,10 +151,11 @@ namespace { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; PPCSubTarget = &MF.getSubtarget<PPCSubtarget>(); - PPCLowering = PPCSubTarget->getTargetLowering(); + Subtarget = &MF.getSubtarget<PPCSubtarget>(); + PPCLowering = Subtarget->getTargetLowering(); SelectionDAGISel::runOnMachineFunction(MF); - if (!PPCSubTarget->isSVR4ABI()) + if (!Subtarget->isSVR4ABI()) InsertVRSaveCode(MF); return true; @@ -204,7 +206,6 @@ namespace { bool tryBitfieldInsert(SDNode *N); bool tryBitPermutation(SDNode *N); bool tryIntCompareInGPR(SDNode *N); - bool tryAndWithMask(SDNode *N); // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into // an X-Form load instruction with the offset being a relocation coming from @@ -239,7 +240,7 @@ namespace { /// bit signed displacement. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None); } /// SelectAddrIdx4 - Given the specified address, check to see if it can be @@ -249,7 +250,8 @@ namespace { /// displacement must be a multiple of 4. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, + Align(4)); } /// SelectAddrIdx16 - Given the specified address, check to see if it can be @@ -259,7 +261,8 @@ namespace { /// displacement must be a multiple of 16. /// Returns false if it can be represented by [r+imm], which are preferred. bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, + Align(16)); } /// SelectAddrIdxOnly - Given the specified address, force it to be @@ -267,28 +270,29 @@ namespace { bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } - + /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. /// The last parameter \p 0 means D form has no requirment for 16 bit signed /// displacement. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None); } /// SelectAddrImmX4 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of /// 4 (last parameter). Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4)); } /// SelectAddrImmX16 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of /// 16(last parameter). Suitable for use by STXV and friends. bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, + Align(16)); } // Select an address into a single register. @@ -297,6 +301,10 @@ namespace { return true; } + bool SelectAddrPCRel(SDValue N, SDValue &Base) { + return PPCLowering->SelectAddressPCRel(N, Base); + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a @@ -317,7 +325,7 @@ namespace { case InlineAsm::Constraint_Zy: // We need to make sure that this one operand does not end up in r0 // (because we might end up lowering this as 0(%op)). - const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); SDLoc dl(Op); SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); @@ -343,6 +351,13 @@ namespace { private: bool trySETCC(SDNode *N); + bool tryAsSingleRLDICL(SDNode *N); + bool tryAsSingleRLDICR(SDNode *N); + bool tryAsSingleRLWINM(SDNode *N); + bool tryAsSingleRLWINM8(SDNode *N); + bool tryAsSingleRLWIMI(SDNode *N); + bool tryAsPairOfRLDICL(SDNode *N); + bool tryAsSingleRLDIMI(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -394,7 +409,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); MachineBasicBlock &EntryBB = *Fn.begin(); DebugLoc dl; // Emit the following code into the entry block: @@ -429,7 +444,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { /// SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { if (!GlobalBaseReg) { - const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); @@ -437,9 +452,9 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { DebugLoc dl; if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { - if (PPCSubTarget->isTargetELF()) { + if (Subtarget->isTargetELF()) { GlobalBaseReg = PPC::R30; - if (!PPCSubTarget->isSecurePlt() && + if (!Subtarget->isSecurePlt() && M->getPICLevel() == PICLevel::SmallPIC) { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); @@ -3788,7 +3803,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, Opc = PPC::CMPD; } } else if (LHS.getValueType() == MVT::f32) { - if (PPCSubTarget->hasSPE()) { + if (Subtarget->hasSPE()) { switch (CC) { default: case ISD::SETEQ: @@ -3815,7 +3830,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, } else Opc = PPC::FCMPUS; } else if (LHS.getValueType() == MVT::f64) { - if (PPCSubTarget->hasSPE()) { + if (Subtarget->hasSPE()) { switch (CC) { default: case ISD::SETEQ: @@ -3840,10 +3855,10 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, break; } } else - Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; + Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } else { assert(LHS.getValueType() == MVT::f128 && "Unknown vt!"); - assert(PPCSubTarget->hasVSX() && "__float128 requires VSX"); + assert(Subtarget->hasVSX() && "__float128 requires VSX"); Opc = PPC::XSCMPUQP; } return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); @@ -3872,10 +3887,10 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, return UseSPE ? PPC::PRED_GT : PPC::PRED_LT; case ISD::SETULE: case ISD::SETLE: - return UseSPE ? PPC::PRED_LE : PPC::PRED_LE; + return PPC::PRED_LE; case ISD::SETOGT: case ISD::SETGT: - return UseSPE ? PPC::PRED_GT : PPC::PRED_GT; + return PPC::PRED_GT; case ISD::SETUGE: case ISD::SETGE: return UseSPE ? PPC::PRED_LE : PPC::PRED_GE; @@ -4038,8 +4053,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); - if (!PPCSubTarget->useCRBits() && - isInt32Immediate(N->getOperand(1), Imm)) { + if (!Subtarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 @@ -4128,20 +4142,20 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { - if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE()) + if (Subtarget->hasQPX() || Subtarget->hasSPE()) return false; EVT VecVT = LHS.getValueType(); bool Swap, Negate; - unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, - PPCSubTarget->hasVSX(), Swap, Negate); + unsigned int VCmpInst = + getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate); if (Swap) std::swap(LHS, RHS); EVT ResVT = VecVT.changeVectorElementTypeToInteger(); if (Negate) { SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); - CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, + CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, ResVT, VCmp, VCmp); return true; } @@ -4150,7 +4164,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { return true; } - if (PPCSubTarget->useCRBits()) + if (Subtarget->useCRBits()) return false; bool Inv; @@ -4160,7 +4174,7 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that // The correct compare instruction is already set by SelectCC() - if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { + if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { Idx = 1; } @@ -4209,7 +4223,7 @@ bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { // because it is translated to r31 or r1 + slot + offset. We won't know the // slot number until the stack frame is finalized. const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); - unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); + unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value(); if ((SlotAlign % Val) != 0) return false; @@ -4241,13 +4255,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, SDValue TrueRes = N->getOperand(2); SDValue FalseRes = N->getOperand(3); ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); - if (!TrueConst) + if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 && + N->getSimpleValueType(0) != MVT::i32)) return false; - assert((N->getSimpleValueType(0) == MVT::i64 || - N->getSimpleValueType(0) == MVT::i32) && - "Expecting either i64 or i32 here."); - // We are looking for any of: // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) @@ -4371,142 +4382,251 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, return true; } -bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) { - if (N->getOpcode() != ISD::AND) +bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + unsigned Imm; + if (!isInt32Immediate(N->getOperand(1), Imm)) return false; SDLoc dl(N); SDValue Val = N->getOperand(0); - unsigned Imm, Imm2, SH, MB, ME; - uint64_t Imm64; - + unsigned SH, MB, ME; // If this is an and of a value rotated between 0 and 31 bits and then and'd // with a mask, emit rlwinm - if (isInt32Immediate(N->getOperand(1), Imm) && - isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { - SDValue Val = N->getOperand(0).getOperand(0); - SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl), - getI32Imm(ME, dl) }; + if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) { + Val = Val.getOperand(0); + SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl), + getI32Imm(ME, dl)}; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } // If this is just a masked value where the input is not handled, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm - if (isInt32Immediate(N->getOperand(1), Imm)) { - if (isRunOfOnes(Imm, MB, ME) && - N->getOperand(0).getOpcode() != ISD::ROTL) { - SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl), - getI32Imm(ME, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); - return true; - } - // AND X, 0 -> 0, not "rlwinm 32". - if (Imm == 0) { - ReplaceUses(SDValue(N, 0), N->getOperand(1)); - return true; - } + if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) { + SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl), + getI32Imm(ME, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); + return true; + } - // ISD::OR doesn't get all the bitfield insertion fun. - // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a - // bitfield insert. - if (N->getOperand(0).getOpcode() == ISD::OR && - isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { - // The idea here is to check whether this is equivalent to: - // (c1 & m) | (x & ~m) - // where m is a run-of-ones mask. The logic here is that, for each bit in - // c1 and c2: - // - if both are 1, then the output will be 1. - // - if both are 0, then the output will be 0. - // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will - // come from x. - // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will - // be 0. - // If that last condition is never the case, then we can form m from the - // bits that are the same between c1 and c2. - unsigned MB, ME; - if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { - SDValue Ops[] = { N->getOperand(0).getOperand(0), - N->getOperand(0).getOperand(1), - getI32Imm(0, dl), getI32Imm(MB, dl), - getI32Imm(ME, dl) }; - ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); - return true; - } - } - } else if (isInt64Immediate(N->getOperand(1).getNode(), Imm64)) { - // If this is a 64-bit zero-extension mask, emit rldicl. - if (isMask_64(Imm64)) { - MB = 64 - countTrailingOnes(Imm64); - SH = 0; - - if (Val.getOpcode() == ISD::ANY_EXTEND) { - auto Op0 = Val.getOperand(0); - if ( Op0.getOpcode() == ISD::SRL && - isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { - - auto ResultType = Val.getNode()->getValueType(0); - auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, - ResultType); - SDValue IDVal (ImDef, 0); - - Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, - ResultType, IDVal, Op0.getOperand(0), - getI32Imm(1, dl)), 0); - SH = 64 - Imm; - } - } + // AND X, 0 -> 0, not "rlwinm 32". + if (Imm == 0) { + ReplaceUses(SDValue(N, 0), N->getOperand(1)); + return true; + } - // If the operand is a logical right shift, we can fold it into this - // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) - // for n <= mb. The right shift is really a left rotate followed by a - // mask, and this mask is a more-restrictive sub-mask of the mask implied - // by the shift. - if (Val.getOpcode() == ISD::SRL && - isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { - assert(Imm < 64 && "Illegal shift amount"); - Val = Val.getOperand(0); - SH = 64 - Imm; - } + return false; +} - SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); - return true; - } else if (isMask_64(~Imm64)) { - // If this is a negated 64-bit zero-extension mask, - // i.e. the immediate is a sequence of ones from most significant side - // and all zero for reminder, we should use rldicr. - MB = 63 - countTrailingOnes(~Imm64); - SH = 0; - SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); - return true; - } +bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) + return false; - // It is not 16-bit imm that means we need two instructions at least if - // using "and" instruction. Try to exploit it with rotate mask instructions. - if (isRunOfOnes64(Imm64, MB, ME)) { - if (MB >= 32 && MB <= ME) { - // MB ME - // +----------------------+ - // |xxxxxxxxxxx00011111000| - // +----------------------+ - // 0 32 64 - // We can only do it if the MB is larger than 32 and MB <= ME - // as RLWINM will replace the content of [0 - 32) with [32 - 64) even - // we didn't rotate it. - SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl), - getI64Imm(ME - 32, dl) }; - CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); - return true; - } - // TODO - handle it with rldicl + rldicl - } + unsigned MB, ME; + if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) { + // MB ME + // +----------------------+ + // |xxxxxxxxxxx00011111000| + // +----------------------+ + // 0 32 64 + // We can only do it if the MB is larger than 32 and MB <= ME + // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even + // we didn't rotate it. + SDLoc dl(N); + SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl), + getI64Imm(ME - 32, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); + return true; + } + + return false; +} + +bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) + return false; + + // Do nothing if it is 16-bit imm as the pattern in the .td file handle + // it well with "andi.". + if (isUInt<16>(Imm64)) + return false; + + SDLoc Loc(N); + SDValue Val = N->getOperand(0); + + // Optimized with two rldicl's as follows: + // Add missing bits on left to the mask and check that the mask is a + // wrapped run of ones, i.e. + // Change pattern |0001111100000011111111| + // to |1111111100000011111111|. + unsigned NumOfLeadingZeros = countLeadingZeros(Imm64); + if (NumOfLeadingZeros != 0) + Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros); + + unsigned MB, ME; + if (!isRunOfOnes64(Imm64, MB, ME)) + return false; + + // ME MB MB-ME+63 + // +----------------------+ +----------------------+ + // |1111111100000011111111| -> |0000001111111111111111| + // +----------------------+ +----------------------+ + // 0 63 0 63 + // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between. + unsigned OnesOnLeft = ME + 1; + unsigned ZerosInBetween = (MB - ME + 63) & 63; + // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear + // on the left the bits that are already zeros in the mask. + Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, + getI64Imm(OnesOnLeft, Loc), + getI64Imm(ZerosInBetween, Loc)), + 0); + // MB-ME+63 ME MB + // +----------------------+ +----------------------+ + // |0000001111111111111111| -> |0001111100000011111111| + // +----------------------+ +----------------------+ + // 0 63 0 63 + // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the + // left the number of ones we previously added. + SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc), + getI64Imm(NumOfLeadingZeros, Loc)}; + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return true; +} + +bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + unsigned Imm; + if (!isInt32Immediate(N->getOperand(1), Imm)) + return false; + + SDValue Val = N->getOperand(0); + unsigned Imm2; + // ISD::OR doesn't get all the bitfield insertion fun. + // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a + // bitfield insert. + if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2)) + return false; + + // The idea here is to check whether this is equivalent to: + // (c1 & m) | (x & ~m) + // where m is a run-of-ones mask. The logic here is that, for each bit in + // c1 and c2: + // - if both are 1, then the output will be 1. + // - if both are 0, then the output will be 0. + // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will + // come from x. + // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will + // be 0. + // If that last condition is never the case, then we can form m from the + // bits that are the same between c1 and c2. + unsigned MB, ME; + if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) { + SDLoc dl(N); + SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl), + getI32Imm(MB, dl), getI32Imm(ME, dl)}; + ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); + return true; } return false; } +bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) + return false; + + // If this is a 64-bit zero-extension mask, emit rldicl. + unsigned MB = 64 - countTrailingOnes(Imm64); + unsigned SH = 0; + unsigned Imm; + SDValue Val = N->getOperand(0); + SDLoc dl(N); + + if (Val.getOpcode() == ISD::ANY_EXTEND) { + auto Op0 = Val.getOperand(0); + if (Op0.getOpcode() == ISD::SRL && + isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { + + auto ResultType = Val.getNode()->getValueType(0); + auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); + SDValue IDVal(ImDef, 0); + + Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, + IDVal, Op0.getOperand(0), + getI32Imm(1, dl)), + 0); + SH = 64 - Imm; + } + } + + // If the operand is a logical right shift, we can fold it into this + // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) + // for n <= mb. The right shift is really a left rotate followed by a + // mask, and this mask is a more-restrictive sub-mask of the mask implied + // by the shift. + if (Val.getOpcode() == ISD::SRL && + isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { + assert(Imm < 64 && "Illegal shift amount"); + Val = Val.getOperand(0); + SH = 64 - Imm; + } + + SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); + return true; +} + +bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); + uint64_t Imm64; + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || + !isMask_64(~Imm64)) + return false; + + // If this is a negated 64-bit zero-extension mask, + // i.e. the immediate is a sequence of ones from most significant side + // and all zero for reminder, we should use rldicr. + unsigned MB = 63 - countTrailingOnes(~Imm64); + unsigned SH = 0; + SDLoc dl(N); + SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); + return true; +} + +bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) { + assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected"); + uint64_t Imm64; + unsigned MB, ME; + SDValue N0 = N->getOperand(0); + + // We won't get fewer instructions if the imm is 32-bit integer. + // rldimi requires the imm to have consecutive ones with both sides zero. + // Also, make sure the first Op has only one use, otherwise this may increase + // register pressure since rldimi is destructive. + if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || + isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse()) + return false; + + unsigned SH = 63 - ME; + SDLoc Dl(N); + // Use select64Imm for making LI instr instead of directly putting Imm64 + SDValue Ops[] = { + N->getOperand(0), + SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0), + getI32Imm(SH, Dl), getI32Imm(MB, Dl)}; + CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops); + return true; +} + // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void PPCDAGToDAGISel::Select(SDNode *N) { @@ -4551,7 +4671,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case PPCISD::ADDI_TLSGD_L_ADDR: { const Module *Mod = MF->getFunction().getParent(); if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || - !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() || + !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() || Mod->getPICLevel() == PICLevel::SmallPIC) break; // Attach global base pointer on GETtlsADDR32 node in order to @@ -4560,8 +4680,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } break; case PPCISD::CALL: { if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || - !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() || - !PPCSubTarget->isTargetELF()) + !TM.isPositionIndependent() || !Subtarget->isSecurePlt() || + !Subtarget->isTargetELF()) break; SDValue Op = N->getOperand(1); @@ -4625,7 +4745,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case ISD::STORE: { // Change TLS initial-exec D-form stores to X-form stores. StoreSDNode *ST = cast<StoreSDNode>(N); - if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() && + if (EnableTLSOpt && Subtarget->isELFv2ABI() && ST->getAddressingMode() != ISD::PRE_INC) if (tryTLSXFormStore(ST)) return; @@ -4639,7 +4759,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // Normal loads are handled by code generated from the .td file. if (LD->getAddressingMode() != ISD::PRE_INC) { // Change TLS initial-exec D-form loads to X-form loads. - if (EnableTLSOpt && PPCSubTarget->isELFv2ABI()) + if (EnableTLSOpt && Subtarget->isELFv2ABI()) if (tryTLSXFormLoad(LD)) return; break; @@ -4730,7 +4850,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case ISD::AND: // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr - if (tryAndWithMask(N)) + if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || + tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) return; // Other cases are autogenerated. @@ -4753,10 +4874,15 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } } + // If this is 'or' against an imm with consecutive ones and both sides zero, + // try to emit rldimi + if (tryAsSingleRLDIMI(N)) + return; + // OR with a 32-bit immediate can be handled by ori + oris // without creating an immediate in a GPR. uint64_t Imm64 = 0; - bool IsPPC64 = PPCSubTarget->isPPC64(); + bool IsPPC64 = Subtarget->isPPC64(); if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && (Imm64 & ~0xFFFFFFFFuLL) == 0) { // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. @@ -4779,7 +4905,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // XOR with a 32-bit immediate can be handled by xori + xoris // without creating an immediate in a GPR. uint64_t Imm64 = 0; - bool IsPPC64 = PPCSubTarget->isPPC64(); + bool IsPPC64 = Subtarget->isPPC64(); if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && (Imm64 & ~0xFFFFFFFFuLL) == 0) { // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. @@ -4866,11 +4992,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) { bool isPPC64 = (PtrVT == MVT::i64); // If this is a select of i1 operands, we'll pattern match it. - if (PPCSubTarget->useCRBits() && - N->getOperand(0).getValueType() == MVT::i1) + if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1) break; - if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { + if (Subtarget->isISA3_0() && Subtarget->isPPC64()) { bool NeedSwapOps = false; bool IsUnCmp = false; if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { @@ -4945,7 +5070,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } unsigned BROpc = - getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget); + getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget); unsigned SelectCCOp; if (N->getValueType(0) == MVT::i32) @@ -4953,28 +5078,28 @@ void PPCDAGToDAGISel::Select(SDNode *N) { else if (N->getValueType(0) == MVT::i64) SelectCCOp = PPC::SELECT_CC_I8; else if (N->getValueType(0) == MVT::f32) { - if (PPCSubTarget->hasP8Vector()) + if (Subtarget->hasP8Vector()) SelectCCOp = PPC::SELECT_CC_VSSRC; - else if (PPCSubTarget->hasSPE()) + else if (Subtarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE4; else SelectCCOp = PPC::SELECT_CC_F4; } else if (N->getValueType(0) == MVT::f64) { - if (PPCSubTarget->hasVSX()) + if (Subtarget->hasVSX()) SelectCCOp = PPC::SELECT_CC_VSFRC; - else if (PPCSubTarget->hasSPE()) + else if (Subtarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE; else SelectCCOp = PPC::SELECT_CC_F8; } else if (N->getValueType(0) == MVT::f128) SelectCCOp = PPC::SELECT_CC_F16; - else if (PPCSubTarget->hasSPE()) + else if (Subtarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE; - else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) + else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f64) SelectCCOp = PPC::SELECT_CC_QFRC; - else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) + else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4f32) SelectCCOp = PPC::SELECT_CC_QSRC; - else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) + else if (Subtarget->hasQPX() && N->getValueType(0) == MVT::v4i1) SelectCCOp = PPC::SELECT_CC_QBRC; else if (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64) @@ -4988,8 +5113,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case ISD::VECTOR_SHUFFLE: - if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || - N->getValueType(0) == MVT::v2i64)) { + if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || + N->getValueType(0) == MVT::v2i64)) { ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), @@ -5024,7 +5149,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // For little endian, we must swap the input operands and adjust // the mask elements (reverse and invert them). - if (PPCSubTarget->isLittleEndian()) { + if (Subtarget->isLittleEndian()) { std::swap(Op1, Op2); unsigned tmp = DM[0]; DM[0] = 1 - DM[1]; @@ -5041,7 +5166,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { break; case PPCISD::BDNZ: case PPCISD::BDZ: { - bool IsPPC64 = PPCSubTarget->isPPC64(); + bool IsPPC64 = Subtarget->isPPC64(); SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) @@ -5069,7 +5194,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { case ISD::BR_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); unsigned PCC = - getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget); + getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget); if (N->getOperand(2).getValueType() == MVT::i1) { unsigned Opc; @@ -5122,11 +5247,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case PPCISD::TOC_ENTRY: { - const bool isPPC64 = PPCSubTarget->isPPC64(); - const bool isELFABI = PPCSubTarget->isSVR4ABI(); - const bool isAIXABI = PPCSubTarget->isAIXABI(); - - assert(!PPCSubTarget->isDarwin() && "TOC is an ELF/XCOFF construct"); + const bool isPPC64 = Subtarget->isPPC64(); + const bool isELFABI = Subtarget->isSVR4ABI(); + const bool isAIXABI = Subtarget->isAIXABI(); // PowerPC only support small, medium and large code model. const CodeModel::Model CModel = TM.getCodeModel(); @@ -5177,7 +5300,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We // generate two instructions as described below. The first source operand // is a symbol reference. If it must be toc-referenced according to - // PPCSubTarget, we generate: + // Subtarget, we generate: // [32-bit AIX] // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) // [64-bit ELF/AIX] @@ -5209,7 +5332,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } case PPCISD::PPC32_PICGOT: // Generate a PIC-safe GOT reference. - assert(PPCSubTarget->is32BitELFABI() && + assert(Subtarget->is32BitELFABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), @@ -5306,7 +5429,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { "Only OR nodes are supported for CMPB"); SDValue Res; - if (!PPCSubTarget->hasCMPB()) + if (!Subtarget->hasCMPB()) return Res; if (N->getValueType(0) != MVT::i32 && @@ -5517,7 +5640,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { // only one instruction (like a zero or one), then we should fold in those // operations with the select. void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { - if (!PPCSubTarget->useCRBits()) + if (!Subtarget->useCRBits()) return; if (N->getOpcode() != ISD::ZERO_EXTEND && @@ -5549,8 +5672,7 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { SDValue O1 = UserO1.getNode() == N ? Val : UserO1; return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, - User->getValueType(0), - O0.getNode(), O1.getNode()); + User->getValueType(0), {O0, O1}); }; // FIXME: When the semantics of the interaction between select and undef @@ -6259,7 +6381,7 @@ static bool PeepholePPC64ZExtGather(SDValue Op32, } void PPCDAGToDAGISel::PeepholePPC64ZExt() { - if (!PPCSubTarget->isPPC64()) + if (!Subtarget->isPPC64()) return; // When we zero-extend from i32 to i64, we use a pattern like this: @@ -6428,10 +6550,6 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() { } void PPCDAGToDAGISel::PeepholePPC64() { - // These optimizations are currently supported only for 64-bit SVR4. - if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) - return; - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); while (Position != CurDAG->allnodes_begin()) { @@ -6544,7 +6662,8 @@ void PPCDAGToDAGISel::PeepholePPC64() { int MaxDisplacement = 7; if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { const GlobalValue *GV = GA->getGlobal(); - MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); + Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); + MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement); } bool UpdateHBase = false; @@ -6610,10 +6729,10 @@ void PPCDAGToDAGISel::PeepholePPC64() { if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); + Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); // We can't perform this optimization for data whose alignment // is insufficient for the instruction encoding. - if (GV->getAlignment() < 4 && - (RequiresMod4Offset || (Offset % 4) != 0)) { + if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) { LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } @@ -6621,8 +6740,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { const Constant *C = CP->getConstVal(); - ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, - CP->getAlignment(), + ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), Offset, Flags); } } |