diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 193 |
1 files changed, 138 insertions, 55 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 61c1fd25031d..12af6087cb47 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -594,7 +594,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Half type will be promoted by default. setOperationAction(ISD::FABS, MVT::f16, Promote); setOperationAction(ISD::FNEG, MVT::f16, Promote); - setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); setOperationAction(ISD::FADD, MVT::f16, Promote); setOperationAction(ISD::FSUB, MVT::f16, Promote); setOperationAction(ISD::FMUL, MVT::f16, Promote); @@ -629,6 +629,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_ROUND, MVT::f16, LibCall); setOperationAction(ISD::FP_EXTEND, MVT::f32, LibCall); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); + + setOperationAction(ISD::STRICT_FADD, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FSUB, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMUL, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FDIV, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMA, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMINIMUM, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FEXP, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FEXP2, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FCEIL, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FRINT, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, LibCall); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, LibCall); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); @@ -2817,6 +2845,21 @@ Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { AddressSpace = X86AS::FS; else if (GuardReg == "gs") AddressSpace = X86AS::GS; + + // Use symbol guard if user specify. + StringRef GuardSymb = M->getStackProtectorGuardSymbol(); + if (!GuardSymb.empty()) { + GlobalVariable *GV = M->getGlobalVariable(GuardSymb); + if (!GV) { + Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext()) + : Type::getInt32Ty(M->getContext()); + GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, + nullptr, GuardSymb, nullptr, + GlobalValue::NotThreadLocal, AddressSpace); + } + return GV; + } + return SegmentOffset(IRB, Offset, AddressSpace); } } @@ -11757,15 +11800,17 @@ static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask, /// value in ExpectedMask is always accepted. Otherwise the indices must match. /// /// SM_SentinelZero is accepted as a valid negative index but must match in -/// both. +/// both, or via a known bits test. static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, ArrayRef<int> ExpectedMask, + const SelectionDAG &DAG, SDValue V1 = SDValue(), SDValue V2 = SDValue()) { int Size = Mask.size(); if (Size != (int)ExpectedMask.size()) return false; - assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) && + assert(llvm::all_of(ExpectedMask, + [Size](int M) { return isInRange(M, 0, 2 * Size); }) && "Illegal target shuffle mask"); // Check for out-of-range target shuffle mask indices. @@ -11778,12 +11823,28 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits()) V2 = SDValue(); + APInt ZeroV1 = APInt::getNullValue(Size); + APInt ZeroV2 = APInt::getNullValue(Size); + for (int i = 0; i < Size; ++i) { int MaskIdx = Mask[i]; int ExpectedIdx = ExpectedMask[i]; if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx) continue; - if (0 <= MaskIdx && 0 <= ExpectedIdx) { + if (MaskIdx == SM_SentinelZero) { + // If we need this expected index to be a zero element, then update the + // relevant zero mask and perform the known bits at the end to minimize + // repeated computes. + SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; + if (ExpectedV && + Size == (int)ExpectedV.getValueType().getVectorNumElements()) { + int BitIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); + APInt &ZeroMask = ExpectedIdx < Size ? ZeroV1 : ZeroV2; + ZeroMask.setBit(BitIdx); + continue; + } + } + if (MaskIdx >= 0) { SDValue MaskV = MaskIdx < Size ? V1 : V2; SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); @@ -11791,15 +11852,16 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, if (IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx)) continue; } - // TODO - handle SM_Sentinel equivalences. return false; } - return true; + return (ZeroV1.isNullValue() || DAG.MaskedVectorIsZero(V1, ZeroV1)) && + (ZeroV2.isNullValue() || DAG.MaskedVectorIsZero(V2, ZeroV2)); } // Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd // instructions. -static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) { +static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT, + const SelectionDAG &DAG) { if (VT != MVT::v8i32 && VT != MVT::v8f32) return false; @@ -11809,12 +11871,13 @@ static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) { SmallVector<int, 8> Unpckhwd; createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false, /* Unary = */ false); - bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) || - isTargetShuffleEquivalent(VT, Mask, Unpckhwd)); + bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd, DAG) || + isTargetShuffleEquivalent(VT, Mask, Unpckhwd, DAG)); return IsUnpackwdMask; } -static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) { +static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask, + const SelectionDAG &DAG) { // Create 128-bit vector type based on mask size. MVT EltVT = MVT::getIntegerVT(128 / Mask.size()); MVT VT = MVT::getVectorVT(EltVT, Mask.size()); @@ -11827,8 +11890,8 @@ static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) { for (unsigned i = 0; i != 4; ++i) { SmallVector<int, 16> UnpackMask; createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2); - if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) || - isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask)) + if (isTargetShuffleEquivalent(VT, Mask, UnpackMask, DAG) || + isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask, DAG)) return true; } return false; @@ -12021,7 +12084,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, // Attempt to match the target mask against the unpack lo/hi mask patterns. SmallVector<int, 64> Unpckl, Unpckh; createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1, + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG, V1, (IsUnary ? V1 : V2))) { UnpackOpcode = X86ISD::UNPCKL; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); @@ -12030,7 +12093,7 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, } createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1, + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG, V1, (IsUnary ? V1 : V2))) { UnpackOpcode = X86ISD::UNPCKH; V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); @@ -12069,14 +12132,14 @@ static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, // If a binary shuffle, commute and try again. if (!IsUnary) { ShuffleVectorSDNode::commuteMask(Unpckl); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG)) { UnpackOpcode = X86ISD::UNPCKL; std::swap(V1, V2); return true; } ShuffleVectorSDNode::commuteMask(Unpckh); - if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) { + if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG)) { UnpackOpcode = X86ISD::UNPCKH; std::swap(V1, V2); return true; @@ -12464,14 +12527,14 @@ static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, // Try binary shuffle. SmallVector<int, 32> BinaryMask; createPackShuffleMask(VT, BinaryMask, false, NumStages); - if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2)) + if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, DAG, V1, V2)) if (MatchPACK(V1, V2, PackVT)) return true; // Try unary shuffle. SmallVector<int, 32> UnaryMask; createPackShuffleMask(VT, UnaryMask, true, NumStages); - if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1)) + if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, DAG, V1)) if (MatchPACK(V1, V1, PackVT)) return true; } @@ -14283,7 +14346,7 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0, // and a simple narrow shuffle. Prefer extract+unpack(h/l)ps to vpermps // because that avoids a constant load from memory. if (NumElts == 4 && - (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask))) + (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask, DAG))) return SDValue(); // Extend the shuffle mask with undef elements. @@ -17230,7 +17293,7 @@ static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, if (Subtarget.hasAVX2()) { // extract128 + vunpckhps/vshufps, is better than vblend + vpermps. if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() && - !is128BitUnpackShuffleMask(HalfMask) && + !is128BitUnpackShuffleMask(HalfMask, DAG) && (!isSingleSHUFPSMask(HalfMask) || Subtarget.hasFastVariableCrossLaneShuffle())) return SDValue(); @@ -17892,7 +17955,7 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // For non-AVX512 if the Mask is of 16bit elements in lane then try to split // since after split we get a more efficient code using vpunpcklwd and // vpunpckhwd instrs than vblend. - if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32)) + if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32, DAG)) return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG); @@ -17930,7 +17993,7 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // For non-AVX512 if the Mask is of 16bit elements in lane then try to split // since after split we get a more efficient code than vblend by using // vpunpcklwd and vpunpckhwd instrs. - if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() && + if (isUnpackWdShuffleMask(Mask, MVT::v8i32, DAG) && !V2.isUndef() && !Subtarget.hasAVX512()) return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG); @@ -27887,11 +27950,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, } // Read Performance Monitoring Counters. case RDPMC: + // Read Processor Register. + case RDPRU: // GetExtended Control Register. case XGETBV: { SmallVector<SDValue, 2> Results; // RDPMC uses ECX to select the index of the performance counter to read. + // RDPRU uses ECX to select the processor register to read. // XGETBV uses ECX to select the index of the XCR register to return. // The result is stored into registers EDX:EAX. expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX, @@ -29902,14 +29968,12 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt); SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, {4, 5, 6, 7, -1, -1, -1, -1}); - Amt0 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, - {0, 1, 1, 1, -1, -1, -1, -1}); - Amt1 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, - {2, 3, 3, 3, -1, -1, -1, -1}); - Amt2 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, - {0, 1, 1, 1, -1, -1, -1, -1}); - Amt3 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, - {2, 3, 3, 3, -1, -1, -1, -1}); + SDValue Msk02 = getV4X86ShuffleImm8ForMask({0, 1, 1, 1}, dl, DAG); + SDValue Msk13 = getV4X86ShuffleImm8ForMask({2, 3, 3, 3}, dl, DAG); + Amt0 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk02); + Amt1 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt01, Msk13); + Amt2 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk02); + Amt3 = DAG.getNode(X86ISD::PSHUFLW, dl, MVT::v8i16, Amt23, Msk13); } } @@ -30797,6 +30861,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { case AtomicRMWInst::UMin: case AtomicRMWInst::FAdd: case AtomicRMWInst::FSub: + case AtomicRMWInst::FMax: + case AtomicRMWInst::FMin: // These always require a non-trivial set of data operations on x86. We must // use a cmpxchg loop. return AtomicExpansionKind::CmpXChg; @@ -32894,6 +32960,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget, Results); return; + case Intrinsic::x86_rdpru: + expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPRU, X86::ECX, Subtarget, + Results); + return; case Intrinsic::x86_xgetbv: expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget, Results); @@ -36985,8 +37055,9 @@ static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT, // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, bool AllowFloatDomain, bool AllowIntDomain, - SDValue V1, const X86Subtarget &Subtarget, - unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) { + SDValue V1, const SelectionDAG &DAG, + const X86Subtarget &Subtarget, unsigned &Shuffle, + MVT &SrcVT, MVT &DstVT) { unsigned NumMaskElts = Mask.size(); unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); @@ -37057,17 +37128,17 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, // instructions are no slower than UNPCKLPD but has the option to // fold the input operand into even an unaligned memory load. if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) { - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG, V1)) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v2f64; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, DAG, V1)) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v4f32; return true; @@ -37076,17 +37147,19 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, if (MaskVT.is256BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles"); - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, DAG, V1)) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v4f64; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG, + V1)) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v8f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, DAG, + V1)) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v8f32; return true; @@ -37096,21 +37169,22 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, if (MaskVT.is512BitVector() && AllowFloatDomain) { assert(Subtarget.hasAVX512() && "AVX512 required for 512-bit vector shuffles"); - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, DAG, + V1)) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v8f64; return true; } if (isTargetShuffleEquivalent( MaskVT, Mask, - {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) { + {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, DAG, V1)) { Shuffle = X86ISD::MOVSLDUP; SrcVT = DstVT = MVT::v16f32; return true; } if (isTargetShuffleEquivalent( MaskVT, Mask, - {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) { + {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, DAG, V1)) { Shuffle = X86ISD::MOVSHDUP; SrcVT = DstVT = MVT::v16f32; return true; @@ -37126,6 +37200,7 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask, const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, + const SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { @@ -37269,33 +37344,36 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask, unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, DAG) && + AllowFloatDomain) { V2 = V1; V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1); Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS; SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) { + if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}, DAG) && + AllowFloatDomain) { V2 = V1; Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS; SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) && + if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}, DAG) && Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) { std::swap(V1, V2); Shuffle = X86ISD::MOVSD; SrcVT = DstVT = MVT::v2f64; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) && + if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG) && (AllowFloatDomain || !Subtarget.hasSSE41())) { Shuffle = X86ISD::MOVSS; SrcVT = DstVT = MVT::v4f32; return true; } - if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7}) && + if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7}, + DAG) && Subtarget.hasFP16()) { Shuffle = X86ISD::MOVSH; SrcVT = DstVT = MVT::v8f16; @@ -37678,7 +37756,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, scaleShuffleElements(Mask, NumElts, ScaledMask)) { for (unsigned i = 0; i != NumElts; ++i) IdentityMask.push_back(i); - if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2)) + if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, DAG, V1, + V2)) return CanonicalizeShuffleInput(RootVT, V1); } } @@ -37902,7 +37981,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, V1, - Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && + DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 0 && Root.getOpcode() == Shuffle) @@ -37913,7 +37992,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, - AllowIntDomain, Subtarget, Shuffle, ShuffleVT, + AllowIntDomain, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { @@ -37931,7 +38010,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, // TODO: Handle other insertions here as well? if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 && Subtarget.hasSSE41() && - !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) { + !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}, DAG)) { if (MaskEltSizeInBits == 32) { SDValue SrcV1 = V1, SrcV2 = V2; if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, @@ -37947,12 +38026,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } } if (MaskEltSizeInBits == 64 && - isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) && + isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}, DAG) && V2.getOpcode() == ISD::SCALAR_TO_VECTOR && V2.getScalarValueSizeInBits() <= 32) { if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) return SDValue(); // Nothing to do! - PermuteImm = (/*DstIdx*/2 << 4) | (/*SrcIdx*/0 << 0); + PermuteImm = (/*DstIdx*/ 2 << 4) | (/*SrcIdx*/ 0 << 0); Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, CanonicalizeShuffleInput(MVT::v4f32, V1), CanonicalizeShuffleInput(MVT::v4f32, V2), @@ -51654,9 +51733,13 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands. // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands. // Otherwise use PCMPEQ (plus AND) and mask testing. - if ((OpSize == 128 && Subtarget.hasSSE2()) || - (OpSize == 256 && Subtarget.hasAVX()) || - (OpSize == 512 && Subtarget.useAVX512Regs())) { + bool NoImplicitFloatOps = + DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && + ((OpSize == 128 && Subtarget.hasSSE2()) || + (OpSize == 256 && Subtarget.hasAVX()) || + (OpSize == 512 && Subtarget.useAVX512Regs()))) { bool HasPT = Subtarget.hasSSE41(); // PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened |