diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) | |
download | src-cfca06d7963fa0909f90483b42a6d7d194d01e08.tar.gz src-cfca06d7963fa0909f90483b42a6d7d194d01e08.zip |
Vendor import of llvm-project master 2e10b7a39b9, the last commit beforevendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9vendor/llvm-project/master
the llvmorg-12-init tag, from which release/11.x was branched.
Notes
Notes:
svn path=/vendor/llvm-project/master/; revision=363578
svn path=/vendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9/; revision=363579; tag=vendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 312 |
1 files changed, 199 insertions, 113 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 47ce83974c8d..7cfe4c8b5892 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -87,7 +87,10 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known, Depth, I); if (!NewVal) return false; - U = NewVal; + if (Instruction* OpInst = dyn_cast<Instruction>(U)) + salvageDebugInfo(*OpInst); + + replaceUse(U, NewVal); return true; } @@ -173,15 +176,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); - // Output known-0 are known to be clear if zero in either the LHS | RHS. - APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; - // Output known-1 bits are only known if set in both the LHS & RHS. - APInt IKnownOne = RHSKnown.One & LHSKnown.One; + Known = LHSKnown & RHSKnown; // If the client is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) - return Constant::getIntegerValue(VTy, IKnownOne); + if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) + return Constant::getIntegerValue(VTy, Known.One); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. @@ -194,8 +194,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero)) return I; - Known.Zero = std::move(IKnownZero); - Known.One = std::move(IKnownOne); break; } case Instruction::Or: { @@ -207,15 +205,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); - // Output known-0 bits are only known if clear in both the LHS & RHS. - APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; - // Output known-1 are known. to be set if s.et in either the LHS | RHS. - APInt IKnownOne = RHSKnown.One | LHSKnown.One; + Known = LHSKnown | RHSKnown; // If the client is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) - return Constant::getIntegerValue(VTy, IKnownOne); + if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) + return Constant::getIntegerValue(VTy, Known.One); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. @@ -228,8 +223,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; - Known.Zero = std::move(IKnownZero); - Known.One = std::move(IKnownOne); break; } case Instruction::Xor: { @@ -239,17 +232,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | - (RHSKnown.One & LHSKnown.One); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | - (RHSKnown.One & LHSKnown.Zero); + Known = LHSKnown ^ RHSKnown; // If the client is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) - return Constant::getIntegerValue(VTy, IKnownOne); + if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) + return Constant::getIntegerValue(VTy, Known.One); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. @@ -309,10 +297,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return InsertNewInstWith(NewXor, *I); } - // Output known-0 bits are known if clear or set in both the LHS & RHS. - Known.Zero = std::move(IKnownZero); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - Known.One = std::move(IKnownOne); break; } case Instruction::Select: { @@ -396,8 +380,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1)) return I; assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?"); - Known = InputKnown.zextOrTrunc(BitWidth, - true /* ExtendedBitsAreKnownZero */); + Known = InputKnown.zextOrTrunc(BitWidth); assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } @@ -453,6 +436,43 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; } case Instruction::Add: + if ((DemandedMask & 1) == 0) { + // If we do not need the low bit, try to convert bool math to logic: + // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN + Value *X, *Y; + if (match(I, m_c_Add(m_OneUse(m_ZExt(m_Value(X))), + m_OneUse(m_SExt(m_Value(Y))))) && + X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) { + // Truth table for inputs and output signbits: + // X:0 | X:1 + // ---------- + // Y:0 | 0 | 0 | + // Y:1 | -1 | 0 | + // ---------- + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(I); + Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y); + return Builder.CreateSExt(AndNot, VTy); + } + + // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN + // TODO: Relax the one-use checks because we are removing an instruction? + if (match(I, m_Add(m_OneUse(m_SExt(m_Value(X))), + m_OneUse(m_SExt(m_Value(Y))))) && + X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) { + // Truth table for inputs and output signbits: + // X:0 | X:1 + // ----------- + // Y:0 | -1 | -1 | + // Y:1 | -1 | 0 | + // ----------- + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(I); + Value *Or = Builder.CreateOr(X, Y); + return Builder.CreateSExt(Or, VTy); + } + } + LLVM_FALLTHROUGH; case Instruction::Sub: { /// If the high-bits of an ADD/SUB are not demanded, then we do not care /// about the high bits of the operands. @@ -515,11 +535,27 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + + bool SignBitZero = Known.Zero.isSignBitSet(); + bool SignBitOne = Known.One.isSignBitSet(); Known.Zero <<= ShiftAmt; Known.One <<= ShiftAmt; // low bits known zero. if (ShiftAmt) Known.Zero.setLowBits(ShiftAmt); + + // If this shift has "nsw" keyword, then the result is either a poison + // value or has the same sign bit as the first operand. + if (IOp->hasNoSignedWrap()) { + if (SignBitZero) + Known.Zero.setSignBit(); + else if (SignBitOne) + Known.One.setSignBit(); + if (Known.hasConflict()) + return UndefValue::get(I->getType()); + } + } else { + computeKnownBits(I, Known, Depth, CxtI); } break; } @@ -543,6 +579,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Known.One.lshrInPlace(ShiftAmt); if (ShiftAmt) Known.Zero.setHighBits(ShiftAmt); // high bits known zero. + } else { + computeKnownBits(I, Known, Depth, CxtI); } break; } @@ -603,6 +641,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one. Known.One |= HighBits; } + } else { + computeKnownBits(I, Known, Depth, CxtI); } break; } @@ -624,6 +664,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Propagate zero bits from the input. Known.Zero.setHighBits(std::min( BitWidth, LHSKnown.Zero.countLeadingOnes() + RHSTrailingZeros)); + } else { + computeKnownBits(I, Known, Depth, CxtI); } break; } @@ -682,7 +724,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; break; } - case Instruction::Call: + case Instruction::Call: { + bool KnownBitsComputed = false; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; @@ -714,8 +757,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, NewVal->takeName(I); return InsertNewInstWith(NewVal, *I); } - - // TODO: Could compute known zero/one bits based on the input. break; } case Intrinsic::fshr: @@ -740,6 +781,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, RHSKnown.Zero.lshr(BitWidth - ShiftAmt); Known.One = LHSKnown.One.shl(ShiftAmt) | RHSKnown.One.lshr(BitWidth - ShiftAmt); + KnownBitsComputed = true; break; } case Intrinsic::x86_mmx_pmovmskb: @@ -768,16 +810,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We know that the upper bits are set to zero. Known.Zero.setBitsFrom(ArgWidth); - return nullptr; + KnownBitsComputed = true; + break; } case Intrinsic::x86_sse42_crc32_64_64: Known.Zero.setBitsFrom(32); - return nullptr; + KnownBitsComputed = true; + break; } } - computeKnownBits(V, Known, Depth, CxtI); + + if (!KnownBitsComputed) + computeKnownBits(V, Known, Depth, CxtI); break; } + } // If the client is only demanding bits that we know, return the known // constant. @@ -811,15 +858,12 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); - // Output known-0 are known to be clear if zero in either the LHS | RHS. - APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; - // Output known-1 bits are only known if set in both the LHS & RHS. - APInt IKnownOne = RHSKnown.One & LHSKnown.One; + Known = LHSKnown & RHSKnown; // If the client is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) - return Constant::getIntegerValue(ITy, IKnownOne); + if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) + return Constant::getIntegerValue(ITy, Known.One); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this @@ -829,8 +873,6 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) return I->getOperand(1); - Known.Zero = std::move(IKnownZero); - Known.One = std::move(IKnownOne); break; } case Instruction::Or: { @@ -842,15 +884,12 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); - // Output known-0 bits are only known if clear in both the LHS & RHS. - APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; - // Output known-1 are known to be set if set in either the LHS | RHS. - APInt IKnownOne = RHSKnown.One | LHSKnown.One; + Known = LHSKnown | RHSKnown; // If the client is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) - return Constant::getIntegerValue(ITy, IKnownOne); + if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) + return Constant::getIntegerValue(ITy, Known.One); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this @@ -860,8 +899,6 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) return I->getOperand(1); - Known.Zero = std::move(IKnownZero); - Known.One = std::move(IKnownOne); break; } case Instruction::Xor: { @@ -872,17 +909,12 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | - (RHSKnown.One & LHSKnown.One); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | - (RHSKnown.One & LHSKnown.Zero); + Known = LHSKnown ^ RHSKnown; // If the client is only demanding bits that we know, return the known // constant. - if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) - return Constant::getIntegerValue(ITy, IKnownOne); + if (DemandedMask.isSubsetOf(Known.Zero | Known.One)) + return Constant::getIntegerValue(ITy, Known.One); // If all of the demanded bits are known zero on one side, return the // other. @@ -891,10 +923,6 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, if (DemandedMask.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - Known.Zero = std::move(IKnownZero); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - Known.One = std::move(IKnownOne); break; } default: @@ -1008,17 +1036,69 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, DemandedElts.getActiveBits() == 3) return nullptr; - unsigned VWidth = II->getType()->getVectorNumElements(); + auto *IIVTy = cast<VectorType>(II->getType()); + unsigned VWidth = IIVTy->getNumElements(); if (VWidth == 1) return nullptr; - ConstantInt *NewDMask = nullptr; + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(II); + + // Assume the arguments are unchanged and later override them, if needed. + SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end()); if (DMaskIdx < 0) { - // Pretend that a prefix of elements is demanded to simplify the code - // below. - DemandedElts = (1 << DemandedElts.getActiveBits()) - 1; + // Buffer case. + + const unsigned ActiveBits = DemandedElts.getActiveBits(); + const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros(); + + // Start assuming the prefix of elements is demanded, but possibly clear + // some other bits if there are trailing zeros (unused components at front) + // and update offset. + DemandedElts = (1 << ActiveBits) - 1; + + if (UnusedComponentsAtFront > 0) { + static const unsigned InvalidOffsetIdx = 0xf; + + unsigned OffsetIdx; + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_raw_buffer_load: + OffsetIdx = 1; + break; + case Intrinsic::amdgcn_s_buffer_load: + // If resulting type is vec3, there is no point in trimming the + // load with updated offset, as the vec3 would most likely be widened to + // vec4 anyway during lowering. + if (ActiveBits == 4 && UnusedComponentsAtFront == 1) + OffsetIdx = InvalidOffsetIdx; + else + OffsetIdx = 1; + break; + case Intrinsic::amdgcn_struct_buffer_load: + OffsetIdx = 2; + break; + default: + // TODO: handle tbuffer* intrinsics. + OffsetIdx = InvalidOffsetIdx; + break; + } + + if (OffsetIdx != InvalidOffsetIdx) { + // Clear demanded bits and update the offset. + DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1); + auto *Offset = II->getArgOperand(OffsetIdx); + unsigned SingleComponentSizeInBits = + getDataLayout().getTypeSizeInBits(II->getType()->getScalarType()); + unsigned OffsetAdd = + UnusedComponentsAtFront * SingleComponentSizeInBits / 8; + auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd); + Args[OffsetIdx] = Builder.CreateAdd(Offset, OffsetAddVal); + } + } } else { + // Image case. + ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx)); unsigned DMaskVal = DMask->getZExtValue() & 0xf; @@ -1037,7 +1117,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, } if (DMaskVal != NewDMaskVal) - NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal); + Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal); } unsigned NewNumElts = DemandedElts.countPopulation(); @@ -1045,39 +1125,25 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, return UndefValue::get(II->getType()); if (NewNumElts >= VWidth && DemandedElts.isMask()) { - if (NewDMask) - II->setArgOperand(DMaskIdx, NewDMask); + if (DMaskIdx >= 0) + II->setArgOperand(DMaskIdx, Args[DMaskIdx]); return nullptr; } - // Determine the overload types of the original intrinsic. - auto IID = II->getIntrinsicID(); - SmallVector<Intrinsic::IITDescriptor, 16> Table; - getIntrinsicInfoTableEntries(IID, Table); - ArrayRef<Intrinsic::IITDescriptor> TableRef = Table; - // Validate function argument and return types, extracting overloaded types // along the way. - FunctionType *FTy = II->getCalledFunction()->getFunctionType(); SmallVector<Type *, 6> OverloadTys; - Intrinsic::matchIntrinsicSignature(FTy, TableRef, OverloadTys); + if (!Intrinsic::getIntrinsicSignature(II->getCalledFunction(), OverloadTys)) + return nullptr; Module *M = II->getParent()->getParent()->getParent(); - Type *EltTy = II->getType()->getVectorElementType(); - Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts); + Type *EltTy = IIVTy->getElementType(); + Type *NewTy = + (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts); OverloadTys[0] = NewTy; - Function *NewIntrin = Intrinsic::getDeclaration(M, IID, OverloadTys); - - SmallVector<Value *, 16> Args; - for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) - Args.push_back(II->getArgOperand(I)); - - if (NewDMask) - Args[DMaskIdx] = NewDMask; - - IRBuilderBase::InsertPointGuard Guard(Builder); - Builder.SetInsertPoint(II); + Function *NewIntrin = + Intrinsic::getDeclaration(M, II->getIntrinsicID(), OverloadTys); CallInst *NewCall = Builder.CreateCall(NewIntrin, Args); NewCall->takeName(II); @@ -1088,7 +1154,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, DemandedElts.countTrailingZeros()); } - SmallVector<uint32_t, 8> EltMask; + SmallVector<int, 8> EltMask; unsigned NewLoadIdx = 0; for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) { if (!!DemandedElts[OrigLoadIdx]) @@ -1120,7 +1186,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth, bool AllowMultipleUsers) { - unsigned VWidth = V->getType()->getVectorNumElements(); + // Cannot analyze scalable type. The number of vector elements is not a + // compile-time constant. + if (isa<ScalableVectorType>(V->getType())) + return nullptr; + + unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); APInt EltMask(APInt::getAllOnesValue(VWidth)); assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); @@ -1199,10 +1270,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, auto *II = dyn_cast<IntrinsicInst>(Inst); Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum); if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) { - if (II) - II->setArgOperand(OpNum, V); - else - Inst->setOperand(OpNum, V); + replaceOperand(*Inst, OpNum, V); MadeChange = true; } }; @@ -1268,7 +1336,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If this is inserting an element that isn't demanded, remove this // insertelement. if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { - Worklist.Add(I); + Worklist.push(I); return I->getOperand(0); } @@ -1282,7 +1350,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Shuffle->getOperand(1)->getType() && "Expected shuffle operands to have same type"); unsigned OpWidth = - Shuffle->getOperand(0)->getType()->getVectorNumElements(); + cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements(); + // Handle trivial case of a splat. Only check the first element of LHS + // operand. + if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) && + DemandedElts.isAllOnesValue()) { + if (!isa<UndefValue>(I->getOperand(1))) { + I->setOperand(1, UndefValue::get(I->getOperand(1)->getType())); + MadeChange = true; + } + APInt LeftDemanded(OpWidth, 1); + APInt LHSUndefElts(OpWidth, 0); + simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts); + if (LHSUndefElts[0]) + UndefElts = EltMask; + else + UndefElts.clearAllBits(); + break; + } + APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0); for (unsigned i = 0; i < VWidth; i++) { if (DemandedElts[i]) { @@ -1396,15 +1482,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } if (NewUndefElts) { // Add additional discovered undefs. - SmallVector<Constant*, 16> Elts; + SmallVector<int, 16> Elts; for (unsigned i = 0; i < VWidth; ++i) { if (UndefElts[i]) - Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); + Elts.push_back(UndefMaskElem); else - Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()), - Shuffle->getMaskValue(i))); + Elts.push_back(Shuffle->getMaskValue(i)); } - I->setOperand(2, ConstantVector::get(Elts)); + Shuffle->setShuffleMask(Elts); MadeChange = true; } break; @@ -1549,7 +1634,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics. // Instead we should return a zero vector. if (!DemandedElts[0]) { - Worklist.Add(II); + Worklist.push(II); return ConstantAggregateZero::get(II->getType()); } @@ -1568,7 +1653,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If lowest element of a scalar op isn't used then use Arg0. if (!DemandedElts[0]) { - Worklist.Add(II); + Worklist.push(II); return II->getArgOperand(0); } // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions @@ -1588,7 +1673,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If lowest element of a scalar op isn't used then use Arg0. if (!DemandedElts[0]) { - Worklist.Add(II); + Worklist.push(II); return II->getArgOperand(0); } @@ -1615,7 +1700,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If lowest element of a scalar op isn't used then use Arg0. if (!DemandedElts[0]) { - Worklist.Add(II); + Worklist.push(II); return II->getArgOperand(0); } @@ -1649,7 +1734,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If lowest element of a scalar op isn't used then use Arg0. if (!DemandedElts[0]) { - Worklist.Add(II); + Worklist.push(II); return II->getArgOperand(0); } @@ -1678,7 +1763,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_avx512_packusdw_512: case Intrinsic::x86_avx512_packuswb_512: { auto *Ty0 = II->getArgOperand(0)->getType(); - unsigned InnerVWidth = Ty0->getVectorNumElements(); + unsigned InnerVWidth = cast<VectorType>(Ty0)->getNumElements(); assert(VWidth == (InnerVWidth * 2) && "Unexpected input size"); unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128; @@ -1747,6 +1832,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::amdgcn_raw_buffer_load: case Intrinsic::amdgcn_raw_buffer_load_format: case Intrinsic::amdgcn_raw_tbuffer_load: + case Intrinsic::amdgcn_s_buffer_load: case Intrinsic::amdgcn_struct_buffer_load: case Intrinsic::amdgcn_struct_buffer_load_format: case Intrinsic::amdgcn_struct_tbuffer_load: |