aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-07-26 19:36:28 +0000
commitcfca06d7963fa0909f90483b42a6d7d194d01e08 (patch)
tree209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
parent706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff)
downloadsrc-cfca06d7963fa0909f90483b42a6d7d194d01e08.tar.gz
src-cfca06d7963fa0909f90483b42a6d7d194d01e08.zip
Vendor import of llvm-project master 2e10b7a39b9, the last commit beforevendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9vendor/llvm-project/master
the llvmorg-12-init tag, from which release/11.x was branched.
Notes
Notes: svn path=/vendor/llvm-project/master/; revision=363578 svn path=/vendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9/; revision=363579; tag=vendor/llvm-project/llvmorg-11-init-20887-g2e10b7a39b9
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp312
1 files changed, 199 insertions, 113 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 47ce83974c8d..7cfe4c8b5892 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -87,7 +87,10 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known,
Depth, I);
if (!NewVal) return false;
- U = NewVal;
+ if (Instruction* OpInst = dyn_cast<Instruction>(U))
+ salvageDebugInfo(*OpInst);
+
+ replaceUse(U, NewVal);
return true;
}
@@ -173,15 +176,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
- // Output known-1 bits are only known if set in both the LHS & RHS.
- APInt IKnownOne = RHSKnown.One & LHSKnown.One;
+ Known = LHSKnown & RHSKnown;
// If the client is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
- return Constant::getIntegerValue(VTy, IKnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
+ return Constant::getIntegerValue(VTy, Known.One);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
@@ -194,8 +194,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
return I;
- Known.Zero = std::move(IKnownZero);
- Known.One = std::move(IKnownOne);
break;
}
case Instruction::Or: {
@@ -207,15 +205,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
- // Output known-1 are known. to be set if s.et in either the LHS | RHS.
- APInt IKnownOne = RHSKnown.One | LHSKnown.One;
+ Known = LHSKnown | RHSKnown;
// If the client is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
- return Constant::getIntegerValue(VTy, IKnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
+ return Constant::getIntegerValue(VTy, Known.One);
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
@@ -228,8 +223,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (ShrinkDemandedConstant(I, 1, DemandedMask))
return I;
- Known.Zero = std::move(IKnownZero);
- Known.One = std::move(IKnownOne);
break;
}
case Instruction::Xor: {
@@ -239,17 +232,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
- (RHSKnown.One & LHSKnown.One);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) |
- (RHSKnown.One & LHSKnown.Zero);
+ Known = LHSKnown ^ RHSKnown;
// If the client is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
- return Constant::getIntegerValue(VTy, IKnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
+ return Constant::getIntegerValue(VTy, Known.One);
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
@@ -309,10 +297,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return InsertNewInstWith(NewXor, *I);
}
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- Known.Zero = std::move(IKnownZero);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- Known.One = std::move(IKnownOne);
break;
}
case Instruction::Select: {
@@ -396,8 +380,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1))
return I;
assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
- Known = InputKnown.zextOrTrunc(BitWidth,
- true /* ExtendedBitsAreKnownZero */);
+ Known = InputKnown.zextOrTrunc(BitWidth);
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
break;
}
@@ -453,6 +436,43 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::Add:
+ if ((DemandedMask & 1) == 0) {
+ // If we do not need the low bit, try to convert bool math to logic:
+ // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
+ Value *X, *Y;
+ if (match(I, m_c_Add(m_OneUse(m_ZExt(m_Value(X))),
+ m_OneUse(m_SExt(m_Value(Y))))) &&
+ X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
+ // Truth table for inputs and output signbits:
+ // X:0 | X:1
+ // ----------
+ // Y:0 | 0 | 0 |
+ // Y:1 | -1 | 0 |
+ // ----------
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(I);
+ Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y);
+ return Builder.CreateSExt(AndNot, VTy);
+ }
+
+ // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN
+ // TODO: Relax the one-use checks because we are removing an instruction?
+ if (match(I, m_Add(m_OneUse(m_SExt(m_Value(X))),
+ m_OneUse(m_SExt(m_Value(Y))))) &&
+ X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {
+ // Truth table for inputs and output signbits:
+ // X:0 | X:1
+ // -----------
+ // Y:0 | -1 | -1 |
+ // Y:1 | -1 | 0 |
+ // -----------
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(I);
+ Value *Or = Builder.CreateOr(X, Y);
+ return Builder.CreateSExt(Or, VTy);
+ }
+ }
+ LLVM_FALLTHROUGH;
case Instruction::Sub: {
/// If the high-bits of an ADD/SUB are not demanded, then we do not care
/// about the high bits of the operands.
@@ -515,11 +535,27 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+ bool SignBitZero = Known.Zero.isSignBitSet();
+ bool SignBitOne = Known.One.isSignBitSet();
Known.Zero <<= ShiftAmt;
Known.One <<= ShiftAmt;
// low bits known zero.
if (ShiftAmt)
Known.Zero.setLowBits(ShiftAmt);
+
+ // If this shift has "nsw" keyword, then the result is either a poison
+ // value or has the same sign bit as the first operand.
+ if (IOp->hasNoSignedWrap()) {
+ if (SignBitZero)
+ Known.Zero.setSignBit();
+ else if (SignBitOne)
+ Known.One.setSignBit();
+ if (Known.hasConflict())
+ return UndefValue::get(I->getType());
+ }
+ } else {
+ computeKnownBits(I, Known, Depth, CxtI);
}
break;
}
@@ -543,6 +579,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known.One.lshrInPlace(ShiftAmt);
if (ShiftAmt)
Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
+ } else {
+ computeKnownBits(I, Known, Depth, CxtI);
}
break;
}
@@ -603,6 +641,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
} else if (Known.One[BitWidth-ShiftAmt-1]) { // New bits are known one.
Known.One |= HighBits;
}
+ } else {
+ computeKnownBits(I, Known, Depth, CxtI);
}
break;
}
@@ -624,6 +664,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Propagate zero bits from the input.
Known.Zero.setHighBits(std::min(
BitWidth, LHSKnown.Zero.countLeadingOnes() + RHSTrailingZeros));
+ } else {
+ computeKnownBits(I, Known, Depth, CxtI);
}
break;
}
@@ -682,7 +724,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
break;
}
- case Instruction::Call:
+ case Instruction::Call: {
+ bool KnownBitsComputed = false;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
@@ -714,8 +757,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
NewVal->takeName(I);
return InsertNewInstWith(NewVal, *I);
}
-
- // TODO: Could compute known zero/one bits based on the input.
break;
}
case Intrinsic::fshr:
@@ -740,6 +781,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
Known.One = LHSKnown.One.shl(ShiftAmt) |
RHSKnown.One.lshr(BitWidth - ShiftAmt);
+ KnownBitsComputed = true;
break;
}
case Intrinsic::x86_mmx_pmovmskb:
@@ -768,16 +810,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We know that the upper bits are set to zero.
Known.Zero.setBitsFrom(ArgWidth);
- return nullptr;
+ KnownBitsComputed = true;
+ break;
}
case Intrinsic::x86_sse42_crc32_64_64:
Known.Zero.setBitsFrom(32);
- return nullptr;
+ KnownBitsComputed = true;
+ break;
}
}
- computeKnownBits(V, Known, Depth, CxtI);
+
+ if (!KnownBitsComputed)
+ computeKnownBits(V, Known, Depth, CxtI);
break;
}
+ }
// If the client is only demanding bits that we know, return the known
// constant.
@@ -811,15 +858,12 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
CxtI);
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
- // Output known-1 bits are only known if set in both the LHS & RHS.
- APInt IKnownOne = RHSKnown.One & LHSKnown.One;
+ Known = LHSKnown & RHSKnown;
// If the client is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
- return Constant::getIntegerValue(ITy, IKnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
+ return Constant::getIntegerValue(ITy, Known.One);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
@@ -829,8 +873,6 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
return I->getOperand(1);
- Known.Zero = std::move(IKnownZero);
- Known.One = std::move(IKnownOne);
break;
}
case Instruction::Or: {
@@ -842,15 +884,12 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
CxtI);
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- APInt IKnownOne = RHSKnown.One | LHSKnown.One;
+ Known = LHSKnown | RHSKnown;
// If the client is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
- return Constant::getIntegerValue(ITy, IKnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
+ return Constant::getIntegerValue(ITy, Known.One);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
@@ -860,8 +899,6 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
return I->getOperand(1);
- Known.Zero = std::move(IKnownZero);
- Known.One = std::move(IKnownOne);
break;
}
case Instruction::Xor: {
@@ -872,17 +909,12 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
CxtI);
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
- (RHSKnown.One & LHSKnown.One);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) |
- (RHSKnown.One & LHSKnown.Zero);
+ Known = LHSKnown ^ RHSKnown;
// If the client is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne))
- return Constant::getIntegerValue(ITy, IKnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero | Known.One))
+ return Constant::getIntegerValue(ITy, Known.One);
// If all of the demanded bits are known zero on one side, return the
// other.
@@ -891,10 +923,6 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
if (DemandedMask.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
- // Output known-0 bits are known if clear or set in both the LHS & RHS.
- Known.Zero = std::move(IKnownZero);
- // Output known-1 are known to be set if set in only one of the LHS, RHS.
- Known.One = std::move(IKnownOne);
break;
}
default:
@@ -1008,17 +1036,69 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
DemandedElts.getActiveBits() == 3)
return nullptr;
- unsigned VWidth = II->getType()->getVectorNumElements();
+ auto *IIVTy = cast<VectorType>(II->getType());
+ unsigned VWidth = IIVTy->getNumElements();
if (VWidth == 1)
return nullptr;
- ConstantInt *NewDMask = nullptr;
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(II);
+
+ // Assume the arguments are unchanged and later override them, if needed.
+ SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end());
if (DMaskIdx < 0) {
- // Pretend that a prefix of elements is demanded to simplify the code
- // below.
- DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
+ // Buffer case.
+
+ const unsigned ActiveBits = DemandedElts.getActiveBits();
+ const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
+
+ // Start assuming the prefix of elements is demanded, but possibly clear
+ // some other bits if there are trailing zeros (unused components at front)
+ // and update offset.
+ DemandedElts = (1 << ActiveBits) - 1;
+
+ if (UnusedComponentsAtFront > 0) {
+ static const unsigned InvalidOffsetIdx = 0xf;
+
+ unsigned OffsetIdx;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_raw_buffer_load:
+ OffsetIdx = 1;
+ break;
+ case Intrinsic::amdgcn_s_buffer_load:
+ // If resulting type is vec3, there is no point in trimming the
+ // load with updated offset, as the vec3 would most likely be widened to
+ // vec4 anyway during lowering.
+ if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
+ OffsetIdx = InvalidOffsetIdx;
+ else
+ OffsetIdx = 1;
+ break;
+ case Intrinsic::amdgcn_struct_buffer_load:
+ OffsetIdx = 2;
+ break;
+ default:
+ // TODO: handle tbuffer* intrinsics.
+ OffsetIdx = InvalidOffsetIdx;
+ break;
+ }
+
+ if (OffsetIdx != InvalidOffsetIdx) {
+ // Clear demanded bits and update the offset.
+ DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
+ auto *Offset = II->getArgOperand(OffsetIdx);
+ unsigned SingleComponentSizeInBits =
+ getDataLayout().getTypeSizeInBits(II->getType()->getScalarType());
+ unsigned OffsetAdd =
+ UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
+ auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
+ Args[OffsetIdx] = Builder.CreateAdd(Offset, OffsetAddVal);
+ }
+ }
} else {
+ // Image case.
+
ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx));
unsigned DMaskVal = DMask->getZExtValue() & 0xf;
@@ -1037,7 +1117,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
}
if (DMaskVal != NewDMaskVal)
- NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);
+ Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
}
unsigned NewNumElts = DemandedElts.countPopulation();
@@ -1045,39 +1125,25 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
return UndefValue::get(II->getType());
if (NewNumElts >= VWidth && DemandedElts.isMask()) {
- if (NewDMask)
- II->setArgOperand(DMaskIdx, NewDMask);
+ if (DMaskIdx >= 0)
+ II->setArgOperand(DMaskIdx, Args[DMaskIdx]);
return nullptr;
}
- // Determine the overload types of the original intrinsic.
- auto IID = II->getIntrinsicID();
- SmallVector<Intrinsic::IITDescriptor, 16> Table;
- getIntrinsicInfoTableEntries(IID, Table);
- ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
-
// Validate function argument and return types, extracting overloaded types
// along the way.
- FunctionType *FTy = II->getCalledFunction()->getFunctionType();
SmallVector<Type *, 6> OverloadTys;
- Intrinsic::matchIntrinsicSignature(FTy, TableRef, OverloadTys);
+ if (!Intrinsic::getIntrinsicSignature(II->getCalledFunction(), OverloadTys))
+ return nullptr;
Module *M = II->getParent()->getParent()->getParent();
- Type *EltTy = II->getType()->getVectorElementType();
- Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts);
+ Type *EltTy = IIVTy->getElementType();
+ Type *NewTy =
+ (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
OverloadTys[0] = NewTy;
- Function *NewIntrin = Intrinsic::getDeclaration(M, IID, OverloadTys);
-
- SmallVector<Value *, 16> Args;
- for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
- Args.push_back(II->getArgOperand(I));
-
- if (NewDMask)
- Args[DMaskIdx] = NewDMask;
-
- IRBuilderBase::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(II);
+ Function *NewIntrin =
+ Intrinsic::getDeclaration(M, II->getIntrinsicID(), OverloadTys);
CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
NewCall->takeName(II);
@@ -1088,7 +1154,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
DemandedElts.countTrailingZeros());
}
- SmallVector<uint32_t, 8> EltMask;
+ SmallVector<int, 8> EltMask;
unsigned NewLoadIdx = 0;
for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
if (!!DemandedElts[OrigLoadIdx])
@@ -1120,7 +1186,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts,
unsigned Depth,
bool AllowMultipleUsers) {
- unsigned VWidth = V->getType()->getVectorNumElements();
+ // Cannot analyze scalable type. The number of vector elements is not a
+ // compile-time constant.
+ if (isa<ScalableVectorType>(V->getType()))
+ return nullptr;
+
+ unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
APInt EltMask(APInt::getAllOnesValue(VWidth));
assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
@@ -1199,10 +1270,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
auto *II = dyn_cast<IntrinsicInst>(Inst);
Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);
if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {
- if (II)
- II->setArgOperand(OpNum, V);
- else
- Inst->setOperand(OpNum, V);
+ replaceOperand(*Inst, OpNum, V);
MadeChange = true;
}
};
@@ -1268,7 +1336,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// If this is inserting an element that isn't demanded, remove this
// insertelement.
if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
- Worklist.Add(I);
+ Worklist.push(I);
return I->getOperand(0);
}
@@ -1282,7 +1350,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
Shuffle->getOperand(1)->getType() &&
"Expected shuffle operands to have same type");
unsigned OpWidth =
- Shuffle->getOperand(0)->getType()->getVectorNumElements();
+ cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+ // Handle trivial case of a splat. Only check the first element of LHS
+ // operand.
+ if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) &&
+ DemandedElts.isAllOnesValue()) {
+ if (!isa<UndefValue>(I->getOperand(1))) {
+ I->setOperand(1, UndefValue::get(I->getOperand(1)->getType()));
+ MadeChange = true;
+ }
+ APInt LeftDemanded(OpWidth, 1);
+ APInt LHSUndefElts(OpWidth, 0);
+ simplifyAndSetOp(I, 0, LeftDemanded, LHSUndefElts);
+ if (LHSUndefElts[0])
+ UndefElts = EltMask;
+ else
+ UndefElts.clearAllBits();
+ break;
+ }
+
APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0);
for (unsigned i = 0; i < VWidth; i++) {
if (DemandedElts[i]) {
@@ -1396,15 +1482,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
if (NewUndefElts) {
// Add additional discovered undefs.
- SmallVector<Constant*, 16> Elts;
+ SmallVector<int, 16> Elts;
for (unsigned i = 0; i < VWidth; ++i) {
if (UndefElts[i])
- Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
+ Elts.push_back(UndefMaskElem);
else
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
- Shuffle->getMaskValue(i)));
+ Elts.push_back(Shuffle->getMaskValue(i));
}
- I->setOperand(2, ConstantVector::get(Elts));
+ Shuffle->setShuffleMask(Elts);
MadeChange = true;
}
break;
@@ -1549,7 +1634,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
// Instead we should return a zero vector.
if (!DemandedElts[0]) {
- Worklist.Add(II);
+ Worklist.push(II);
return ConstantAggregateZero::get(II->getType());
}
@@ -1568,7 +1653,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
- Worklist.Add(II);
+ Worklist.push(II);
return II->getArgOperand(0);
}
// TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
@@ -1588,7 +1673,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
- Worklist.Add(II);
+ Worklist.push(II);
return II->getArgOperand(0);
}
@@ -1615,7 +1700,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
- Worklist.Add(II);
+ Worklist.push(II);
return II->getArgOperand(0);
}
@@ -1649,7 +1734,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// If lowest element of a scalar op isn't used then use Arg0.
if (!DemandedElts[0]) {
- Worklist.Add(II);
+ Worklist.push(II);
return II->getArgOperand(0);
}
@@ -1678,7 +1763,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_avx512_packusdw_512:
case Intrinsic::x86_avx512_packuswb_512: {
auto *Ty0 = II->getArgOperand(0)->getType();
- unsigned InnerVWidth = Ty0->getVectorNumElements();
+ unsigned InnerVWidth = cast<VectorType>(Ty0)->getNumElements();
assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
@@ -1747,6 +1832,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_s_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_tbuffer_load: