aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp777
1 files changed, 589 insertions, 188 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 5482b944e347..726bb545be12 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
@@ -39,6 +40,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -67,6 +69,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include <algorithm>
@@ -89,6 +92,12 @@ static cl::opt<unsigned> GuardWideningWindow(
cl::desc("How wide an instruction window to bypass looking for "
"another guard"));
+namespace llvm {
+/// enable preservation of attributes in assume like:
+/// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
+extern cl::opt<bool> EnableKnowledgeRetention;
+} // namespace llvm
+
/// Return the specified type promoted as it would be to pass though a va_arg
/// area.
static Type *getPromotedType(Type *Ty) {
@@ -283,16 +292,20 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
// If the mask is all ones or undefs, this is a plain vector load of the 1st
// argument.
- if (maskIsAllOneOrUndef(II.getArgOperand(2)))
- return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
- "unmaskedload");
+ if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
+ LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+ "unmaskedload");
+ L->copyMetadata(II);
+ return L;
+ }
// If we can unconditionally load from this address, replace with a
// load/select idiom. TODO: use DT for context sensitive query
if (isDereferenceablePointer(LoadPtr, II.getType(),
II.getModule()->getDataLayout(), &II, nullptr)) {
- Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
- "unmaskedload");
+ LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
+ "unmaskedload");
+ LI->copyMetadata(II);
return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
}
@@ -315,7 +328,10 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
if (ConstMask->isAllOnesValue()) {
Value *StorePtr = II.getArgOperand(1);
Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
- return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
+ StoreInst *S =
+ new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
+ S->copyMetadata(II);
+ return S;
}
if (isa<ScalableVectorType>(ConstMask->getType()))
@@ -385,7 +401,13 @@ static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
InstCombinerImpl &IC) {
auto *Arg = II.getArgOperand(0);
auto *StrippedArg = Arg->stripPointerCasts();
- auto *StrippedInvariantGroupsArg = Arg->stripPointerCastsAndInvariantGroups();
+ auto *StrippedInvariantGroupsArg = StrippedArg;
+ while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
+ if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
+ Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
+ break;
+ StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
+ }
if (StrippedArg == StrippedInvariantGroupsArg)
return nullptr; // No launders/strips to remove.
@@ -413,6 +435,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
"Expected cttz or ctlz intrinsic");
bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
Value *X;
// ctlz(bitreverse(x)) -> cttz(x)
// cttz(bitreverse(x)) -> ctlz(x)
@@ -422,11 +445,43 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
return CallInst::Create(F, {X, II.getArgOperand(1)});
}
+ if (II.getType()->isIntOrIntVectorTy(1)) {
+ // ctlz/cttz i1 Op0 --> not Op0
+ if (match(Op1, m_Zero()))
+ return BinaryOperator::CreateNot(Op0);
+ // If zero is undef, then the input can be assumed to be "true", so the
+ // instruction simplifies to "false".
+ assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
+ return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
+ }
+
+ // If the operand is a select with constant arm(s), try to hoist ctlz/cttz.
+ if (auto *Sel = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
+ return R;
+
if (IsTZ) {
// cttz(-x) -> cttz(x)
if (match(Op0, m_Neg(m_Value(X))))
return IC.replaceOperand(II, 0, X);
+ // cttz(sext(x)) -> cttz(zext(x))
+ if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
+ auto *Zext = IC.Builder.CreateZExt(X, II.getType());
+ auto *CttzZext =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
+ return IC.replaceInstUsesWith(II, CttzZext);
+ }
+
+ // Zext doesn't change the number of trailing zeros, so narrow:
+ // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsUndef' parameter is 'true'.
+ if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
+ auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
+ IC.Builder.getTrue());
+ auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
+ return IC.replaceInstUsesWith(II, ZextCttz);
+ }
+
// cttz(abs(x)) -> cttz(x)
// cttz(nabs(x)) -> cttz(x)
Value *Y;
@@ -486,13 +541,19 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
Type *Ty = II.getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
Value *Op0 = II.getArgOperand(0);
- Value *X;
+ Value *X, *Y;
// ctpop(bitreverse(x)) -> ctpop(x)
// ctpop(bswap(x)) -> ctpop(x)
if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
return IC.replaceOperand(II, 0, X);
+ // ctpop(rot(x)) -> ctpop(x)
+ if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
+ match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
+ X == Y)
+ return IC.replaceOperand(II, 0, X);
+
// ctpop(x | -x) -> bitwidth - cttz(x, false)
if (Op0->hasOneUse() &&
match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
@@ -511,18 +572,36 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
return CallInst::Create(F, {X, IC.Builder.getFalse()});
}
+ // Zext doesn't change the number of set bits, so narrow:
+ // ctpop (zext X) --> zext (ctpop X)
+ if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
+ Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
+ return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
+ }
+
+ // If the operand is a select with constant arm(s), try to hoist ctpop.
+ if (auto *Sel = dyn_cast<SelectInst>(Op0))
+ if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
+ return R;
+
+ KnownBits Known(BitWidth);
+ IC.computeKnownBits(Op0, Known, 0, &II);
+
+ // If all bits are zero except for exactly one fixed bit, then the result
+ // must be 0 or 1, and we can get that answer by shifting to LSB:
+ // ctpop (X & 32) --> (X & 32) >> 5
+ if ((~Known.Zero).isPowerOf2())
+ return BinaryOperator::CreateLShr(
+ Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
+
// FIXME: Try to simplify vectors of integers.
auto *IT = dyn_cast<IntegerType>(Ty);
if (!IT)
return nullptr;
- KnownBits Known(BitWidth);
- IC.computeKnownBits(Op0, Known, 0, &II);
-
+ // Add range metadata since known bits can't completely reflect what we know.
unsigned MinCount = Known.countMinPopulation();
unsigned MaxCount = Known.countMaxPopulation();
-
- // Add range metadata since known bits can't completely reflect what we know.
if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
Metadata *LowAndHigh[] = {
ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
@@ -675,6 +754,47 @@ static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
}
+/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
+/// can only be one of two possible constant values -- turn that into a select
+/// of constants.
+static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
+ Value *X;
+ const APInt *C0, *C1;
+ if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
+ return nullptr;
+
+ CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::smax:
+ if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
+ Pred = ICmpInst::ICMP_SGT;
+ break;
+ case Intrinsic::smin:
+ if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
+ Pred = ICmpInst::ICMP_SLT;
+ break;
+ case Intrinsic::umax:
+ if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
+ Pred = ICmpInst::ICMP_UGT;
+ break;
+ case Intrinsic::umin:
+ if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
+ Pred = ICmpInst::ICMP_ULT;
+ break;
+ default:
+ llvm_unreachable("Expected min/max intrinsic");
+ }
+ if (Pred == CmpInst::BAD_ICMP_PREDICATE)
+ return nullptr;
+
+ // max (min X, 42), 41 --> X > 41 ? 42 : 41
+ // min (max X, 42), 43 --> X < 43 ? 42 : 43
+ Value *Cmp = Builder.CreateICmp(Pred, X, I1);
+ return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -828,17 +948,41 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
}
+ // Match a complicated way to check if a number is odd/even:
+ // abs (srem X, 2) --> and X, 1
+ const APInt *C;
+ if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
+
break;
}
- case Intrinsic::umax:
case Intrinsic::umin: {
Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
+ // umin(x, 1) == zext(x != 0)
+ if (match(I1, m_One())) {
+ Value *Zero = Constant::getNullValue(I0->getType());
+ Value *Cmp = Builder.CreateICmpNE(I0, Zero);
+ return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
+ }
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::umax: {
+ Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
Value *X, *Y;
if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
(I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
}
+ Constant *C;
+ if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
+ I0->hasOneUse()) {
+ Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
+ if (ConstantExpr::getZExt(NarrowC, II->getType()) == C) {
+ Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
+ return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
+ }
+ }
// If both operands of unsigned min/max are sign-extended, it is still ok
// to narrow the operation.
LLVM_FALLTHROUGH;
@@ -852,6 +996,66 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
}
+
+ Constant *C;
+ if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
+ I0->hasOneUse()) {
+ Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
+ if (ConstantExpr::getSExt(NarrowC, II->getType()) == C) {
+ Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
+ return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
+ }
+ }
+
+ if (match(I0, m_Not(m_Value(X)))) {
+ // max (not X), (not Y) --> not (min X, Y)
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
+ if (match(I1, m_Not(m_Value(Y))) &&
+ (I0->hasOneUse() || I1->hasOneUse())) {
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
+ return BinaryOperator::CreateNot(InvMaxMin);
+ }
+ // max (not X), C --> not(min X, ~C)
+ if (match(I1, m_Constant(C)) && I0->hasOneUse()) {
+ Constant *NotC = ConstantExpr::getNot(C);
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, NotC);
+ return BinaryOperator::CreateNot(InvMaxMin);
+ }
+ }
+
+ // smax(X, -X) --> abs(X)
+ // smin(X, -X) --> -abs(X)
+ // umax(X, -X) --> -abs(X)
+ // umin(X, -X) --> abs(X)
+ if (isKnownNegation(I0, I1)) {
+ // We can choose either operand as the input to abs(), but if we can
+ // eliminate the only use of a value, that's better for subsequent
+ // transforms/analysis.
+ if (I0->hasOneUse() && !I1->hasOneUse())
+ std::swap(I0, I1);
+
+ // This is some variant of abs(). See if we can propagate 'nsw' to the abs
+ // operation and potentially its negation.
+ bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
+ Value *Abs = Builder.CreateBinaryIntrinsic(
+ Intrinsic::abs, I0,
+ ConstantInt::getBool(II->getContext(), IntMinIsPoison));
+
+ // We don't have a "nabs" intrinsic, so negate if needed based on the
+ // max/min operation.
+ if (IID == Intrinsic::smin || IID == Intrinsic::umax)
+ Abs = Builder.CreateNeg(Abs, "nabs", /* NUW */ false, IntMinIsPoison);
+ return replaceInstUsesWith(CI, Abs);
+ }
+
+ if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
+ return Sel;
+
+ if (match(I1, m_ImmConstant()))
+ if (auto *Sel = dyn_cast<SelectInst>(I0))
+ if (Instruction *R = FoldOpIntoSelect(*II, Sel))
+ return R;
+
break;
}
case Intrinsic::bswap: {
@@ -1178,22 +1382,28 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
}
- Value *ExtSrc0;
- Value *ExtSrc1;
-
- // minnum (fpext x), (fpext y) -> minnum x, y
- // maxnum (fpext x), (fpext y) -> maxnum x, y
- if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc0)))) &&
- match(II->getArgOperand(1), m_OneUse(m_FPExt(m_Value(ExtSrc1)))) &&
- ExtSrc0->getType() == ExtSrc1->getType()) {
- Function *F = Intrinsic::getDeclaration(
- II->getModule(), II->getIntrinsicID(), {ExtSrc0->getType()});
- CallInst *NewCall = Builder.CreateCall(F, { ExtSrc0, ExtSrc1 });
- NewCall->copyFastMathFlags(II);
- NewCall->takeName(II);
+ // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
+ if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
+ match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
+ X->getType() == Y->getType()) {
+ Value *NewCall =
+ Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
return new FPExtInst(NewCall, II->getType());
}
+ // max X, -X --> fabs X
+ // min X, -X --> -(fabs X)
+ // TODO: Remove one-use limitation? That is obviously better for max.
+ // It would be an extra instruction for min (fnabs), but that is
+ // still likely better for analysis and codegen.
+ if ((match(Arg0, m_OneUse(m_FNeg(m_Value(X)))) && Arg1 == X) ||
+ (match(Arg1, m_OneUse(m_FNeg(m_Value(X)))) && Arg0 == X)) {
+ Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
+ if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
+ R = Builder.CreateFNegFMF(R, II);
+ return replaceInstUsesWith(*II, R);
+ }
+
break;
}
case Intrinsic::fmuladd: {
@@ -1493,15 +1703,23 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *IIOperand = II->getArgOperand(0);
SmallVector<OperandBundleDef, 4> OpBundles;
II->getOperandBundlesAsDefs(OpBundles);
- bool HasOpBundles = !OpBundles.empty();
+
+ /// This will remove the boolean Condition from the assume given as
+ /// argument and remove the assume if it becomes useless.
+ /// always returns nullptr for use as a return values.
+ auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
+ assert(isa<AssumeInst>(Assume));
+ if (isAssumeWithEmptyBundle(*cast<AssumeInst>(II)))
+ return eraseInstFromFunction(CI);
+ replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
+ return nullptr;
+ };
// Remove an assume if it is followed by an identical assume.
// TODO: Do we need this? Unless there are conflicting assumptions, the
// computeKnownBits(IIOperand) below here eliminates redundant assumes.
Instruction *Next = II->getNextNonDebugInstruction();
- if (HasOpBundles &&
- match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))) &&
- !cast<IntrinsicInst>(Next)->hasOperandBundles())
- return eraseInstFromFunction(CI);
+ if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
+ return RemoveConditionFromAssume(Next);
// Canonicalize assume(a && b) -> assume(a); assume(b);
// Note: New assumption intrinsics created here are registered by
@@ -1534,121 +1752,108 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
isValidAssumeForContext(II, LHS, &DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LHS->setMetadata(LLVMContext::MD_nonnull, MD);
- if (!HasOpBundles)
- return eraseInstFromFunction(*II);
+ return RemoveConditionFromAssume(II);
// TODO: apply nonnull return attributes to calls and invokes
// TODO: apply range metadata for range check patterns?
}
- // If there is a dominating assume with the same condition as this one,
- // then this one is redundant, and should be removed.
- KnownBits Known(1);
- computeKnownBits(IIOperand, Known, 0, II);
- if (Known.isAllOnes() && isAssumeWithEmptyBundle(*II))
- return eraseInstFromFunction(*II);
-
- // Update the cache of affected values for this assumption (we might be
- // here because we just simplified the condition).
- AC.updateAffectedValues(II);
- break;
- }
- case Intrinsic::experimental_gc_statepoint: {
- GCStatepointInst &GCSP = *cast<GCStatepointInst>(II);
- SmallPtrSet<Value *, 32> LiveGcValues;
- for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
- GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
-
- // Remove the relocation if unused.
- if (GCR.use_empty()) {
- eraseInstFromFunction(GCR);
- continue;
+ // Convert nonnull assume like:
+ // %A = icmp ne i32* %PTR, null
+ // call void @llvm.assume(i1 %A)
+ // into
+ // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
+ if (EnableKnowledgeRetention &&
+ match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) &&
+ Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
+ if (auto *Replacement = buildAssumeFromKnowledge(
+ {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
+
+ Replacement->insertBefore(Next);
+ AC.registerAssumption(Replacement);
+ return RemoveConditionFromAssume(II);
}
+ }
- Value *DerivedPtr = GCR.getDerivedPtr();
- Value *BasePtr = GCR.getBasePtr();
-
- // Undef is undef, even after relocation.
- if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
- replaceInstUsesWith(GCR, UndefValue::get(GCR.getType()));
- eraseInstFromFunction(GCR);
- continue;
+ // Convert alignment assume like:
+ // %B = ptrtoint i32* %A to i64
+ // %C = and i64 %B, Constant
+ // %D = icmp eq i64 %C, 0
+ // call void @llvm.assume(i1 %D)
+ // into
+ // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
+ uint64_t AlignMask;
+ if (EnableKnowledgeRetention &&
+ match(IIOperand,
+ m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)),
+ m_Zero())) &&
+ Pred == CmpInst::ICMP_EQ) {
+ if (isPowerOf2_64(AlignMask + 1)) {
+ uint64_t Offset = 0;
+ match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
+ if (match(A, m_PtrToInt(m_Value(A)))) {
+ /// Note: this doesn't preserve the offset information but merges
+ /// offset and alignment.
+ /// TODO: we can generate a GEP instead of merging the alignment with
+ /// the offset.
+ RetainedKnowledge RK{Attribute::Alignment,
+ (unsigned)MinAlign(Offset, AlignMask + 1), A};
+ if (auto *Replacement =
+ buildAssumeFromKnowledge(RK, Next, &AC, &DT)) {
+
+ Replacement->insertAfter(II);
+ AC.registerAssumption(Replacement);
+ }
+ return RemoveConditionFromAssume(II);
+ }
}
+ }
- if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
- // The relocation of null will be null for most any collector.
- // TODO: provide a hook for this in GCStrategy. There might be some
- // weird collector this property does not hold for.
- if (isa<ConstantPointerNull>(DerivedPtr)) {
- // Use null-pointer of gc_relocate's type to replace it.
- replaceInstUsesWith(GCR, ConstantPointerNull::get(PT));
- eraseInstFromFunction(GCR);
+ /// Canonicalize Knowledge in operand bundles.
+ if (EnableKnowledgeRetention && II->hasOperandBundles()) {
+ for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
+ auto &BOI = II->bundle_op_info_begin()[Idx];
+ RetainedKnowledge RK =
+ llvm::getKnowledgeFromBundle(cast<AssumeInst>(*II), BOI);
+ if (BOI.End - BOI.Begin > 2)
+ continue; // Prevent reducing knowledge in an align with offset since
+ // extracting a RetainedKnowledge form them looses offset
+ // information
+ RetainedKnowledge CanonRK =
+ llvm::simplifyRetainedKnowledge(cast<AssumeInst>(II), RK,
+ &getAssumptionCache(),
+ &getDominatorTree());
+ if (CanonRK == RK)
+ continue;
+ if (!CanonRK) {
+ if (BOI.End - BOI.Begin > 0) {
+ Worklist.pushValue(II->op_begin()[BOI.Begin]);
+ Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
+ }
continue;
}
-
- // isKnownNonNull -> nonnull attribute
- if (!GCR.hasRetAttr(Attribute::NonNull) &&
- isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT)) {
- GCR.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- // We discovered new fact, re-check users.
- Worklist.pushUsersToWorkList(GCR);
- }
- }
-
- // If we have two copies of the same pointer in the statepoint argument
- // list, canonicalize to one. This may let us common gc.relocates.
- if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
- GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
- auto *OpIntTy = GCR.getOperand(2)->getType();
- GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
+ assert(RK.AttrKind == CanonRK.AttrKind);
+ if (BOI.End - BOI.Begin > 0)
+ II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
+ if (BOI.End - BOI.Begin > 1)
+ II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
+ Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
+ if (RK.WasOn)
+ Worklist.pushValue(RK.WasOn);
+ return II;
}
+ }
- // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
- // Canonicalize on the type from the uses to the defs
+ // If there is a dominating assume with the same condition as this one,
+ // then this one is redundant, and should be removed.
+ KnownBits Known(1);
+ computeKnownBits(IIOperand, Known, 0, II);
+ if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast<AssumeInst>(*II)))
+ return eraseInstFromFunction(*II);
- // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
- LiveGcValues.insert(BasePtr);
- LiveGcValues.insert(DerivedPtr);
- }
- Optional<OperandBundleUse> Bundle =
- GCSP.getOperandBundle(LLVMContext::OB_gc_live);
- unsigned NumOfGCLives = LiveGcValues.size();
- if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size())
- break;
- // We can reduce the size of gc live bundle.
- DenseMap<Value *, unsigned> Val2Idx;
- std::vector<Value *> NewLiveGc;
- for (unsigned I = 0, E = Bundle->Inputs.size(); I < E; ++I) {
- Value *V = Bundle->Inputs[I];
- if (Val2Idx.count(V))
- continue;
- if (LiveGcValues.count(V)) {
- Val2Idx[V] = NewLiveGc.size();
- NewLiveGc.push_back(V);
- } else
- Val2Idx[V] = NumOfGCLives;
- }
- // Update all gc.relocates
- for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
- GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
- Value *BasePtr = GCR.getBasePtr();
- assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
- "Missed live gc for base pointer");
- auto *OpIntTy1 = GCR.getOperand(1)->getType();
- GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
- Value *DerivedPtr = GCR.getDerivedPtr();
- assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
- "Missed live gc for derived pointer");
- auto *OpIntTy2 = GCR.getOperand(2)->getType();
- GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
- }
- // Create new statepoint instruction.
- OperandBundleDef NewBundle("gc-live", NewLiveGc);
- if (isa<CallInst>(II))
- return CallInst::CreateWithReplacedBundle(cast<CallInst>(II), NewBundle);
- else
- return InvokeInst::CreateWithReplacedBundle(cast<InvokeInst>(II),
- NewBundle);
+ // Update the cache of affected values for this assumption (we might be
+ // here because we just simplified the condition).
+ AC.updateAffectedValues(cast<AssumeInst>(II));
break;
}
case Intrinsic::experimental_guard: {
@@ -1699,18 +1904,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
unsigned SubVecNumElts = SubVecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
- // The result of this call is undefined if IdxN is not a constant multiple
- // of the SubVec's minimum vector length OR the insertion overruns Vec.
- if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) {
- replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
- return eraseInstFromFunction(CI);
- }
-
// An insert that entirely overwrites Vec with SubVec is a nop.
- if (VecNumElts == SubVecNumElts) {
- replaceInstUsesWith(CI, SubVec);
- return eraseInstFromFunction(CI);
- }
+ if (VecNumElts == SubVecNumElts)
+ return replaceInstUsesWith(CI, SubVec);
// Widen SubVec into a vector of the same width as Vec, since
// shufflevector requires the two input vectors to be the same width.
@@ -1734,8 +1930,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Mask.push_back(i);
Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
- replaceInstUsesWith(CI, Shuffle);
- return eraseInstFromFunction(CI);
+ return replaceInstUsesWith(CI, Shuffle);
}
break;
}
@@ -1753,14 +1948,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
unsigned VecNumElts = VecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
- // The result of this call is undefined if IdxN is not a constant multiple
- // of the result type's minimum vector length OR the extraction overruns
- // Vec.
- if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) {
- replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
- return eraseInstFromFunction(CI);
- }
-
// Extracting the entirety of Vec is a nop.
if (VecNumElts == DstNumElts) {
replaceInstUsesWith(CI, Vec);
@@ -1771,10 +1958,101 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
for (unsigned i = 0; i != DstNumElts; ++i)
Mask.push_back(IdxN + i);
- Value *Shuffle =
- Builder.CreateShuffleVector(Vec, UndefValue::get(VecTy), Mask);
- replaceInstUsesWith(CI, Shuffle);
- return eraseInstFromFunction(CI);
+ Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
+ return replaceInstUsesWith(CI, Shuffle);
+ }
+ break;
+ }
+ case Intrinsic::vector_reduce_or:
+ case Intrinsic::vector_reduce_and: {
+ // Canonicalize logical or/and reductions:
+ // Or reduction for i1 is represented as:
+ // %val = bitcast <ReduxWidth x i1> to iReduxWidth
+ // %res = cmp ne iReduxWidth %val, 0
+ // And reduction for i1 is represented as:
+ // %val = bitcast <ReduxWidth x i1> to iReduxWidth
+ // %res = cmp eq iReduxWidth %val, 11111
+ Value *Arg = II->getArgOperand(0);
+ Type *RetTy = II->getType();
+ if (RetTy == Builder.getInt1Ty())
+ if (auto *FVTy = dyn_cast<FixedVectorType>(Arg->getType())) {
+ Value *Res = Builder.CreateBitCast(
+ Arg, Builder.getIntNTy(FVTy->getNumElements()));
+ if (IID == Intrinsic::vector_reduce_and) {
+ Res = Builder.CreateICmpEQ(
+ Res, ConstantInt::getAllOnesValue(Res->getType()));
+ } else {
+ assert(IID == Intrinsic::vector_reduce_or &&
+ "Expected or reduction.");
+ Res = Builder.CreateIsNotNull(Res);
+ }
+ return replaceInstUsesWith(CI, Res);
+ }
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::vector_reduce_add: {
+ if (IID == Intrinsic::vector_reduce_add) {
+ // Convert vector_reduce_add(ZExt(<n x i1>)) to
+ // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
+ // Convert vector_reduce_add(SExt(<n x i1>)) to
+ // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
+ // Convert vector_reduce_add(<n x i1>) to
+ // Trunc(ctpop(bitcast <n x i1> to in)).
+ Value *Arg = II->getArgOperand(0);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Value *V = Builder.CreateBitCast(
+ Vect, Builder.getIntNTy(FTy->getNumElements()));
+ Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
+ if (Res->getType() != II->getType())
+ Res = Builder.CreateZExtOrTrunc(Res, II->getType());
+ if (Arg != Vect &&
+ cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
+ Res = Builder.CreateNeg(Res);
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
+ bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
+ IID != Intrinsic::vector_reduce_fmul) ||
+ II->hasAllowReassoc();
+ const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
+ IID == Intrinsic::vector_reduce_fmul)
+ ? 1
+ : 0;
+ Value *Arg = II->getArgOperand(ArgIdx);
+ Value *V;
+ ArrayRef<int> Mask;
+ if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
+ !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
+ !cast<ShuffleVectorInst>(Arg)->isSingleSource())
+ break;
+ int Sz = Mask.size();
+ SmallBitVector UsedIndices(Sz);
+ for (int Idx : Mask) {
+ if (Idx == UndefMaskElem || UsedIndices.test(Idx))
+ break;
+ UsedIndices.set(Idx);
+ }
+ // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
+ // other changes.
+ if (UsedIndices.all()) {
+ replaceUse(II->getOperandUse(ArgIdx), V);
+ return nullptr;
}
break;
}
@@ -1786,6 +2064,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
break;
}
}
+ // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
+ // context, so it is handled in visitCallBase and we should trigger it.
return visitCallBase(*II);
}
@@ -1827,20 +2107,27 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call,
isa<GCResultInst>(Call))
return false;
+ // Opaque pointers are compatible with any byval types.
+ PointerType *SrcTy = cast<PointerType>(CI->getOperand(0)->getType());
+ if (SrcTy->isOpaque())
+ return true;
+
// The size of ByVal or InAlloca arguments is derived from the type, so we
// can't change to a type with a different size. If the size were
// passed explicitly we could avoid this check.
if (!Call.isPassPointeeByValueArgument(ix))
return true;
- Type* SrcTy =
- cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
- Type *DstTy = Call.isByValArgument(ix)
- ? Call.getParamByValType(ix)
- : cast<PointerType>(CI->getType())->getElementType();
- if (!SrcTy->isSized() || !DstTy->isSized())
+ // The transform currently only handles type replacement for byval, not other
+ // type-carrying attributes.
+ if (!Call.isByValArgument(ix))
+ return false;
+
+ Type *SrcElemTy = SrcTy->getElementType();
+ Type *DstElemTy = Call.getParamByValType(ix);
+ if (!SrcElemTy->isSized() || !DstElemTy->isSized())
return false;
- if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
+ if (DL.getTypeAllocSize(SrcElemTy) != DL.getTypeAllocSize(DstElemTy))
return false;
return true;
}
@@ -1940,7 +2227,7 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
return nullptr;
}
-static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
+void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
unsigned NumArgs = Call.getNumArgOperands();
ConstantInt *Op0C = dyn_cast<ConstantInt>(Call.getOperand(0));
ConstantInt *Op1C =
@@ -1959,17 +2246,21 @@ static void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
Call.addAttribute(AttributeList::ReturnIndex,
Attribute::getWithDereferenceableOrNullBytes(
Call.getContext(), Op0C->getZExtValue()));
- } else if (isAlignedAllocLikeFn(&Call, TLI) && Op1C) {
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Op1C->getZExtValue()));
+ } else if (isAlignedAllocLikeFn(&Call, TLI)) {
+ if (Op1C)
+ Call.addAttribute(AttributeList::ReturnIndex,
+ Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Op1C->getZExtValue()));
// Add alignment attribute if alignment is a power of two constant.
- if (Op0C && Op0C->getValue().ult(llvm::Value::MaximumAlignment)) {
+ if (Op0C && Op0C->getValue().ult(llvm::Value::MaximumAlignment) &&
+ isKnownNonZero(Call.getOperand(1), DL, 0, &AC, &Call, &DT)) {
uint64_t AlignmentVal = Op0C->getZExtValue();
- if (llvm::isPowerOf2_64(AlignmentVal))
+ if (llvm::isPowerOf2_64(AlignmentVal)) {
+ Call.removeAttribute(AttributeList::ReturnIndex, Attribute::Alignment);
Call.addAttribute(AttributeList::ReturnIndex,
Attribute::getWithAlignment(Call.getContext(),
Align(AlignmentVal)));
+ }
}
} else if (isReallocLikeFn(&Call, TLI) && Op1C) {
Call.addAttribute(AttributeList::ReturnIndex,
@@ -2049,19 +2340,24 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
return &Call;
}
- // If the call and callee calling conventions don't match, this call must
- // be unreachable, as the call is undefined.
- if (CalleeF->getCallingConv() != Call.getCallingConv() &&
+ // If the call and callee calling conventions don't match, and neither one
+ // of the calling conventions is compatible with C calling convention
+ // this call must be unreachable, as the call is undefined.
+ if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
+ !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
+ TargetLibraryInfoImpl::isCallingConvCCompatible(&Call)) &&
+ !(Call.getCallingConv() == llvm::CallingConv::C &&
+ TargetLibraryInfoImpl::isCallingConvCCompatible(CalleeF))) &&
// Only do this for calls to a function with a body. A prototype may
// not actually end up matching the implementation's calling conv for a
// variety of reasons (e.g. it may be written in assembly).
!CalleeF->isDeclaration()) {
Instruction *OldCall = &Call;
CreateNonTerminatorUnreachable(OldCall);
- // If OldCall does not return void then replaceInstUsesWith undef.
+ // If OldCall does not return void then replaceInstUsesWith poison.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!OldCall->getType()->isVoidTy())
- replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
+ replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
if (isa<CallInst>(OldCall))
return eraseInstFromFunction(*OldCall);
@@ -2074,13 +2370,15 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
}
}
+ // Calling a null function pointer is undefined if a null address isn't
+ // dereferenceable.
if ((isa<ConstantPointerNull>(Callee) &&
!NullPointerIsDefined(Call.getFunction())) ||
isa<UndefValue>(Callee)) {
- // If Call does not return void then replaceInstUsesWith undef.
+ // If Call does not return void then replaceInstUsesWith poison.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!Call.getType()->isVoidTy())
- replaceInstUsesWith(Call, UndefValue::get(Call.getType()));
+ replaceInstUsesWith(Call, PoisonValue::get(Call.getType()));
if (Call.isTerminator()) {
// Can't remove an invoke or callbr because we cannot change the CFG.
@@ -2095,8 +2393,8 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
if (IntrinsicInst *II = findInitTrampoline(Callee))
return transformCallThroughTrampoline(Call, *II);
- PointerType *PTy = cast<PointerType>(Callee->getType());
- FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+ // TODO: Drop this transform once opaque pointer transition is done.
+ FunctionType *FTy = Call.getFunctionType();
if (FTy->isVarArg()) {
int ix = FTy->getNumParams();
// See if we can optimize any arguments passed through the varargs area of
@@ -2107,13 +2405,14 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
replaceUse(*I, CI->getOperand(0));
- // Update the byval type to match the argument type.
- if (Call.isByValArgument(ix)) {
+ // Update the byval type to match the pointer type.
+ // Not necessary for opaque pointers.
+ PointerType *NewTy = cast<PointerType>(CI->getOperand(0)->getType());
+ if (!NewTy->isOpaque() && Call.isByValArgument(ix)) {
Call.removeParamAttr(ix, Attribute::ByVal);
Call.addParamAttr(
ix, Attribute::getWithByValType(
- Call.getContext(),
- CI->getOperand(0)->getType()->getPointerElementType()));
+ Call.getContext(), NewTy->getElementType()));
}
Changed = true;
}
@@ -2121,9 +2420,13 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
}
if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
- // Inline asm calls cannot throw - mark them 'nounwind'.
- Call.setDoesNotThrow();
- Changed = true;
+ InlineAsm *IA = cast<InlineAsm>(Callee);
+ if (!IA->canThrow()) {
+ // Normal inline asm calls cannot throw - mark them
+ // 'nounwind'.
+ Call.setDoesNotThrow();
+ Changed = true;
+ }
}
// Try to optimize the call if possible, we require DataLayout for most of
@@ -2148,6 +2451,104 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
if (isAllocLikeFn(&Call, &TLI))
return visitAllocSite(Call);
+ // Handle intrinsics which can be used in both call and invoke context.
+ switch (Call.getIntrinsicID()) {
+ case Intrinsic::experimental_gc_statepoint: {
+ GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
+ SmallPtrSet<Value *, 32> LiveGcValues;
+ for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
+ GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
+
+ // Remove the relocation if unused.
+ if (GCR.use_empty()) {
+ eraseInstFromFunction(GCR);
+ continue;
+ }
+
+ Value *DerivedPtr = GCR.getDerivedPtr();
+ Value *BasePtr = GCR.getBasePtr();
+
+ // Undef is undef, even after relocation.
+ if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
+ replaceInstUsesWith(GCR, UndefValue::get(GCR.getType()));
+ eraseInstFromFunction(GCR);
+ continue;
+ }
+
+ if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
+ // The relocation of null will be null for most any collector.
+ // TODO: provide a hook for this in GCStrategy. There might be some
+ // weird collector this property does not hold for.
+ if (isa<ConstantPointerNull>(DerivedPtr)) {
+ // Use null-pointer of gc_relocate's type to replace it.
+ replaceInstUsesWith(GCR, ConstantPointerNull::get(PT));
+ eraseInstFromFunction(GCR);
+ continue;
+ }
+
+ // isKnownNonNull -> nonnull attribute
+ if (!GCR.hasRetAttr(Attribute::NonNull) &&
+ isKnownNonZero(DerivedPtr, DL, 0, &AC, &Call, &DT)) {
+ GCR.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ // We discovered new fact, re-check users.
+ Worklist.pushUsersToWorkList(GCR);
+ }
+ }
+
+ // If we have two copies of the same pointer in the statepoint argument
+ // list, canonicalize to one. This may let us common gc.relocates.
+ if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
+ GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
+ auto *OpIntTy = GCR.getOperand(2)->getType();
+ GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
+ }
+
+ // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
+ // Canonicalize on the type from the uses to the defs
+
+ // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
+ LiveGcValues.insert(BasePtr);
+ LiveGcValues.insert(DerivedPtr);
+ }
+ Optional<OperandBundleUse> Bundle =
+ GCSP.getOperandBundle(LLVMContext::OB_gc_live);
+ unsigned NumOfGCLives = LiveGcValues.size();
+ if (!Bundle.hasValue() || NumOfGCLives == Bundle->Inputs.size())
+ break;
+ // We can reduce the size of gc live bundle.
+ DenseMap<Value *, unsigned> Val2Idx;
+ std::vector<Value *> NewLiveGc;
+ for (unsigned I = 0, E = Bundle->Inputs.size(); I < E; ++I) {
+ Value *V = Bundle->Inputs[I];
+ if (Val2Idx.count(V))
+ continue;
+ if (LiveGcValues.count(V)) {
+ Val2Idx[V] = NewLiveGc.size();
+ NewLiveGc.push_back(V);
+ } else
+ Val2Idx[V] = NumOfGCLives;
+ }
+ // Update all gc.relocates
+ for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
+ GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
+ Value *BasePtr = GCR.getBasePtr();
+ assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
+ "Missed live gc for base pointer");
+ auto *OpIntTy1 = GCR.getOperand(1)->getType();
+ GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
+ Value *DerivedPtr = GCR.getDerivedPtr();
+ assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
+ "Missed live gc for derived pointer");
+ auto *OpIntTy2 = GCR.getOperand(2)->getType();
+ GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
+ }
+ // Create new statepoint instruction.
+ OperandBundleDef NewBundle("gc-live", NewLiveGc);
+ return CallBase::Create(&Call, NewBundle);
+ }
+ default: { break; }
+ }
+
return Changed ? &Call : nullptr;
}