aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp430
1 files changed, 292 insertions, 138 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 809be499ee0f..f748f78524e0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -104,7 +104,7 @@ static Type *getPromotedType(Type *Ty) {
/// requires a deeper change to allow either unread or unwritten objects.
static bool hasUndefSource(AnyMemTransferInst *MI) {
auto *Src = MI->getRawSource();
- while (isa<GetElementPtrInst>(Src) || isa<BitCastInst>(Src)) {
+ while (isa<GetElementPtrInst>(Src)) {
if (!Src->hasOneUse())
return false;
Src = cast<Instruction>(Src)->getOperand(0);
@@ -171,7 +171,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
// If the memcpy has metadata describing the members, see if we can get the
- // TBAA tag describing our copy.
+ // TBAA, scope and noalias tags describing our copy.
AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
Value *Src = MI->getArgOperand(1);
@@ -260,13 +260,11 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
- Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
-
Value *Dest = MI->getDest();
// Extract the fill value and store.
- const uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
- Constant *FillVal = ConstantInt::get(ITy, Fill);
+ Constant *FillVal = ConstantInt::get(
+ MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) {
@@ -490,7 +488,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
// cttz(bitreverse(x)) -> ctlz(x)
if (match(Op0, m_BitReverse(m_Value(X)))) {
Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
- Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType());
+ Function *F =
+ Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
return CallInst::Create(F, {X, II.getArgOperand(1)});
}
@@ -506,8 +505,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
// If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
if (II.hasOneUse() && match(Op1, m_Zero()) &&
- match(II.user_back(), m_Shift(m_Value(), m_Specific(&II))))
+ match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
+ II.dropUBImplyingAttrsAndMetadata();
return IC.replaceOperand(II, 1, IC.Builder.getTrue());
+ }
Constant *C;
@@ -587,6 +588,19 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
}
}
+ // cttz(Pow2) -> Log2(Pow2)
+ // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
+ if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
+ if (IsTZ)
+ return IC.replaceInstUsesWith(II, R);
+ BinaryOperator *BO = BinaryOperator::CreateSub(
+ ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
+ R);
+ BO->setHasNoSignedWrap();
+ BO->setHasNoUnsignedWrap();
+ return BO;
+ }
+
KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
@@ -648,9 +662,8 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
// ctpop(x | -x) -> bitwidth - cttz(x, false)
if (Op0->hasOneUse() &&
match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
- Function *F =
- Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
- auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()});
+ auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
+ {X, IC.Builder.getFalse()});
auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
}
@@ -659,7 +672,7 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
if (match(Op0,
m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) {
Function *F =
- Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
+ Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
return CallInst::Create(F, {X, IC.Builder.getFalse()});
}
@@ -692,12 +705,24 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
Ty);
// Add range attribute since known bits can't completely reflect what we know.
- if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
- !II.getMetadata(LLVMContext::MD_range)) {
- ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()),
- APInt(BitWidth, Known.countMaxPopulation() + 1));
- II.addRangeRetAttr(Range);
- return &II;
+ if (BitWidth != 1) {
+ ConstantRange OldRange =
+ II.getRange().value_or(ConstantRange::getFull(BitWidth));
+
+ unsigned Lower = Known.countMinPopulation();
+ unsigned Upper = Known.countMaxPopulation() + 1;
+
+ if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
+ isKnownNonZero(Op0, IC.getSimplifyQuery().getWithInstruction(&II)))
+ Lower = 1;
+
+ ConstantRange Range(APInt(BitWidth, Lower), APInt(BitWidth, Upper));
+ Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
+
+ if (Range != OldRange) {
+ II.addRangeRetAttr(Range);
+ return &II;
+ }
}
return nullptr;
@@ -827,6 +852,35 @@ InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
WO->getRHS(), *WO, OperationResult, OverflowResult))
return createOverflowTuple(WO, OperationResult, OverflowResult);
+
+ // See whether we can optimize the overflow check with assumption information.
+ for (User *U : WO->users()) {
+ if (!match(U, m_ExtractValue<1>(m_Value())))
+ continue;
+
+ for (auto &AssumeVH : AC.assumptionsFor(U)) {
+ if (!AssumeVH)
+ continue;
+ CallInst *I = cast<CallInst>(AssumeVH);
+ if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
+ continue;
+ if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
+ /*AllowEphemerals=*/true))
+ continue;
+ Value *Result =
+ Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
+ Result->takeName(WO);
+ if (auto *Inst = dyn_cast<Instruction>(Result)) {
+ if (WO->isSigned())
+ Inst->setHasNoSignedWrap();
+ else
+ Inst->setHasNoUnsignedWrap();
+ }
+ return createOverflowTuple(WO, Result,
+ ConstantInt::getFalse(U->getType()));
+ }
+ }
+
return nullptr;
}
@@ -1171,10 +1225,9 @@ Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
return nullptr;
// Finally create and return the sat intrinsic, truncated to the new type
- Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
- Value *Sat = Builder.CreateCall(F, {AT, BT});
+ Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
return CastInst::Create(Instruction::SExt, Sat, Ty);
}
@@ -1276,8 +1329,8 @@ reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
return nullptr;
// max (max X, C), Y --> max (max X, Y), C
- Function *MinMax =
- Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType());
+ Function *MinMax = Intrinsic::getOrInsertDeclaration(II->getModule(),
+ MinMaxID, II->getType());
Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
NewInner->takeName(Inner);
return CallInst::Create(MinMax, {NewInner, C});
@@ -1336,7 +1389,8 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
return nullptr;
Module *Mod = II->getModule();
- Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
+ Function *MinMax =
+ Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
}
@@ -1493,6 +1547,76 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
ConstantInt::getTrue(ZeroUndef->getType()));
}
+/// Return whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW,
+ bool HasNSW, Intrinsic::ID ROp) {
+ switch (ROp) {
+ case Intrinsic::umax:
+ case Intrinsic::umin:
+ return HasNUW && LOp == Instruction::Add;
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ return HasNSW && LOp == Instruction::Add;
+ default:
+ return false;
+ }
+}
+
+// Attempts to factorise a common term
+// in an instruction that has the form "(A op' B) op (C op' D)
+// where op is an intrinsic and op' is a binop
+static Value *
+foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
+ Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
+
+ OverflowingBinaryOperator *Op0 = dyn_cast<OverflowingBinaryOperator>(LHS);
+ OverflowingBinaryOperator *Op1 = dyn_cast<OverflowingBinaryOperator>(RHS);
+
+ if (!Op0 || !Op1)
+ return nullptr;
+
+ if (Op0->getOpcode() != Op1->getOpcode())
+ return nullptr;
+
+ if (!Op0->hasOneUse() || !Op1->hasOneUse())
+ return nullptr;
+
+ Instruction::BinaryOps InnerOpcode =
+ static_cast<Instruction::BinaryOps>(Op0->getOpcode());
+ bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
+ bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
+
+ if (!leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode))
+ return nullptr;
+
+ assert(II->isCommutative() && Op0->isCommutative() &&
+ "Only inner and outer commutative op codes are supported.");
+
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0);
+ Value *D = Op1->getOperand(1);
+
+ // Attempts to swap variables such that A always equals C
+ if (A != C && A != D)
+ std::swap(A, B);
+ if (A == C || A == D) {
+ if (A != C)
+ std::swap(C, D);
+ Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
+ BinaryOperator *NewBinop =
+ cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, A));
+ NewBinop->setHasNoSignedWrap(HasNSW);
+ NewBinop->setHasNoUnsignedWrap(HasNUW);
+ return NewBinop;
+ }
+
+ return nullptr;
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -1500,10 +1624,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Don't try to simplify calls without uses. It will not do anything useful,
// but will result in the following folds being skipped.
if (!CI.use_empty()) {
- SmallVector<Value *, 4> Args;
- Args.reserve(CI.arg_size());
- for (Value *Op : CI.args())
- Args.push_back(Op);
+ SmallVector<Value *, 8> Args(CI.args());
if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
SQ.getWithInstruction(&CI)))
return replaceInstUsesWith(CI, V);
@@ -1564,7 +1685,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Type *Tys[3] = { CI.getArgOperand(0)->getType(),
CI.getArgOperand(1)->getType(),
CI.getArgOperand(2)->getType() };
- CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
+ CI.setCalledFunction(
+ Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
Changed = true;
}
}
@@ -1575,16 +1697,30 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return eraseInstFromFunction(CI);
}
+ auto IsPointerUndefined = [MI](Value *Ptr) {
+ return isa<ConstantPointerNull>(Ptr) &&
+ !NullPointerIsDefined(
+ MI->getFunction(),
+ cast<PointerType>(Ptr->getType())->getAddressSpace());
+ };
+ bool SrcIsUndefined = false;
// If we can determine a pointer alignment that is bigger than currently
// set, update the alignment.
if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
if (Instruction *I = SimplifyAnyMemTransfer(MTI))
return I;
+ SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
} else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
if (Instruction *I = SimplifyAnyMemSet(MSI))
return I;
}
+ // If src/dest is null, this memory intrinsic must be a noop.
+ if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
+ Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
+ return eraseInstFromFunction(CI);
+ }
+
if (Changed) return II;
}
@@ -1642,9 +1778,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *X;
if (match(IIOperand, m_Neg(m_Value(X))))
return replaceOperand(*II, 0, X);
- if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X)))))
- return replaceOperand(*II, 0, X);
- if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))
+ if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
return replaceOperand(*II, 0, X);
Value *Y;
@@ -1726,6 +1860,33 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
}
}
+ // If C is not 0:
+ // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
+ // If C is not 0 or 1:
+ // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
+ auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
+ const APInt *C;
+ Value *X;
+ if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
+ !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
+ return nullptr;
+ if (C->isZero())
+ return nullptr;
+ if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
+ return nullptr;
+
+ Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
+ Value *NewSelect =
+ Builder.CreateSelect(Cmp, ConstantInt::get(X->getType(), 1), A);
+ return replaceInstUsesWith(*II, NewSelect);
+ };
+
+ if (IID == Intrinsic::umax) {
+ if (Instruction *I = foldMaxMulShift(I0, I1))
+ return I;
+ if (Instruction *I = foldMaxMulShift(I1, I0))
+ return I;
+ }
// If both operands of unsigned min/max are sign-extended, it is still ok
// to narrow the operation.
[[fallthrough]];
@@ -1906,6 +2067,18 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
}
+ if (Value *V = foldIntrinsicUsingDistributiveLaws(II, Builder))
+ return replaceInstUsesWith(*II, V);
+
+ break;
+ }
+ case Intrinsic::scmp: {
+ Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
+ Value *LHS, *RHS;
+ if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
+ return replaceInstUsesWith(
+ CI,
+ Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
break;
}
case Intrinsic::bitreverse: {
@@ -2065,7 +2238,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
Module *Mod = II->getModule();
- Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
+ Function *Fshl =
+ Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
}
assert(IID == Intrinsic::fshl &&
@@ -2085,7 +2259,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
Module *Mod = II->getModule();
- Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
+ Function *Bswap =
+ Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
return CallInst::Create(Bswap, { Op0 });
}
if (Instruction *BitOp =
@@ -2094,6 +2269,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return BitOp;
}
+ // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
+ // power-of-2
+ if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
+ match(Op1, m_ZeroInt())) {
+ Value *Op2 = II->getArgOperand(2);
+ Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
+ return BinaryOperator::CreateShl(Op0, And);
+ }
+
// Left or right might be masked.
if (SimplifyDemandedInstructionBits(*II))
return &CI;
@@ -2365,13 +2549,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
default:
llvm_unreachable("unexpected intrinsic ID");
}
- Value *V = Builder.CreateBinaryIntrinsic(
- IID, X, ConstantFP::get(Arg0->getType(), Res), II);
// TODO: Conservatively intersecting FMF. If Res == C2, the transform
// was a simplification (so Arg0 and its original flags could
// propagate?)
- if (auto *CI = dyn_cast<CallInst>(V))
- CI->andIRFlags(M);
+ Value *V = Builder.CreateBinaryIntrinsic(
+ IID, X, ConstantFP::get(Arg0->getType(), Res),
+ FMFSource::intersect(II, M));
return replaceInstUsesWith(*II, V);
}
}
@@ -2466,13 +2649,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
case Intrinsic::fmuladd: {
// Try to simplify the underlying FMul.
- if (Value *V = simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
- II->getFastMathFlags(),
- SQ.getWithInstruction(II))) {
- auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
- FAdd->copyFastMathFlags(II);
- return FAdd;
- }
+ if (Value *V =
+ simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
+ II->getFastMathFlags(), SQ.getWithInstruction(II)))
+ return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
+ II->getFastMathFlags());
[[fallthrough]];
}
@@ -2480,6 +2661,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// fma fneg(x), fneg(y), z -> fma x, y, z
Value *Src0 = II->getArgOperand(0);
Value *Src1 = II->getArgOperand(1);
+ Value *Src2 = II->getArgOperand(2);
Value *X, *Y;
if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
replaceOperand(*II, 0, X);
@@ -2497,22 +2679,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Try to simplify the underlying FMul. We can only apply simplifications
// that do not require rounding.
- if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
- II->getFastMathFlags(),
- SQ.getWithInstruction(II))) {
- auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
- FAdd->copyFastMathFlags(II);
- return FAdd;
- }
+ if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
+ SQ.getWithInstruction(II)))
+ return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
// fma x, y, 0 -> fmul x, y
// This is always valid for -0.0, but requires nsz for +0.0 as
// -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
- if (match(II->getArgOperand(2), m_NegZeroFP()) ||
- (match(II->getArgOperand(2), m_PosZeroFP()) &&
- II->getFastMathFlags().noSignedZeros()))
+ if (match(Src2, m_NegZeroFP()) ||
+ (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
+ // fma x, -1.0, y -> fsub y, x
+ if (match(Src1, m_SpecificFP(-1.0)))
+ return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
+
break;
}
case Intrinsic::copysign: {
@@ -2535,8 +2716,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Propagate sign argument through nested calls:
// copysign Mag, (copysign ?, X) --> copysign Mag, X
Value *X;
- if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X))))
- return replaceOperand(*II, 1, X);
+ if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X)))) {
+ Value *CopySign =
+ Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
+ return replaceInstUsesWith(*II, CopySign);
+ }
// Clear sign-bit of constant magnitude:
// copysign -MagC, X --> copysign MagC, X
@@ -2591,8 +2775,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
// fabs (copysign x, y) -> (fabs x)
CallInst *AbsSign =
- Builder.CreateCall(II->getCalledFunction(), {Magnitude});
- AbsSign->copyFastMathFlags(II);
+ Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Magnitude, II);
return replaceInstUsesWith(*II, AbsSign);
}
@@ -2699,16 +2882,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *NewLdexp = nullptr;
Value *Select = nullptr;
if (match(SelectRHS, m_ZeroInt())) {
- NewLdexp = Builder.CreateLdexp(Src, SelectLHS);
+ NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
} else if (match(SelectLHS, m_ZeroInt())) {
- NewLdexp = Builder.CreateLdexp(Src, SelectRHS);
+ NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
}
if (NewLdexp) {
Select->takeName(II);
- cast<Instruction>(NewLdexp)->copyFastMathFlags(II);
return replaceInstUsesWith(*II, Select);
}
}
@@ -2791,7 +2973,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
CallArgs.push_back(II->getArgOperand(4));
}
- Function *NewFn = Intrinsic::getDeclaration(II->getModule(), NewIntrin);
+ Function *NewFn =
+ Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
return CallInst::Create(NewFn, CallArgs);
}
case Intrinsic::arm_neon_vtbl1:
@@ -3028,13 +3211,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// assume( (load addr) != null ) -> add 'nonnull' metadata to load
// (if assume is valid at the load)
- CmpInst::Predicate Pred;
Instruction *LHS;
- if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
- Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
+ if (match(IIOperand, m_SpecificICmp(ICmpInst::ICMP_NE, m_Instruction(LHS),
+ m_Zero())) &&
+ LHS->getOpcode() == Instruction::Load &&
LHS->getType()->isPointerTy() &&
isValidAssumeForContext(II, LHS, &DT)) {
- MDNode *MD = MDNode::get(II->getContext(), std::nullopt);
+ MDNode *MD = MDNode::get(II->getContext(), {});
LHS->setMetadata(LLVMContext::MD_nonnull, MD);
LHS->setMetadata(LLVMContext::MD_noundef, MD);
return RemoveConditionFromAssume(II);
@@ -3070,12 +3253,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// into
// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
if (EnableKnowledgeRetention &&
- match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) &&
- Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
+ match(IIOperand,
+ m_SpecificICmp(ICmpInst::ICMP_NE, m_Value(A), m_Zero())) &&
+ A->getType()->isPointerTy()) {
if (auto *Replacement = buildAssumeFromKnowledge(
{RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
- Replacement->insertBefore(Next);
+ Replacement->insertBefore(Next->getIterator());
AC.registerAssumption(Replacement);
return RemoveConditionFromAssume(II);
}
@@ -3088,12 +3272,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// call void @llvm.assume(i1 %D)
// into
// call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
- uint64_t AlignMask;
+ uint64_t AlignMask = 1;
if (EnableKnowledgeRetention &&
- match(IIOperand,
- m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)),
- m_Zero())) &&
- Pred == CmpInst::ICMP_EQ) {
+ (match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
+ match(IIOperand,
+ m_SpecificICmp(ICmpInst::ICMP_EQ,
+ m_And(m_Value(A), m_ConstantInt(AlignMask)),
+ m_Zero())))) {
if (isPowerOf2_64(AlignMask + 1)) {
uint64_t Offset = 0;
match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
@@ -3107,7 +3292,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (auto *Replacement =
buildAssumeFromKnowledge(RK, Next, &AC, &DT)) {
- Replacement->insertAfter(II);
+ Replacement->insertAfter(II->getIterator());
AC.registerAssumption(Replacement);
}
return RemoveConditionFromAssume(II);
@@ -3191,7 +3376,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
while (MoveI != NextInst) {
auto *Temp = MoveI;
MoveI = MoveI->getNextNonDebugInstruction();
- Temp->moveBefore(II);
+ Temp->moveBefore(II->getIterator());
}
replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
}
@@ -3595,26 +3780,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// * The intrinsic is speculatable.
// * The select condition is not a vector, or the intrinsic does not
// perform cross-lane operations.
- switch (IID) {
- case Intrinsic::ctlz:
- case Intrinsic::cttz:
- case Intrinsic::ctpop:
- case Intrinsic::umin:
- case Intrinsic::umax:
- case Intrinsic::smin:
- case Intrinsic::smax:
- case Intrinsic::usub_sat:
- case Intrinsic::uadd_sat:
- case Intrinsic::ssub_sat:
- case Intrinsic::sadd_sat:
+ if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) &&
+ isNotCrossLaneOperation(II))
for (Value *Op : II->args())
if (auto *Sel = dyn_cast<SelectInst>(Op))
if (Instruction *R = FoldOpIntoSelect(*II, Sel))
return R;
- [[fallthrough]];
- default:
- break;
- }
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
return Shuf;
@@ -3667,7 +3838,7 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
// Skip optimizing notail and musttail calls so
// LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
- // LibCallSimplifier::optimizeCall should try to preseve tail calls though.
+ // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
if (CI->isMustTailCall() || CI->isNoTailCall())
return nullptr;
@@ -3677,8 +3848,8 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
auto InstCombineErase = [this](Instruction *I) {
eraseInstFromFunction(*I);
};
- LibCallSimplifier Simplifier(DL, &TLI, &AC, ORE, BFI, PSI, InstCombineRAUW,
- InstCombineErase);
+ LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
+ InstCombineRAUW, InstCombineErase);
if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
++NumSimplified;
return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
@@ -4035,13 +4206,14 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
DenseMap<Value *, unsigned> Val2Idx;
std::vector<Value *> NewLiveGc;
for (Value *V : Bundle->Inputs) {
- if (Val2Idx.count(V))
+ auto [It, Inserted] = Val2Idx.try_emplace(V);
+ if (!Inserted)
continue;
if (LiveGcValues.count(V)) {
- Val2Idx[V] = NewLiveGc.size();
+ It->second = NewLiveGc.size();
NewLiveGc.push_back(V);
} else
- Val2Idx[V] = NumOfGCLives;
+ It->second = NumOfGCLives;
}
// Update all gc.relocates
for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
@@ -4079,6 +4251,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
assert(!isa<CallBrInst>(Call) &&
"CallBr's don't have a single point after a def to insert at");
+ // Don't perform the transform for declarations, which may not be fully
+ // accurate. For example, void @foo() is commonly used as a placeholder for
+ // unknown prototypes.
+ if (Callee->isDeclaration())
+ return false;
+
// If this is a call to a thunk function, don't remove the cast. Thunks are
// used to transparently forward all incoming parameters and outgoing return
// values, so it's important to leave the cast in place.
@@ -4115,18 +4293,14 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
return false; // TODO: Handle multiple return values.
if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
- if (Callee->isDeclaration())
- return false; // Cannot transform this return value.
-
- if (!Caller->use_empty() &&
- // void -> non-void is handled specially
- !NewRetTy->isVoidTy())
+ if (!Caller->use_empty())
return false; // Cannot transform this return value.
}
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
- if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
+ if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
+ NewRetTy, CallerPAL.getRetAttrs())))
return false; // Attribute not compatible with transformed value.
}
@@ -4172,7 +4346,8 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
// Check if there are any incompatible attributes we cannot drop safely.
if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
.overlaps(AttributeFuncs::typeIncompatible(
- ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP)))
+ ParamTy, CallerPAL.getParamAttrs(i),
+ AttributeFuncs::ASK_UNSAFE_TO_DROP)))
return false; // Attribute not compatible with transformed value.
if (Call.isInAllocaArgument(i) ||
@@ -4187,25 +4362,6 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
return false; // Cannot transform to or from byval.
}
- if (Callee->isDeclaration()) {
- // Do not delete arguments unless we have a function body.
- if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
- return false;
-
- // If the callee is just a declaration, don't change the varargsness of the
- // call. We don't want to introduce a varargs call where one doesn't
- // already exist.
- if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
- return false;
-
- // If both the callee and the cast type are varargs, we still have to make
- // sure the number of fixed parameters are the same or we have the same
- // ABI issues as if we introduce a varargs call.
- if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
- FT->getNumParams() != Call.getFunctionType()->getNumParams())
- return false;
- }
-
if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
!CallerPAL.isEmpty()) {
// In this case we have more arguments than the new function type, but we
@@ -4229,7 +4385,8 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
+ RAttrs.remove(
+ AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
LLVMContext &Ctx = Call.getContext();
AI = Call.arg_begin();
@@ -4244,7 +4401,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
// Add any parameter attributes except the ones incompatible with the new
// type. Note that we made sure all incompatible ones are safe to drop.
AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
- ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP);
+ ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
ArgAttrs.push_back(
CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
}
@@ -4311,17 +4468,14 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
Instruction *NC = NewCall;
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
- if (!NV->getType()->isVoidTy()) {
- NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
- NC->setDebugLoc(Caller->getDebugLoc());
-
- auto OptInsertPt = NewCall->getInsertionPointAfterDef();
- assert(OptInsertPt && "No place to insert cast");
- InsertNewInstBefore(NC, *OptInsertPt);
- Worklist.pushUsersToWorkList(*Caller);
- } else {
- NV = PoisonValue::get(Caller->getType());
- }
+ assert(!NV->getType()->isVoidTy());
+ NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
+ NC->setDebugLoc(Caller->getDebugLoc());
+
+ auto OptInsertPt = NewCall->getInsertionPointAfterDef();
+ assert(OptInsertPt && "No place to insert cast");
+ InsertNewInstBefore(NC, *OptInsertPt);
+ Worklist.pushUsersToWorkList(*Caller);
}
if (!Caller->use_empty())