diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 430 |
1 files changed, 292 insertions, 138 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 809be499ee0f..f748f78524e0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -104,7 +104,7 @@ static Type *getPromotedType(Type *Ty) { /// requires a deeper change to allow either unread or unwritten objects. static bool hasUndefSource(AnyMemTransferInst *MI) { auto *Src = MI->getRawSource(); - while (isa<GetElementPtrInst>(Src) || isa<BitCastInst>(Src)) { + while (isa<GetElementPtrInst>(Src)) { if (!Src->hasOneUse()) return false; Src = cast<Instruction>(Src)->getOperand(0); @@ -171,7 +171,7 @@ Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); // If the memcpy has metadata describing the members, see if we can get the - // TBAA tag describing our copy. + // TBAA, scope and noalias tags describing our copy. AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size); Value *Src = MI->getArgOperand(1); @@ -260,13 +260,11 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) { // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. - Value *Dest = MI->getDest(); // Extract the fill value and store. - const uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - Constant *FillVal = ConstantInt::get(ITy, Fill); + Constant *FillVal = ConstantInt::get( + MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue())); StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile()); S->copyMetadata(*MI, LLVMContext::MD_DIAssignID); auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) { @@ -490,7 +488,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { // cttz(bitreverse(x)) -> ctlz(x) if (match(Op0, m_BitReverse(m_Value(X)))) { Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz; - Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType()); + Function *F = + Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType()); return CallInst::Create(F, {X, II.getArgOperand(1)}); } @@ -506,8 +505,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true. if (II.hasOneUse() && match(Op1, m_Zero()) && - match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) + match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) { + II.dropUBImplyingAttrsAndMetadata(); return IC.replaceOperand(II, 1, IC.Builder.getTrue()); + } Constant *C; @@ -587,6 +588,19 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { } } + // cttz(Pow2) -> Log2(Pow2) + // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2) + if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) { + if (IsTZ) + return IC.replaceInstUsesWith(II, R); + BinaryOperator *BO = BinaryOperator::CreateSub( + ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1), + R); + BO->setHasNoSignedWrap(); + BO->setHasNoUnsignedWrap(); + return BO; + } + KnownBits Known = IC.computeKnownBits(Op0, 0, &II); // Create a mask for bits above (ctlz) or below (cttz) the first known one. @@ -648,9 +662,8 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { // ctpop(x | -x) -> bitwidth - cttz(x, false) if (Op0->hasOneUse() && match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) { - Function *F = - Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty); - auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()}); + auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty, + {X, IC.Builder.getFalse()}); auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth)); return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz)); } @@ -659,7 +672,7 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { if (match(Op0, m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { Function *F = - Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty); + Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty); return CallInst::Create(F, {X, IC.Builder.getFalse()}); } @@ -692,12 +705,24 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { Ty); // Add range attribute since known bits can't completely reflect what we know. - if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) && - !II.getMetadata(LLVMContext::MD_range)) { - ConstantRange Range(APInt(BitWidth, Known.countMinPopulation()), - APInt(BitWidth, Known.countMaxPopulation() + 1)); - II.addRangeRetAttr(Range); - return &II; + if (BitWidth != 1) { + ConstantRange OldRange = + II.getRange().value_or(ConstantRange::getFull(BitWidth)); + + unsigned Lower = Known.countMinPopulation(); + unsigned Upper = Known.countMaxPopulation() + 1; + + if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) && + isKnownNonZero(Op0, IC.getSimplifyQuery().getWithInstruction(&II))) + Lower = 1; + + ConstantRange Range(APInt(BitWidth, Lower), APInt(BitWidth, Upper)); + Range = Range.intersectWith(OldRange, ConstantRange::Unsigned); + + if (Range != OldRange) { + II.addRangeRetAttr(Range); + return &II; + } } return nullptr; @@ -827,6 +852,35 @@ InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(), WO->getRHS(), *WO, OperationResult, OverflowResult)) return createOverflowTuple(WO, OperationResult, OverflowResult); + + // See whether we can optimize the overflow check with assumption information. + for (User *U : WO->users()) { + if (!match(U, m_ExtractValue<1>(m_Value()))) + continue; + + for (auto &AssumeVH : AC.assumptionsFor(U)) { + if (!AssumeVH) + continue; + CallInst *I = cast<CallInst>(AssumeVH); + if (!match(I->getArgOperand(0), m_Not(m_Specific(U)))) + continue; + if (!isValidAssumeForContext(I, II, /*DT=*/nullptr, + /*AllowEphemerals=*/true)) + continue; + Value *Result = + Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS()); + Result->takeName(WO); + if (auto *Inst = dyn_cast<Instruction>(Result)) { + if (WO->isSigned()) + Inst->setHasNoSignedWrap(); + else + Inst->setHasNoUnsignedWrap(); + } + return createOverflowTuple(WO, Result, + ConstantInt::getFalse(U->getType())); + } + } + return nullptr; } @@ -1171,10 +1225,9 @@ Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) { return nullptr; // Finally create and return the sat intrinsic, truncated to the new type - Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy); Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy); - Value *Sat = Builder.CreateCall(F, {AT, BT}); + Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT}); return CastInst::Create(Instruction::SExt, Sat, Ty); } @@ -1276,8 +1329,8 @@ reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, return nullptr; // max (max X, C), Y --> max (max X, Y), C - Function *MinMax = - Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType()); + Function *MinMax = Intrinsic::getOrInsertDeclaration(II->getModule(), + MinMaxID, II->getType()); Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y); NewInner->takeName(Inner); return CallInst::Create(MinMax, {NewInner, C}); @@ -1336,7 +1389,8 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) { return nullptr; Module *Mod = II->getModule(); - Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType()); + Function *MinMax = + Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType()); return CallInst::Create(MinMax, { MinMaxOp, ThirdOp }); } @@ -1493,6 +1547,76 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, ConstantInt::getTrue(ZeroUndef->getType())); } +/// Return whether "X LOp (Y ROp Z)" is always equal to +/// "(X LOp Y) ROp (X LOp Z)". +static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, + bool HasNSW, Intrinsic::ID ROp) { + switch (ROp) { + case Intrinsic::umax: + case Intrinsic::umin: + return HasNUW && LOp == Instruction::Add; + case Intrinsic::smax: + case Intrinsic::smin: + return HasNSW && LOp == Instruction::Add; + default: + return false; + } +} + +// Attempts to factorise a common term +// in an instruction that has the form "(A op' B) op (C op' D) +// where op is an intrinsic and op' is a binop +static Value * +foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, + InstCombiner::BuilderTy &Builder) { + Value *LHS = II->getOperand(0), *RHS = II->getOperand(1); + Intrinsic::ID TopLevelOpcode = II->getIntrinsicID(); + + OverflowingBinaryOperator *Op0 = dyn_cast<OverflowingBinaryOperator>(LHS); + OverflowingBinaryOperator *Op1 = dyn_cast<OverflowingBinaryOperator>(RHS); + + if (!Op0 || !Op1) + return nullptr; + + if (Op0->getOpcode() != Op1->getOpcode()) + return nullptr; + + if (!Op0->hasOneUse() || !Op1->hasOneUse()) + return nullptr; + + Instruction::BinaryOps InnerOpcode = + static_cast<Instruction::BinaryOps>(Op0->getOpcode()); + bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap(); + bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap(); + + if (!leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) + return nullptr; + + assert(II->isCommutative() && Op0->isCommutative() && + "Only inner and outer commutative op codes are supported."); + + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = Op1->getOperand(0); + Value *D = Op1->getOperand(1); + + // Attempts to swap variables such that A always equals C + if (A != C && A != D) + std::swap(A, B); + if (A == C || A == D) { + if (A != C) + std::swap(C, D); + Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D); + BinaryOperator *NewBinop = + cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, A)); + NewBinop->setHasNoSignedWrap(HasNSW); + NewBinop->setHasNoUnsignedWrap(HasNUW); + return NewBinop; + } + + return nullptr; +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -1500,10 +1624,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Don't try to simplify calls without uses. It will not do anything useful, // but will result in the following folds being skipped. if (!CI.use_empty()) { - SmallVector<Value *, 4> Args; - Args.reserve(CI.arg_size()); - for (Value *Op : CI.args()) - Args.push_back(Op); + SmallVector<Value *, 8> Args(CI.args()); if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args, SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); @@ -1564,7 +1685,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Type *Tys[3] = { CI.getArgOperand(0)->getType(), CI.getArgOperand(1)->getType(), CI.getArgOperand(2)->getType() }; - CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); + CI.setCalledFunction( + Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys)); Changed = true; } } @@ -1575,16 +1697,30 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return eraseInstFromFunction(CI); } + auto IsPointerUndefined = [MI](Value *Ptr) { + return isa<ConstantPointerNull>(Ptr) && + !NullPointerIsDefined( + MI->getFunction(), + cast<PointerType>(Ptr->getType())->getAddressSpace()); + }; + bool SrcIsUndefined = false; // If we can determine a pointer alignment that is bigger than currently // set, update the alignment. if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) { if (Instruction *I = SimplifyAnyMemTransfer(MTI)) return I; + SrcIsUndefined = IsPointerUndefined(MTI->getRawSource()); } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) { if (Instruction *I = SimplifyAnyMemSet(MSI)) return I; } + // If src/dest is null, this memory intrinsic must be a noop. + if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) { + Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength())); + return eraseInstFromFunction(CI); + } + if (Changed) return II; } @@ -1642,9 +1778,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *X; if (match(IIOperand, m_Neg(m_Value(X)))) return replaceOperand(*II, 0, X); - if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X))))) - return replaceOperand(*II, 0, X); - if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) + if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X)))) return replaceOperand(*II, 0, X); Value *Y; @@ -1726,6 +1860,33 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType()); } } + // If C is not 0: + // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C) + // If C is not 0 or 1: + // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C) + auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * { + const APInt *C; + Value *X; + if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) && + !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne())) + return nullptr; + if (C->isZero()) + return nullptr; + if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One())))) + return nullptr; + + Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0)); + Value *NewSelect = + Builder.CreateSelect(Cmp, ConstantInt::get(X->getType(), 1), A); + return replaceInstUsesWith(*II, NewSelect); + }; + + if (IID == Intrinsic::umax) { + if (Instruction *I = foldMaxMulShift(I0, I1)) + return I; + if (Instruction *I = foldMaxMulShift(I1, I0)) + return I; + } // If both operands of unsigned min/max are sign-extended, it is still ok // to narrow the operation. [[fallthrough]]; @@ -1906,6 +2067,18 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } + if (Value *V = foldIntrinsicUsingDistributiveLaws(II, Builder)) + return replaceInstUsesWith(*II, V); + + break; + } + case Intrinsic::scmp: { + Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); + Value *LHS, *RHS; + if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero())) + return replaceInstUsesWith( + CI, + Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS})); break; } case Intrinsic::bitreverse: { @@ -2065,7 +2238,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC); Module *Mod = II->getModule(); - Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty); + Function *Fshl = + Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty); return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC }); } assert(IID == Intrinsic::fshl && @@ -2085,7 +2259,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form) if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) { Module *Mod = II->getModule(); - Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty); + Function *Bswap = + Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty); return CallInst::Create(Bswap, { Op0 }); } if (Instruction *BitOp = @@ -2094,6 +2269,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return BitOp; } + // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a + // power-of-2 + if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) && + match(Op1, m_ZeroInt())) { + Value *Op2 = II->getArgOperand(2); + Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1)); + return BinaryOperator::CreateShl(Op0, And); + } + // Left or right might be masked. if (SimplifyDemandedInstructionBits(*II)) return &CI; @@ -2365,13 +2549,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { default: llvm_unreachable("unexpected intrinsic ID"); } - Value *V = Builder.CreateBinaryIntrinsic( - IID, X, ConstantFP::get(Arg0->getType(), Res), II); // TODO: Conservatively intersecting FMF. If Res == C2, the transform // was a simplification (so Arg0 and its original flags could // propagate?) - if (auto *CI = dyn_cast<CallInst>(V)) - CI->andIRFlags(M); + Value *V = Builder.CreateBinaryIntrinsic( + IID, X, ConstantFP::get(Arg0->getType(), Res), + FMFSource::intersect(II, M)); return replaceInstUsesWith(*II, V); } } @@ -2466,13 +2649,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } case Intrinsic::fmuladd: { // Try to simplify the underlying FMul. - if (Value *V = simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), - II->getFastMathFlags(), - SQ.getWithInstruction(II))) { - auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); - FAdd->copyFastMathFlags(II); - return FAdd; - } + if (Value *V = + simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), + II->getFastMathFlags(), SQ.getWithInstruction(II))) + return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2), + II->getFastMathFlags()); [[fallthrough]]; } @@ -2480,6 +2661,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // fma fneg(x), fneg(y), z -> fma x, y, z Value *Src0 = II->getArgOperand(0); Value *Src1 = II->getArgOperand(1); + Value *Src2 = II->getArgOperand(2); Value *X, *Y; if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) { replaceOperand(*II, 0, X); @@ -2497,22 +2679,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Try to simplify the underlying FMul. We can only apply simplifications // that do not require rounding. - if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1), - II->getFastMathFlags(), - SQ.getWithInstruction(II))) { - auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); - FAdd->copyFastMathFlags(II); - return FAdd; - } + if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(), + SQ.getWithInstruction(II))) + return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags()); // fma x, y, 0 -> fmul x, y // This is always valid for -0.0, but requires nsz for +0.0 as // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own. - if (match(II->getArgOperand(2), m_NegZeroFP()) || - (match(II->getArgOperand(2), m_PosZeroFP()) && - II->getFastMathFlags().noSignedZeros())) + if (match(Src2, m_NegZeroFP()) || + (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros())) return BinaryOperator::CreateFMulFMF(Src0, Src1, II); + // fma x, -1.0, y -> fsub y, x + if (match(Src1, m_SpecificFP(-1.0))) + return BinaryOperator::CreateFSubFMF(Src2, Src0, II); + break; } case Intrinsic::copysign: { @@ -2535,8 +2716,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Propagate sign argument through nested calls: // copysign Mag, (copysign ?, X) --> copysign Mag, X Value *X; - if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X)))) - return replaceOperand(*II, 1, X); + if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X)))) { + Value *CopySign = + Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign)); + return replaceInstUsesWith(*II, CopySign); + } // Clear sign-bit of constant magnitude: // copysign -MagC, X --> copysign MagC, X @@ -2591,8 +2775,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { m_CopySign(m_Value(Magnitude), m_Value(Sign)))) { // fabs (copysign x, y) -> (fabs x) CallInst *AbsSign = - Builder.CreateCall(II->getCalledFunction(), {Magnitude}); - AbsSign->copyFastMathFlags(II); + Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Magnitude, II); return replaceInstUsesWith(*II, AbsSign); } @@ -2699,16 +2882,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Value *NewLdexp = nullptr; Value *Select = nullptr; if (match(SelectRHS, m_ZeroInt())) { - NewLdexp = Builder.CreateLdexp(Src, SelectLHS); + NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II); Select = Builder.CreateSelect(SelectCond, NewLdexp, Src); } else if (match(SelectLHS, m_ZeroInt())) { - NewLdexp = Builder.CreateLdexp(Src, SelectRHS); + NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II); Select = Builder.CreateSelect(SelectCond, Src, NewLdexp); } if (NewLdexp) { Select->takeName(II); - cast<Instruction>(NewLdexp)->copyFastMathFlags(II); return replaceInstUsesWith(*II, Select); } } @@ -2791,7 +2973,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { CallArgs.push_back(II->getArgOperand(4)); } - Function *NewFn = Intrinsic::getDeclaration(II->getModule(), NewIntrin); + Function *NewFn = + Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin); return CallInst::Create(NewFn, CallArgs); } case Intrinsic::arm_neon_vtbl1: @@ -3028,13 +3211,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // assume( (load addr) != null ) -> add 'nonnull' metadata to load // (if assume is valid at the load) - CmpInst::Predicate Pred; Instruction *LHS; - if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) && - Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load && + if (match(IIOperand, m_SpecificICmp(ICmpInst::ICMP_NE, m_Instruction(LHS), + m_Zero())) && + LHS->getOpcode() == Instruction::Load && LHS->getType()->isPointerTy() && isValidAssumeForContext(II, LHS, &DT)) { - MDNode *MD = MDNode::get(II->getContext(), std::nullopt); + MDNode *MD = MDNode::get(II->getContext(), {}); LHS->setMetadata(LLVMContext::MD_nonnull, MD); LHS->setMetadata(LLVMContext::MD_noundef, MD); return RemoveConditionFromAssume(II); @@ -3070,12 +3253,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // into // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ] if (EnableKnowledgeRetention && - match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) && - Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) { + match(IIOperand, + m_SpecificICmp(ICmpInst::ICMP_NE, m_Value(A), m_Zero())) && + A->getType()->isPointerTy()) { if (auto *Replacement = buildAssumeFromKnowledge( {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) { - Replacement->insertBefore(Next); + Replacement->insertBefore(Next->getIterator()); AC.registerAssumption(Replacement); return RemoveConditionFromAssume(II); } @@ -3088,12 +3272,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // call void @llvm.assume(i1 %D) // into // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)] - uint64_t AlignMask; + uint64_t AlignMask = 1; if (EnableKnowledgeRetention && - match(IIOperand, - m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)), - m_Zero())) && - Pred == CmpInst::ICMP_EQ) { + (match(IIOperand, m_Not(m_Trunc(m_Value(A)))) || + match(IIOperand, + m_SpecificICmp(ICmpInst::ICMP_EQ, + m_And(m_Value(A), m_ConstantInt(AlignMask)), + m_Zero())))) { if (isPowerOf2_64(AlignMask + 1)) { uint64_t Offset = 0; match(A, m_Add(m_Value(A), m_ConstantInt(Offset))); @@ -3107,7 +3292,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (auto *Replacement = buildAssumeFromKnowledge(RK, Next, &AC, &DT)) { - Replacement->insertAfter(II); + Replacement->insertAfter(II->getIterator()); AC.registerAssumption(Replacement); } return RemoveConditionFromAssume(II); @@ -3191,7 +3376,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { while (MoveI != NextInst) { auto *Temp = MoveI; MoveI = MoveI->getNextNonDebugInstruction(); - Temp->moveBefore(II); + Temp->moveBefore(II->getIterator()); } replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond)); } @@ -3595,26 +3780,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // * The intrinsic is speculatable. // * The select condition is not a vector, or the intrinsic does not // perform cross-lane operations. - switch (IID) { - case Intrinsic::ctlz: - case Intrinsic::cttz: - case Intrinsic::ctpop: - case Intrinsic::umin: - case Intrinsic::umax: - case Intrinsic::smin: - case Intrinsic::smax: - case Intrinsic::usub_sat: - case Intrinsic::uadd_sat: - case Intrinsic::ssub_sat: - case Intrinsic::sadd_sat: + if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) && + isNotCrossLaneOperation(II)) for (Value *Op : II->args()) if (auto *Sel = dyn_cast<SelectInst>(Op)) if (Instruction *R = FoldOpIntoSelect(*II, Sel)) return R; - [[fallthrough]]; - default: - break; - } if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder)) return Shuf; @@ -3667,7 +3838,7 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { // Skip optimizing notail and musttail calls so // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants. - // LibCallSimplifier::optimizeCall should try to preseve tail calls though. + // LibCallSimplifier::optimizeCall should try to preserve tail calls though. if (CI->isMustTailCall() || CI->isNoTailCall()) return nullptr; @@ -3677,8 +3848,8 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) { auto InstCombineErase = [this](Instruction *I) { eraseInstFromFunction(*I); }; - LibCallSimplifier Simplifier(DL, &TLI, &AC, ORE, BFI, PSI, InstCombineRAUW, - InstCombineErase); + LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI, + InstCombineRAUW, InstCombineErase); if (Value *With = Simplifier.optimizeCall(CI, Builder)) { ++NumSimplified; return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With); @@ -4035,13 +4206,14 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { DenseMap<Value *, unsigned> Val2Idx; std::vector<Value *> NewLiveGc; for (Value *V : Bundle->Inputs) { - if (Val2Idx.count(V)) + auto [It, Inserted] = Val2Idx.try_emplace(V); + if (!Inserted) continue; if (LiveGcValues.count(V)) { - Val2Idx[V] = NewLiveGc.size(); + It->second = NewLiveGc.size(); NewLiveGc.push_back(V); } else - Val2Idx[V] = NumOfGCLives; + It->second = NumOfGCLives; } // Update all gc.relocates for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) { @@ -4079,6 +4251,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { assert(!isa<CallBrInst>(Call) && "CallBr's don't have a single point after a def to insert at"); + // Don't perform the transform for declarations, which may not be fully + // accurate. For example, void @foo() is commonly used as a placeholder for + // unknown prototypes. + if (Callee->isDeclaration()) + return false; + // If this is a call to a thunk function, don't remove the cast. Thunks are // used to transparently forward all incoming parameters and outgoing return // values, so it's important to leave the cast in place. @@ -4115,18 +4293,14 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { return false; // TODO: Handle multiple return values. if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) { - if (Callee->isDeclaration()) - return false; // Cannot transform this return value. - - if (!Caller->use_empty() && - // void -> non-void is handled specially - !NewRetTy->isVoidTy()) + if (!Caller->use_empty()) return false; // Cannot transform this return value. } if (!CallerPAL.isEmpty() && !Caller->use_empty()) { AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs()); - if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy))) + if (RAttrs.overlaps(AttributeFuncs::typeIncompatible( + NewRetTy, CallerPAL.getRetAttrs()))) return false; // Attribute not compatible with transformed value. } @@ -4172,7 +4346,8 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // Check if there are any incompatible attributes we cannot drop safely. if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i)) .overlaps(AttributeFuncs::typeIncompatible( - ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP))) + ParamTy, CallerPAL.getParamAttrs(i), + AttributeFuncs::ASK_UNSAFE_TO_DROP))) return false; // Attribute not compatible with transformed value. if (Call.isInAllocaArgument(i) || @@ -4187,25 +4362,6 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { return false; // Cannot transform to or from byval. } - if (Callee->isDeclaration()) { - // Do not delete arguments unless we have a function body. - if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) - return false; - - // If the callee is just a declaration, don't change the varargsness of the - // call. We don't want to introduce a varargs call where one doesn't - // already exist. - if (FT->isVarArg() != Call.getFunctionType()->isVarArg()) - return false; - - // If both the callee and the cast type are varargs, we still have to make - // sure the number of fixed parameters are the same or we have the same - // ABI issues as if we introduce a varargs call. - if (FT->isVarArg() && Call.getFunctionType()->isVarArg() && - FT->getNumParams() != Call.getFunctionType()->getNumParams()) - return false; - } - if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && !CallerPAL.isEmpty()) { // In this case we have more arguments than the new function type, but we @@ -4229,7 +4385,8 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy)); + RAttrs.remove( + AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs())); LLVMContext &Ctx = Call.getContext(); AI = Call.arg_begin(); @@ -4244,7 +4401,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // Add any parameter attributes except the ones incompatible with the new // type. Note that we made sure all incompatible ones are safe to drop. AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible( - ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP); + ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP); ArgAttrs.push_back( CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs)); } @@ -4311,17 +4468,14 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { Instruction *NC = NewCall; Value *NV = NC; if (OldRetTy != NV->getType() && !Caller->use_empty()) { - if (!NV->getType()->isVoidTy()) { - NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy); - NC->setDebugLoc(Caller->getDebugLoc()); - - auto OptInsertPt = NewCall->getInsertionPointAfterDef(); - assert(OptInsertPt && "No place to insert cast"); - InsertNewInstBefore(NC, *OptInsertPt); - Worklist.pushUsersToWorkList(*Caller); - } else { - NV = PoisonValue::get(Caller->getType()); - } + assert(!NV->getType()->isVoidTy()); + NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy); + NC->setDebugLoc(Caller->getDebugLoc()); + + auto OptInsertPt = NewCall->getInsertionPointAfterDef(); + assert(OptInsertPt && "No place to insert cast"); + InsertNewInstBefore(NC, *OptInsertPt); + Worklist.pushUsersToWorkList(*Caller); } if (!Caller->use_empty()) |
