diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-02-16 20:13:02 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-02-16 20:13:02 +0000 |
commit | b60736ec1405bb0a8dd40989f67ef4c93da068ab (patch) | |
tree | 5c43fbb7c9fc45f0f87e0e6795a86267dbd12f9d /llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp | |
parent | cfca06d7963fa0909f90483b42a6d7d194d01e08 (diff) | |
download | src-b60736ec1405bb0a8dd40989f67ef4c93da068ab.tar.gz src-b60736ec1405bb0a8dd40989f67ef4c93da068ab.zip |
Vendor import of llvm-project main 8e464dd76bef, the last commit beforevendor/llvm-project/llvmorg-12-init-17869-g8e464dd76bef
the upstream release/12.x branch was created.
Diffstat (limited to 'llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp | 339 |
1 files changed, 224 insertions, 115 deletions
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index cd2f4ca36f3b..b671d68031a8 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -58,8 +58,11 @@ STATISTIC(NumMemAccess, "Number of memory access targets propagated"); STATISTIC(NumCmps, "Number of comparisons propagated"); STATISTIC(NumReturns, "Number of return values propagated"); STATISTIC(NumDeadCases, "Number of switch cases removed"); +STATISTIC(NumSDivSRemsNarrowed, + "Number of sdivs/srems whose width was decreased"); STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); -STATISTIC(NumUDivs, "Number of udivs whose width was decreased"); +STATISTIC(NumUDivURemsNarrowed, + "Number of udivs/urems whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumSExt, "Number of sext converted to zext"); @@ -126,7 +129,7 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { if (S->getType()->isVectorTy()) return false; if (isa<Constant>(S->getCondition())) return false; - Constant *C = LVI->getConstant(S->getCondition(), S->getParent(), S); + Constant *C = LVI->getConstant(S->getCondition(), S); if (!C) return false; ConstantInt *CI = dyn_cast<ConstantInt>(C); @@ -283,7 +286,7 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) { if (isa<Constant>(Pointer)) return false; - Constant *C = LVI->getConstant(Pointer, I->getParent(), I); + Constant *C = LVI->getConstant(Pointer, I); if (!C) return false; ++NumMemAccess; @@ -301,18 +304,9 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) { if (!C) return false; - // As a policy choice, we choose not to waste compile time on anything where - // the comparison is testing local values. While LVI can sometimes reason - // about such cases, it's not its primary purpose. We do make sure to do - // the block local query for uses from terminator instructions, but that's - // handled in the code for each terminator. As an exception, we allow phi - // nodes, for which LVI can thread the condition into predecessors. - auto *I = dyn_cast<Instruction>(Op0); - if (I && I->getParent() == Cmp->getParent() && !isa<PHINode>(I)) - return false; - LazyValueInfo::Tristate Result = - LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp); + LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp, + /*UseBlockValue=*/true); if (Result == LazyValueInfo::Unknown) return false; @@ -336,15 +330,6 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, Value *Cond = I->getCondition(); BasicBlock *BB = I->getParent(); - // If the condition was defined in same block as the switch then LazyValueInfo - // currently won't say anything useful about it, though in theory it could. - if (isa<Instruction>(Cond) && cast<Instruction>(Cond)->getParent() == BB) - return false; - - // If the switch is unreachable then trying to improve it is a waste of time. - pred_iterator PB = pred_begin(BB), PE = pred_end(BB); - if (PB == PE) return false; - // Analyse each switch case in turn. bool Changed = false; DenseMap<BasicBlock*, int> SuccessorsCount; @@ -357,35 +342,9 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { ConstantInt *Case = CI->getCaseValue(); - - // Check to see if the switch condition is equal to/not equal to the case - // value on every incoming edge, equal/not equal being the same each time. - LazyValueInfo::Tristate State = LazyValueInfo::Unknown; - for (pred_iterator PI = PB; PI != PE; ++PI) { - // Is the switch condition equal to the case value? - LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ, - Cond, Case, *PI, - BB, SI); - // Give up on this case if nothing is known. - if (Value == LazyValueInfo::Unknown) { - State = LazyValueInfo::Unknown; - break; - } - - // If this was the first edge to be visited, record that all other edges - // need to give the same result. - if (PI == PB) { - State = Value; - continue; - } - - // If this case is known to fire for some edges and known not to fire for - // others then there is nothing we can do - give up. - if (Value != State) { - State = LazyValueInfo::Unknown; - break; - } - } + LazyValueInfo::Tristate State = + LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, + /* UseBlockValue */ true); if (State == LazyValueInfo::False) { // This case never fires - remove it. @@ -429,10 +388,8 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, // See if we can prove that the given binary op intrinsic will not overflow. static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) { - ConstantRange LRange = LVI->getConstantRange( - BO->getLHS(), BO->getParent(), BO); - ConstantRange RRange = LVI->getConstantRange( - BO->getRHS(), BO->getParent(), BO); + ConstantRange LRange = LVI->getConstantRange(BO->getLHS(), BO); + ConstantRange RRange = LVI->getConstantRange(BO->getRHS(), BO); ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion( BO->getBinaryOp(), RRange, BO->getNoWrapKind()); return NWRegion.contains(LRange); @@ -532,8 +489,6 @@ static void processSaturatingInst(SaturatingInst *SI, LazyValueInfo *LVI) { /// Infer nonnull attributes for the arguments at the specified callsite. static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { - SmallVector<unsigned, 4> ArgNos; - unsigned ArgNo = 0; if (auto *WO = dyn_cast<WithOverflowInst>(&CB)) { if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) { @@ -549,6 +504,8 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { } } + bool Changed = false; + // Deopt bundle operands are intended to capture state with minimal // perturbance of the code otherwise. If we can find a constant value for // any such operand and remove a use of the original value, that's @@ -557,22 +514,22 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { // idiomatically, appear along rare conditional paths, it's reasonable likely // we may have a conditional fact with which LVI can fold. if (auto DeoptBundle = CB.getOperandBundle(LLVMContext::OB_deopt)) { - bool Progress = false; for (const Use &ConstU : DeoptBundle->Inputs) { Use &U = const_cast<Use&>(ConstU); Value *V = U.get(); if (V->getType()->isVectorTy()) continue; if (isa<Constant>(V)) continue; - Constant *C = LVI->getConstant(V, CB.getParent(), &CB); + Constant *C = LVI->getConstant(V, &CB); if (!C) continue; U.set(C); - Progress = true; + Changed = true; } - if (Progress) - return true; } + SmallVector<unsigned, 4> ArgNos; + unsigned ArgNo = 0; + for (Value *V : CB.args()) { PointerType *Type = dyn_cast<PointerType>(V->getType()); // Try to mark pointer typed parameters as non-null. We skip the @@ -590,7 +547,7 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { assert(ArgNo == CB.arg_size() && "sanity check"); if (ArgNos.empty()) - return false; + return Changed; AttributeList AS = CB.getAttributes(); LLVMContext &Ctx = CB.getContext(); @@ -601,13 +558,79 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) { return true; } -static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) { - Constant *Zero = ConstantInt::get(SDI->getType(), 0); - for (Value *O : SDI->operands()) { - auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, O, Zero, SDI); - if (Result != LazyValueInfo::True) - return false; +static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { + Constant *Zero = ConstantInt::get(V->getType(), 0); + auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, V, Zero, CxtI); + return Result == LazyValueInfo::True; +} + +static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { + Constant *Zero = ConstantInt::get(V->getType(), 0); + auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI); + return Result == LazyValueInfo::True; +} + +enum class Domain { NonNegative, NonPositive, Unknown }; + +Domain getDomain(Value *V, LazyValueInfo *LVI, Instruction *CxtI) { + if (isNonNegative(V, LVI, CxtI)) + return Domain::NonNegative; + if (isNonPositive(V, LVI, CxtI)) + return Domain::NonPositive; + return Domain::Unknown; +} + +/// Try to shrink a sdiv/srem's width down to the smallest power of two that's +/// sufficient to contain its operands. +static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { + assert(Instr->getOpcode() == Instruction::SDiv || + Instr->getOpcode() == Instruction::SRem); + if (Instr->getType()->isVectorTy()) + return false; + + // Find the smallest power of two bitwidth that's sufficient to hold Instr's + // operands. + unsigned OrigWidth = Instr->getType()->getIntegerBitWidth(); + + // What is the smallest bit width that can accomodate the entire value ranges + // of both of the operands? + std::array<Optional<ConstantRange>, 2> CRs; + unsigned MinSignedBits = 0; + for (auto I : zip(Instr->operands(), CRs)) { + std::get<1>(I) = LVI->getConstantRange(std::get<0>(I), Instr); + MinSignedBits = std::max(std::get<1>(I)->getMinSignedBits(), MinSignedBits); } + + // sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can + // prove that such a combination is impossible, we need to bump the bitwidth. + if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) && + CRs[0]->contains( + APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth))) + ++MinSignedBits; + + // Don't shrink below 8 bits wide. + unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MinSignedBits), 8); + + // NewWidth might be greater than OrigWidth if OrigWidth is not a power of + // two. + if (NewWidth >= OrigWidth) + return false; + + ++NumSDivSRemsNarrowed; + IRBuilder<> B{Instr}; + auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); + auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, + Instr->getName() + ".lhs.trunc"); + auto *RHS = B.CreateTruncOrBitCast(Instr->getOperand(1), TruncTy, + Instr->getName() + ".rhs.trunc"); + auto *BO = B.CreateBinOp(Instr->getOpcode(), LHS, RHS, Instr->getName()); + auto *Sext = B.CreateSExt(BO, Instr->getType(), Instr->getName() + ".sext"); + if (auto *BinOp = dyn_cast<BinaryOperator>(BO)) + if (BinOp->getOpcode() == Instruction::SDiv) + BinOp->setIsExact(Instr->isExact()); + + Instr->replaceAllUsesWith(Sext); + Instr->eraseFromParent(); return true; } @@ -621,21 +644,23 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { // Find the smallest power of two bitwidth that's sufficient to hold Instr's // operands. - auto OrigWidth = Instr->getType()->getIntegerBitWidth(); - ConstantRange OperandRange(OrigWidth, /*isFullSet=*/false); + + // What is the smallest bit width that can accomodate the entire value ranges + // of both of the operands? + unsigned MaxActiveBits = 0; for (Value *Operand : Instr->operands()) { - OperandRange = OperandRange.unionWith( - LVI->getConstantRange(Operand, Instr->getParent())); + ConstantRange CR = LVI->getConstantRange(Operand, Instr); + MaxActiveBits = std::max(CR.getActiveBits(), MaxActiveBits); } // Don't shrink below 8 bits wide. - unsigned NewWidth = std::max<unsigned>( - PowerOf2Ceil(OperandRange.getUnsignedMax().getActiveBits()), 8); + unsigned NewWidth = std::max<unsigned>(PowerOf2Ceil(MaxActiveBits), 8); + // NewWidth might be greater than OrigWidth if OrigWidth is not a power of // two. - if (NewWidth >= OrigWidth) + if (NewWidth >= Instr->getType()->getIntegerBitWidth()) return false; - ++NumUDivs; + ++NumUDivURemsNarrowed; IRBuilder<> B{Instr}; auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); auto *LHS = B.CreateTruncOrBitCast(Instr->getOperand(0), TruncTy, @@ -654,52 +679,135 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { } static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI)) + assert(SDI->getOpcode() == Instruction::SRem); + if (SDI->getType()->isVectorTy()) return false; + struct Operand { + Value *V; + Domain D; + }; + std::array<Operand, 2> Ops; + + for (const auto I : zip(Ops, SDI->operands())) { + Operand &Op = std::get<0>(I); + Op.V = std::get<1>(I); + Op.D = getDomain(Op.V, LVI, SDI); + if (Op.D == Domain::Unknown) + return false; + } + + // We know domains of both of the operands! ++NumSRems; - auto *BO = BinaryOperator::CreateURem(SDI->getOperand(0), SDI->getOperand(1), - SDI->getName(), SDI); - BO->setDebugLoc(SDI->getDebugLoc()); - SDI->replaceAllUsesWith(BO); + + // We need operands to be non-negative, so negate each one that isn't. + for (Operand &Op : Ops) { + if (Op.D == Domain::NonNegative) + continue; + auto *BO = + BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); + BO->setDebugLoc(SDI->getDebugLoc()); + Op.V = BO; + } + + auto *URem = + BinaryOperator::CreateURem(Ops[0].V, Ops[1].V, SDI->getName(), SDI); + URem->setDebugLoc(SDI->getDebugLoc()); + + Value *Res = URem; + + // If the divident was non-positive, we need to negate the result. + if (Ops[0].D == Domain::NonPositive) + Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); + + SDI->replaceAllUsesWith(Res); SDI->eraseFromParent(); - // Try to process our new urem. - processUDivOrURem(BO, LVI); + // Try to simplify our new urem. + processUDivOrURem(URem, LVI); return true; } /// See if LazyValueInfo's ability to exploit edge conditions or range -/// information is sufficient to prove the both operands of this SDiv are -/// positive. If this is the case, replace the SDiv with a UDiv. Even for local +/// information is sufficient to prove the signs of both operands of this SDiv. +/// If this is the case, replace the SDiv with a UDiv. Even for local /// conditions, this can sometimes prove conditions instcombine can't by /// exploiting range information. static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI)) + assert(SDI->getOpcode() == Instruction::SDiv); + if (SDI->getType()->isVectorTy()) return false; + struct Operand { + Value *V; + Domain D; + }; + std::array<Operand, 2> Ops; + + for (const auto I : zip(Ops, SDI->operands())) { + Operand &Op = std::get<0>(I); + Op.V = std::get<1>(I); + Op.D = getDomain(Op.V, LVI, SDI); + if (Op.D == Domain::Unknown) + return false; + } + + // We know domains of both of the operands! ++NumSDivs; - auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1), - SDI->getName(), SDI); - BO->setDebugLoc(SDI->getDebugLoc()); - BO->setIsExact(SDI->isExact()); - SDI->replaceAllUsesWith(BO); + + // We need operands to be non-negative, so negate each one that isn't. + for (Operand &Op : Ops) { + if (Op.D == Domain::NonNegative) + continue; + auto *BO = + BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI); + BO->setDebugLoc(SDI->getDebugLoc()); + Op.V = BO; + } + + auto *UDiv = + BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI); + UDiv->setDebugLoc(SDI->getDebugLoc()); + UDiv->setIsExact(SDI->isExact()); + + Value *Res = UDiv; + + // If the operands had two different domains, we need to negate the result. + if (Ops[0].D != Ops[1].D) + Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI); + + SDI->replaceAllUsesWith(Res); SDI->eraseFromParent(); // Try to simplify our new udiv. - processUDivOrURem(BO, LVI); + processUDivOrURem(UDiv, LVI); return true; } +static bool processSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) { + assert(Instr->getOpcode() == Instruction::SDiv || + Instr->getOpcode() == Instruction::SRem); + if (Instr->getType()->isVectorTy()) + return false; + + if (Instr->getOpcode() == Instruction::SDiv) + if (processSDiv(Instr, LVI)) + return true; + + if (Instr->getOpcode() == Instruction::SRem) + if (processSRem(Instr, LVI)) + return true; + + return narrowSDivOrSRem(Instr, LVI); +} + static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { if (SDI->getType()->isVectorTy()) return false; - Constant *Zero = ConstantInt::get(SDI->getType(), 0); - if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, SDI->getOperand(0), Zero, SDI) != - LazyValueInfo::True) + if (!isNonNegative(SDI->getOperand(0), LVI, SDI)) return false; ++NumAShrs; @@ -719,9 +827,7 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) { Value *Base = SDI->getOperand(0); - Constant *Zero = ConstantInt::get(Base->getType(), 0); - if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, Base, Zero, SDI) != - LazyValueInfo::True) + if (!isNonNegative(Base, LVI, SDI)) return false; ++NumSExt; @@ -748,14 +854,12 @@ static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) { if (NSW && NUW) return false; - BasicBlock *BB = BinOp->getParent(); - Instruction::BinaryOps Opcode = BinOp->getOpcode(); Value *LHS = BinOp->getOperand(0); Value *RHS = BinOp->getOperand(1); - ConstantRange LRange = LVI->getConstantRange(LHS, BB, BinOp); - ConstantRange RRange = LVI->getConstantRange(RHS, BB, BinOp); + ConstantRange LRange = LVI->getConstantRange(LHS, BinOp); + ConstantRange RRange = LVI->getConstantRange(RHS, BinOp); bool Changed = false; bool NewNUW = false, NewNSW = false; @@ -783,7 +887,6 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { // Pattern match (and lhs, C) where C includes a superset of bits which might // be set in lhs. This is a common truncation idiom created by instcombine. - BasicBlock *BB = BinOp->getParent(); Value *LHS = BinOp->getOperand(0); ConstantInt *RHS = dyn_cast<ConstantInt>(BinOp->getOperand(1)); if (!RHS || !RHS->getValue().isMask()) @@ -792,7 +895,7 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { // We can only replace the AND with LHS based on range info if the range does // not include undef. ConstantRange LRange = - LVI->getConstantRange(LHS, BB, BinOp, /*UndefAllowed=*/false); + LVI->getConstantRange(LHS, BinOp, /*UndefAllowed=*/false); if (!LRange.getUnsignedMax().ule(RHS->getValue())) return false; @@ -804,7 +907,7 @@ static bool processAnd(BinaryOperator *BinOp, LazyValueInfo *LVI) { static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { - if (Constant *C = LVI->getConstant(V, At->getParent(), At)) + if (Constant *C = LVI->getConstant(V, At)) return C; // TODO: The following really should be sunk inside LVI's core algorithm, or @@ -858,10 +961,8 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, BBChanged |= processCallSite(cast<CallBase>(*II), LVI); break; case Instruction::SRem: - BBChanged |= processSRem(cast<BinaryOperator>(II), LVI); - break; case Instruction::SDiv: - BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI); + BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI); break; case Instruction::UDiv: case Instruction::URem: @@ -929,11 +1030,19 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { bool Changed = runImpl(F, LVI, DT, getBestSimplifyQuery(AM, F)); - if (!Changed) - return PreservedAnalyses::all(); PreservedAnalyses PA; - PA.preserve<GlobalsAA>(); - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<LazyValueAnalysis>(); + if (!Changed) { + PA = PreservedAnalyses::all(); + } else { + PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LazyValueAnalysis>(); + } + + // Keeping LVI alive is expensive, both because it uses a lot of memory, and + // because invalidating values in LVI is expensive. While CVP does preserve + // LVI, we know that passes after JumpThreading+CVP will not need the result + // of this analysis, so we forcefully discard it early. + PA.abandon<LazyValueAnalysis>(); return PA; } |