diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 273 |
1 files changed, 102 insertions, 171 deletions
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 318c4c06f0f7..3721564890dd 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -24,12 +24,6 @@ // memcmp, strlen, etc. // Future floating point idioms to recognize in -ffast-math mode: // fpowi -// Future integer operation idioms to recognize: -// ctpop -// -// Beware that isel's default lowering for ctpop is highly inefficient for -// i64 and larger types when i64 is legal and the value has few bits set. It -// would be good to enhance isel to emit a loop for ctpop in this case. // // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). @@ -61,7 +55,6 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -85,14 +78,11 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstructionCost.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -255,62 +245,8 @@ private: /// @} }; - -class LoopIdiomRecognizeLegacyPass : public LoopPass { -public: - static char ID; - - explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) { - initializeLoopIdiomRecognizeLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnLoop(Loop *L, LPPassManager &LPM) override { - if (DisableLIRP::All) - return false; - - if (skipLoop(L)) - return false; - - AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); - DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - TargetLibraryInfo *TLI = - &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( - *L->getHeader()->getParent()); - const TargetTransformInfo *TTI = - &getAnalysis<TargetTransformInfoWrapperPass>().getTTI( - *L->getHeader()->getParent()); - const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout(); - auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); - MemorySSA *MSSA = nullptr; - if (MSSAAnalysis) - MSSA = &MSSAAnalysis->getMSSA(); - - // For the old PM, we can't use OptimizationRemarkEmitter as an analysis - // pass. Function analyses need to be preserved across loop transformations - // but ORE cannot be preserved (see comment before the pass definition). - OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE); - return LIR.runOnLoop(L); - } - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG. - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<TargetLibraryInfoWrapperPass>(); - AU.addRequired<TargetTransformInfoWrapperPass>(); - AU.addPreserved<MemorySSAWrapperPass>(); - getLoopAnalysisUsage(AU); - } -}; - } // end anonymous namespace -char LoopIdiomRecognizeLegacyPass::ID = 0; - PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { @@ -335,18 +271,8 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, return PA; } -INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", - "Recognize loop idioms", false, false) -INITIALIZE_PASS_DEPENDENCY(LoopPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom", - "Recognize loop idioms", false, false) - -Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); } - static void deleteDeadInstruction(Instruction *I) { - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); I->eraseFromParent(); } @@ -442,7 +368,7 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { // array. We could theoretically do a store to an alloca or something, but // that doesn't seem worthwhile. Constant *C = dyn_cast<Constant>(V); - if (!C) + if (!C || isa<ConstantExpr>(C)) return nullptr; // Only handle simple values that are a power of two bytes in size. @@ -497,8 +423,8 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // When storing out scalable vectors we bail out for now, since the code // below currently only works for constant strides. TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); - if (SizeInBits.isScalable() || (SizeInBits.getFixedSize() & 7) || - (SizeInBits.getFixedSize() >> 32) != 0) + if (SizeInBits.isScalable() || (SizeInBits.getFixedValue() & 7) || + (SizeInBits.getFixedValue() >> 32) != 0) return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current @@ -798,7 +724,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL, } /// processLoopMemIntrinsic - Template function for calling different processor -/// functions based on mem instrinsic type. +/// functions based on mem intrinsic type. template <typename MemInst> bool LoopIdiomRecognize::processLoopMemIntrinsic( BasicBlock *BB, @@ -995,9 +921,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, SmallPtrSet<Instruction *, 1> MSIs; MSIs.insert(MSI); return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()), - MaybeAlign(MSI->getDestAlignment()), - SplatValue, MSI, MSIs, Ev, BECount, - IsNegStride, /*IsLoopMemset=*/true); + MSI->getDestAlign(), SplatValue, MSI, MSIs, Ev, + BECount, IsNegStride, /*IsLoopMemset=*/true); } /// mayLoopAccessLocation - Return true if the specified loop might access the @@ -1017,9 +942,13 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, // to be exactly the size of the memset, which is (BECount+1)*StoreSize const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount); const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV); - if (BECst && ConstSize) - AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) * - ConstSize->getValue()->getZExtValue()); + if (BECst && ConstSize) { + std::optional<uint64_t> BEInt = BECst->getAPInt().tryZExtValue(); + std::optional<uint64_t> SizeInt = ConstSize->getAPInt().tryZExtValue(); + // FIXME: Should this check for overflow? + if (BEInt && SizeInt) + AccessSize = LocationSize::precise((*BEInt + 1) * *SizeInt); + } // TODO: For this to be really effective, we have to dive into the pointer // operand in the store. Store to &A[i] of 100 will always return may alias @@ -1030,8 +959,7 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, for (BasicBlock *B : L->blocks()) for (Instruction &I : *B) if (!IgnoredInsts.contains(&I) && - isModOrRefSet( - intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) + isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access)) return true; return false; } @@ -1053,33 +981,6 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount, return SE->getMinusSCEV(Start, Index); } -/// Compute trip count from the backedge taken count. -static const SCEV *getTripCount(const SCEV *BECount, Type *IntPtr, - Loop *CurLoop, const DataLayout *DL, - ScalarEvolution *SE) { - const SCEV *TripCountS = nullptr; - // The # stored bytes is (BECount+1). Expand the trip count out to - // pointer size if it isn't already. - // - // If we're going to need to zero extend the BE count, check if we can add - // one to it prior to zero extending without overflow. Provided this is safe, - // it allows better simplification of the +1. - if (DL->getTypeSizeInBits(BECount->getType()) < - DL->getTypeSizeInBits(IntPtr) && - SE->isLoopEntryGuardedByCond( - CurLoop, ICmpInst::ICMP_NE, BECount, - SE->getNegativeSCEV(SE->getOne(BECount->getType())))) { - TripCountS = SE->getZeroExtendExpr( - SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW), - IntPtr); - } else { - TripCountS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr), - SE->getOne(IntPtr), SCEV::FlagNUW); - } - - return TripCountS; -} - /// Compute the number of bytes as a SCEV from the backedge taken count. /// /// This also maps the SCEV into the provided type and tries to handle the @@ -1087,8 +988,8 @@ static const SCEV *getTripCount(const SCEV *BECount, Type *IntPtr, static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE) { - const SCEV *TripCountSCEV = getTripCount(BECount, IntPtr, CurLoop, DL, SE); - + const SCEV *TripCountSCEV = + SE->getTripCountFromExitCount(BECount, IntPtr, CurLoop); return SE->getMulExpr(TripCountSCEV, SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr), SCEV::FlagNUW); @@ -1101,6 +1002,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *StoredVal, Instruction *TheStore, SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) { + Module *M = TheStore->getModule(); Value *SplatValue = isBytewiseValue(StoredVal, *DL); Constant *PatternValue = nullptr; @@ -1119,7 +1021,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( SCEVExpander Expander(*SE, *DL, "loop-idiom"); SCEVExpanderCleaner ExpCleaner(Expander); - Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS); + Type *DestInt8PtrTy = Builder.getPtrTy(DestAS); Type *IntIdxTy = DL->getIndexType(DestPtr->getType()); bool Changed = false; @@ -1130,7 +1032,7 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. - if (!isSafeToExpand(Start, *SE)) + if (!Expander.isSafeToExpand(Start)) return Changed; // Okay, we have a strided store "p[i]" of a splattable value. We can turn @@ -1164,32 +1066,37 @@ bool LoopIdiomRecognize::processLoopStridedStore( // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. - if (!isSafeToExpand(NumBytesS, *SE)) + if (!Expander.isSafeToExpand(NumBytesS)) return Changed; Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + if (!SplatValue && !isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) + return Changed; + + AAMDNodes AATags = TheStore->getAAMetadata(); + for (Instruction *Store : Stores) + AATags = AATags.merge(Store->getAAMetadata()); + if (auto CI = dyn_cast<ConstantInt>(NumBytes)) + AATags = AATags.extendTo(CI->getZExtValue()); + else + AATags = AATags.extendTo(-1); + CallInst *NewCall; if (SplatValue) { - AAMDNodes AATags = TheStore->getAAMetadata(); - if (auto CI = dyn_cast<ConstantInt>(NumBytes)) - AATags = AATags.extendTo(CI->getZExtValue()); - else - AATags = AATags.extendTo(-1); - NewCall = Builder.CreateMemSet( BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment), /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); } else { + assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)); // Everything is emitted in default address space Type *Int8PtrTy = DestInt8PtrTy; - Module *M = TheStore->getModule(); StringRef FuncName = "memset_pattern16"; - FunctionCallee MSP = M->getOrInsertFunction(FuncName, Builder.getVoidTy(), - Int8PtrTy, Int8PtrTy, IntIdxTy); - inferLibFuncAttributes(M, FuncName, *TLI); + FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16, + Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy); + inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI); // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. @@ -1198,9 +1105,20 @@ bool LoopIdiomRecognize::processLoopStridedStore( PatternValue, ".memset_pattern"); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these. GV->setAlignment(Align(16)); - Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); + Value *PatternPtr = GV; NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); - } + + // Set the TBAA info if present. + if (AATags.TBAA) + NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA); + + if (AATags.Scope) + NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope); + + if (AATags.NoAlias) + NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias); + } + NewCall->setDebugLoc(TheStore->getDebugLoc()); if (MSSAU) { @@ -1271,13 +1189,13 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, StoreEv, LoadEv, BECount); } +namespace { class MemmoveVerifier { public: explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr, const DataLayout &DL) - : DL(DL), LoadOff(0), StoreOff(0), - BP1(llvm::GetPointerBaseWithConstantOffset( - LoadBasePtr.stripPointerCasts(), LoadOff, DL)), + : DL(DL), BP1(llvm::GetPointerBaseWithConstantOffset( + LoadBasePtr.stripPointerCasts(), LoadOff, DL)), BP2(llvm::GetPointerBaseWithConstantOffset( StoreBasePtr.stripPointerCasts(), StoreOff, DL)), IsSameObject(BP1 == BP2) {} @@ -1295,7 +1213,7 @@ public: // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr. int64_t LoadSize = - DL.getTypeSizeInBits(TheLoad.getType()).getFixedSize() / 8; + DL.getTypeSizeInBits(TheLoad.getType()).getFixedValue() / 8; if (BP1 != BP2 || LoadSize != int64_t(StoreSize)) return false; if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) || @@ -1307,14 +1225,15 @@ public: private: const DataLayout &DL; - int64_t LoadOff; - int64_t StoreOff; + int64_t LoadOff = 0; + int64_t StoreOff = 0; const Value *BP1; const Value *BP2; public: const bool IsSameObject; }; +} // namespace bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV, @@ -1363,7 +1282,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // feeds the stores. Check for an alias by generating the base address and // checking everything. Value *StoreBasePtr = Expander.expandCodeFor( - StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); + StrStart, Builder.getPtrTy(StrAS), Preheader->getTerminator()); // From here on out, conservatively report to the pass manager that we've // changed the IR, even if we later clean up these added instructions. There @@ -1415,31 +1334,24 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. - Value *LoadBasePtr = Expander.expandCodeFor( - LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); + Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS), + Preheader->getTerminator()); // If the store is a memcpy instruction, we must check if it will write to // the load memory locations. So remove it from the ignored stores. - if (IsMemCpy) - IgnoredInsts.erase(TheStore); MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL); + if (IsMemCpy && !Verifier.IsSameObject) + IgnoredInsts.erase(TheStore); if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, StoreSizeSCEV, *AA, IgnoredInsts)) { - if (!IsMemCpy) { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", - TheLoad) - << ore::NV("Inst", InstRemark) << " in " - << ore::NV("Function", TheStore->getFunction()) - << " function will not be hoisted: " - << ore::NV("Reason", "The loop may access load location"); - }); - return Changed; - } - // At this point loop may access load only for memcpy in same underlying - // object. If that's not the case bail out. - if (!Verifier.IsSameObject) - return Changed; + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad) + << ore::NV("Inst", InstRemark) << " in " + << ore::NV("Function", TheStore->getFunction()) + << " function will not be hoisted: " + << ore::NV("Reason", "The loop may access load location"); + }); + return Changed; } bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore; @@ -1487,9 +1399,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( return Changed; // We cannot allow unaligned ops for unordered load/store, so reject // anything where the alignment isn't at least the element size. - assert((StoreAlign.hasValue() && LoadAlign.hasValue()) && + assert((StoreAlign && LoadAlign) && "Expect unordered load/store to have align."); - if (StoreAlign.getValue() < StoreSize || LoadAlign.getValue() < StoreSize) + if (*StoreAlign < StoreSize || *LoadAlign < StoreSize) return Changed; // If the element.atomic memcpy is not lowered into explicit @@ -1503,9 +1415,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // Note that unordered atomic loads/stores are *required* by the spec to // have an alignment but non-atomic loads/stores may not. NewCall = Builder.CreateElementUnorderedAtomicMemCpy( - StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(), - NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope, - AATags.NoAlias); + StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize, + AATags.TBAA, AATags.TBAAStruct, AATags.Scope, AATags.NoAlias); } NewCall->setDebugLoc(TheStore->getDebugLoc()); @@ -2113,7 +2024,8 @@ void LoopIdiomRecognize::transformLoopToCountable( auto *LbBr = cast<BranchInst>(Body->getTerminator()); ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition()); - PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi", &Body->front()); + PHINode *TcPhi = PHINode::Create(CountTy, 2, "tcphi"); + TcPhi->insertBefore(Body->begin()); Builder.SetInsertPoint(LbCond); Instruction *TcDec = cast<Instruction>(Builder.CreateSub( @@ -2219,7 +2131,8 @@ void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition()); Type *Ty = TripCnt->getType(); - PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front()); + PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi"); + TcPhi->insertBefore(Body->begin()); Builder.SetInsertPoint(LbCond); Instruction *TcDec = cast<Instruction>( @@ -2477,7 +2390,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { // intrinsic/shift we'll use are not cheap. Note that we are okay with *just* // making the loop countable, even if nothing else changes. IntrinsicCostAttributes Attrs( - IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()}); + IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getTrue()}); InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind); if (Cost > TargetTransformInfo::TCC_Basic) { LLVM_DEBUG(dbgs() << DEBUG_TYPE @@ -2493,6 +2406,24 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { // Ok, transform appears worthwhile. MadeChange = true; + if (!isGuaranteedNotToBeUndefOrPoison(BitPos)) { + // BitMask may be computed from BitPos, Freeze BitPos so we can increase + // it's use count. + Instruction *InsertPt = nullptr; + if (auto *BitPosI = dyn_cast<Instruction>(BitPos)) + InsertPt = &**BitPosI->getInsertionPointAfterDef(); + else + InsertPt = &*DT->getRoot()->getFirstNonPHIOrDbgOrAlloca(); + if (!InsertPt) + return false; + FreezeInst *BitPosFrozen = + new FreezeInst(BitPos, BitPos->getName() + ".fr", InsertPt); + BitPos->replaceUsesWithIf(BitPosFrozen, [BitPosFrozen](Use &U) { + return U.getUser() != BitPosFrozen; + }); + BitPos = BitPosFrozen; + } + // Step 1: Compute the loop trip count. Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty), @@ -2501,7 +2432,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask"); Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked"); CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic( - IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()}, + IntrID, Ty, {XMasked, /*is_zero_poison=*/Builder.getTrue()}, /*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros"); Value *XMaskedNumActiveBits = Builder.CreateSub( ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros, @@ -2562,7 +2493,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() { // Step 4: Rewrite the loop into a countable form, with canonical IV. // The new canonical induction variable. - Builder.SetInsertPoint(&LoopHeaderBB->front()); + Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin()); auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv"); // The induction itself. @@ -2831,7 +2762,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() { // intrinsic we'll use are not cheap. Note that we are okay with *just* // making the loop countable, even if nothing else changes. IntrinsicCostAttributes Attrs( - IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getFalse()}); + IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()}); InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind); if (Cost > TargetTransformInfo::TCC_Basic) { LLVM_DEBUG(dbgs() << DEBUG_TYPE @@ -2849,7 +2780,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() { // Step 1: Compute the loop's final IV value / trip count. CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic( - IntrID, Ty, {Val, /*is_zero_undef=*/Builder.getFalse()}, + IntrID, Ty, {Val, /*is_zero_poison=*/Builder.getFalse()}, /*FMFSource=*/nullptr, Val->getName() + ".numleadingzeros"); Value *ValNumActiveBits = Builder.CreateSub( ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros, @@ -2886,11 +2817,11 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() { // Step 3: Rewrite the loop into a countable form, with canonical IV. // The new canonical induction variable. - Builder.SetInsertPoint(&LoopHeaderBB->front()); + Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin()); auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv"); // The induction itself. - Builder.SetInsertPoint(LoopHeaderBB->getFirstNonPHI()); + Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->getFirstNonPHIIt()); auto *CIVNext = Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next", /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2); |