diff options
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstructionCombining.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 651 |
1 files changed, 395 insertions, 256 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index eb5eadba194d..029be5257694 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1027,13 +1027,11 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, if (!ConstIsRHS) std::swap(Op0, Op1); - auto *BO = cast<BinaryOperator>(&I); - Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1, - SO->getName() + ".op"); - auto *FPInst = dyn_cast<Instruction>(RI); - if (FPInst && isa<FPMathOperator>(FPInst)) - FPInst->copyFastMathFlags(BO); - return RI; + Value *NewBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), Op0, + Op1, SO->getName() + ".op"); + if (auto *NewBOI = dyn_cast<Instruction>(NewBO)) + NewBOI->copyIRFlags(&I); + return NewBO; } Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, @@ -1289,6 +1287,70 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { return replaceInstUsesWith(I, NewPN); } +Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { + // TODO: This should be similar to the incoming values check in foldOpIntoPhi: + // we are guarding against replicating the binop in >1 predecessor. + // This could miss matching a phi with 2 constant incoming values. + auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0)); + auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1)); + if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() || + Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2) + return nullptr; + + // TODO: Remove the restriction for binop being in the same block as the phis. + if (BO.getParent() != Phi0->getParent() || + BO.getParent() != Phi1->getParent()) + return nullptr; + + // Match a pair of incoming constants for one of the predecessor blocks. + BasicBlock *ConstBB, *OtherBB; + Constant *C0, *C1; + if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) { + ConstBB = Phi0->getIncomingBlock(0); + OtherBB = Phi0->getIncomingBlock(1); + } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) { + ConstBB = Phi0->getIncomingBlock(1); + OtherBB = Phi0->getIncomingBlock(0); + } else { + return nullptr; + } + if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1))) + return nullptr; + + // The block that we are hoisting to must reach here unconditionally. + // Otherwise, we could be speculatively executing an expensive or + // non-speculative op. + auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator()); + if (!PredBlockBranch || PredBlockBranch->isConditional() || + !DT.isReachableFromEntry(OtherBB)) + return nullptr; + + // TODO: This check could be tightened to only apply to binops (div/rem) that + // are not safe to speculatively execute. But that could allow hoisting + // potentially expensive instructions (fdiv for example). + for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter) + if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter)) + return nullptr; + + // Make a new binop in the predecessor block with the non-constant incoming + // values. + Builder.SetInsertPoint(PredBlockBranch); + Value *NewBO = Builder.CreateBinOp(BO.getOpcode(), + Phi0->getIncomingValueForBlock(OtherBB), + Phi1->getIncomingValueForBlock(OtherBB)); + if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO)) + NotFoldedNewBO->copyIRFlags(&BO); + + // Fold constants for the predecessor block with constant incoming values. + Constant *NewC = ConstantExpr::get(BO.getOpcode(), C0, C1); + + // Replace the binop with a phi of the new values. The old phis are dead. + PHINode *NewPhi = PHINode::Create(BO.getType(), 2); + NewPhi->addIncoming(NewBO, OtherBB); + NewPhi->addIncoming(NewC, ConstBB); + return NewPhi; +} + Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { if (!isa<Constant>(I.getOperand(1))) return nullptr; @@ -1307,10 +1369,11 @@ Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { /// is a sequence of GEP indices into the pointed type that will land us at the /// specified offset. If so, fill them into NewIndices and return the resultant /// element type, otherwise return null. -Type * -InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset, - SmallVectorImpl<Value *> &NewIndices) { - Type *Ty = PtrTy->getElementType(); +static Type *findElementAtOffset(PointerType *PtrTy, int64_t IntOffset, + SmallVectorImpl<Value *> &NewIndices, + const DataLayout &DL) { + // Only used by visitGEPOfBitcast(), which is skipped for opaque pointers. + Type *Ty = PtrTy->getNonOpaquePointerElementType(); if (!Ty->isSized()) return nullptr; @@ -1320,7 +1383,7 @@ InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset, return nullptr; for (const APInt &Index : Indices) - NewIndices.push_back(Builder.getInt(Index)); + NewIndices.push_back(ConstantInt::get(PtrTy->getContext(), Index)); return Ty; } @@ -1884,12 +1947,254 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP, return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel); } +Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, + GEPOperator *Src) { + // Combine Indices - If the source pointer to this getelementptr instruction + // is a getelementptr instruction with matching element type, combine the + // indices of the two getelementptr instructions into a single instruction. + if (Src->getResultElementType() != GEP.getSourceElementType()) + return nullptr; + + if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) + return nullptr; + + if (Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && + Src->hasOneUse()) { + Value *GO1 = GEP.getOperand(1); + Value *SO1 = Src->getOperand(1); + + if (LI) { + // Try to reassociate loop invariant GEP chains to enable LICM. + if (Loop *L = LI->getLoopFor(GEP.getParent())) { + // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is + // invariant: this breaks the dependence between GEPs and allows LICM + // to hoist the invariant part out of the loop. + if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { + // We have to be careful here. + // We have something like: + // %src = getelementptr <ty>, <ty>* %base, <ty> %idx + // %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2 + // If we just swap idx & idx2 then we could inadvertantly + // change %src from a vector to a scalar, or vice versa. + // Cases: + // 1) %base a scalar & idx a scalar & idx2 a vector + // => Swapping idx & idx2 turns %src into a vector type. + // 2) %base a scalar & idx a vector & idx2 a scalar + // => Swapping idx & idx2 turns %src in a scalar type + // 3) %base, %idx, and %idx2 are scalars + // => %src & %gep are scalars + // => swapping idx & idx2 is safe + // 4) %base a vector + // => %src is a vector + // => swapping idx & idx2 is safe. + auto *SO0 = Src->getOperand(0); + auto *SO0Ty = SO0->getType(); + if (!isa<VectorType>(GEP.getType()) || // case 3 + isa<VectorType>(SO0Ty)) { // case 4 + Src->setOperand(1, GO1); + GEP.setOperand(1, SO1); + return &GEP; + } else { + // Case 1 or 2 + // -- have to recreate %src & %gep + // put NewSrc at same location as %src + Builder.SetInsertPoint(cast<Instruction>(Src)); + Value *NewSrc = Builder.CreateGEP( + GEP.getSourceElementType(), SO0, GO1, Src->getName()); + // Propagate 'inbounds' if the new source was not constant-folded. + if (auto *NewSrcGEPI = dyn_cast<GetElementPtrInst>(NewSrc)) + NewSrcGEPI->setIsInBounds(Src->isInBounds()); + GetElementPtrInst *NewGEP = GetElementPtrInst::Create( + GEP.getSourceElementType(), NewSrc, {SO1}); + NewGEP->setIsInBounds(GEP.isInBounds()); + return NewGEP; + } + } + } + } + } + + // Note that if our source is a gep chain itself then we wait for that + // chain to be resolved before we perform this transformation. This + // avoids us creating a TON of code in some cases. + if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) + return nullptr; // Wait until our source is folded to completion. + + SmallVector<Value*, 8> Indices; + + // Find out whether the last index in the source GEP is a sequential idx. + bool EndsWithSequential = false; + for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); + I != E; ++I) + EndsWithSequential = I.isSequential(); + + // Can we combine the two pointer arithmetics offsets? + if (EndsWithSequential) { + // Replace: gep (gep %P, long B), long A, ... + // With: T = long A+B; gep %P, T, ... + Value *SO1 = Src->getOperand(Src->getNumOperands()-1); + Value *GO1 = GEP.getOperand(1); + + // If they aren't the same type, then the input hasn't been processed + // by the loop above yet (which canonicalizes sequential index types to + // intptr_t). Just avoid transforming this until the input has been + // normalized. + if (SO1->getType() != GO1->getType()) + return nullptr; + + Value *Sum = + SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); + // Only do the combine when we are sure the cost after the + // merge is never more than that before the merge. + if (Sum == nullptr) + return nullptr; + + // Update the GEP in place if possible. + if (Src->getNumOperands() == 2) { + GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))); + replaceOperand(GEP, 0, Src->getOperand(0)); + replaceOperand(GEP, 1, Sum); + return &GEP; + } + Indices.append(Src->op_begin()+1, Src->op_end()-1); + Indices.push_back(Sum); + Indices.append(GEP.op_begin()+2, GEP.op_end()); + } else if (isa<Constant>(*GEP.idx_begin()) && + cast<Constant>(*GEP.idx_begin())->isNullValue() && + Src->getNumOperands() != 1) { + // Otherwise we can do the fold if the first index of the GEP is a zero + Indices.append(Src->op_begin()+1, Src->op_end()); + Indices.append(GEP.idx_begin()+1, GEP.idx_end()); + } + + if (!Indices.empty()) + return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)) + ? GetElementPtrInst::CreateInBounds( + Src->getSourceElementType(), Src->getOperand(0), Indices, + GEP.getName()) + : GetElementPtrInst::Create(Src->getSourceElementType(), + Src->getOperand(0), Indices, + GEP.getName()); + + return nullptr; +} + +// Note that we may have also stripped an address space cast in between. +Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI, + GetElementPtrInst &GEP) { + // With opaque pointers, there is no pointer element type we can use to + // adjust the GEP type. + PointerType *SrcType = cast<PointerType>(BCI->getSrcTy()); + if (SrcType->isOpaque()) + return nullptr; + + Type *GEPEltType = GEP.getSourceElementType(); + Type *SrcEltType = SrcType->getNonOpaquePointerElementType(); + Value *SrcOp = BCI->getOperand(0); + + // GEP directly using the source operand if this GEP is accessing an element + // of a bitcasted pointer to vector or array of the same dimensions: + // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z + // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z + auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy, + const DataLayout &DL) { + auto *VecVTy = cast<FixedVectorType>(VecTy); + return ArrTy->getArrayElementType() == VecVTy->getElementType() && + ArrTy->getArrayNumElements() == VecVTy->getNumElements() && + DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy); + }; + if (GEP.getNumOperands() == 3 && + ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) && + areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) || + (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() && + areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) { + + // Create a new GEP here, as using `setOperand()` followed by + // `setSourceElementType()` won't actually update the type of the + // existing GEP Value. Causing issues if this Value is accessed when + // constructing an AddrSpaceCastInst + SmallVector<Value *, 8> Indices(GEP.indices()); + Value *NGEP = GEP.isInBounds() + ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, Indices) + : Builder.CreateGEP(SrcEltType, SrcOp, Indices); + NGEP->takeName(&GEP); + + // Preserve GEP address space to satisfy users + if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) + return new AddrSpaceCastInst(NGEP, GEP.getType()); + + return replaceInstUsesWith(GEP, NGEP); + } + + // See if we can simplify: + // X = bitcast A* to B* + // Y = gep X, <...constant indices...> + // into a gep of the original struct. This is important for SROA and alias + // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. + unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType()); + APInt Offset(OffsetBits, 0); + + // If the bitcast argument is an allocation, The bitcast is for convertion + // to actual type of allocation. Removing such bitcasts, results in having + // GEPs with i8* base and pure byte offsets. That means GEP is not aware of + // struct or array hierarchy. + // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have + // a better chance to succeed. + if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) && + !isAllocationFn(SrcOp, &TLI)) { + // If this GEP instruction doesn't move the pointer, just replace the GEP + // with a bitcast of the real input to the dest type. + if (!Offset) { + // If the bitcast is of an allocation, and the allocation will be + // converted to match the type of the cast, don't touch this. + if (isa<AllocaInst>(SrcOp)) { + // See if the bitcast simplifies, if so, don't nuke this GEP yet. + if (Instruction *I = visitBitCast(*BCI)) { + if (I != BCI) { + I->takeName(BCI); + BCI->getParent()->getInstList().insert(BCI->getIterator(), I); + replaceInstUsesWith(*BCI, I); + } + return &GEP; + } + } + + if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace()) + return new AddrSpaceCastInst(SrcOp, GEP.getType()); + return new BitCastInst(SrcOp, GEP.getType()); + } + + // Otherwise, if the offset is non-zero, we need to find out if there is a + // field at Offset in 'A's type. If so, we can pull the cast through the + // GEP. + SmallVector<Value*, 8> NewIndices; + if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) { + Value *NGEP = + GEP.isInBounds() + ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices) + : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices); + + if (NGEP->getType() == GEP.getType()) + return replaceInstUsesWith(GEP, NGEP); + NGEP->takeName(&GEP); + + if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) + return new AddrSpaceCastInst(NGEP, GEP.getType()); + return new BitCastInst(NGEP, GEP.getType()); + } + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { - SmallVector<Value *, 8> Ops(GEP.operands()); + Value *PtrOp = GEP.getOperand(0); + SmallVector<Value *, 8> Indices(GEP.indices()); Type *GEPType = GEP.getType(); Type *GEPEltType = GEP.getSourceElementType(); bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType); - if (Value *V = SimplifyGEPInst(GEPEltType, Ops, GEP.isInBounds(), + if (Value *V = SimplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(), SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); @@ -1912,8 +2217,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { // undef elements to decrease demanded bits } - Value *PtrOp = GEP.getOperand(0); - // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. bool MadeChange = false; @@ -2063,132 +2366,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { PtrOp = NewGEP; } - // Combine Indices - If the source pointer to this getelementptr instruction - // is a getelementptr instruction, combine the indices of the two - // getelementptr instructions into a single instruction. - if (auto *Src = dyn_cast<GEPOperator>(PtrOp)) { - if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) - return nullptr; - - if (Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && - Src->hasOneUse()) { - Value *GO1 = GEP.getOperand(1); - Value *SO1 = Src->getOperand(1); - - if (LI) { - // Try to reassociate loop invariant GEP chains to enable LICM. - if (Loop *L = LI->getLoopFor(GEP.getParent())) { - // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is - // invariant: this breaks the dependence between GEPs and allows LICM - // to hoist the invariant part out of the loop. - if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { - // We have to be careful here. - // We have something like: - // %src = getelementptr <ty>, <ty>* %base, <ty> %idx - // %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2 - // If we just swap idx & idx2 then we could inadvertantly - // change %src from a vector to a scalar, or vice versa. - // Cases: - // 1) %base a scalar & idx a scalar & idx2 a vector - // => Swapping idx & idx2 turns %src into a vector type. - // 2) %base a scalar & idx a vector & idx2 a scalar - // => Swapping idx & idx2 turns %src in a scalar type - // 3) %base, %idx, and %idx2 are scalars - // => %src & %gep are scalars - // => swapping idx & idx2 is safe - // 4) %base a vector - // => %src is a vector - // => swapping idx & idx2 is safe. - auto *SO0 = Src->getOperand(0); - auto *SO0Ty = SO0->getType(); - if (!isa<VectorType>(GEPType) || // case 3 - isa<VectorType>(SO0Ty)) { // case 4 - Src->setOperand(1, GO1); - GEP.setOperand(1, SO1); - return &GEP; - } else { - // Case 1 or 2 - // -- have to recreate %src & %gep - // put NewSrc at same location as %src - Builder.SetInsertPoint(cast<Instruction>(PtrOp)); - Value *NewSrc = - Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()); - // Propagate 'inbounds' if the new source was not constant-folded. - if (auto *NewSrcGEPI = dyn_cast<GetElementPtrInst>(NewSrc)) - NewSrcGEPI->setIsInBounds(Src->isInBounds()); - GetElementPtrInst *NewGEP = - GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1}); - NewGEP->setIsInBounds(GEP.isInBounds()); - return NewGEP; - } - } - } - } - } - - // Note that if our source is a gep chain itself then we wait for that - // chain to be resolved before we perform this transformation. This - // avoids us creating a TON of code in some cases. - if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0))) - if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return nullptr; // Wait until our source is folded to completion. - - SmallVector<Value*, 8> Indices; - - // Find out whether the last index in the source GEP is a sequential idx. - bool EndsWithSequential = false; - for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); - I != E; ++I) - EndsWithSequential = I.isSequential(); - - // Can we combine the two pointer arithmetics offsets? - if (EndsWithSequential) { - // Replace: gep (gep %P, long B), long A, ... - // With: T = long A+B; gep %P, T, ... - Value *SO1 = Src->getOperand(Src->getNumOperands()-1); - Value *GO1 = GEP.getOperand(1); - - // If they aren't the same type, then the input hasn't been processed - // by the loop above yet (which canonicalizes sequential index types to - // intptr_t). Just avoid transforming this until the input has been - // normalized. - if (SO1->getType() != GO1->getType()) - return nullptr; - - Value *Sum = - SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); - // Only do the combine when we are sure the cost after the - // merge is never more than that before the merge. - if (Sum == nullptr) - return nullptr; - - // Update the GEP in place if possible. - if (Src->getNumOperands() == 2) { - GEP.setIsInBounds(isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))); - replaceOperand(GEP, 0, Src->getOperand(0)); - replaceOperand(GEP, 1, Sum); - return &GEP; - } - Indices.append(Src->op_begin()+1, Src->op_end()-1); - Indices.push_back(Sum); - Indices.append(GEP.op_begin()+2, GEP.op_end()); - } else if (isa<Constant>(*GEP.idx_begin()) && - cast<Constant>(*GEP.idx_begin())->isNullValue() && - Src->getNumOperands() != 1) { - // Otherwise we can do the fold if the first index of the GEP is a zero - Indices.append(Src->op_begin()+1, Src->op_end()); - Indices.append(GEP.idx_begin()+1, GEP.idx_end()); - } - - if (!Indices.empty()) - return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP)) - ? GetElementPtrInst::CreateInBounds( - Src->getSourceElementType(), Src->getOperand(0), Indices, - GEP.getName()) - : GetElementPtrInst::Create(Src->getSourceElementType(), - Src->getOperand(0), Indices, - GEP.getName()); - } + if (auto *Src = dyn_cast<GEPOperator>(PtrOp)) + if (Instruction *I = visitGEPOfGEP(GEP, Src)) + return I; // Skip if GEP source element type is scalable. The type alloc size is unknown // at compile-time. @@ -2234,9 +2414,13 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *StrippedPtr = PtrOp->stripPointerCasts(); PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType()); - if (StrippedPtr != PtrOp) { + // TODO: The basic approach of these folds is not compatible with opaque + // pointers, because we can't use bitcasts as a hint for a desirable GEP + // type. Instead, we should perform canonicalization directly on the GEP + // type. For now, skip these. + if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) { bool HasZeroPointerIndex = false; - Type *StrippedPtrEltTy = StrippedPtrTy->getElementType(); + Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType(); if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); @@ -2420,103 +2604,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { ASCStrippedPtrOp = BC; } - if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) { - Value *SrcOp = BCI->getOperand(0); - PointerType *SrcType = cast<PointerType>(BCI->getSrcTy()); - Type *SrcEltType = SrcType->getElementType(); - - // GEP directly using the source operand if this GEP is accessing an element - // of a bitcasted pointer to vector or array of the same dimensions: - // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z - // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z - auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy, - const DataLayout &DL) { - auto *VecVTy = cast<FixedVectorType>(VecTy); - return ArrTy->getArrayElementType() == VecVTy->getElementType() && - ArrTy->getArrayNumElements() == VecVTy->getNumElements() && - DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy); - }; - if (GEP.getNumOperands() == 3 && - ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) && - areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) || - (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() && - areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) { - - // Create a new GEP here, as using `setOperand()` followed by - // `setSourceElementType()` won't actually update the type of the - // existing GEP Value. Causing issues if this Value is accessed when - // constructing an AddrSpaceCastInst - Value *NGEP = - GEP.isInBounds() - ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]}) - : Builder.CreateGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]}); - NGEP->takeName(&GEP); - - // Preserve GEP address space to satisfy users - if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) - return new AddrSpaceCastInst(NGEP, GEPType); - - return replaceInstUsesWith(GEP, NGEP); - } - - // See if we can simplify: - // X = bitcast A* to B* - // Y = gep X, <...constant indices...> - // into a gep of the original struct. This is important for SROA and alias - // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. - unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType); - APInt Offset(OffsetBits, 0); - - // If the bitcast argument is an allocation, The bitcast is for convertion - // to actual type of allocation. Removing such bitcasts, results in having - // GEPs with i8* base and pure byte offsets. That means GEP is not aware of - // struct or array hierarchy. - // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have - // a better chance to succeed. - if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) && - !isAllocationFn(SrcOp, &TLI)) { - // If this GEP instruction doesn't move the pointer, just replace the GEP - // with a bitcast of the real input to the dest type. - if (!Offset) { - // If the bitcast is of an allocation, and the allocation will be - // converted to match the type of the cast, don't touch this. - if (isa<AllocaInst>(SrcOp)) { - // See if the bitcast simplifies, if so, don't nuke this GEP yet. - if (Instruction *I = visitBitCast(*BCI)) { - if (I != BCI) { - I->takeName(BCI); - BCI->getParent()->getInstList().insert(BCI->getIterator(), I); - replaceInstUsesWith(*BCI, I); - } - return &GEP; - } - } - - if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace()) - return new AddrSpaceCastInst(SrcOp, GEPType); - return new BitCastInst(SrcOp, GEPType); - } - - // Otherwise, if the offset is non-zero, we need to find out if there is a - // field at Offset in 'A's type. If so, we can pull the cast through the - // GEP. - SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) { - Value *NGEP = - GEP.isInBounds() - ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices) - : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices); - - if (NGEP->getType() == GEPType) - return replaceInstUsesWith(GEP, NGEP); - NGEP->takeName(&GEP); - - if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) - return new AddrSpaceCastInst(NGEP, GEPType); - return new BitCastInst(NGEP, GEPType); - } - } - } + if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) + if (Instruction *I = visitGEPOfBitcast(BCI, GEP)) + return I; if (!GEP.isInBounds()) { unsigned IdxWidth = @@ -2533,8 +2623,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize()); if (BasePtrOffset.ule(AllocSize)) { return GetElementPtrInst::CreateInBounds( - GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1), - GEP.getName()); + GEP.getSourceElementType(), PtrOp, Indices, GEP.getName()); } } } @@ -2553,10 +2642,6 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, if (auto *LI = dyn_cast<LoadInst>(V)) return isa<GlobalVariable>(LI->getPointerOperand()); // Two distinct allocations will never be equal. - // We rely on LookThroughBitCast in isAllocLikeFn being false, since looking - // through bitcasts of V can cause - // the result statement below to be true, even when AI and V (ex: - // i8* ->i32* ->i8* of AI) are the same allocations. return isAllocLikeFn(V, &TLI) && V != AI; } @@ -2659,7 +2744,7 @@ static bool isAllocSiteRemovable(Instruction *AI, continue; } - if (isReallocLikeFn(I, &TLI, true)) { + if (isReallocLikeFn(I, &TLI)) { Users.emplace_back(I); Worklist.push_back(I); continue; @@ -2682,6 +2767,8 @@ static bool isAllocSiteRemovable(Instruction *AI, } Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { + assert(isa<AllocaInst>(MI) || isAllocRemovable(&cast<CallBase>(MI), &TLI)); + // If we have a malloc call which is only used in any amount of comparisons to // null and free calls, delete the calls and replace the comparisons with true // or false as appropriate. @@ -2900,7 +2987,7 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI) { // If we had free(realloc(...)) with no intervening uses, then eliminate the // realloc() entirely. if (CallInst *CI = dyn_cast<CallInst>(Op)) { - if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI, true)) { + if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI)) { return eraseInstFromFunction( *replaceInstUsesWith(*CI, CI->getOperand(0))); } @@ -3709,16 +3796,61 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { return nullptr; } +/// Check for case where the call writes to an otherwise dead alloca. This +/// shows up for unused out-params in idiomatic C/C++ code. Note that this +/// helper *only* analyzes the write; doesn't check any other legality aspect. +static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) { + auto *CB = dyn_cast<CallBase>(I); + if (!CB) + // TODO: handle e.g. store to alloca here - only worth doing if we extend + // to allow reload along used path as described below. Otherwise, this + // is simply a store to a dead allocation which will be removed. + return false; + Optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI); + if (!Dest) + return false; + auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr)); + if (!AI) + // TODO: allow malloc? + return false; + // TODO: allow memory access dominated by move point? Note that since AI + // could have a reference to itself captured by the call, we would need to + // account for cycles in doing so. + SmallVector<const User *> AllocaUsers; + SmallPtrSet<const User *, 4> Visited; + auto pushUsers = [&](const Instruction &I) { + for (const User *U : I.users()) { + if (Visited.insert(U).second) + AllocaUsers.push_back(U); + } + }; + pushUsers(*AI); + while (!AllocaUsers.empty()) { + auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val()); + if (isa<BitCastInst>(UserI) || isa<GetElementPtrInst>(UserI) || + isa<AddrSpaceCastInst>(UserI)) { + pushUsers(*UserI); + continue; + } + if (UserI == CB) + continue; + // TODO: support lifetime.start/end here + return false; + } + return true; +} + /// Try to move the specified instruction from its current block into the /// beginning of DestBlock, which can only happen if it's safe to move the /// instruction past all of the instructions between it and the end of its /// block. -static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { +static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock, + TargetLibraryInfo &TLI) { assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!"); BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. - if (isa<PHINode>(I) || I->isEHPad() || I->mayHaveSideEffects() || + if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || I->isTerminator()) return false; @@ -3738,6 +3870,14 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { if (CI->isConvergent()) return false; } + + // Unless we can prove that the memory write isn't visibile except on the + // path we're sinking to, we must bail. + if (I->mayWriteToMemory()) { + if (!SoleWriteToDeadLocal(I, TLI)) + return false; + } + // We can only sink load instructions if there is nothing between the load and // the end of block that could change the value. if (I->mayReadFromMemory()) { @@ -3746,7 +3886,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { // successor block. if (DestBlock->getUniquePredecessor() != I->getParent()) return false; - for (BasicBlock::iterator Scan = I->getIterator(), + for (BasicBlock::iterator Scan = std::next(I->getIterator()), E = I->getParent()->end(); Scan != E; ++Scan) if (Scan->mayWriteToMemory()) @@ -3906,12 +4046,11 @@ bool InstCombinerImpl::run() { // predecessor, so that we don't have to split the critical edge. // Another option where we can sink is a block that ends with a // terminator that does not pass control to other block (such as - // return or unreachable). In this case: + // return or unreachable or resume). In this case: // - I dominates the User (by SSA form); // - the User will be executed at most once. // So sinking I down to User is always profitable or neutral. - if (UserParent->getUniquePredecessor() == BB || - (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term))) { + if (UserParent->getUniquePredecessor() == BB || succ_empty(Term)) { assert(DT.dominates(BB, UserParent) && "Dominance relation broken?"); return UserParent; } @@ -3922,7 +4061,7 @@ bool InstCombinerImpl::run() { if (OptBB) { auto *UserParent = *OptBB; // Okay, the CFG is simple enough, try to sink this instruction. - if (TryToSinkInstruction(I, UserParent)) { + if (TryToSinkInstruction(I, UserParent, TLI)) { LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); MadeIRChange = true; // We'll add uses of the sunk instruction below, but since |