diff options
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r-- | lib/Transforms/InstCombine/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 4 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 88 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 17 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 164 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 22 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 21 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 121 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 23 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 76 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 29 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 250 |
13 files changed, 420 insertions, 402 deletions
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index d070ccc0d63f..72cfe2c985bc 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -13,3 +13,5 @@ add_llvm_library(LLVMInstCombine InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp ) + +add_dependencies(LLVMInstCombine intrinsics_gen) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 199df519ce07..0d5ef904ee47 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,11 +11,11 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/TargetFolder.h" @@ -187,7 +187,7 @@ public: Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitMalloc(Instruction &FI); + Instruction *visitAllocSite(Instruction &FI); Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 05e702fa43b5..99b62f8d05a7 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -170,10 +170,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // -A + B --> B - A // -A + -B --> -(A + B) if (Value *LHSV = dyn_castNegVal(LHS)) { - if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); - return BinaryOperator::CreateNeg(NewAdd); - } + if (!isa<Constant>(RHS)) + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + return BinaryOperator::CreateNeg(NewAdd); + } return BinaryOperator::CreateSub(RHS, LHSV); } @@ -329,6 +330,20 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } + // Check for (x & y) + (x ^ y) + { + Value *A = 0, *B = 0; + if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && + (match(LHS, m_And(m_Specific(A), m_Specific(B))) || + match(LHS, m_And(m_Specific(B), m_Specific(A))))) + return BinaryOperator::CreateOr(A, B); + + if (match(LHS, m_Xor(m_Value(A), m_Value(B))) && + (match(RHS, m_And(m_Specific(A), m_Specific(B))) || + match(RHS, m_And(m_Specific(B), m_Specific(A))))) + return BinaryOperator::CreateOr(A, B); + } + return Changed ? &I : 0; } @@ -406,66 +421,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { } -/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the -/// code necessary to compute the offset from the base pointer (without adding -/// in the base pointer). Return the result as a signed integer of intptr size. -Value *InstCombiner::EmitGEPOffset(User *GEP) { - TargetData &TD = *getTargetData(); - gep_type_iterator GTI = gep_type_begin(GEP); - Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); - Value *Result = Constant::getNullValue(IntPtrTy); - - // If the GEP is inbounds, we know that none of the addressing operations will - // overflow in an unsigned sense. - bool isInBounds = cast<GEPOperator>(GEP)->isInBounds(); - - // Build a mask for high order bits. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; - ++i, ++GTI) { - Value *Op = *i; - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; - if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) { - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast<StructType>(*GTI)) { - Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - - if (Size) - Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".offs"); - continue; - } - - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - Constant *OC = - ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); - // Emit an add instruction. - Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); - continue; - } - // Convert to correct type. - if (Op->getType() != IntPtrTy) - Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); - if (Size != 1) { - // We'll let instcombine(mul) convert this to a shl if possible. - Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".idx", isInBounds /*NUW*/); - } - - // Emit an add instruction. - Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); - } - return Result; -} - - - - /// Optimize pointer differences into the same array into a size. Consider: /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. @@ -589,11 +544,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; - // C - zext(bool) -> bool ? C - 1 : C - if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) - if (ZI->getSrcTy()->isIntegerTy(1)) - return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); - // C-(X+C2) --> (C-C2)-X ConstantInt *C2; if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 0dbe11d2f01f..7d0af0d80226 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -986,19 +986,23 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { bool Op1Ordered; unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + // uno && ord -> false + if (Op0Pred == 0 && Op1Pred == 0 && Op0Ordered != Op1Ordered) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); if (Op1Pred == 0) { std::swap(LHS, RHS); std::swap(Op0Pred, Op1Pred); std::swap(Op0Ordered, Op1Ordered); } if (Op0Pred == 0) { - // uno && ueq -> uno && (uno || eq) -> ueq + // uno && ueq -> uno && (uno || eq) -> uno // ord && olt -> ord && (ord && lt) -> olt - if (Op0Ordered == Op1Ordered) + if (!Op0Ordered && (Op0Ordered == Op1Ordered)) + return LHS; + if (Op0Ordered && (Op0Ordered == Op1Ordered)) return RHS; // uno && oeq -> uno && (ord && eq) -> false - // uno && ord -> false if (!Op0Ordered) return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); // ord && ueq -> ord && (uno || eq) -> oeq @@ -1932,10 +1936,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // A | ( A ^ B) -> A | B // A | (~A ^ B) -> A | ~B + // (A & B) | (A ^ B) if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) { if (Op0 == A || Op0 == B) return BinaryOperator::CreateOr(A, B); + if (match(Op0, m_And(m_Specific(A), m_Specific(B))) || + match(Op0, m_And(m_Specific(B), m_Specific(A)))) + return BinaryOperator::CreateOr(A, B); + if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { Value *Not = Builder->CreateNot(B, B->getName()+".not"); return BinaryOperator::CreateOr(Not, Op0); @@ -2212,7 +2221,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Op0I && Op1I && Op0I->isShift() && Op0I->getOpcode() == Op1I->getOpcode() && Op0I->getOperand(1) == Op1I->getOperand(1) && - (Op1I->hasOneUse() || Op1I->hasOneUse())) { + (Op0I->hasOneUse() || Op1I->hasOneUse())) { Value *NewOp = Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), Op0I->getName()); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 77e47271008c..d34fab103fa3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -172,8 +172,6 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isFreeCall(&CI)) return visitFree(CI); - if (isMalloc(&CI)) - return visitMalloc(CI); // If the caller function is nounwind, mark the call as nounwind, even if the // callee isn't. @@ -246,78 +244,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { - // We need target data for just about everything so depend on it. - if (!TD) break; - - Type *ReturnTy = CI.getType(); - uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; - - // Get to the real allocated thing and offset as fast as possible. - Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); - - uint64_t Offset = 0; - uint64_t Size = -1ULL; - - // Try to look through constant GEPs. - if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) { - if (!GEP->hasAllConstantIndices()) break; - - // Get the current byte offset into the thing. Use the original - // operand in case we're looking through a bitcast. - SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); - if (!GEP->getPointerOperandType()->isPointerTy()) - return 0; - Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); - - Op1 = GEP->getPointerOperand()->stripPointerCasts(); - - // Make sure we're not a constant offset from an external - // global. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) - if (!GV->hasDefinitiveInitializer()) break; - } - - // If we've stripped down to a single global variable that we - // can know the size of then just return that. - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) { - if (GV->hasDefinitiveInitializer()) { - Constant *C = GV->getInitializer(); - Size = TD->getTypeAllocSize(C->getType()); - } else { - // Can't determine size of the GV. - Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow); - return ReplaceInstUsesWith(CI, RetVal); - } - } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { - // Get alloca size. - if (AI->getAllocatedType()->isSized()) { - Size = TD->getTypeAllocSize(AI->getAllocatedType()); - if (AI->isArrayAllocation()) { - const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize()); - if (!C) break; - Size *= C->getZExtValue(); - } - } - } else if (CallInst *MI = extractMallocCall(Op1)) { - // Get allocation size. - Type* MallocType = getMallocAllocatedType(MI); - if (MallocType && MallocType->isSized()) - if (Value *NElems = getMallocArraySize(MI, TD, true)) - if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems)) - Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType); - } - - // Do not return "I don't know" here. Later optimization passes could - // make it possible to evaluate objectsize to a constant. - if (Size == -1ULL) - break; - - if (Size < Offset) { - // Out of bound reference? Negative index normalized to large - // index? Just return "I don't know". - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow)); - } - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset)); + uint64_t Size; + if (getObjectSize(II->getArgOperand(0), Size, TD)) + return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); + return 0; } case Intrinsic::bswap: // bswap(bswap(x)) -> x @@ -694,6 +624,57 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::arm_neon_vmulls: + case Intrinsic::arm_neon_vmullu: { + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + + // Handle mul by zero first: + if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) { + return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType())); + } + + // Check for constant LHS & RHS - in this case we just simplify. + bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu); + VectorType *NewVT = cast<VectorType>(II->getType()); + unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth(); + if (ConstantDataVector *CV0 = dyn_cast<ConstantDataVector>(Arg0)) { + if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) { + VectorType* VT = cast<VectorType>(CV0->getType()); + SmallVector<Constant*, 4> NewElems; + for (unsigned i = 0; i < VT->getNumElements(); ++i) { + APInt CV0E = + (cast<ConstantInt>(CV0->getAggregateElement(i)))->getValue(); + CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth); + APInt CV1E = + (cast<ConstantInt>(CV1->getAggregateElement(i)))->getValue(); + CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth); + NewElems.push_back( + ConstantInt::get(NewVT->getElementType(), CV0E * CV1E)); + } + return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems)); + } + + // Couldn't simplify - cannonicalize constant to the RHS. + std::swap(Arg0, Arg1); + } + + // Handle mul by one: + if (ConstantDataVector *CV1 = dyn_cast<ConstantDataVector>(Arg1)) { + if (ConstantInt *Splat = + dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) { + if (Splat->isOne()) { + if (Zext) + return CastInst::CreateZExtOrBitCast(Arg0, II->getType()); + // else + return CastInst::CreateSExtOrBitCast(Arg0, II->getType()); + } + } + } + + break; + } + case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. @@ -711,7 +692,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { - if (isa<AllocaInst>(BI) || isMalloc(BI)) { + if (isa<AllocaInst>(BI)) { CannotRemove = true; break; } @@ -814,7 +795,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { if (CI->getCalledFunction() == 0) return 0; InstCombineFortifiedLibCalls Simplifier(this); - Simplifier.fold(CI, TD); + Simplifier.fold(CI, TD, TLI); return Simplifier.NewInstruction; } @@ -898,6 +879,9 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { + if (isAllocLikeFn(CS.getInstruction())) + return visitAllocSite(*CS.getInstruction()); + bool Changed = false; // If the callee is a pointer to a function, attempt to move any casts to the @@ -933,24 +917,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { } if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { - // This instruction is not reachable, just remove it. We insert a store to - // undef so that we know that this code is not reachable, despite the fact - // that we can't modify the CFG here. - new StoreInst(ConstantInt::getTrue(Callee->getContext()), - UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), - CS.getInstruction()); - // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) ReplaceInstUsesWith(*CS.getInstruction(), UndefValue::get(CS.getInstruction()->getType())); - if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { - // Don't break the CFG, insert a dummy cond branch. - BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), - ConstantInt::getTrue(Callee->getContext()), II); + if (isa<InvokeInst>(CS.getInstruction())) { + // Can't remove an invoke because we cannot change the CFG. + return 0; } + + // This instruction is not reachable, just remove it. We insert a store to + // undef so that we know that this code is not reachable, despite the fact + // that we can't modify the CFG here. + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + CS.getInstruction()); + return EraseInstFromFunction(*CS.getInstruction()); } @@ -1194,8 +1178,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), - attrVec.end()); + const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec); Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -1367,8 +1350,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), - NewAttrs.end()); + const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 39279f437205..555b4428d2e8 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -34,7 +34,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { // Cannot look past anything that might overflow. OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); - if (OBI && !OBI->hasNoUnsignedWrap()) { + if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) { Scale = 1; Offset = 0; return Val; @@ -648,10 +648,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { if (!I) return false; // If the input is a truncate from the destination type, we can trivially - // eliminate it, even if it has multiple uses. - // FIXME: This is currently disabled until codegen can handle this without - // pessimizing code, PR5997. - if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) + // eliminate it. + if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; // We can't extend or shrink something that has multiple uses: doing so would @@ -992,11 +990,8 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - // If this is a truncate from the dest type, we can trivially eliminate it, - // even if it has multiple uses. - // FIXME: This is currently disabled until codegen can handle this without - // pessimizing code, PR5997. - if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) + // If this is a truncate from the dest type, we can trivially eliminate it. + if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; // We can't extend or shrink something that has multiple uses: doing so would @@ -1341,10 +1336,9 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // non-type-safe code. if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) && GEP->hasAllConstantIndices()) { - // We are guaranteed to get a constant from EmitGEPOffset. - ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP)); - int64_t Offset = OffsetV->getSExtValue(); - + SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); + int64_t Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); + // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); Type *GEPIdxTy = diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index ab2987ff24cd..bdd310e97f6c 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1035,7 +1035,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { // Pull in the high bits from known-ones set. APInt NewRHS = RHS->getValue().zext(SrcBits); - NewRHS |= KnownOne; + NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits); return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantInt::get(ICI.getContext(), NewRHS)); } @@ -2580,10 +2580,25 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } + // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B) + // and (B & (1<<X)-1) == (zext A) --> A == (trunc B) + ConstantInt *Cst1; + if ((Op0->hasOneUse() && + match(Op0, m_ZExt(m_Value(A))) && + match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) || + (Op1->hasOneUse() && + match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) && + match(Op1, m_ZExt(m_Value(A))))) { + APInt Pow2 = Cst1->getValue() + 1; + if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) && + Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth()) + return new ICmpInst(I.getPredicate(), A, + Builder->CreateTrunc(B, A->getType())); + } + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to // "icmp (and X, mask), cst" uint64_t ShAmt = 0; - ConstantInt *Cst1; if (Op0->hasOneUse() && match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) && @@ -2809,7 +2824,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, case ICmpInst::ICMP_UGE: // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) + if (RHS.isNegative()) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); Pred = ICmpInst::ICMP_UGT; break; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index b2f2e248e417..c485844aaeb4 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -22,72 +22,6 @@ using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); -// Try to kill dead allocas by walking through its uses until we see some use -// that could escape. This is a conservative analysis which tries to handle -// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things -// left after inlining and SROA finish chewing on an alloca. -static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) { - SmallVector<Instruction *, 4> Worklist, DeadStores; - Worklist.push_back(&AI); - do { - Instruction *PI = Worklist.pop_back_val(); - for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); - UI != UE; ++UI) { - Instruction *I = cast<Instruction>(*UI); - switch (I->getOpcode()) { - default: - // Give up the moment we see something we can't handle. - return 0; - - case Instruction::GetElementPtr: - case Instruction::BitCast: - Worklist.push_back(I); - continue; - - case Instruction::Call: - // We can handle a limited subset of calls to no-op intrinsics. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - continue; - default: - return 0; - } - } - // Reject everything else. - return 0; - - case Instruction::Store: { - // Stores into the alloca are only live if the alloca is live. - StoreInst *SI = cast<StoreInst>(I); - // We can eliminate atomic stores, but not volatile. - if (SI->isVolatile()) - return 0; - // The store is only trivially safe if the poniter is the destination - // as opposed to the value. We're conservative here and don't check for - // the case where we store the address of a dead alloca into a dead - // alloca. - if (SI->getPointerOperand() != PI) - return 0; - DeadStores.push_back(I); - continue; - } - } - } - } while (!Worklist.empty()); - - // The alloca is dead. Kill off all the stores to it, and then replace it - // with undef. - while (!DeadStores.empty()) - IC.EraseInstFromFunction(*DeadStores.pop_back_val()); - return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType())); -} - Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. @@ -106,7 +40,6 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); @@ -135,22 +68,54 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { - // If alloca'ing a zero byte object, replace the alloca with a null pointer. - // Note that we only do this for alloca's, because malloc should allocate - // and return a unique pointer, even for a zero byte allocation. - if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - + if (TD && AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); + + // Move all alloca's of zero byte objects to the entry block and merge them + // together. Note that we only do this for alloca's, because malloc should + // allocate and return a unique pointer, even for a zero byte allocation. + if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) { + // For a zero sized alloca there is no point in doing an array allocation. + // This is helpful if the array size is a complicated expression not used + // elsewhere. + if (AI.isArrayAllocation()) { + AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); + return &AI; + } + + // Get the first instruction in the entry block. + BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); + Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); + if (FirstInst != &AI) { + // If the entry block doesn't start with a zero-size alloca then move + // this one to the start of the entry block. There is no problem with + // dominance as the array size was forced to a constant earlier already. + AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); + if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || + TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { + AI.moveBefore(FirstInst); + return &AI; + } + + // Replace this zero-sized alloca with the one at the start of the entry + // block after ensuring that the address will be aligned enough for both + // types. + unsigned MaxAlign = + std::max(TD->getPrefTypeAlignment(EntryAI->getAllocatedType()), + TD->getPrefTypeAlignment(AI.getAllocatedType())); + EntryAI->setAlignment(MaxAlign); + if (AI.getType() != EntryAI->getType()) + return new BitCastInst(EntryAI, AI.getType()); + return ReplaceInstUsesWith(AI, EntryAI); + } + } } - // Try to aggressively remove allocas which are only used for GEPs, lifetime - // markers, and stores. This happens when SROA iteratively promotes stores - // out of the alloca, and we need to cleanup after it. - return removeDeadAlloca(*this, AI); + // At last, use the generic allocation site handler to aggressively remove + // unused allocas. + return visitAllocSite(AI); } diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 5168e2a113ca..35a0bbb76146 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -464,9 +464,12 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2) { const APInt *CI; Value *N; - if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) { + if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || + match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) { if (*CI != 1) N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2())); + if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1)) + N = Builder->CreateZExt(N, Z->getDestTy()); if (I.isExact()) return BinaryOperator::CreateExactLShr(Op0, N); return BinaryOperator::CreateLShr(Op0, N); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index e727b2c592db..291e80019e8d 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -129,6 +129,12 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, if (TI->isCast()) { if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) return 0; + // The select condition may be a vector. We may only change the operand + // type if the vector width remains the same (and matches the condition). + Type *CondTy = SI.getCondition()->getType(); + if (CondTy->isVectorTy() && CondTy->getVectorNumElements() != + FI->getOperand(0)->getType()->getVectorNumElements()) + return 0; } else { return 0; // unknown unary op. } @@ -498,7 +504,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // NOTE: if we wanted to, this is where to detect integer MIN/MAX - if (isa<Constant>(CmpRHS)) { + if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS)) { if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { // Transform (X == C) ? X : Y -> (X == C) ? C : Y SI.setOperand(1, CmpRHS); @@ -875,12 +881,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { if (TrueSI->getCondition() == CondVal) { + if (SI.getTrueValue() == TrueSI->getTrueValue()) + return 0; SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } } if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { if (FalseSI->getCondition() == CondVal) { + if (SI.getFalseValue() == FalseSI->getFalseValue()) + return 0; SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } @@ -893,5 +903,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } + if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { + unsigned VWidth = VecTy->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) { + if (V != &SI) + return ReplaceInstUsesWith(SI, V); + return &SI; + } + } + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index b31049e59f18..4bb2403299ce 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -151,7 +151,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't // profitable unless we know the and'd out bits are already zero. - if (CI->getZExtValue() > NumBits) { + if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) { unsigned LowBits = CI->getZExtValue() - NumBits; if (MaskedValueIsZero(I->getOperand(0), APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) @@ -529,6 +529,19 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ShiftOp = 0; if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { + + // This is a constant shift of a constant shift. Be careful about hiding + // shl instructions behind bit masks. They are used to represent multiplies + // by a constant, and it is important that simple arithmetic expressions + // are still recognizable by scalar evolution. + // + // The transforms applied to shl are very similar to the transforms applied + // to mul by constant. We can be more aggressive about optimizing right + // shifts. + // + // Combinations of right and left shifts will still be optimized in + // DAGCombine where scalar evolution no longer applies. + ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1)); uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); @@ -554,13 +567,6 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } if (ShiftAmt1 == ShiftAmt2) { - // If we have ((X >>? C) << C), turn this into X & (-1 << C). - if (I.getOpcode() == Instruction::Shl && - ShiftOp->getOpcode() != Instruction::Shl) { - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, - ConstantInt::get(I.getContext(),Mask)); - } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { @@ -570,28 +576,23 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } } else if (ShiftAmt1 < ShiftAmt2) { uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; - - // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) + + // (X >>?,exact C1) << C2 --> X << (C2-C1) + // The inexact version is deferred to DAGCombine so we don't hide shl + // behind a bit mask. if (I.getOpcode() == Instruction::Shl && - ShiftOp->getOpcode() != Instruction::Shl) { + ShiftOp->getOpcode() != Instruction::Shl && + ShiftOp->isExact()) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); - if (ShiftOp->isExact()) { - // (X >>?,exact C1) << C2 --> X << (C2-C1) - BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, - X, ShiftDiffCst); - NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); - return NewShl; - } - Value *Shift = Builder->CreateShl(X, ShiftDiffCst); - - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(),Mask)); + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); + return NewShl; } - + // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { @@ -627,24 +628,19 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, assert(ShiftAmt2 < ShiftAmt1); uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; - // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) + // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) + // The inexact version is deferred to DAGCombine so we don't hide shl + // behind a bit mask. if (I.getOpcode() == Instruction::Shl && - ShiftOp->getOpcode() != Instruction::Shl) { + ShiftOp->getOpcode() != Instruction::Shl && + ShiftOp->isExact()) { ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); - if (ShiftOp->isExact()) { - // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) - BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), - X, ShiftDiffCst); - NewShr->setIsExact(true); - return NewShr; - } - Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), - X, ShiftDiffCst); - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(),Mask)); + BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), + X, ShiftDiffCst); + NewShr->setIsExact(true); + return NewShr; } - + // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 125c74a89a11..54be8ed3fa90 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -989,6 +989,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } break; } + case Instruction::Select: { + APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts); + if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) { + for (unsigned i = 0; i < VWidth; i++) { + if (CV->getAggregateElement(i)->isNullValue()) + LeftDemanded.clearBit(i); + else + RightDemanded.clearBit(i); + } + } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. + UndefElts &= UndefElts2; + break; + } case Instruction::BitCast: { // Vector->vector casts only. VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); @@ -1074,6 +1097,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; break; + case Instruction::FPTrunc: + case Instruction::FPExt: + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; case Instruction::Call: { IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 066b2ec89c3e..68ecd516049d 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -87,30 +87,34 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { } +Value *InstCombiner::EmitGEPOffset(User *GEP) { + return llvm::EmitGEPOffset(Builder, *getTargetData(), GEP); +} + /// ShouldChangeType - Return true if it is desirable to convert a computation /// from 'From' to 'To'. We don't want to convert from a legal to an illegal /// type for example, or from a smaller to a larger illegal type. bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); - + // If we don't have TD, we don't know if the source/dest are legal. if (!TD) return false; - + unsigned FromWidth = From->getPrimitiveSizeInBits(); unsigned ToWidth = To->getPrimitiveSizeInBits(); bool FromLegal = TD->isLegalInteger(FromWidth); bool ToLegal = TD->isLegalInteger(ToWidth); - + // If this is a legal integer from type, and the result would be an illegal // type, don't do the transformation. if (FromLegal && !ToLegal) return false; - + // Otherwise, if both are illegal, do not increase the size of the result. We // do allow things like i160 -> i64, but not i64 -> i160. if (!FromLegal && !ToLegal && ToWidth > FromWidth) return false; - + return true; } @@ -127,7 +131,7 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { // We reason about Add and Sub Only. Instruction::BinaryOps Opcode = I.getOpcode(); - if (Opcode != Instruction::Add && + if (Opcode != Instruction::Add && Opcode != Instruction::Sub) { return false; } @@ -203,7 +207,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. if (MaintainNoSignedWrap(I, B, C) && - (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { + (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { // Note: this is only valid because SimplifyBinOp doesn't look at // the operands to Op0. I.clearSubclassOptionalData(); @@ -211,7 +215,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { } else { I.clearSubclassOptionalData(); } - + Changed = true; ++NumReassoc; continue; @@ -540,7 +544,7 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, Value *Op0 = SO, *Op1 = ConstOperand; if (!ConstIsRHS) std::swap(Op0, Op1); - + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, SO->getName()+".op"); @@ -579,7 +583,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) return 0; } - + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); @@ -599,7 +603,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) return 0; - + // We normally only transform phis with a single use. However, if a PHI has // multiple uses and they are all the same operation, we can fold *all* of the // uses into the PHI. @@ -613,7 +617,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } // Otherwise, we can replace *all* users with the new PHI we form. } - + // Check to see if all of the operands of the PHI are simple constants // (constantint/constantfp/undef). If there is one non-constant value, // remember the BB it is in. If there is more than one or if *it* is a PHI, @@ -627,7 +631,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { if (isa<PHINode>(InVal)) return 0; // Itself a phi. if (NonConstBB) return 0; // More than one non-const value. - + NonConstBB = PN->getIncomingBlock(i); // If the InVal is an invoke at the end of the pred block, then we can't @@ -635,14 +639,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) if (II->getParent() == NonConstBB) return 0; - + // If the incoming non-constant value is in I's block, we will remove one // instruction, but insert another equivalent one, leading to infinite // instcombine. if (NonConstBB == I.getParent()) return 0; } - + // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only @@ -656,12 +660,12 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues()); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); - + // If we are going to have to insert a new computation, do so right before the // predecessors terminator. if (NonConstBB) Builder->SetInsertPoint(NonConstBB->getTerminator()); - + // Next, add all of the operands to the PHI. if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { // We only currently try to fold the condition of a select when it is a phi, @@ -706,20 +710,20 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PN->getIncomingValue(i), C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } - } else { + } else { CastInst *CI = cast<CastInst>(&I); Type *RetTy = CI->getType(); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); - else + else InV = Builder->CreateCast(CI->getOpcode(), PN->getIncomingValue(i), I.getType(), "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } - + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { Instruction *User = cast<Instruction>(*UI++); @@ -734,11 +738,11 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// or not there is a sequence of GEP indices into the type that will land us at /// the specified offset. If so, fill them into NewIndices and return the /// resultant element type, otherwise return null. -Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices) { if (!TD) return 0; if (!Ty->isSized()) return 0; - + // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] @@ -747,7 +751,7 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; Offset -= FirstIdx*TySize; - + // Handle hosts where % returns negative instead of values [0..TySize). if (Offset < 0) { --FirstIdx; @@ -756,24 +760,24 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, } assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } - + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); - + // Index into the types. If we fail, set OrigBase to null. while (Offset) { // Indexing into tail padding between struct/array elements. if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) return 0; - + if (StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TD->getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); - + unsigned Elt = SL->getElementContainingOffset(Offset); NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), Elt)); - + Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { @@ -787,7 +791,7 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, return 0; } } - + return Ty; } @@ -948,7 +952,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Res->setIsInBounds(GEP.isInBounds()); return Res; } - + if (ArrayType *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? @@ -981,16 +985,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // V and GEP are both pointer types --> BitCast return new BitCastInst(NewGEP, GEP.getType()); } - + // Transform things like: // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - + if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) { uint64_t ArrayEltSize = TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); - + // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We // allow either a mul, shift, or constant here. Value *NewIdx = 0; @@ -1015,7 +1019,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { NewIdx = Inst->getOperand(0); } } - + // If the index will be to exactly the right offset with the scale taken // out, perform the transformation. Note, we don't know whether Scale is // signed or not. We'll use unsigned version of division/modulo @@ -1054,10 +1058,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { - // Determine how much the GEP moves the pointer. We are guaranteed to get - // a constant back from EmitGEPOffset. - ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP)); - int64_t Offset = OffsetV->getSExtValue(); + // Determine how much the GEP moves the pointer. + SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end()); + int64_t Offset = TD->getIndexedOffset(GEP.getPointerOperandType(), Ops); // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. @@ -1065,7 +1068,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. if (isa<AllocaInst>(BCI->getOperand(0)) || - isMalloc(BCI->getOperand(0))) { + isAllocationFn(BCI->getOperand(0))) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1078,7 +1081,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } return new BitCastInst(BCI->getOperand(0), GEP.getType()); } - + // Otherwise, if the offset is non-zero, we need to find out if there is a // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. @@ -1089,68 +1092,103 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *NGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : Builder->CreateGEP(BCI->getOperand(0), NewIndices); - + if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); NGEP->takeName(&GEP); return new BitCastInst(NGEP, GEP.getType()); } } - } - + } + return 0; } -static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl<WeakVH> &Users, - int Depth = 0) { - if (Depth == 8) - return false; +static bool +isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) { + SmallVector<Instruction*, 4> Worklist; + Worklist.push_back(AI); - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - User *U = *UI; - if (isFreeCall(U)) { - Users.push_back(U); - continue; - } - if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { - if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1))) { - Users.push_back(ICI); + do { + Instruction *PI = Worklist.pop_back_val(); + for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); UI != UE; + ++UI) { + Instruction *I = cast<Instruction>(*UI); + switch (I->getOpcode()) { + default: + // Give up the moment we see something we can't handle. + return false; + + case Instruction::BitCast: + case Instruction::GetElementPtr: + Users.push_back(I); + Worklist.push_back(I); continue; - } - } - if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) { - Users.push_back(BCI); + + case Instruction::ICmp: { + ICmpInst *ICI = cast<ICmpInst>(I); + // We can fold eq/ne comparisons with null to false/true, respectively. + if (!ICI->isEquality() || !isa<ConstantPointerNull>(ICI->getOperand(1))) + return false; + Users.push_back(I); continue; } - } - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { - if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) { - Users.push_back(GEPI); + + case Instruction::Call: + // Ignore no-op and store intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + + case Intrinsic::memmove: + case Intrinsic::memcpy: + case Intrinsic::memset: { + MemIntrinsic *MI = cast<MemIntrinsic>(II); + if (MI->isVolatile() || MI->getRawDest() != PI) + return false; + } + // fall through + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + Users.push_back(I); + continue; + } + } + + if (isFreeCall(I)) { + Users.push_back(I); + continue; + } + return false; + + case Instruction::Store: { + StoreInst *SI = cast<StoreInst>(I); + if (SI->isVolatile() || SI->getPointerOperand() != PI) + return false; + Users.push_back(I); continue; } - } - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - Users.push_back(II); - continue; } + llvm_unreachable("missing a return?"); } - return false; - } + } while (!Worklist.empty()); return true; } -Instruction *InstCombiner::visitMalloc(Instruction &MI) { +Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. SmallVector<WeakVH, 64> Users; - if (IsOnlyNullComparedAndFreed(&MI, Users)) { + if (isAllocSiteRemovable(&MI, Users)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { Instruction *I = cast_or_null<Instruction>(&*Users[i]); if (!I) continue; @@ -1161,9 +1199,23 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) { C->isFalseWhenEqual())); } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { ReplaceInstUsesWith(*I, UndefValue::get(I->getType())); + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::objectsize) { + ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1)); + uint64_t DontKnow = CI->isZero() ? -1ULL : 0; + ReplaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow)); + } } EraseInstFromFunction(*I); } + + if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) { + // Replace invoke with a NOP intrinsic to maintain the original CFG + Module *M = II->getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); + InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), + ArrayRef<Value *>(), "", II->getParent()); + } return EraseInstFromFunction(MI); } return 0; @@ -1181,7 +1233,7 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); return EraseInstFromFunction(FI); } - + // If we have 'free null' delete the instruction. This can happen in stl code // when lots of inlining happens. if (isa<ConstantPointerNull>(Op)) @@ -1207,14 +1259,14 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Cannonicalize fcmp_one -> fcmp_oeq FCmpInst::Predicate FPred; Value *Y; - if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), + if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), TrueDest, FalseDest)) && BI.getCondition()->hasOneUse()) if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || FPred == FCmpInst::FCMP_OGE) { FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); - + // Swap Destinations and condition. BI.swapSuccessors(); Worklist.Add(Cond); @@ -1280,7 +1332,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { } return 0; // Can't handle other constants } - + if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { // We're extracting from an insertvalue instruction, compare the indices const unsigned *exti, *exte, *insi, *inse; @@ -1329,7 +1381,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // %E = extractvalue { i32, { i32 } } %I, 1, 0 // with // %E extractvalue { i32 } { i32 42 }, 0 - return ExtractValueInst::Create(IV->getInsertedValueOperand(), + return ExtractValueInst::Create(IV->getInsertedValueOperand(), makeArrayRef(exti, exte)); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { @@ -1349,7 +1401,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { EraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); } - + // If the normal result of the add is dead, and the RHS is a constant, // we can transform this into a range comparison. // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 @@ -1798,7 +1850,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { /// many instructions are dead or constant). Additionally, if we find a branch /// whose condition is a known constant, we only visit the reachable successors. /// -static bool AddReachableCodeToWorklist(BasicBlock *BB, +static bool AddReachableCodeToWorklist(BasicBlock *BB, SmallPtrSet<BasicBlock*, 64> &Visited, InstCombiner &IC, const TargetData *TD, @@ -1812,13 +1864,13 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, do { BB = Worklist.pop_back_val(); - + // We have now visited this block! If we've already been here, ignore it. if (!Visited.insert(BB)) continue; for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *Inst = BBI++; - + // DCE instruction if trivially dead. if (isInstructionTriviallyDead(Inst)) { ++NumDeadInst; @@ -1826,7 +1878,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Inst->eraseFromParent(); continue; } - + // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) { @@ -1837,7 +1889,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Inst->eraseFromParent(); continue; } - + if (TD) { // See if we can constant fold its operands. for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); @@ -1881,17 +1933,17 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Worklist.push_back(ReachableBB); continue; } - + // Otherwise it is the default destination. Worklist.push_back(SI->getDefaultDest()); continue; } } - + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) Worklist.push_back(TI->getSuccessor(i)); } while (!Worklist.empty()); - + // Once we've found all of the instructions to add to instcombine's worklist, // add them in reverse order. This way instcombine will visit from the top // of the function down. This jives well with the way that it adds all uses @@ -1899,13 +1951,13 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // some N^2 behavior in pathological cases. IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], InstrsForInstCombineWorklist.size()); - + return MadeIRChange; } bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { MadeIRChange = false; - + DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); @@ -1976,13 +2028,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { BasicBlock *BB = I->getParent(); Instruction *UserInst = cast<Instruction>(I->use_back()); BasicBlock *UserParent; - + // Get the block the use occurs in. if (PHINode *PN = dyn_cast<PHINode>(UserInst)) UserParent = PN->getIncomingBlock(I->use_begin().getUse()); else UserParent = UserInst->getParent(); - + if (UserParent != BB) { bool UserIsSuccessor = false; // See if the user is one of our successors. @@ -2004,7 +2056,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Now that we have an instruction, try combining it to simplify it. Builder->SetInsertPoint(I->getParent(), I); Builder->SetCurrentDebugLocation(I->getDebugLoc()); - + #ifndef NDEBUG std::string OrigI; #endif @@ -2069,14 +2121,14 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { bool InstCombiner::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); TLI = &getAnalysis<TargetLibraryInfo>(); - + /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. - IRBuilder<true, TargetFolder, InstCombineIRInserter> + IRBuilder<true, TargetFolder, InstCombineIRInserter> TheBuilder(F.getContext(), TargetFolder(TD), InstCombineIRInserter(Worklist)); Builder = &TheBuilder; - + bool EverMadeChange = false; // Lower dbg.declare intrinsics otherwise their value may be clobbered @@ -2087,7 +2139,7 @@ bool InstCombiner::runOnFunction(Function &F) { unsigned Iteration = 0; while (DoOneIteration(F, Iteration++)) EverMadeChange = true; - + Builder = 0; return EverMadeChange; } |