diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-28 21:22:49 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-28 21:22:49 +0000 |
commit | b2b7c066a48f61ec67332fb797a20bb04901c83d (patch) | |
tree | b3de3914f41bb160a795f7dcd767566c62bdf3e8 | |
parent | fd4675b5a029cce616a1b0ad339344c5df800ea6 (diff) |
Vendor import of llvm trunk r321530:vendor/llvm/llvm-trunk-r321530
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=327300
svn path=/vendor/llvm/llvm-trunk-r321530/; revision=327301; tag=vendor/llvm/llvm-trunk-r321530
116 files changed, 2261 insertions, 1219 deletions
diff --git a/bindings/go/llvm/ir_test.go b/bindings/go/llvm/ir_test.go index 325ee4890f4c..fb39955ec10f 100644 --- a/bindings/go/llvm/ir_test.go +++ b/bindings/go/llvm/ir_test.go @@ -142,7 +142,7 @@ func TestSubtypes(t *testing.T) { int_pointer := PointerType(cont.Int32Type(), 0) int_inner := int_pointer.Subtypes() if len(int_inner) != 1 { - t.Errorf("Got size %d, though wanted 1") + t.Errorf("Got size %d, though wanted 1", len(int_inner)) } if int_inner[0] != cont.Int32Type() { t.Errorf("Expected int32 type") @@ -151,7 +151,7 @@ func TestSubtypes(t *testing.T) { st_pointer := cont.StructType([]Type{cont.Int32Type(), cont.Int8Type()}, false) st_inner := st_pointer.Subtypes() if len(st_inner) != 2 { - t.Errorf("Got size %d, though wanted 2") + t.Errorf("Got size %d, though wanted 2", len(int_inner)) } if st_inner[0] != cont.Int32Type() { t.Errorf("Expected first struct field to be int32") diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 42597c871eae..c6be957b0e45 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -17,7 +17,7 @@ include(HandleLLVMStdlib) if( UNIX AND NOT (BEOS OR HAIKU) ) # Used by check_symbol_exists: - set(CMAKE_REQUIRED_LIBRARIES m) + list(APPEND CMAKE_REQUIRED_LIBRARIES "m") endif() # x86_64 FreeBSD 9.2 requires libcxxrt to be specified explicitly. if( CMAKE_SYSTEM MATCHES "FreeBSD-9.2-RELEASE" AND diff --git a/cmake/modules/CheckAtomic.cmake b/cmake/modules/CheckAtomic.cmake index 11f0366bc851..9a4cdf12a622 100644 --- a/cmake/modules/CheckAtomic.cmake +++ b/cmake/modules/CheckAtomic.cmake @@ -8,7 +8,7 @@ INCLUDE(CheckLibraryExists) function(check_working_cxx_atomics varname) set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - set(CMAKE_REQUIRED_FLAGS "-std=c++11") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11") CHECK_CXX_SOURCE_COMPILES(" #include <atomic> std::atomic<int> x; diff --git a/cmake/modules/CheckCompilerVersion.cmake b/cmake/modules/CheckCompilerVersion.cmake index 2e8f5445781c..adf500ad53a7 100644 --- a/cmake/modules/CheckCompilerVersion.cmake +++ b/cmake/modules/CheckCompilerVersion.cmake @@ -28,7 +28,7 @@ if(NOT DEFINED LLVM_COMPILER_CHECKED) # bug in libstdc++4.6 that is fixed in libstdc++4.7. set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(OLD_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) - set(CMAKE_REQUIRED_FLAGS "-std=c++0x") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++0x") check_cxx_source_compiles(" #include <atomic> std::atomic<float> x(0.0f); diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index 719d3997594e..07048a52319e 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -1040,7 +1040,7 @@ line argument: .. code-block:: c++ - DEBUG(errs() << "I am here!\n"); + DEBUG(dbgs() << "I am here!\n"); Then you can run your pass like this: @@ -1076,10 +1076,10 @@ follows: .. code-block:: c++ #define DEBUG_TYPE "foo" - DEBUG(errs() << "'foo' debug type\n"); + DEBUG(dbgs() << "'foo' debug type\n"); #undef DEBUG_TYPE #define DEBUG_TYPE "bar" - DEBUG(errs() << "'bar' debug type\n")); + DEBUG(dbgs() << "'bar' debug type\n"); #undef DEBUG_TYPE Then you can run your pass like this: @@ -1120,8 +1120,8 @@ preceding example could be written as: .. code-block:: c++ - DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n"); - DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n")); + DEBUG_WITH_TYPE("foo", dbgs() << "'foo' debug type\n"); + DEBUG_WITH_TYPE("bar", dbgs() << "'bar' debug type\n"); .. _Statistic: diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index 3932a2ec2498..4f896bddff87 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -197,6 +197,9 @@ Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); +/// Given a callsite, fold the result or return null. +Value *SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q); + /// Given a function and iterators over arguments, fold the result or return /// null. Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 391a333594e9..c2974525a6ff 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -407,12 +407,6 @@ public: void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result); - /// Perform a dependency query specifically for QueryInst's access to Loc. - /// The other comments for getNonLocalPointerDependency apply here as well. - void getNonLocalPointerDependencyFrom(Instruction *QueryInst, - const MemoryLocation &Loc, bool isLoad, - SmallVectorImpl<NonLocalDepResult> &Result); - /// Removes an instruction from the dependence analysis, updating the /// dependence of instructions that previously depended on it. void removeInstruction(Instruction *InstToRemove); diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index cecd8958e9d9..c20f20cfbe4d 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -646,9 +646,6 @@ public: /// \brief Additional properties of an operand's values. enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; - /// \return True if target can execute instructions out of order. - bool isOutOfOrder() const; - /// \return The number of scalar or vector registers that the target has. /// If 'Vectors' is true, it returns the number of vector registers. If it is /// set to false, it returns the number of scalar registers. @@ -1021,7 +1018,6 @@ public: Type *Ty) = 0; virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) = 0; - virtual bool isOutOfOrder() const = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; @@ -1299,9 +1295,6 @@ public: Type *Ty) override { return Impl.getIntImmCost(IID, Idx, Imm, Ty); } - bool isOutOfOrder() const override { - return Impl.isOutOfOrder(); - } unsigned getNumberOfRegisters(bool Vector) override { return Impl.getNumberOfRegisters(Vector); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 3625675d53de..4c37402278ef 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -337,8 +337,6 @@ public: return TTI::TCC_Free; } - bool isOutOfOrder() const { return false; } - unsigned getNumberOfRegisters(bool Vector) { return 8; } unsigned getRegisterBitWidth(bool Vector) const { return 32; } diff --git a/include/llvm/BinaryFormat/COFF.h b/include/llvm/BinaryFormat/COFF.h index e6bb50cadb12..a55c544dfe90 100644 --- a/include/llvm/BinaryFormat/COFF.h +++ b/include/llvm/BinaryFormat/COFF.h @@ -95,7 +95,7 @@ enum MachineTypes : unsigned { MT_Invalid = 0xffff, IMAGE_FILE_MACHINE_UNKNOWN = 0x0, - IMAGE_FILE_MACHINE_AM33 = 0x13, + IMAGE_FILE_MACHINE_AM33 = 0x1D3, IMAGE_FILE_MACHINE_AMD64 = 0x8664, IMAGE_FILE_MACHINE_ARM = 0x1C0, IMAGE_FILE_MACHINE_ARMNT = 0x1C4, diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index f1f9275b0786..526ddb1b9706 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -402,10 +402,6 @@ public: return BaseT::getInstructionLatency(I); } - bool isOutOfOrder() const { - return getST()->getSchedModel().isOutOfOrder(); - } - /// @} /// \name Vector TTI Implementations diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index d7999cd33231..cc08fe683272 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -288,7 +288,8 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile - [IntrArgMemOnly, NoCapture<0>] + [IntrArgMemOnly, NoCapture<0>], "", + [SDNPMemOperand] >; def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index 9eccafc65f3a..641631cc4ec9 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -226,8 +226,8 @@ public: sampleprof_error addCalledTargetSamples(uint32_t LineOffset, uint32_t Discriminator, - const std::string &FName, - uint64_t Num, uint64_t Weight = 1) { + StringRef FName, uint64_t Num, + uint64_t Weight = 1) { return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( FName, Num, Weight); } diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 7276f2524fed..de7d21f9f133 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -385,8 +385,8 @@ bool DemandedBits::isInstructionDead(Instruction *I) { void DemandedBits::print(raw_ostream &OS) { performAnalysis(); for (auto &KV : AliveBits) { - OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for " - << *KV.first << "\n"; + OS << "DemandedBits: 0x" << Twine::utohexstr(KV.second.getLimitedValue()) + << " for " << *KV.first << '\n'; } } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 3ce1281743c3..93fb1143e505 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -3897,8 +3897,9 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) - if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) - return Elt; + if (IdxC->getValue().ule(Vec->getType()->getVectorNumElements())) + if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) + return Elt; // An undef extract index can be arbitrarily chosen to be an out-of-range // index value, which would result in the instruction being undef. @@ -4494,6 +4495,22 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, return *ArgBegin; return nullptr; } + case Intrinsic::bswap: { + Value *IIOperand = *ArgBegin; + Value *X = nullptr; + // bswap(bswap(x)) -> x + if (match(IIOperand, m_BSwap(m_Value(X)))) + return X; + return nullptr; + } + case Intrinsic::bitreverse: { + Value *IIOperand = *ArgBegin; + Value *X = nullptr; + // bitreverse(bitreverse(x)) -> x + if (match(IIOperand, m_BitReverse(m_Value(X)))) + return X; + return nullptr; + } default: return nullptr; } @@ -4548,6 +4565,16 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, return SimplifyRelativeLoad(C0, C1, Q.DL); return nullptr; } + case Intrinsic::powi: + if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) { + // powi(x, 0) -> 1.0 + if (Power->isZero()) + return ConstantFP::get(LHS->getType(), 1.0); + // powi(x, 1) -> x + if (Power->isOne()) + return LHS; + } + return nullptr; default: return nullptr; } @@ -4616,6 +4643,12 @@ Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit); } +Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) { + CallSite CS(const_cast<Instruction*>(ICS.getInstruction())); + return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + Q, RecursionLimit); +} + /// See if we can compute a simplified version of this instruction. /// If not, this returns null. @@ -4750,8 +4783,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, break; case Instruction::Call: { CallSite CS(cast<CallInst>(I)); - Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), - Q); + Result = SimplifyCall(CS, Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 7b792ed0a2e2..0e3f498cb14c 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -265,13 +265,21 @@ void Lint::visitCallSite(CallSite CS) { // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced // memory regions. - if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) - for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) + if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { + AttributeList PAL = CS.getAttributes(); + unsigned ArgNo = 0; + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) { + // Skip ByVal arguments since they will be memcpy'd to the callee's + // stack so we're not really passing the pointer anyway. + if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal)) + continue; if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); Assert(Result != MustAlias && Result != PartialAlias, "Unusual: noalias argument aliases another argument", &I); } + } + } // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index bb7bf967994c..bf83f52ccf2e 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -920,14 +920,6 @@ void MemoryDependenceResults::getNonLocalPointerDependency( Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) { const MemoryLocation Loc = MemoryLocation::get(QueryInst); bool isLoad = isa<LoadInst>(QueryInst); - return getNonLocalPointerDependencyFrom(QueryInst, Loc, isLoad, Result); -} - -void MemoryDependenceResults::getNonLocalPointerDependencyFrom( - Instruction *QueryInst, - const MemoryLocation &Loc, - bool isLoad, - SmallVectorImpl<NonLocalDepResult> &Result) { BasicBlock *FromBB = QueryInst->getParent(); assert(FromBB); @@ -1127,15 +1119,21 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB( // If we already have a cache entry for this CacheKey, we may need to do some // work to reconcile the cache entry and the current query. if (!Pair.second) { - if (CacheInfo->Size != Loc.Size) { - // The query's Size differs from the cached one. Throw out the - // cached data and proceed with the query at the new size. + if (CacheInfo->Size < Loc.Size) { + // The query's Size is greater than the cached one. Throw out the + // cached data and proceed with the query at the greater size. CacheInfo->Pair = BBSkipFirstBlockPair(); CacheInfo->Size = Loc.Size; for (auto &Entry : CacheInfo->NonLocalDeps) if (Instruction *Inst = Entry.getResult().getInst()) RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); CacheInfo->NonLocalDeps.clear(); + } else if (CacheInfo->Size > Loc.Size) { + // This query's Size is less than the cached one. Conservatively restart + // the query using the greater size. + return getNonLocalPointerDepFromBB( + QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, + StartBB, Result, Visited, SkipFirstBlock); } // If the query's AATags are inconsistent with the cached one, diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 10badd89a4a8..efa5bd564ad0 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -306,7 +306,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || // Inliner doesn't handle variadic functions. // FIXME: refactor this to use the same code that inliner is using. - F.isVarArg(); + F.isVarArg() || + // Don't try to import functions with noinline attribute. + F.getAttributes().hasFnAttribute(Attribute::NoInline); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, /* Live = */ false, F.isDSOLocal()); FunctionSummary::FFlags FunFlags{ diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 2a8088dc4452..f34549ae52b4 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1268,7 +1268,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } if (!hasTrunc) return getAddExpr(Operands); - UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + // In spite we checked in the beginning that ID is not in the cache, + // it is possible that during recursion and different modification + // ID came to cache, so if we found it, just return it. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; } // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can @@ -1284,7 +1288,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } if (!hasTrunc) return getMulExpr(Operands); - UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + // In spite we checked in the beginning that ID is not in the cache, + // it is possible that during recursion and different modification + // ID came to cache, so if we found it, just return it. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; } // If the input value is a chrec scev, truncate the chrec's operands. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 86f714b930d0..3ceda677ba61 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -187,8 +187,21 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, // generated code. if (isa<DbgInfoIntrinsic>(IP)) ScanLimit++; + + // Conservatively, do not use any instruction which has any of wrap/exact + // flags installed. + // TODO: Instead of simply disable poison instructions we can be clever + // here and match SCEV to this instruction. + auto canGeneratePoison = [](Instruction *I) { + if (isa<OverflowingBinaryOperator>(I) && + (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())) + return true; + if (isa<PossiblyExactOperator>(I) && I->isExact()) + return true; + return false; + }; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && - IP->getOperand(1) == RHS) + IP->getOperand(1) == RHS && !canGeneratePoison(&*IP)) return &*IP; if (IP == BlockBegin) break; } diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index c9e9c6d1a419..b744cae51ed7 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -314,10 +314,6 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, return Cost; } -bool TargetTransformInfo::isOutOfOrder() const { - return TTIImpl->isOutOfOrder(); -} - unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { return TTIImpl->getNumberOfRegisters(Vector); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 2730daefa625..cd4cee631568 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -4238,14 +4238,14 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, LHS = CmpLHS; RHS = CmpRHS; - // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may - // return inconsistent results between implementations. - // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 - // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) - // Therefore we behave conservatively and only proceed if at least one of the - // operands is known to not be zero, or if we don't care about signed zeroes. + // Signed zero may return inconsistent results between implementations. + // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 + // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) + // Therefore, we behave conservatively and only proceed if at least one of the + // operands is known to not be zero or if we don't care about signed zero. switch (Pred) { default: break; + // FIXME: Include OGT/OLT/UGT/ULT. case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && @@ -4493,14 +4493,24 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, // Deal with type mismatches. if (CastOp && CmpLHS->getType() != TrueVal->getType()) { - if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) + if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) { + // If this is a potential fmin/fmax with a cast to integer, then ignore + // -0.0 because there is no corresponding integer value. + if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) + FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, cast<CastInst>(TrueVal)->getOperand(0), C, LHS, RHS); - if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) + } + if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { + // If this is a potential fmin/fmax with a cast to integer, then ignore + // -0.0 because there is no corresponding integer value. + if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) + FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, C, cast<CastInst>(FalseVal)->getOperand(0), LHS, RHS); + } } return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 388663eb1db7..3218dce8f575 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1922,14 +1922,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { EVT VT = Sel.getValueType(); SDLoc DL(Sel); SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); - assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) || - isConstantFPBuildVectorOrConstantFP(NewCT)) && - "Failed to constant fold a binop with constant operands"); + if (!NewCT.isUndef() && + !isConstantOrConstantVector(NewCT, true) && + !isConstantFPBuildVectorOrConstantFP(NewCT)) + return SDValue(); SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); - assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) || - isConstantFPBuildVectorOrConstantFP(NewCF)) && - "Failed to constant fold a binop with constant operands"); + if (!NewCF.isUndef() && + !isConstantOrConstantVector(NewCF, true) && + !isConstantFPBuildVectorOrConstantFP(NewCF)) + return SDValue(); return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); } @@ -3577,7 +3579,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, // TODO: What is the 'or' equivalent of this fold? // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) - if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE && + if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 && + IsInteger && CC0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || (isAllOnesConstant(LR) && isNullConstant(RR)))) { SDValue One = DAG.getConstant(1, DL, OpVT); @@ -3641,15 +3644,18 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && VT.getSizeInBits() <= 64) { if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - APInt ADDC = ADDI->getAPIntValue(); - if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal // immediate for an add, but it is legal if its top c2 bits are set, // transform the ADD so the immediate doesn't need to be materialized // in a register. - if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + APInt SRLC = SRLI->getAPIntValue(); + if (ADDC.getMinSignedBits() <= 64 && + SRLC.ult(VT.getSizeInBits()) && + !TLI.isLegalAddImmediate(ADDC.getSExtValue())) { APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - SRLI->getZExtValue()); + SRLC.getZExtValue()); if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp index 68e0ce39a54e..04deb434cec2 100644 --- a/lib/IR/SafepointIRVerifier.cpp +++ b/lib/IR/SafepointIRVerifier.cpp @@ -237,6 +237,59 @@ class InstructionVerifier; /// Builds BasicBlockState for each BB of the function. /// It can traverse function for verification and provides all required /// information. +/// +/// GC pointer may be in one of three states: relocated, unrelocated and +/// poisoned. +/// Relocated pointer may be used without any restrictions. +/// Unrelocated pointer cannot be dereferenced, passed as argument to any call +/// or returned. Unrelocated pointer may be safely compared against another +/// unrelocated pointer or against a pointer exclusively derived from null. +/// Poisoned pointers are produced when we somehow derive pointer from relocated +/// and unrelocated pointers (e.g. phi, select). This pointers may be safely +/// used in a very limited number of situations. Currently the only way to use +/// it is comparison against constant exclusively derived from null. All +/// limitations arise due to their undefined state: this pointers should be +/// treated as relocated and unrelocated simultaneously. +/// Rules of deriving: +/// R + U = P - that's where the poisoned pointers come from +/// P + X = P +/// U + U = U +/// R + R = R +/// X + C = X +/// Where "+" - any operation that somehow derive pointer, U - unrelocated, +/// R - relocated and P - poisoned, C - constant, X - U or R or P or C or +/// nothing (in case when "+" is unary operation). +/// Deriving of pointers by itself is always safe. +/// NOTE: when we are making decision on the status of instruction's result: +/// a) for phi we need to check status of each input *at the end of +/// corresponding predecessor BB*. +/// b) for other instructions we need to check status of each input *at the +/// current point*. +/// +/// FIXME: This works fairly well except one case +/// bb1: +/// p = *some GC-ptr def* +/// p1 = gep p, offset +/// / | +/// / | +/// bb2: | +/// safepoint | +/// \ | +/// \ | +/// bb3: +/// p2 = phi [p, bb2] [p1, bb1] +/// p3 = phi [p, bb2] [p, bb1] +/// here p and p1 is unrelocated +/// p2 and p3 is poisoned (though they shouldn't be) +/// +/// This leads to some weird results: +/// cmp eq p, p2 - illegal instruction (false-positive) +/// cmp eq p1, p2 - illegal instruction (false-positive) +/// cmp eq p, p3 - illegal instruction (false-positive) +/// cmp eq p, p1 - ok +/// To fix this we need to introduce conception of generations and be able to +/// check if two values belong to one generation or not. This way p2 will be +/// considered to be unrelocated and no false alarm will happen. class GCPtrTracker { const Function &F; SpecificBumpPtrAllocator<BasicBlockState> BSAllocator; @@ -244,6 +297,9 @@ class GCPtrTracker { // This set contains defs of unrelocated pointers that are proved to be legal // and don't need verification. DenseSet<const Instruction *> ValidUnrelocatedDefs; + // This set contains poisoned defs. They can be safely ignored during + // verification too. + DenseSet<const Value *> PoisonedDefs; public: GCPtrTracker(const Function &F, const DominatorTree &DT); @@ -251,6 +307,8 @@ public: BasicBlockState *getBasicBlockState(const BasicBlock *BB); const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const; + bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); } + /// Traverse each BB of the function and call /// InstructionVerifier::verifyInstruction for each possibly invalid /// instruction. @@ -349,7 +407,9 @@ const BasicBlockState *GCPtrTracker::getBasicBlockState( } bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const { - return ValidUnrelocatedDefs.count(I); + // Poisoned defs are skipped since they are always safe by itself by + // definition (for details see comment to this class). + return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I); } void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker, @@ -418,31 +478,78 @@ bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB, "Passed Contribution should be from the passed BasicBlockState!"); AvailableValueSet AvailableSet = BBS->AvailableIn; bool ContributionChanged = false; + // For explanation why instructions are processed this way see + // "Rules of deriving" in the comment to this class. for (const Instruction &I : *BB) { - bool ProducesUnrelocatedPointer = false; - if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) && - containsGCPtrType(I.getType())) { - // GEP/bitcast of unrelocated pointer is legal by itself but this - // def shouldn't appear in any AvailableSet. + bool ValidUnrelocatedPointerDef = false; + bool PoisonedPointerDef = false; + // TODO: `select` instructions should be handled here too. + if (const PHINode *PN = dyn_cast<PHINode>(&I)) { + if (containsGCPtrType(PN->getType())) { + // If both is true, output is poisoned. + bool HasRelocatedInputs = false; + bool HasUnrelocatedInputs = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + const BasicBlock *InBB = PN->getIncomingBlock(i); + const Value *InValue = PN->getIncomingValue(i); + + if (isNotExclusivelyConstantDerived(InValue)) { + if (isValuePoisoned(InValue)) { + // If any of inputs is poisoned, output is always poisoned too. + HasRelocatedInputs = true; + HasUnrelocatedInputs = true; + break; + } + if (BlockMap[InBB]->AvailableOut.count(InValue)) + HasRelocatedInputs = true; + else + HasUnrelocatedInputs = true; + } + } + if (HasUnrelocatedInputs) { + if (HasRelocatedInputs) + PoisonedPointerDef = true; + else + ValidUnrelocatedPointerDef = true; + } + } + } else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) && + containsGCPtrType(I.getType())) { + // GEP/bitcast of unrelocated pointer is legal by itself but this def + // shouldn't appear in any AvailableSet. for (const Value *V : I.operands()) if (containsGCPtrType(V->getType()) && isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) { - ProducesUnrelocatedPointer = true; + if (isValuePoisoned(V)) + PoisonedPointerDef = true; + else + ValidUnrelocatedPointerDef = true; break; } } - if (!ProducesUnrelocatedPointer) { - bool Cleared = false; - transferInstruction(I, Cleared, AvailableSet); - (void)Cleared; - } else { - // Remove def of unrelocated pointer from Contribution of this BB - // and trigger update of all its successors. + assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) && + "Value cannot be both unrelocated and poisoned!"); + if (ValidUnrelocatedPointerDef) { + // Remove def of unrelocated pointer from Contribution of this BB and + // trigger update of all its successors. Contribution.erase(&I); + PoisonedDefs.erase(&I); ValidUnrelocatedDefs.insert(&I); - DEBUG(dbgs() << "Removing " << I << " from Contribution of " + DEBUG(dbgs() << "Removing urelocated " << I << " from Contribution of " << BB->getName() << "\n"); ContributionChanged = true; + } else if (PoisonedPointerDef) { + // Mark pointer as poisoned, remove its def from Contribution and trigger + // update of all successors. + Contribution.erase(&I); + PoisonedDefs.insert(&I); + DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of " + << BB->getName() << "\n"); + ContributionChanged = true; + } else { + bool Cleared = false; + transferInstruction(I, Cleared, AvailableSet); + (void)Cleared; } } return ContributionChanged; @@ -524,8 +631,8 @@ void InstructionVerifier::verifyInstruction( // Returns true if LHS and RHS are unrelocated pointers and they are // valid unrelocated uses. - auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, - &RHS] () { + auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS, + &LHS, &RHS] () { // A cmp instruction has valid unrelocated pointer operands only if // both operands are unrelocated pointers. // In the comparison between two pointers, if one is an unrelocated @@ -545,12 +652,23 @@ void InstructionVerifier::verifyInstruction( (baseTyLHS == BaseType::NonConstant && baseTyRHS == BaseType::ExclusivelySomeConstant)) return false; + + // If one of pointers is poisoned and other is not exclusively derived + // from null it is an invalid expression: it produces poisoned result + // and unless we want to track all defs (not only gc pointers) the only + // option is to prohibit such instructions. + if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) || + (Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull)) + return false; + // All other cases are valid cases enumerated below: - // 1. Comparison between an exlusively derived null pointer and a + // 1. Comparison between an exclusively derived null pointer and a // constant base pointer. - // 2. Comparison between an exlusively derived null pointer and a + // 2. Comparison between an exclusively derived null pointer and a // non-constant unrelocated base pointer. // 3. Comparison between 2 unrelocated pointers. + // 4. Comparison between a pointer exclusively derived from null and a + // non-constant poisoned pointer. return true; }; if (!hasValidUnrelocatedUse()) { diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index c8b3892375f6..abcd8905ad35 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -76,7 +76,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir, if (TempDir.empty()) return; // User asked to save temps, let dump the bitcode file after import. - std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str(); + std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str(); std::error_code EC; raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); if (EC) diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index f94e9d3c4785..3e2150a451e0 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -427,13 +427,13 @@ bool ELFAsmParser::parseGroup(StringRef &GroupName) { GroupName = getTok().getString(); Lex(); } else if (getParser().parseIdentifier(GroupName)) { - return true; + return TokError("invalid group name"); } if (L.is(AsmToken::Comma)) { Lex(); StringRef Linkage; if (getParser().parseIdentifier(Linkage)) - return true; + return TokError("invalid linkage"); if (Linkage != "comdat") return TokError("Linkage must be 'comdat'"); } diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp index ccc8cc56eb0a..8dbd58632f0e 100644 --- a/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -628,7 +628,7 @@ CoverageMapping::getInstantiationGroups(StringRef Filename) const { } std::vector<InstantiationGroup> Result; - for (const auto &InstantiationSet : InstantiationSetCollector) { + for (auto &InstantiationSet : InstantiationSetCollector) { InstantiationGroup IG{InstantiationSet.first.first, InstantiationSet.first.second, std::move(InstantiationSet.second)}; diff --git a/lib/Support/ARMAttributeParser.cpp b/lib/Support/ARMAttributeParser.cpp index 3d800eb7a96c..e39bddc4e8f2 100644 --- a/lib/Support/ARMAttributeParser.cpp +++ b/lib/Support/ARMAttributeParser.cpp @@ -666,7 +666,7 @@ void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) { ParseIndexList(Data, Offset, Indicies); break; default: - errs() << "unrecognised tag: 0x" << utohexstr(Tag) << '\n'; + errs() << "unrecognised tag: 0x" << Twine::utohexstr(Tag) << '\n'; return; } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index b547a0932709..4caf4a4fdce0 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(const Option *O) { O->getNumOccurrencesFlag() == cl::OneOrMore; } -static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); } +static bool isWhitespace(char C) { + return C == ' ' || C == '\t' || C == '\r' || C == '\n'; +} static bool isQuote(char C) { return C == '\"' || C == '\''; } @@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, break; } + char C = Src[I]; + // Backslash escapes the next character. - if (I + 1 < E && Src[I] == '\\') { + if (I + 1 < E && C == '\\') { ++I; // Skip the escape. Token.push_back(Src[I]); continue; } // Consume a quoted string. - if (isQuote(Src[I])) { - char Quote = Src[I++]; - while (I != E && Src[I] != Quote) { + if (isQuote(C)) { + ++I; + while (I != E && Src[I] != C) { // Backslash escapes the next character. if (Src[I] == '\\' && I + 1 != E) ++I; @@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, } // End the token if this is whitespace. - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { if (!Token.empty()) NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); @@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, } // This is a normal character. Append it. - Token.push_back(Src[I]); + Token.push_back(C); } // Append the last token after hitting EOF with no whitespace. @@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, // end of the source string. enum { INIT, UNQUOTED, QUOTED } State = INIT; for (size_t I = 0, E = Src.size(); I != E; ++I) { + char C = Src[I]; + // INIT state indicates that the current input index is at the start of // the string or between tokens. if (State == INIT) { - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { // Mark the end of lines in response files - if (MarkEOLs && Src[I] == '\n') + if (MarkEOLs && C == '\n') NewArgv.push_back(nullptr); continue; } - if (Src[I] == '"') { + if (C == '"') { State = QUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); State = UNQUOTED; continue; } - Token.push_back(Src[I]); + Token.push_back(C); State = UNQUOTED; continue; } @@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, // quotes. if (State == UNQUOTED) { // Whitespace means the end of the token. - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); State = INIT; // Mark the end of lines in response files - if (MarkEOLs && Src[I] == '\n') + if (MarkEOLs && C == '\n') NewArgv.push_back(nullptr); continue; } - if (Src[I] == '"') { + if (C == '"') { State = QUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); continue; } - Token.push_back(Src[I]); + Token.push_back(C); continue; } // QUOTED state means that it's reading a token quoted by double quotes. if (State == QUOTED) { - if (Src[I] == '"') { + if (C == '"') { State = UNQUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); continue; } - Token.push_back(Src[I]); + Token.push_back(C); } } // Append the last token after hitting EOF with no whitespace. diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index fc9d0cc08885..be35f894cccd 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -110,7 +110,7 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) { return 1; if (ErrorsPrinted > 0) - return reportError(argv0, utostr(ErrorsPrinted) + " errors.\n"); + return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n"); // Declare success. Out.keep(); diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index abbba7d1d5a9..40836b00b9e6 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3673,15 +3673,6 @@ static bool getFMAPatterns(MachineInstr &Root, } break; case AArch64::FSUBv2f32: - if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); - Found = true; - } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); - Found = true; - } if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULv2i32_indexed)) { Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2); @@ -3691,17 +3682,17 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); Found = true; } - break; - case AArch64::FSUBv2f64: if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2i64_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); + AArch64::FMULv2i32_indexed)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); Found = true; } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv2f64)) { - Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); + AArch64::FMULv2f32)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); Found = true; } + break; + case AArch64::FSUBv2f64: if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULv2i64_indexed)) { Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2); @@ -3711,17 +3702,17 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); Found = true; } - break; - case AArch64::FSUBv4f32: if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4i32_indexed)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); + AArch64::FMULv2i64_indexed)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); Found = true; } else if (canCombineWithFMUL(MBB, Root.getOperand(1), - AArch64::FMULv4f32)) { - Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); + AArch64::FMULv2f64)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); Found = true; } + break; + case AArch64::FSUBv4f32: if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULv4i32_indexed)) { Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2); @@ -3731,6 +3722,15 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); Found = true; } + if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv4i32_indexed)) { + Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); + Found = true; + } else if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv4f32)) { + Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); + Found = true; + } break; } return Found; @@ -5062,4 +5062,4 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( It = MBB.insert(It, LDRXpost); return It; -}
\ No newline at end of file +} diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 2aa395642c40..753cfff4cdae 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -1797,11 +1797,7 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { llvm_unreachable("unsupported fp type"); APInt API = APF.bitcastToAPInt(); - std::string hexstr(utohexstr(API.getZExtValue())); - O << lead; - if (hexstr.length() < numHex) - O << std::string(numHex - hexstr.length(), '0'); - O << utohexstr(API.getZExtValue()); + O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true); } void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp index 86a28f7d0700..a754a6a36dab 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.cpp +++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/Support/Format.h" using namespace llvm; #define DEBUG_TYPE "nvptx-mcexpr" @@ -47,10 +48,7 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { } APInt API = APF.bitcastToAPInt(); - std::string HexStr(utohexstr(API.getZExtValue())); - if (HexStr.length() < NumHex) - OS << std::string(NumHex - HexStr.length(), '0'); - OS << utohexstr(API.getZExtValue()); + OS << format_hex_no_prefix(API.getZExtValue(), NumHex, /*Upper=*/true); } const NVPTXGenericMCSymbolRefExpr* diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index d19463ccb51f..204d97cbdd44 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -190,7 +190,7 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (isVerbose()) { OutStreamer->AddComment("fallthrough-return: $pop" + - utostr(MFI->getWARegStackId( + Twine(MFI->getWARegStackId( MFI->getWAReg(MI->getOperand(0).getReg())))); OutStreamer->AddBlankLine(); } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 87c65347e334..f1ce430f3323 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2377,10 +2377,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Flags |= Prefix; Name = Parser.getTok().getString(); Parser.Lex(); // eat the prefix - // Hack: we could have something like + // Hack: we could have something like "rep # some comment" or // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" while (Name.startswith(";") || Name.startswith("\n") || - Name.startswith("\t") || Name.startswith("/")) { + Name.startswith("#") || Name.startswith("\t") || + Name.startswith("/")) { Name = Parser.getTok().getString(); Parser.Lex(); // go to next prefix or instr } diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7e7c35569093..ba998467b799 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -739,7 +739,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [ FeatureVNNI, FeatureVPCLMULQDQ, FeatureVPOPCNTDQ, - FeatureGFNI + FeatureGFNI, + FeatureCLWB ]>; class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2c1faa157ddb..ba3b02e25a9d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1310,8 +1310,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - - setOperationAction(ISD::MUL, MVT::v8i64, Legal); } if (Subtarget.hasCDI()) { @@ -1388,8 +1386,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, VT, Legal); setOperationAction(ISD::FP_TO_SINT, VT, Legal); setOperationAction(ISD::FP_TO_UINT, VT, Legal); - - setOperationAction(ISD::MUL, VT, Legal); } } @@ -7108,8 +7104,8 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant(Immediate, dl, VT); } // Lower BUILD_VECTOR operation for v8i1 and v16i1 types. -SDValue -X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); assert((VT.getVectorElementType() == MVT::i1) && @@ -7131,8 +7127,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32)); // We have to manually lower both halves so getNode doesn't try to // reassemble the build_vector. - Lower = LowerBUILD_VECTORvXi1(Lower, DAG); - Upper = LowerBUILD_VECTORvXi1(Upper, DAG); + Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget); + Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget); return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper); } SDValue Imm = ConvertI1VectorToInteger(Op, DAG); @@ -7881,7 +7877,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Generate vectors for predicate vectors. if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) - return LowerBUILD_VECTORvXi1(Op, DAG); + return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) return VectorConstant; @@ -15658,8 +15654,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, } /// 64-bit unsigned integer to double expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // This algorithm is not obvious. Here it is what we're trying to output: /* movq %rax, %xmm0 @@ -15679,7 +15675,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, // Build some magic constants. static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; Constant *C0 = ConstantDataVector::get(*Context, CV0); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16); SmallVector<Constant*,2> CV1; @@ -15726,8 +15722,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, } /// 32-bit unsigned integer to float expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDLoc dl(Op); // FP constant to bias correct the final result. SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, @@ -15760,16 +15756,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. - MVT DestVT = Op.getSimpleValueType(); - - if (DestVT.bitsLT(MVT::f64)) - return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, - DAG.getIntPtrConstant(0, dl)); - if (DestVT.bitsGT(MVT::f64)) - return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); - - // Handle final rounding. - return Sub; + return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType()); } static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, @@ -15901,8 +15888,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); } -SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, - SelectionDAG &DAG) const { +static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDValue N0 = Op.getOperand(0); MVT SrcVT = N0.getSimpleValueType(); SDLoc dl(Op); @@ -15941,7 +15928,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Op.getSimpleValueType().isVector()) - return lowerUINT_TO_FP_vec(Op, DAG); + return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); MVT SrcVT = N0.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); @@ -15954,9 +15941,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, } if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) - return LowerUINT_TO_FP_i64(Op, DAG); + return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); if (SrcVT == MVT::i32 && X86ScalarSSEf64) - return LowerUINT_TO_FP_i32(Op, DAG); + return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) return SDValue(); @@ -22097,7 +22084,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. if (VT == MVT::v4i32) { assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() && - "Should not custom lower when pmuldq is available!"); + "Should not custom lower when pmulld is available!"); + + // If the upper 17 bits of each element are zero then we can use PMADD. + APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (DAG.MaskedValueIsZero(A, Mask17) && DAG.MaskedValueIsZero(B, Mask17)) + return DAG.getNode(X86ISD::VPMADDWD, dl, VT, + DAG.getBitcast(MVT::v8i16, A), + DAG.getBitcast(MVT::v8i16, B)); // Extract the odd parts. static const int UnpackMask[] = { 1, -1, 3, -1 }; @@ -22149,6 +22143,11 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask); bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask); + // If DQI is supported we can use MULLQ, but MULUDQ is still better if the + // the high bits are known to be zero. + if (Subtarget.hasDQI() && (!AHiIsZero || !BHiIsZero)) + return Op; + // Bit cast to 32-bit vectors for MULUDQ. SDValue Alo = DAG.getBitcast(MulVT, A); SDValue Blo = DAG.getBitcast(MulVT, B); @@ -31012,8 +31011,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]); } - // The replacement was made in place; don't return anything. - return SDValue(); + // The replacement was made in place; return N so it won't be revisited. + return SDValue(N, 0); } /// If a vector select has an operand that is -1 or 0, try to simplify the @@ -32267,6 +32266,13 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, if ((NumElts % 2) != 0) return SDValue(); + // If the upper 17 bits of each element are zero then we can use PMADD. + APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (VT == MVT::v4i32 && DAG.MaskedValueIsZero(N0, Mask17) && + DAG.MaskedValueIsZero(N1, Mask17)) + return DAG.getNode(X86ISD::VPMADDWD, DL, VT, DAG.getBitcast(MVT::v8i16, N0), + DAG.getBitcast(MVT::v8i16, N1)); + unsigned RegSize = 128; MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); @@ -34882,7 +34888,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its // better to truncate if we have the chance. if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) && - !TLI.isOperationLegal(Opcode, SrcVT)) + !Subtarget.hasDQI()) return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); LLVM_FALLTHROUGH; case ISD::ADD: { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 8464081b1b08..7708f577ba70 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1167,7 +1167,6 @@ namespace llvm { bool isReplace) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; @@ -1183,9 +1182,6 @@ namespace llvm { SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2a2286e42405..46c19f18f8d3 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4420,12 +4420,12 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, SSE_INTALU_ITINS_P, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD; defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, - SSE_INTALU_ITINS_P, HasBWI, 1>; + SSE_INTMUL_ITINS_P, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, - SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P, + SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P, HasBWI, 1>; defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P, HasBWI, 1>; @@ -4454,7 +4454,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins, } } -defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, +defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P, avx512vl_i32_info, avx512vl_i64_info, X86pmuldq, HasAVX512, 1>,T8PD; defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b48fa1841979..cb84f9aecf79 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3734,7 +3734,7 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), @@ -3742,8 +3742,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -6313,7 +6313,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), @@ -6321,8 +6321,8 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst @@ -6338,7 +6338,7 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), @@ -6346,8 +6346,8 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX, NoVLX] in { @@ -6924,14 +6924,15 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 7086c2eb52c4..a69c009e1a54 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -181,8 +181,9 @@ public: StringRef Name, bool IsThinLTOPreLink, std::function<AssumptionCache &(Function &)> GetAssumptionCache, std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo) - : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo), - Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {} + : GetAC(std::move(GetAssumptionCache)), + GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), + IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM); @@ -1547,14 +1548,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) { // Populate the symbol map. for (const auto &N_F : M.getValueSymbolTable()) { - std::string OrigName = N_F.getKey(); + StringRef OrigName = N_F.getKey(); Function *F = dyn_cast<Function>(N_F.getValue()); if (F == nullptr) continue; SymbolMap[OrigName] = F; auto pos = OrigName.find('.'); - if (pos != std::string::npos) { - std::string NewName = OrigName.substr(0, pos); + if (pos != StringRef::npos) { + StringRef NewName = OrigName.substr(0, pos); auto r = SymbolMap.insert(std::make_pair(NewName, F)); // Failiing to insert means there is already an entry in SymbolMap, // thus there are multiple functions that are mapped to the same diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 945133074059..caffc03339c4 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -90,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) { if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { Metadata *&GlobalMD = LocalToGlobal[MD]; if (!GlobalMD) { - std::string NewName = - (to_string(LocalToGlobal.size()) + ModuleId).str(); + std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str(); GlobalMD = MDString::get(M.getContext(), NewName); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index a088d447337f..40e52ee755e5 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { /// instructions. For normal calls, it allows visitCallSite to do the heavy /// lifting. Instruction *InstCombiner::visitCallInst(CallInst &CI) { - auto Args = CI.arg_operands(); - if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(), - Args.end(), SQ.getWithInstruction(&CI))) + if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) @@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false)) return replaceInstUsesWith(CI, N); return nullptr; - case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; - // TODO should this be in InstSimplify? - // bswap(bswap(x)) -> x - if (match(IIOperand, m_BSwap(m_Value(X)))) - return replaceInstUsesWith(CI, X); - // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { unsigned C = X->getType()->getPrimitiveSizeInBits() - @@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; } - - case Intrinsic::bitreverse: { - Value *IIOperand = II->getArgOperand(0); - Value *X = nullptr; - - // TODO should this be in InstSimplify? - // bitreverse(bitreverse(x)) -> x - if (match(IIOperand, m_BitReverse(m_Value(X)))) - return replaceInstUsesWith(CI, X); - break; - } - case Intrinsic::masked_load: if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder)) return replaceInstUsesWith(CI, SimplifiedMaskedOp); @@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::powi: if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - // powi(x, 0) -> 1.0 - if (Power->isZero()) - return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); - // powi(x, 1) -> x - if (Power->isOne()) - return replaceInstUsesWith(CI, II->getArgOperand(0)); + // 0 and 1 are handled in instsimplify + // powi(x, -1) -> 1/x if (Power->isMinusOne()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), II->getArgOperand(0)); + // powi(x, 2) -> x*x + if (Power->equalsInt(2)) + return BinaryOperator::CreateFMul(II->getArgOperand(0), + II->getArgOperand(0)); } break; @@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // The compare intrinsic uses the above assumptions and therefore // doesn't require additional flags. if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) && - match(Arg1, m_Zero()) && + match(Arg1, m_Zero()) && isa<Instruction>(Arg0) && cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) { if (Arg0IsZero) std::swap(A, B); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 87666360c1a0..541dde6c47d2 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1631,9 +1631,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - // Handle cases involving: rem X, (select Cond, Y, Z) - if (simplifyDivRemOfSelectWithZeroOp(I)) - return &I; - return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 65a96b965227..aeac8910af6b 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { - unsigned IndexVal = IdxC->getZExtValue(); unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - // InstSimplify handles cases where the index is invalid. - assert(IndexVal < VectorWidth); + // InstSimplify should handle cases where the index is invalid. + if (!IdxC->getValue().ule(VectorWidth)) + return nullptr; + + unsigned IndexVal = IdxC->getZExtValue(); // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8328d4031941..8e39f24d819c 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2702,9 +2702,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { unsigned Align = Arg.getParamAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(Ty); - const std::string &Name = Arg.hasName() ? Arg.getName().str() : - "Arg" + llvm::to_string(Arg.getArgNo()); - AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval"); + AllocaInst *AI = IRB.CreateAlloca( + Ty, nullptr, + (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) + + ".byval"); AI->setAlignment(Align); Arg.replaceAllUsesWith(AI); diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp index 814a62cd7d65..bf92e43c4715 100644 --- a/lib/Transforms/Scalar/GVNSink.cpp +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -641,7 +641,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking( DenseMap<uint32_t, unsigned> VNums; for (auto *I : Insts) { uint32_t N = VN.lookupOrAdd(I); - DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); + DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n"); if (N == ~0U) return None; VNums[N]++; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 6af3fef963dc..9c870b42a747 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -476,33 +476,22 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, Alignment = DL.getABITypeAlignment(EltType); } - // Remember the debug location. - DebugLoc Loc; - if (!Range.TheStores.empty()) - Loc = Range.TheStores[0]->getDebugLoc(); + AMemSet = + Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI : Range.TheStores) - dbgs() << *SI << '\n'); + dbgs() << *SI << '\n'; + dbgs() << "With: " << *AMemSet << '\n'); + + if (!Range.TheStores.empty()) + AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); // Zap all the stores. for (Instruction *SI : Range.TheStores) { MD->removeInstruction(SI); SI->eraseFromParent(); } - - // Create the memset after removing the stores, so that if there any cached - // non-local dependencies on the removed instructions in - // MemoryDependenceAnalysis, the cache entries are updated to "dirty" - // entries pointing below the memset, so subsequent queries include the - // memset. - AMemSet = - Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); - if (!Range.TheStores.empty()) - AMemSet->setDebugLoc(Loc); - - DEBUG(dbgs() << "With: " << *AMemSet << '\n'); - ++NumMemSetInfer; } @@ -1042,22 +1031,9 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. - MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep); - MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false, - M->getIterator(), M->getParent()); - - if (SourceDep.isNonLocal()) { - SmallVector<NonLocalDepResult, 2> NonLocalDepResults; - MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false, - NonLocalDepResults); - if (NonLocalDepResults.size() == 1) { - SourceDep = NonLocalDepResults[0].getResult(); - assert((!SourceDep.getInst() || - LookupDomTree().dominates(SourceDep.getInst(), M)) && - "when memdep returns exactly one result, it should dominate"); - } - } - + MemDepResult SourceDep = + MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, + M->getIterator(), M->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; @@ -1259,18 +1235,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) { MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( SrcLoc, true, M->getIterator(), M->getParent()); - if (SrcDepInfo.isNonLocal()) { - SmallVector<NonLocalDepResult, 2> NonLocalDepResults; - MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true, - NonLocalDepResults); - if (NonLocalDepResults.size() == 1) { - SrcDepInfo = NonLocalDepResults[0].getResult(); - assert((!SrcDepInfo.getInst() || - LookupDomTree().dominates(SrcDepInfo.getInst(), M)) && - "when memdep returns exactly one result, it should dominate"); - } - } - if (SrcDepInfo.isClobber()) { if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) return processMemCpyMemCpyDependence(M, MDep); diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 3b45cfa482e6..c44edbed8ed9 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -2796,17 +2796,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, StatepointLiveSetTy Updated; findLiveSetAtInst(Inst, RevisedLivenessData, Updated); -#ifndef NDEBUG - DenseSet<Value *> Bases; - for (auto KVPair : Info.PointerToBase) - Bases.insert(KVPair.second); -#endif - // We may have base pointers which are now live that weren't before. We need // to update the PointerToBase structure to reflect this. for (auto V : Updated) if (Info.PointerToBase.insert({V, V}).second) { - assert(Bases.count(V) && "Can't find base for unexpected live value!"); + assert(isKnownBaseResult(V) && + "Can't find base for unexpected live value!"); continue; } diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index efff06f79cb7..e00541d3c812 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -648,8 +648,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); + // NewExit gets its DebugLoc from LatchExit, which is not part of the + // original Loop. + // Fix this by setting Loop's DebugLoc to NewExit. + auto *NewExitTerminator = NewExit->getTerminator(); + NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); // Split NewExit to insert epilog remainder loop. - EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); + EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index b3c80424c8b9..e7358dbcb624 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth( cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions")); -static cl::opt<unsigned> DependenceChainLatency( - "dependence-chain-latency", cl::Hidden, cl::init(8), - cl::desc("Limit the maximum latency of dependence chain containing cmp " - "for if conversion")); - -static cl::opt<unsigned> SmallBBSize( - "small-bb-size", cl::Hidden, cl::init(40), - cl::desc("Check dependence chain latency only in basic block smaller than " - "this number")); - STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, return true; } -/// Estimate the code size of the specified BB. -static unsigned CountBBCodeSize(BasicBlock *BB, - const TargetTransformInfo &TTI) { - unsigned Size = 0; - for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II) - Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize); - return Size; -} - -/// Find out the latency of the longest dependence chain in the BB if -/// LongestChain is true, or the dependence chain containing the compare -/// instruction feeding the block's conditional branch. -static unsigned FindDependenceChainLatency(BasicBlock *BB, - DenseMap<Instruction *, unsigned> &Instructions, - const TargetTransformInfo &TTI, - bool LongestChain) { - unsigned MaxLatency = 0; - - BasicBlock::iterator II; - for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) { - unsigned Latency = 0; - for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) { - Instruction *Op = dyn_cast<Instruction>(II->getOperand(O)); - if (Op && Instructions.count(Op)) { - auto OpLatency = Instructions[Op]; - if (OpLatency > Latency) - Latency = OpLatency; - } - } - Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency); - Instructions[&(*II)] = Latency; - - if (Latency > MaxLatency) - MaxLatency = Latency; - } - - if (LongestChain) - return MaxLatency; - - // The length of the dependence chain containing the compare instruction is - // wanted, so the terminator must be a BranchInst. - assert(isa<BranchInst>(II)); - BranchInst* Br = cast<BranchInst>(II); - Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition()); - if (Cmp && Instructions.count(Cmp)) - return Instructions[Cmp]; - else - return 0; -} - -/// Instructions in BB2 may depend on instructions in BB1, and instructions -/// in BB1 may have users in BB2. If the last (in terms of latency) such kind -/// of instruction in BB1 is I, then the instructions after I can be executed -/// in parallel with instructions in BB2. -/// This function returns the latency of I. -static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2, - BasicBlock *IfBlock1, BasicBlock *IfBlock2, - DenseMap<Instruction *, unsigned> &BB1Instructions) { - unsigned LastLatency = 0; - SmallVector<Instruction *, 16> Worklist; - BasicBlock::iterator II; - for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) { - if (PHINode *PN = dyn_cast<PHINode>(II)) { - // Look for users in BB2. - bool InBBUser = false; - for (User *U : PN->users()) { - if (cast<Instruction>(U)->getParent() == BB2) { - InBBUser = true; - break; - } - } - // No such user, we don't care about this instruction and its operands. - if (!InBBUser) - break; - } - Worklist.push_back(&(*II)); - } - - while (!Worklist.empty()) { - Instruction *I = Worklist.pop_back_val(); - for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) { - if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) { - if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2) - Worklist.push_back(Op); - else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) { - if (BB1Instructions[Op] > LastLatency) - LastLatency = BB1Instructions[Op]; - } - } - } - } - - return LastLatency; -} - -/// If after if conversion, most of the instructions in this new BB construct a -/// long and slow dependence chain, it may be slower than cmp/branch, even -/// if the branch has a high miss rate, because the control dependence is -/// transformed into data dependence, and control dependence can be speculated, -/// and thus, the second part can execute in parallel with the first part on -/// modern OOO processor. -/// -/// To check this condition, this function finds the length of the dependence -/// chain in BB1 (only the part that can be executed in parallel with code after -/// branch in BB2) containing cmp, and if the length is longer than a threshold, -/// don't perform if conversion. -/// -/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion. -/// SpeculationSize contains the code size of IfBlock1 and IfBlock2. -static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2, - BasicBlock *IfBlock1, BasicBlock *IfBlock2, - unsigned SpeculationSize, - const TargetTransformInfo &TTI) { - // Accumulated latency of each instruction in their BBs. - DenseMap<Instruction *, unsigned> BB1Instructions; - DenseMap<Instruction *, unsigned> BB2Instructions; - - if (!TTI.isOutOfOrder()) - return false; - - unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI) - + SpeculationSize; - - // We check small BB only since it is more difficult to find unrelated - // instructions to fill functional units in a small BB. - if (NewBBSize > SmallBBSize) - return false; - - auto BB1Chain = - FindDependenceChainLatency(BB1, BB1Instructions, TTI, false); - auto BB2Chain = - FindDependenceChainLatency(BB2, BB2Instructions, TTI, true); - - // If there are many unrelated instructions in the new BB, there will be - // other instructions for the processor to issue regardless of the length - // of this new dependence chain. - // Modern processors can issue 3 or more instructions in each cycle. But in - // real world applications, an IPC of 2 is already very good for non-loop - // code with small basic blocks. Higher IPC is usually found in programs with - // small kernel. So IPC of 2 is more reasonable for most applications. - if ((BB1Chain + BB2Chain) * 2 <= NewBBSize) - return false; - - // We only care about part of the dependence chain in BB1 that can be - // executed in parallel with BB2, so adjust the latency. - BB1Chain -= - LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions); - - // Correctly predicted branch instruction can skip the dependence chain in - // BB1, but misprediction has a penalty, so only when the dependence chain is - // longer than DependenceChainLatency, then branch is better than select. - // Besides misprediction penalty, the threshold value DependenceChainLatency - // also depends on branch misprediction rate, taken branch latency and cmov - // latency. - if (BB1Chain >= DependenceChainLatency) - return true; - - return false; -} - /// Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { @@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue)) return false; - // Don't do if conversion for long dependence chain. - if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr, - CountBBCodeSize(ThenBB, TTI), TTI)) - return false; - // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); @@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, } } - if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2, - AggressiveInsts.size(), TTI)) - return false; - DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); diff --git a/test/Analysis/DemandedBits/basic.ll b/test/Analysis/DemandedBits/basic.ll index 5b8652396b3a..6f44465315e6 100644 --- a/test/Analysis/DemandedBits/basic.ll +++ b/test/Analysis/DemandedBits/basic.ll @@ -1,9 +1,9 @@ ; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s -; CHECK-DAG: DemandedBits: 0xFF for %1 = add nsw i32 %a, 5 -; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 -; CHECK-DAG: DemandedBits: 0xFF for %2 = mul nsw i32 %1, %b +; CHECK-DAG: DemandedBits: 0xff for %1 = add nsw i32 %a, 5 +; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8 +; CHECK-DAG: DemandedBits: 0xff for %2 = mul nsw i32 %1, %b define i8 @test_mul(i32 %a, i32 %b) { %1 = add nsw i32 %a, 5 %2 = mul nsw i32 %1, %b diff --git a/test/Analysis/DemandedBits/intrinsics.ll b/test/Analysis/DemandedBits/intrinsics.ll index 5a6d17284a72..48f6d4624422 100644 --- a/test/Analysis/DemandedBits/intrinsics.ll +++ b/test/Analysis/DemandedBits/intrinsics.ll @@ -1,9 +1,9 @@ ; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s -; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1 -; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bitreverse.i32(i32 %1) -; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 +; CHECK-DAG: DemandedBits: 0xff000000 for %1 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xff for %2 = call i32 @llvm.bitreverse.i32(i32 %1) +; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8 define i8 @test_bswap(i32 %x) { %1 = or i32 %x, 1 %2 = call i32 @llvm.bswap.i32(i32 %1) @@ -12,9 +12,9 @@ define i8 @test_bswap(i32 %x) { } declare i32 @llvm.bswap.i32(i32) -; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1 -; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bswap.i32(i32 %1) -; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 +; CHECK-DAG: DemandedBits: 0xff000000 for %1 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xff for %2 = call i32 @llvm.bswap.i32(i32 %1) +; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8 define i8 @test_bitreverse(i32 %x) { %1 = or i32 %x, 1 %2 = call i32 @llvm.bitreverse.i32(i32 %1) diff --git a/test/Analysis/Lint/noalias-byval.ll b/test/Analysis/Lint/noalias-byval.ll new file mode 100644 index 000000000000..5b36c6d15df3 --- /dev/null +++ b/test/Analysis/Lint/noalias-byval.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -lint -disable-output 2>&1 | FileCheck %s + +%s = type { i8 } + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #0 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #0 + +declare void @f1(%s* noalias nocapture sret, %s* nocapture readnone) + +define void @f2() { +entry: + %c = alloca %s + %tmp = alloca %s + %0 = bitcast %s* %c to i8* + %1 = bitcast %s* %tmp to i8* + call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false) + call void @f1(%s* sret %c, %s* %c) + ret void +} + +; Lint should complain about us passing %c to both arguments since one of them +; is noalias. +; CHECK: Unusual: noalias argument aliases another argument +; CHECK-NEXT: call void @f1(%s* sret %c, %s* %c) + +declare void @f3(%s* noalias nocapture sret, %s* byval nocapture readnone) + +define void @f4() { +entry: + %c = alloca %s + %tmp = alloca %s + %0 = bitcast %s* %c to i8* + %1 = bitcast %s* %tmp to i8* + call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false) + call void @f3(%s* sret %c, %s* byval %c) + ret void +} + +; Lint should not complain about passing %c to both arguments even if one is +; noalias, since the other one is byval, effectively copying the data to the +; stack instead of passing the pointer itself. +; CHECK-NOT: Unusual: noalias argument aliases another argument +; CHECK-NOT: call void @f3(%s* sret %c, %s* %c) + +attributes #0 = { argmemonly nounwind } diff --git a/test/Analysis/ScalarEvolution/truncate.ll b/test/Analysis/ScalarEvolution/truncate.ll new file mode 100644 index 000000000000..e9bd39d7a268 --- /dev/null +++ b/test/Analysis/ScalarEvolution/truncate.ll @@ -0,0 +1,72 @@ +; RUN: opt < %s -analyze -scalar-evolution +; RUN: opt < %s -passes='print<scalar-evolution>' +; Regression test for assert ScalarEvolution::getTruncateExpr. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +define void @snork(i8* %arg, i8 %arg1, i64 %arg2) { +bb: + br label %bb12 + +bb3: ; preds = %bb34 + br i1 true, label %bb4, label %bb12 + +bb4: ; preds = %bb3 + br label %bb6 + +bb5: ; preds = %bb6 + ret void + +bb6: ; preds = %bb6, %bb4 + %tmp = phi i64 [ %tmp28, %bb4 ], [ %tmp10, %bb6 ] + %tmp7 = phi i32 [ 3, %bb4 ], [ %tmp11, %bb6 ] + %tmp8 = trunc i64 %tmp to i32 + %tmp9 = sdiv i32 %tmp8, %tmp7 + %tmp10 = add i64 %tmp, -1 + %tmp11 = add i32 %tmp9, %tmp7 + br i1 true, label %bb5, label %bb6 + +bb12: ; preds = %bb3, %bb + br label %bb13 + +bb13: ; preds = %bb34, %bb12 + %tmp14 = phi i64 [ %arg2, %bb12 ], [ %tmp28, %bb34 ] + %tmp15 = phi i8 [ %arg1, %bb12 ], [ %tmp26, %bb34 ] + %tmp16 = phi i32 [ 1, %bb12 ], [ %tmp35, %bb34 ] + %tmp17 = add i8 %tmp15, -1 + %tmp18 = sext i8 %tmp17 to i64 + %tmp19 = sub i64 1, %tmp14 + %tmp20 = add i64 %tmp19, %tmp18 + %tmp21 = trunc i64 %tmp20 to i32 + %tmp22 = icmp eq i32 %tmp21, 0 + br i1 %tmp22, label %bb32, label %bb23 + +bb23: ; preds = %bb13 + br i1 true, label %bb25, label %bb24 + +bb24: ; preds = %bb23 + br label %bb25 + +bb25: ; preds = %bb24, %bb23 + %tmp26 = add i8 %tmp15, -2 + %tmp27 = sext i8 %tmp26 to i64 + %tmp28 = sub i64 %tmp27, %tmp20 + %tmp29 = trunc i64 %tmp28 to i32 + %tmp30 = icmp eq i32 %tmp29, 0 + br i1 %tmp30, label %bb31, label %bb34 + +bb31: ; preds = %bb25 + br label %bb33 + +bb32: ; preds = %bb13 + br label %bb33 + +bb33: ; preds = %bb32, %bb31 + unreachable + +bb34: ; preds = %bb25 + %tmp35 = add nuw nsw i32 %tmp16, 2 + %tmp36 = icmp ugt i32 %tmp16, 52 + br i1 %tmp36, label %bb3, label %bb13 +} diff --git a/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir index 630b34028162..c9ff2cd0d514 100644 --- a/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir +++ b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir @@ -1,7 +1,7 @@ -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefix=UNPROFITABLE %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s # name: f1_2s registers: @@ -80,3 +80,82 @@ body: | # PROFITABLE-LABEL: name: f1_2d # PROFITABLE: %5:fpr128 = FNEGv2f64 %2 # PROFITABLE-NEXT: FMLAv2f64 killed %5, %0, %1 +--- +name: f1_both_fmul_2s +registers: + - { id: 0, class: fpr64 } + - { id: 1, class: fpr64 } + - { id: 2, class: fpr64 } + - { id: 3, class: fpr64 } + - { id: 4, class: fpr64 } + - { id: 5, class: fpr64 } + - { id: 6, class: fpr64 } +body: | + bb.0.entry: + %3:fpr64 = COPY %q3 + %2:fpr64 = COPY %q2 + %1:fpr64 = COPY %q1 + %0:fpr64 = COPY %q0 + %4:fpr64 = FMULv2f32 %0, %1 + %5:fpr64 = FMULv2f32 %2, %3 + %6:fpr64 = FSUBv2f32 killed %4, %5 + %q0 = COPY %6 + RET_ReallyLR implicit %q0 + +... +# ALL-LABEL: name: f1_both_fmul_2s +# ALL: %4:fpr64 = FMULv2f32 %0, %1 +# ALL-NEXT: FMLSv2f32 killed %4, %2, %3 +--- +name: f1_both_fmul_4s +registers: + - { id: 0, class: fpr128 } + - { id: 1, class: fpr128 } + - { id: 2, class: fpr128 } + - { id: 3, class: fpr128 } + - { id: 4, class: fpr128 } + - { id: 5, class: fpr128 } + - { id: 6, class: fpr128 } +body: | + bb.0.entry: + %3:fpr128 = COPY %q3 + %2:fpr128 = COPY %q2 + %1:fpr128 = COPY %q1 + %0:fpr128 = COPY %q0 + %4:fpr128 = FMULv4f32 %0, %1 + %5:fpr128 = FMULv4f32 %2, %3 + %6:fpr128 = FSUBv4f32 killed %4, %5 + %q0 = COPY %6 + RET_ReallyLR implicit %q0 + +... +# ALL-LABEL: name: f1_both_fmul_4s +# ALL: %4:fpr128 = FMULv4f32 %0, %1 +# ALL-NEXT: FMLSv4f32 killed %4, %2, %3 +--- +name: f1_both_fmul_2d +registers: + - { id: 0, class: fpr128 } + - { id: 1, class: fpr128 } + - { id: 2, class: fpr128 } + - { id: 3, class: fpr128 } + - { id: 4, class: fpr128 } + - { id: 5, class: fpr128 } + - { id: 6, class: fpr128 } +body: | + bb.0.entry: + %3:fpr128 = COPY %q3 + %2:fpr128 = COPY %q2 + %1:fpr128 = COPY %q1 + %0:fpr128 = COPY %q0 + %4:fpr128 = FMULv2f64 %0, %1 + %5:fpr128 = FMULv2f64 %2, %3 + %6:fpr128 = FSUBv2f64 killed %4, %5 + %q0 = COPY %6 + RET_ReallyLR implicit %q0 + +... +# ALL-LABEL: name: f1_both_fmul_2d +# ALL: %4:fpr128 = FMULv2f64 %0, %1 +# ALL-NEXT: FMLSv2f64 killed %4, %2, %3 + diff --git a/test/CodeGen/AArch64/combine-and-like.ll b/test/CodeGen/AArch64/combine-and-like.ll new file mode 100644 index 000000000000..15770c2e02ff --- /dev/null +++ b/test/CodeGen/AArch64/combine-and-like.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +define i32 @f(i32 %a0) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %1 = lshr i32 %a0, 2147483647 + %2 = add i32 %1, 2147483647 + %3 = and i32 %2, %1 + ret i32 %3 +} diff --git a/test/CodeGen/X86/avx512-schedule.ll b/test/CodeGen/X86/avx512-schedule.ll index 306b95f0f3ae..af99b86ca5d1 100755 --- a/test/CodeGen/X86/avx512-schedule.ll +++ b/test/CodeGen/X86/avx512-schedule.ll @@ -129,7 +129,7 @@ entry: define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { ; GENERIC-LABEL: imulq512: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: imulq512: @@ -143,7 +143,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { ; GENERIC-LABEL: imulq256: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: imulq256: @@ -157,7 +157,7 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { ; GENERIC-LABEL: imulq128: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: imulq128: @@ -550,7 +550,7 @@ define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { ; GENERIC-LABEL: vpmulld_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpmulld_test: diff --git a/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir b/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir index 965014162073..bbefc4f920a1 100644 --- a/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir +++ b/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir @@ -57,7 +57,7 @@ # return w; # } # -# CHECK: 129: eb 13 jmp 19 <ifElse+0x7E> +# CHECK: 129: eb 13 jmp 19 <ifElse+0x7e> # CHECK: 12e: eb a0 jmp -96 <ifElse+0x10> # CHECK: 132: eb 9c jmp -100 <ifElse+0x10> # CHECK: 137: eb 97 jmp -105 <ifElse+0x10> diff --git a/test/CodeGen/X86/combine-pmuldq.ll b/test/CodeGen/X86/combine-pmuldq.ll index 53ab87a386b3..ebfe0d56358e 100644 --- a/test/CodeGen/X86/combine-pmuldq.ll +++ b/test/CodeGen/X86/combine-pmuldq.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=AVX --check-prefix=AVX512DQVL ; TODO - shuffle+sext are superfluous define <2 x i64> @combine_shuffle_sext_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { @@ -66,13 +69,29 @@ define <2 x i64> @combine_shuffle_zero_pmuludq(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-NEXT: pmuludq %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: combine_shuffle_zero_pmuludq: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: combine_shuffle_zero_pmuludq: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: combine_shuffle_zero_pmuludq: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] +; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq: +; AVX512DQVL: # %bb.0: +; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] +; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX512DQVL-NEXT: retq %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7> %2 = shufflevector <4 x i32> %a1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7> %3 = bitcast <4 x i32> %1 to <2 x i64> @@ -94,13 +113,29 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) ; SSE-NEXT: pmuludq %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: combine_shuffle_zero_pmuludq_256: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] -; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] -; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX2-LABEL: combine_shuffle_zero_pmuludq_256: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: combine_shuffle_zero_pmuludq_256: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq_256: +; AVX512DQVL: # %bb.0: +; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX512DQVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 +; AVX512DQVL-NEXT: retq %1 = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> %2 = shufflevector <8 x i32> %a1, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> %3 = bitcast <8 x i32> %1 to <4 x i64> @@ -108,3 +143,46 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) %5 = mul <4 x i64> %3, %4 ret <4 x i64> %5 } + +define <8 x i64> @combine_zext_pmuludq_256(<8 x i32> %a) { +; SSE-LABEL: combine_zext_pmuludq_256: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] +; SSE-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero +; SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero +; SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; SSE-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883] +; SSE-NEXT: pmuludq %xmm1, %xmm0 +; SSE-NEXT: pmuludq %xmm1, %xmm2 +; SSE-NEXT: pmuludq %xmm1, %xmm4 +; SSE-NEXT: pmuludq %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm4, %xmm1 +; SSE-NEXT: retq +; +; AVX2-LABEL: combine_zext_pmuludq_256: +; AVX2: # %bb.0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [715827883,715827883,715827883,715827883] +; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: combine_zext_pmuludq_256: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; AVX512VL-NEXT: vpmuludq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512DQVL-LABEL: combine_zext_pmuludq_256: +; AVX512DQVL: # %bb.0: +; AVX512DQVL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; AVX512DQVL-NEXT: vpmuludq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512DQVL-NEXT: retq + %1 = zext <8 x i32> %a to <8 x i64> + %2 = mul nuw nsw <8 x i64> %1, <i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883> + ret <8 x i64> %2 +} diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll index 912110e75d27..62e86e3ad2cc 100644 --- a/test/CodeGen/X86/fdiv-combine.ll +++ b/test/CodeGen/X86/fdiv-combine.ll @@ -95,6 +95,41 @@ define double @div3_arcp(double %x, double %y, double %z) { ret double %ret } +define float @div_select_constant_fold(i1 zeroext %arg) { +; CHECK-LABEL: div_select_constant_fold: +; CHECK: # %bb.0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB6_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB6_1: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq + %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00 + %B2 = fdiv float %tmp, 1.000000e+00 + ret float %B2 +} + +define float @div_select_constant_fold_zero(i1 zeroext %arg) { +; CHECK-LABEL: div_select_constant_fold_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB7_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: jmp .LBB7_3 +; CHECK-NEXT: .LBB7_1: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: .LBB7_3: +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: divss %xmm1, %xmm0 +; CHECK-NEXT: retq + %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00 + %B2 = fdiv float %tmp, 0.000000e+00 + ret float %B2 +} + define void @PR24141() { ; CHECK-LABEL: PR24141: ; CHECK: callq diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll index e09ad3e4e0b8..6431847064f0 100644 --- a/test/CodeGen/X86/gather-addresses.ll +++ b/test/CodeGen/X86/gather-addresses.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN ; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN ; RUN: llc -mtriple=i686-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN32 @@ -7,34 +8,59 @@ ; use an efficient mov/shift sequence rather than shuffling each individual ; element out of the index vector. -; CHECK-LABEL: foo: -; LIN: movdqa (%rsi), %xmm0 -; LIN: pand (%rdx), %xmm0 -; LIN: pextrq $1, %xmm0, %r[[REG4:.+]] -; LIN: movq %xmm0, %r[[REG2:.+]] -; LIN: movslq %e[[REG2]], %r[[REG1:.+]] -; LIN: sarq $32, %r[[REG2]] -; LIN: movslq %e[[REG4]], %r[[REG3:.+]] -; LIN: sarq $32, %r[[REG4]] -; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1 -; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1 -; LIN: movq %rdi, %xmm1 -; LIN: movq %r[[REG3]], %xmm0 - -; WIN: movdqa (%rdx), %xmm0 -; WIN: pand (%r8), %xmm0 -; WIN: pextrq $1, %xmm0, %r[[REG4:.+]] -; WIN: movq %xmm0, %r[[REG2:.+]] -; WIN: movslq %e[[REG2]], %r[[REG1:.+]] -; WIN: sarq $32, %r[[REG2]] -; WIN: movslq %e[[REG4]], %r[[REG3:.+]] -; WIN: sarq $32, %r[[REG4]] -; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1 -; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1 -; WIN: movdqa (%r[[REG2]]), %xmm0 -; WIN: movq %r[[REG2]], %xmm1 - define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { +; LIN-LABEL: foo: +; LIN: # %bb.0: +; LIN-NEXT: movdqa (%rsi), %xmm0 +; LIN-NEXT: pand (%rdx), %xmm0 +; LIN-NEXT: pextrq $1, %xmm0, %rax +; LIN-NEXT: movq %xmm0, %rcx +; LIN-NEXT: movslq %ecx, %rdx +; LIN-NEXT: sarq $32, %rcx +; LIN-NEXT: movslq %eax, %rsi +; LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; LIN-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; LIN-NEXT: sarq $32, %rax +; LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; LIN-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; LIN-NEXT: retq +; +; WIN-LABEL: foo: +; WIN: # %bb.0: +; WIN-NEXT: movdqa (%rdx), %xmm0 +; WIN-NEXT: pand (%r8), %xmm0 +; WIN-NEXT: pextrq $1, %xmm0, %rax +; WIN-NEXT: movq %xmm0, %rdx +; WIN-NEXT: movslq %edx, %r8 +; WIN-NEXT: sarq $32, %rdx +; WIN-NEXT: movslq %eax, %r9 +; WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; WIN-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; WIN-NEXT: sarq $32, %rax +; WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; WIN-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; WIN-NEXT: retq +; +; LIN32-LABEL: foo: +; LIN32: # %bb.0: +; LIN32-NEXT: pushl %edi +; LIN32-NEXT: pushl %esi +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; LIN32-NEXT: movdqa (%edx), %xmm0 +; LIN32-NEXT: pand (%ecx), %xmm0 +; LIN32-NEXT: pextrd $1, %xmm0, %ecx +; LIN32-NEXT: pextrd $2, %xmm0, %edx +; LIN32-NEXT: pextrd $3, %xmm0, %esi +; LIN32-NEXT: movd %xmm0, %edi +; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; LIN32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; LIN32-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; LIN32-NEXT: popl %esi +; LIN32-NEXT: popl %edi +; LIN32-NEXT: retl %a = load <4 x i32>, <4 x i32>* %i %b = load <4 x i32>, <4 x i32>* %h %j = and <4 x i32> %a, %b @@ -60,13 +86,81 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { ; Check that the sequence previously used above, which bounces the vector off the ; cache works for x86-32. Note that in this case it will not be used for index ; calculation, since indexes are 32-bit, not 64. -; CHECK-LABEL: old: -; LIN32: movaps %xmm0, (%esp) -; LIN32-DAG: {{(mov|and)}}l (%esp), -; LIN32-DAG: {{(mov|and)}}l 4(%esp), -; LIN32-DAG: {{(mov|and)}}l 8(%esp), -; LIN32-DAG: {{(mov|and)}}l 12(%esp), define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind { +; LIN-LABEL: old: +; LIN: # %bb.0: +; LIN-NEXT: movdqa (%rsi), %xmm0 +; LIN-NEXT: pand (%rdx), %xmm0 +; LIN-NEXT: pextrq $1, %xmm0, %rax +; LIN-NEXT: movq %rax, %rdx +; LIN-NEXT: shrq $32, %rdx +; LIN-NEXT: movq %xmm0, %rsi +; LIN-NEXT: movq %rsi, %rdi +; LIN-NEXT: shrq $32, %rdi +; LIN-NEXT: andl %ecx, %esi +; LIN-NEXT: andl %ecx, %eax +; LIN-NEXT: andq %rcx, %rdi +; LIN-NEXT: andq %rcx, %rdx +; LIN-NEXT: movq %rdi, %xmm1 +; LIN-NEXT: movq %rsi, %xmm0 +; LIN-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; LIN-NEXT: movq %rdx, %xmm2 +; LIN-NEXT: movq %rax, %xmm1 +; LIN-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; LIN-NEXT: retq +; +; WIN-LABEL: old: +; WIN: # %bb.0: +; WIN-NEXT: movdqa (%rdx), %xmm0 +; WIN-NEXT: pand (%r8), %xmm0 +; WIN-NEXT: pextrq $1, %xmm0, %r8 +; WIN-NEXT: movq %r8, %rcx +; WIN-NEXT: shrq $32, %rcx +; WIN-NEXT: movq %xmm0, %rax +; WIN-NEXT: movq %rax, %rdx +; WIN-NEXT: shrq $32, %rdx +; WIN-NEXT: andl %r9d, %eax +; WIN-NEXT: andl %r9d, %r8d +; WIN-NEXT: andq %r9, %rdx +; WIN-NEXT: andq %r9, %rcx +; WIN-NEXT: movq %rdx, %xmm1 +; WIN-NEXT: movq %rax, %xmm0 +; WIN-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; WIN-NEXT: movq %rcx, %xmm2 +; WIN-NEXT: movq %r8, %xmm1 +; WIN-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; WIN-NEXT: retq +; +; LIN32-LABEL: old: +; LIN32: # %bb.0: +; LIN32-NEXT: pushl %ebp +; LIN32-NEXT: movl %esp, %ebp +; LIN32-NEXT: pushl %esi +; LIN32-NEXT: andl $-16, %esp +; LIN32-NEXT: subl $32, %esp +; LIN32-NEXT: movl 20(%ebp), %eax +; LIN32-NEXT: movl 16(%ebp), %ecx +; LIN32-NEXT: movl 12(%ebp), %edx +; LIN32-NEXT: movaps (%edx), %xmm0 +; LIN32-NEXT: andps (%ecx), %xmm0 +; LIN32-NEXT: movaps %xmm0, (%esp) +; LIN32-NEXT: movl (%esp), %ecx +; LIN32-NEXT: andl %eax, %ecx +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; LIN32-NEXT: andl %eax, %edx +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; LIN32-NEXT: andl %eax, %esi +; LIN32-NEXT: andl {{[0-9]+}}(%esp), %eax +; LIN32-NEXT: movd %edx, %xmm1 +; LIN32-NEXT: movd %ecx, %xmm0 +; LIN32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; LIN32-NEXT: movd %eax, %xmm2 +; LIN32-NEXT: movd %esi, %xmm1 +; LIN32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; LIN32-NEXT: leal -4(%ebp), %esp +; LIN32-NEXT: popl %esi +; LIN32-NEXT: popl %ebp +; LIN32-NEXT: retl %a = load <4 x i32>, <4 x i32>* %i %b = load <4 x i32>, <4 x i32>* %h %j = and <4 x i32> %a, %b @@ -77,7 +171,7 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind %q0 = zext i32 %d0 to i64 %q1 = zext i32 %d1 to i64 %q2 = zext i32 %d2 to i64 - %q3 = zext i32 %d3 to i64 + %q3 = zext i32 %d3 to i64 %r0 = and i64 %q0, %f %r1 = and i64 %q1, %f %r2 = and i64 %q2, %f diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index d318dde34434..d3521ca9f1e3 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -497,7 +497,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 @@ -510,7 +510,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax -; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1 +; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 ; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 @@ -582,7 +582,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 @@ -595,7 +595,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax -; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1 +; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 ; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 diff --git a/test/CodeGen/X86/setcc-combine.ll b/test/CodeGen/X86/setcc-combine.ll index a4a8e67d742c..56cff4ab6f2f 100644 --- a/test/CodeGen/X86/setcc-combine.ll +++ b/test/CodeGen/X86/setcc-combine.ll @@ -183,3 +183,27 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) { ret i32 %t1 } +; (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) +; Don't combine with i1 - out of range constant +define void @test_i1_uge(i1 *%A2) { +; CHECK-LABEL: test_i1_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: movb (%rdi), %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorb $1, %cl +; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negq %rax +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movb %cl, (%rdi,%rax) +; CHECK-NEXT: retq + %L5 = load i1, i1* %A2 + %C3 = icmp ne i1 %L5, true + %C8 = icmp eq i1 %L5, false + %C9 = icmp ugt i1 %C3, %C8 + %G3 = getelementptr i1, i1* %A2, i1 %C9 + store i1 %C3, i1* %G3 + ret void +} + diff --git a/test/CodeGen/X86/shrink_vmul.ll b/test/CodeGen/X86/shrink_vmul.ll index a516c709517d..ced3a40e4a46 100644 --- a/test/CodeGen/X86/shrink_vmul.ll +++ b/test/CodeGen/X86/shrink_vmul.ll @@ -112,13 +112,14 @@ define void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE-NEXT: movl c, %esi ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: pxor %xmm2, %xmm2 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; X86-SSE-NEXT: pmullw %xmm0, %xmm1 -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; X86-SSE-NEXT: movdqu %xmm1, (%esi,%ecx,4) +; X86-SSE-NEXT: pxor %xmm1, %xmm1 +; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X86-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X86-SSE-NEXT: pmaddwd %xmm0, %xmm2 +; X86-SSE-NEXT: movdqu %xmm2, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -142,13 +143,14 @@ define void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X64-SSE: # %bb.0: # %entry ; X64-SSE-NEXT: movq {{.*}}(%rip), %rax ; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-SSE-NEXT: pxor %xmm2, %xmm2 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; X64-SSE-NEXT: pmullw %xmm0, %xmm1 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; X64-SSE-NEXT: movdqu %xmm1, (%rax,%rdx,4) +; X64-SSE-NEXT: pxor %xmm1, %xmm1 +; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X64-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X64-SSE-NEXT: pmaddwd %xmm0, %xmm2 +; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_4xi8: @@ -2215,13 +2217,7 @@ define void @PR34947() { ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl (%eax) ; X86-SSE-NEXT: movd %edx, %xmm0 -; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X86-SSE-NEXT: pmuludq %xmm2, %xmm3 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: pmaddwd {{\.LCPI.*}}, %xmm1 ; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007 ; X86-SSE-NEXT: movd %eax, %xmm2 ; X86-SSE-NEXT: pmuludq %xmm0, %xmm2 @@ -2415,13 +2411,7 @@ define void @PR34947() { ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl (%rax) ; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; X64-SSE-NEXT: pmuludq %xmm2, %xmm1 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X64-SSE-NEXT: pmuludq %xmm2, %xmm3 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X64-SSE-NEXT: pmaddwd {{.*}}(%rip), %xmm1 ; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007 ; X64-SSE-NEXT: movd %eax, %xmm2 ; X64-SSE-NEXT: pmuludq %xmm0, %xmm2 diff --git a/test/CodeGen/X86/slow-pmulld.ll b/test/CodeGen/X86/slow-pmulld.ll index 4d73b11349f5..325e6ee4085a 100644 --- a/test/CodeGen/X86/slow-pmulld.ll +++ b/test/CodeGen/X86/slow-pmulld.ll @@ -10,22 +10,14 @@ define <4 x i32> @foo(<4 x i8> %A) { ; CHECK32-LABEL: foo: ; CHECK32: # %bb.0: -; CHECK32-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[4],zero,xmm0[8],zero,xmm0[12],zero,xmm0[u,u,u,u,u,u,u,u] -; CHECK32-NEXT: movdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u> -; CHECK32-NEXT: movdqa %xmm0, %xmm2 -; CHECK32-NEXT: pmullw %xmm1, %xmm0 -; CHECK32-NEXT: pmulhw %xmm1, %xmm2 -; CHECK32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; CHECK32-NEXT: pand {{\.LCPI.*}}, %xmm0 +; CHECK32-NEXT: pmaddwd {{\.LCPI.*}}, %xmm0 ; CHECK32-NEXT: retl ; ; CHECK64-LABEL: foo: ; CHECK64: # %bb.0: -; CHECK64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[4],zero,xmm0[8],zero,xmm0[12],zero,xmm0[u,u,u,u,u,u,u,u] -; CHECK64-NEXT: movdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u> -; CHECK64-NEXT: movdqa %xmm0, %xmm2 -; CHECK64-NEXT: pmullw %xmm1, %xmm0 -; CHECK64-NEXT: pmulhw %xmm1, %xmm2 -; CHECK64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; CHECK64-NEXT: pand {{.*}}(%rip), %xmm0 +; CHECK64-NEXT: pmaddwd {{.*}}(%rip), %xmm0 ; CHECK64-NEXT: retq ; ; SSE4-32-LABEL: foo: diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll index ad2edfe0959e..a789b861b7aa 100644 --- a/test/CodeGen/X86/sse2-schedule.ll +++ b/test/CodeGen/X86/sse2-schedule.ll @@ -5624,16 +5624,8 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; ATOM-LABEL: test_pmaddwd: ; ATOM: # %bb.0: -; ATOM-NEXT: pmaddwd %xmm1, %xmm0 -; ATOM-NEXT: pmaddwd (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:5.00] +; ATOM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [5:5.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pmaddwd: @@ -6241,16 +6233,8 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; ATOM-LABEL: test_pmuludq: ; ATOM: # %bb.0: -; ATOM-NEXT: pmuludq %xmm1, %xmm0 -; ATOM-NEXT: pmuludq (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:5.00] +; ATOM-NEXT: pmuludq (%rdi), %xmm0 # sched: [5:5.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pmuludq: @@ -6394,12 +6378,8 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; ATOM-LABEL: test_psadbw: ; ATOM: # %bb.0: -; ATOM-NEXT: psadbw %xmm1, %xmm0 -; ATOM-NEXT: psadbw (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: psadbw (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] diff --git a/test/MC/ARM/branch-disassemble.s b/test/MC/ARM/branch-disassemble.s index 4df40e05e8c9..5380a1d0b9e4 100644 --- a/test/MC/ARM/branch-disassemble.s +++ b/test/MC/ARM/branch-disassemble.s @@ -7,8 +7,8 @@ @ RUN: | FileCheck %s -check-prefix CHECK-THUMB b.w .Lbranch -@ CHECK-ARM: b #4 <$a.0+0xC> -@ CHECK-THUMB: b.w #8 <$t.0+0xC> +@ CHECK-ARM: b #4 <$a.0+0xc> +@ CHECK-THUMB: b.w #8 <$t.0+0xc> adds r0, r1, #42 adds r1, r2, #42 .Lbranch: diff --git a/test/MC/ELF/comdat-declaration-errors.s b/test/MC/ELF/comdat-declaration-errors.s new file mode 100644 index 000000000000..fade8cfe41dd --- /dev/null +++ b/test/MC/ELF/comdat-declaration-errors.s @@ -0,0 +1,14 @@ +// RUN: not llvm-mc -triple x86_64-pc-linux-gnu %s \ +// RUN: -filetype=obj -o %t.o 2>&1 | FileCheck %s + +// Check we error out on incorrect COMDATs declarations +// and not just silently ingnore them. + +// CHECK: error: invalid group name +// CHECK-NEXT: .section .foo,"G",@progbits,-abc,comdat + +// CHECK: error: invalid linkage +// CHECK-NEXT: .section .bar,"G",@progbits,abc,-comdat + +.section .foo,"G",@progbits,-abc,comdat +.section .bar,"G",@progbits,abc,-comdat diff --git a/test/MC/X86/PREFETCH-32.s b/test/MC/X86/PREFETCH-32.s new file mode 100644 index 000000000000..caec44ea386c --- /dev/null +++ b/test/MC/X86/PREFETCH-32.s @@ -0,0 +1,169 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: prefetch -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x84,0x82,0x10,0xe3,0x0f,0xe3] +prefetch -485498096(%edx,%eax,4) + +// CHECK: prefetch 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x84,0x82,0xf0,0x1c,0xf0,0x1c] +prefetch 485498096(%edx,%eax,4) + +// CHECK: prefetch 485498096(%edx) +// CHECK: encoding: [0x0f,0x0d,0x82,0xf0,0x1c,0xf0,0x1c] +prefetch 485498096(%edx) + +// CHECK: prefetch 485498096 +// CHECK: encoding: [0x0f,0x0d,0x05,0xf0,0x1c,0xf0,0x1c] +prefetch 485498096 + +// CHECK: prefetch 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x0d,0x44,0x02,0x40] +prefetch 64(%edx,%eax) + +// CHECK: prefetch (%edx) +// CHECK: encoding: [0x0f,0x0d,0x02] +prefetch (%edx) + +// CHECK: prefetchnta -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x84,0x82,0x10,0xe3,0x0f,0xe3] +prefetchnta -485498096(%edx,%eax,4) + +// CHECK: prefetchnta 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x84,0x82,0xf0,0x1c,0xf0,0x1c] +prefetchnta 485498096(%edx,%eax,4) + +// CHECK: prefetchnta 485498096(%edx) +// CHECK: encoding: [0x0f,0x18,0x82,0xf0,0x1c,0xf0,0x1c] +prefetchnta 485498096(%edx) + +// CHECK: prefetchnta 485498096 +// CHECK: encoding: [0x0f,0x18,0x05,0xf0,0x1c,0xf0,0x1c] +prefetchnta 485498096 + +// CHECK: prefetchnta 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x18,0x44,0x02,0x40] +prefetchnta 64(%edx,%eax) + +// CHECK: prefetchnta (%edx) +// CHECK: encoding: [0x0f,0x18,0x02] +prefetchnta (%edx) + +// CHECK: prefetcht0 -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x8c,0x82,0x10,0xe3,0x0f,0xe3] +prefetcht0 -485498096(%edx,%eax,4) + +// CHECK: prefetcht0 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x8c,0x82,0xf0,0x1c,0xf0,0x1c] +prefetcht0 485498096(%edx,%eax,4) + +// CHECK: prefetcht0 485498096(%edx) +// CHECK: encoding: [0x0f,0x18,0x8a,0xf0,0x1c,0xf0,0x1c] +prefetcht0 485498096(%edx) + +// CHECK: prefetcht0 485498096 +// CHECK: encoding: [0x0f,0x18,0x0d,0xf0,0x1c,0xf0,0x1c] +prefetcht0 485498096 + +// CHECK: prefetcht0 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x18,0x4c,0x02,0x40] +prefetcht0 64(%edx,%eax) + +// CHECK: prefetcht0 (%edx) +// CHECK: encoding: [0x0f,0x18,0x0a] +prefetcht0 (%edx) + +// CHECK: prefetcht1 -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x94,0x82,0x10,0xe3,0x0f,0xe3] +prefetcht1 -485498096(%edx,%eax,4) + +// CHECK: prefetcht1 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x94,0x82,0xf0,0x1c,0xf0,0x1c] +prefetcht1 485498096(%edx,%eax,4) + +// CHECK: prefetcht1 485498096(%edx) +// CHECK: encoding: [0x0f,0x18,0x92,0xf0,0x1c,0xf0,0x1c] +prefetcht1 485498096(%edx) + +// CHECK: prefetcht1 485498096 +// CHECK: encoding: [0x0f,0x18,0x15,0xf0,0x1c,0xf0,0x1c] +prefetcht1 485498096 + +// CHECK: prefetcht1 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x18,0x54,0x02,0x40] +prefetcht1 64(%edx,%eax) + +// CHECK: prefetcht1 (%edx) +// CHECK: encoding: [0x0f,0x18,0x12] +prefetcht1 (%edx) + +// CHECK: prefetcht2 -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x9c,0x82,0x10,0xe3,0x0f,0xe3] +prefetcht2 -485498096(%edx,%eax,4) + +// CHECK: prefetcht2 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x9c,0x82,0xf0,0x1c,0xf0,0x1c] +prefetcht2 485498096(%edx,%eax,4) + +// CHECK: prefetcht2 485498096(%edx) +// CHECK: encoding: [0x0f,0x18,0x9a,0xf0,0x1c,0xf0,0x1c] +prefetcht2 485498096(%edx) + +// CHECK: prefetcht2 485498096 +// CHECK: encoding: [0x0f,0x18,0x1d,0xf0,0x1c,0xf0,0x1c] +prefetcht2 485498096 + +// CHECK: prefetcht2 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x18,0x5c,0x02,0x40] +prefetcht2 64(%edx,%eax) + +// CHECK: prefetcht2 (%edx) +// CHECK: encoding: [0x0f,0x18,0x1a] +prefetcht2 (%edx) + +// CHECK: prefetchw -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x8c,0x82,0x10,0xe3,0x0f,0xe3] +prefetchw -485498096(%edx,%eax,4) + +// CHECK: prefetchw 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x8c,0x82,0xf0,0x1c,0xf0,0x1c] +prefetchw 485498096(%edx,%eax,4) + +// CHECK: prefetchw 485498096(%edx) +// CHECK: encoding: [0x0f,0x0d,0x8a,0xf0,0x1c,0xf0,0x1c] +prefetchw 485498096(%edx) + +// CHECK: prefetchw 485498096 +// CHECK: encoding: [0x0f,0x0d,0x0d,0xf0,0x1c,0xf0,0x1c] +prefetchw 485498096 + +// CHECK: prefetchw 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x0d,0x4c,0x02,0x40] +prefetchw 64(%edx,%eax) + +// CHECK: prefetchw (%edx) +// CHECK: encoding: [0x0f,0x0d,0x0a] +prefetchw (%edx) + +// CHECK: prefetchwt1 -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x94,0x82,0x10,0xe3,0x0f,0xe3] +prefetchwt1 -485498096(%edx,%eax,4) + +// CHECK: prefetchwt1 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x94,0x82,0xf0,0x1c,0xf0,0x1c] +prefetchwt1 485498096(%edx,%eax,4) + +// CHECK: prefetchwt1 485498096(%edx) +// CHECK: encoding: [0x0f,0x0d,0x92,0xf0,0x1c,0xf0,0x1c] +prefetchwt1 485498096(%edx) + +// CHECK: prefetchwt1 485498096 +// CHECK: encoding: [0x0f,0x0d,0x15,0xf0,0x1c,0xf0,0x1c] +prefetchwt1 485498096 + +// CHECK: prefetchwt1 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x0d,0x54,0x02,0x40 +prefetchwt1 64(%edx,%eax) + +// CHECK: prefetchwt1 (%edx) +// CHECK: encoding: [0x0f,0x0d,0x12] +prefetchwt1 (%edx) diff --git a/test/MC/X86/PREFETCH-64.s b/test/MC/X86/PREFETCH-64.s new file mode 100644 index 000000000000..0c4a126a2a20 --- /dev/null +++ b/test/MC/X86/PREFETCH-64.s @@ -0,0 +1,170 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: prefetch 485498096 +// CHECK: encoding: [0x0f,0x0d,0x04,0x25,0xf0,0x1c,0xf0,0x1c] +prefetch 485498096 + +// CHECK: prefetch 64(%rdx) +// CHECK: encoding: [0x0f,0x0d,0x42,0x40] +prefetch 64(%rdx) + +// CHECK: prefetch 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x44,0x82,0x40] +prefetch 64(%rdx,%rax,4) + +// CHECK: prefetch -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x44,0x82,0xc0] +prefetch -64(%rdx,%rax,4) + +// CHECK: prefetch 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x0d,0x44,0x02,0x40] +prefetch 64(%rdx,%rax) + +// CHECK: prefetchnta 485498096 +// CHECK: encoding: [0x0f,0x18,0x04,0x25,0xf0,0x1c,0xf0,0x1c] +prefetchnta 485498096 + +// CHECK: prefetchnta 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x42,0x40] +prefetchnta 64(%rdx) + +// CHECK: prefetchnta 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x44,0x82,0x40] +prefetchnta 64(%rdx,%rax,4) + +// CHECK: prefetchnta -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x44,0x82,0xc0] +prefetchnta -64(%rdx,%rax,4) + +// CHECK: prefetchnta 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x44,0x02,0x40] +prefetchnta 64(%rdx,%rax) + +// CHECK: prefetchnta (%rdx) +// CHECK: encoding: [0x0f,0x18,0x02] +prefetchnta (%rdx) + +// CHECK: prefetch (%rdx) +// CHECK: encoding: [0x0f,0x0d,0x02] +prefetch (%rdx) + +// CHECK: prefetcht0 485498096 +// CHECK: encoding: [0x0f,0x18,0x0c,0x25,0xf0,0x1c,0xf0,0x1c] +prefetcht0 485498096 + +// CHECK: prefetcht0 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x4a,0x40] +prefetcht0 64(%rdx) + +// CHECK: prefetcht0 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x4c,0x82,0x40] +prefetcht0 64(%rdx,%rax,4) + +// CHECK: prefetcht0 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x4c,0x82,0xc0] +prefetcht0 -64(%rdx,%rax,4) + +// CHECK: prefetcht0 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x4c,0x02,0x40] +prefetcht0 64(%rdx,%rax) + +// CHECK: prefetcht0 (%rdx) +// CHECK: encoding: [0x0f,0x18,0x0a] +prefetcht0 (%rdx) + +// CHECK: prefetcht1 485498096 +// CHECK: encoding: [0x0f,0x18,0x14,0x25,0xf0,0x1c,0xf0,0x1c] +prefetcht1 485498096 + +// CHECK: prefetcht1 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x52,0x40] +prefetcht1 64(%rdx) + +// CHECK: prefetcht1 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x54,0x82,0x40] +prefetcht1 64(%rdx,%rax,4) + +// CHECK: prefetcht1 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x54,0x82,0xc0] +prefetcht1 -64(%rdx,%rax,4) + +// CHECK: prefetcht1 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x54,0x02,0x40] +prefetcht1 64(%rdx,%rax) + +// CHECK: prefetcht1 (%rdx) +// CHECK: encoding: [0x0f,0x18,0x12] +prefetcht1 (%rdx) + +// CHECK: prefetcht2 485498096 +// CHECK: encoding: [0x0f,0x18,0x1c,0x25,0xf0,0x1c,0xf0,0x1c] +prefetcht2 485498096 + +// CHECK: prefetcht2 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x5a,0x40] +prefetcht2 64(%rdx) + +// CHECK: prefetcht2 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x5c,0x82,0x40] +prefetcht2 64(%rdx,%rax,4) + +// CHECK: prefetcht2 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x5c,0x82,0xc0] +prefetcht2 -64(%rdx,%rax,4) + +// CHECK: prefetcht2 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x5c,0x02,0x40] +prefetcht2 64(%rdx,%rax) + +// CHECK: prefetcht2 (%rdx) +// CHECK: encoding: [0x0f,0x18,0x1a] +prefetcht2 (%rdx) + +// CHECK: prefetchw 485498096 +// CHECK: encoding: [0x0f,0x0d,0x0c,0x25,0xf0,0x1c,0xf0,0x1c] +prefetchw 485498096 + +// CHECK: prefetchw 64(%rdx) +// CHECK: encoding: [0x0f,0x0d,0x4a,0x40] +prefetchw 64(%rdx) + +// CHECK: prefetchw 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x4c,0x82,0x40] +prefetchw 64(%rdx,%rax,4) + +// CHECK: prefetchw -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x4c,0x82,0xc0] +prefetchw -64(%rdx,%rax,4) + +// CHECK: prefetchw 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x0d,0x4c,0x02,0x40] +prefetchw 64(%rdx,%rax) + +// CHECK: prefetchw (%rdx) +// CHECK: encoding: [0x0f,0x0d,0x0a] +prefetchw (%rdx) + +// CHECK: prefetchwt1 485498096 +// CHECK: encoding: [0x0f,0x0d,0x14,0x25,0xf0,0x1c,0xf0,0x1c] +prefetchwt1 485498096 + +// CHECK: prefetchwt1 64(%rdx) +// CHECK: encoding: [0x0f,0x0d,0x52,0x40] +prefetchwt1 64(%rdx) + +// CHECK: prefetchwt1 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x54,0x82,0x40] +prefetchwt1 64(%rdx,%rax,4) + +// CHECK: prefetchwt1 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x54,0x82,0xc0] +prefetchwt1 -64(%rdx,%rax,4) + +// CHECK: prefetchwt1 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x0d,0x54,0x02,0x40] +prefetchwt1 64(%rdx,%rax) + +// CHECK: prefetchwt1 (%rdx) +// CHECK: encoding: [0x0f,0x0d,0x12] +prefetchwt1 (%rdx) + diff --git a/test/MC/X86/RDPMC-32.s b/test/MC/X86/RDPMC-32.s new file mode 100644 index 000000000000..5168af3a62c1 --- /dev/null +++ b/test/MC/X86/RDPMC-32.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdpmc +// CHECK: encoding: [0x0f,0x33] +rdpmc + diff --git a/test/MC/X86/RDPMC-64.s b/test/MC/X86/RDPMC-64.s new file mode 100644 index 000000000000..56fa3d9fa828 --- /dev/null +++ b/test/MC/X86/RDPMC-64.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdpmc +// CHECK: encoding: [0x0f,0x33] +rdpmc + diff --git a/test/MC/X86/RDRAND-32.s b/test/MC/X86/RDRAND-32.s new file mode 100644 index 000000000000..5461ca74eabb --- /dev/null +++ b/test/MC/X86/RDRAND-32.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdrandl %eax +// CHECK: encoding: [0x0f,0xc7,0xf0] +rdrandl %eax + diff --git a/test/MC/X86/RDRAND-64.s b/test/MC/X86/RDRAND-64.s new file mode 100644 index 000000000000..3482c0ecd5c3 --- /dev/null +++ b/test/MC/X86/RDRAND-64.s @@ -0,0 +1,14 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdrandl %r13d +// CHECK: encoding: [0x41,0x0f,0xc7,0xf5] +rdrandl %r13d + +// CHECK: rdrandq %r13 +// CHECK: encoding: [0x49,0x0f,0xc7,0xf5] +rdrandq %r13 + +// CHECK: rdrandw %r13w +// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xf5] +rdrandw %r13w + diff --git a/test/MC/X86/RDSEED-32.s b/test/MC/X86/RDSEED-32.s new file mode 100644 index 000000000000..87be0d502a66 --- /dev/null +++ b/test/MC/X86/RDSEED-32.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdseedl %eax +// CHECK: encoding: [0x0f,0xc7,0xf8] +rdseedl %eax + diff --git a/test/MC/X86/RDSEED-64.s b/test/MC/X86/RDSEED-64.s new file mode 100644 index 000000000000..0d710ceaa5bb --- /dev/null +++ b/test/MC/X86/RDSEED-64.s @@ -0,0 +1,14 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdseedl %r13d +// CHECK: encoding: [0x41,0x0f,0xc7,0xfd] +rdseedl %r13d + +// CHECK: rdseedq %r13 +// CHECK: encoding: [0x49,0x0f,0xc7,0xfd] +rdseedq %r13 + +// CHECK: rdseedw %r13w +// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xfd] +rdseedw %r13w + diff --git a/test/MC/X86/RDTSCP-32.s b/test/MC/X86/RDTSCP-32.s new file mode 100644 index 000000000000..48232edf7d52 --- /dev/null +++ b/test/MC/X86/RDTSCP-32.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdtscp +// CHECK: encoding: [0x0f,0x01,0xf9] +rdtscp + diff --git a/test/MC/X86/RDTSCP-64.s b/test/MC/X86/RDTSCP-64.s new file mode 100644 index 000000000000..045fd49a2738 --- /dev/null +++ b/test/MC/X86/RDTSCP-64.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdtscp +// CHECK: encoding: [0x0f,0x01,0xf9] +rdtscp + diff --git a/test/MC/X86/RDWRFSGS-64.s b/test/MC/X86/RDWRFSGS-64.s new file mode 100644 index 000000000000..47314caf867d --- /dev/null +++ b/test/MC/X86/RDWRFSGS-64.s @@ -0,0 +1,34 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdfsbasel %r13d +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xc5] +rdfsbasel %r13d + +// CHECK: rdfsbaseq %r13 +// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xc5] +rdfsbaseq %r13 + +// CHECK: rdgsbasel %r13d +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xcd] +rdgsbasel %r13d + +// CHECK: rdgsbaseq %r13 +// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xcd] +rdgsbaseq %r13 + +// CHECK: wrfsbasel %r13d +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xd5] +wrfsbasel %r13d + +// CHECK: wrfsbaseq %r13 +// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xd5] +wrfsbaseq %r13 + +// CHECK: wrgsbasel %r13d +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xdd] +wrgsbasel %r13d + +// CHECK: wrgsbaseq %r13 +// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xdd] +wrgsbaseq %r13 + diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s index b1e89cde9796..23846d921a8c 100644 --- a/test/MC/X86/x86-64.s +++ b/test/MC/X86/x86-64.s @@ -99,6 +99,10 @@ // CHECK: shll $2, %eax sall $2, %eax +// CHECK: rep movsb +rep # comment +movsb + // CHECK: rep // CHECK: insb rep;insb diff --git a/test/MC/X86/x86_64-asm-match.s b/test/MC/X86/x86_64-asm-match.s new file mode 100644 index 000000000000..3208e4f4e0f0 --- /dev/null +++ b/test/MC/X86/x86_64-asm-match.s @@ -0,0 +1,52 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -debug-only=asm-matcher %s 2>&1 | FileCheck %s +// REQUIRES: asserts + +// CHECK: AsmMatcher: found 4 encodings with mnemonic 'pshufb' +// CHECK:Trying to match opcode MMX_PSHUFBrr64 +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode PSHUFBrr +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode PSHUFBrm +// CHECK: Matching formal operand class MCK_Mem128 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode +// CHECK:AsmMatcher: found 2 encodings with mnemonic 'sha1rnds4' +// CHECK:Trying to match opcode SHA1RNDS4rri +// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 3 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode +// CHECK:AsmMatcher: found 4 encodings with mnemonic 'pinsrw' +// CHECK:Trying to match opcode MMX_PINSRWirri +// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_GR32orGR64 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 3 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode PINSRWrri +// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_GR32orGR64 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 3 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode +// CHECK:AsmMatcher: found 2 encodings with mnemonic 'crc32l' +// CHECK:Trying to match opcode CRC32r32r32 +// CHECK: Matching formal operand class MCK_GR32 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode CRC32r32m32 +// CHECK: Matching formal operand class MCK_Mem32 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_GR32 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode +// CHECK:AsmMatcher: found 4 encodings with mnemonic 'punpcklbw' +// CHECK:Trying to match opcode MMX_PUNPCKLBWirr +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 2 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode MMX_PUNPCKLBWirm +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_Mem64 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode + + +pshufb CPI1_0(%rip), %xmm1 +sha1rnds4 $1, %xmm1, %xmm2 +pinsrw $3, %ecx, %xmm5 +crc32l %gs:0xdeadbeef(%rbx,%rcx,8),%ecx + +.intel_syntax +punpcklbw mm0, qword ptr [rsp] diff --git a/test/SafepointIRVerifier/from-same-relocation-in-phi-nodes.ll b/test/SafepointIRVerifier/from-same-relocation-in-phi-nodes.ll new file mode 100644 index 000000000000..4df19b2d7262 --- /dev/null +++ b/test/SafepointIRVerifier/from-same-relocation-in-phi-nodes.ll @@ -0,0 +1,26 @@ +; XFAIL: * +; RUN: opt -safepoint-ir-verifier-print-only -verify-safepoint-ir -S %s 2>&1 | FileCheck %s + +; In %merge %val.unrelocated, %ptr and %arg should be unrelocated. +; FIXME: if this test fails it is a false-positive alarm. IR is correct. +define void @test.unrelocated-phi.ok(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.unrelocated-phi.ok + bci_0: + %ptr = getelementptr i8, i8 addrspace(1)* %arg, i64 4 + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + br label %merge + + right: + br label %merge + + merge: +; CHECK: No illegal uses found by SafepointIRVerifier in: test.unrelocated-phi.ok + %val.unrelocated = phi i8 addrspace(1)* [ %arg, %left ], [ %ptr, %right ] + %c = icmp eq i8 addrspace(1)* %val.unrelocated, %arg + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) diff --git a/test/SafepointIRVerifier/unrecorded-live-at-sp.ll b/test/SafepointIRVerifier/unrecorded-live-at-sp.ll index e3f21c3e7133..5cd4aa741454 100644 --- a/test/SafepointIRVerifier/unrecorded-live-at-sp.ll +++ b/test/SafepointIRVerifier/unrecorded-live-at-sp.ll @@ -1,8 +1,9 @@ ; RUN: opt %s -safepoint-ir-verifier-print-only -verify-safepoint-ir -S 2>&1 | FileCheck %s ; CHECK: Illegal use of unrelocated value found! -; CHECK-NEXT: Def: %base_phi3 = phi %jObject addrspace(1)* [ %obj609.relocated, %not_zero146 ], [ %base_phi2, %bci_37-aload ], !is_base_value !0 -; CHECK-NEXT: Use: %base_phi2 = phi %jObject addrspace(1)* [ %base_phi3, %not_zero179 ], [ %cast5, %bci_0 ], !is_base_value !0 +; CHECK-NEXT: Def: %base_phi4 = phi %jObject addrspace(1)* addrspace(1)* [ %addr98.relocated, %not_zero146 ], [ %cast6, %bci_37-aload ], !is_base_value !0 +; CHECK-NEXT: Use: %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, %jObject addrspace(1)* %base_phi1, %jObject addrspace(1)* addrspace(1)* %base_phi4, %jObject addrspace(1)* addrspace(1)* %relocated4, %jObject addrspace(1)* %relocated7) + %jObject = type { [8 x i8] } diff --git a/test/SafepointIRVerifier/uses-in-phi-nodes.ll b/test/SafepointIRVerifier/uses-in-phi-nodes.ll index d06eb6e0d9a7..bbf98577230d 100644 --- a/test/SafepointIRVerifier/uses-in-phi-nodes.ll +++ b/test/SafepointIRVerifier/uses-in-phi-nodes.ll @@ -14,9 +14,9 @@ define i8 addrspace(1)* @test.not.ok.0(i8 addrspace(1)* %arg) gc "statepoint-exa merge: ; CHECK: Illegal use of unrelocated value found! -; CHECK-NEXT: Def: i8 addrspace(1)* %arg -; CHECK-NEXT: Use: %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ] - %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right] +; CHECK-NEXT: Def: %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ] +; CHECK-NEXT: Use: ret i8 addrspace(1)* %val + %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ] ret i8 addrspace(1)* %val } @@ -34,9 +34,9 @@ define i8 addrspace(1)* @test.not.ok.1(i8 addrspace(1)* %arg) gc "statepoint-exa merge: ; CHECK: Illegal use of unrelocated value found! -; CHECK-NEXT: Def: i8 addrspace(1)* %arg -; CHECK-NEXT: Use: %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ] - %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right] +; CHECK-NEXT: Def: %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ] +; CHECK-NEXT: Use: ret i8 addrspace(1)* %val + %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ] ret i8 addrspace(1)* %val } @@ -74,5 +74,99 @@ define i8 addrspace(1)* @test.ok.1(i8 addrspace(1)* %arg) gc "statepoint-example ret i8 addrspace(1)* %val } +; It should be allowed to compare poisoned ptr with null. +define void @test.poisoned.cmp.ok(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.ok + bci_0: + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg + br label %merge + + right: + %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + br label %merge + + merge: +; CHECK: No illegal uses found by SafepointIRVerifier in: test.poisoned.cmp.ok + %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] + %c = icmp eq i8 addrspace(1)* %val.poisoned, null + ret void +} + +; It is illegal to compare poisoned ptr and relocated. +define void @test.poisoned.cmp.fail.0(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.fail.0 + bci_0: + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg + br label %merge + + right: + %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated2 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 7, i32 7) ; arg, arg + br label %merge + + merge: +; CHECK: Illegal use of unrelocated value found! +; CHECK-NEXT: Def: %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] +; CHECK-NEXT: Use: %c = icmp eq i8 addrspace(1)* %val.poisoned, %val + %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] + %val = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg.relocated2, %right ] + %c = icmp eq i8 addrspace(1)* %val.poisoned, %val + ret void +} + +; It is illegal to compare poisoned ptr and unrelocated. +define void @test.poisoned.cmp.fail.1(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.fail.1 + bci_0: + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg + br label %merge + + right: + %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated2 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 7, i32 7) ; arg, arg + br label %merge + + merge: +; CHECK: Illegal use of unrelocated value found! +; CHECK-NEXT: Def: %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] +; CHECK-NEXT: Use: %c = icmp eq i8 addrspace(1)* %val.poisoned, %arg + %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] + %c = icmp eq i8 addrspace(1)* %val.poisoned, %arg + ret void +} + +; It should be allowed to compare unrelocated phi with unrelocated value. +define void @test.unrelocated-phi.cmp.ok(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.unrelocated-phi.cmp.ok + bci_0: + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + br label %merge + + right: + br label %merge + + merge: +; CHECK: No illegal uses found by SafepointIRVerifier in: test.unrelocated-phi.cmp.ok + %val.unrelocated = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ] + %c = icmp eq i8 addrspace(1)* %val.unrelocated, %arg + ret void +} + declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) declare void @not_statepoint() diff --git a/test/ThinLTO/X86/Inputs/noinline.ll b/test/ThinLTO/X86/Inputs/noinline.ll new file mode 100644 index 000000000000..73db2912cabc --- /dev/null +++ b/test/ThinLTO/X86/Inputs/noinline.ll @@ -0,0 +1,8 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define i32 @foo(i32) local_unnamed_addr #0 { + ret i32 10 +} + +attributes #0 = { noinline } diff --git a/test/ThinLTO/X86/noinline.ll b/test/ThinLTO/X86/noinline.ll new file mode 100644 index 000000000000..27f59ab90967 --- /dev/null +++ b/test/ThinLTO/X86/noinline.ll @@ -0,0 +1,26 @@ +; This test checks that ThinLTO doesn't try to import noinline function +; which, when takes place, causes promotion of its callee. +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/noinline.ll -o %t2.bc +; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t3.o \ +; RUN: -save-temps \ +; RUN: -r=%t1.bc,main,px \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t2.bc,foo,p + +; RUN: llvm-dis %t3.o.1.3.import.bc -o - | FileCheck %s + +; CHECK-NOT: define available_externally i32 @foo + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +; Function Attrs: nounwind ssp uwtable +define i32 @main(i32, i8** nocapture readnone) local_unnamed_addr #0 { + %3 = tail call i32 @foo(i32 %0) #0 + ret i32 %3 +} + +declare i32 @foo(i32) local_unnamed_addr + +attributes #0 = { nounwind } diff --git a/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll b/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll index fde0692d00a2..b05b27f533bb 100644 --- a/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll +++ b/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll @@ -122,6 +122,19 @@ entry: } +define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @sub_compare_folding_swapPD256_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> fsub (<4 x double> undef, <4 x double> undef), <4 x double> zeroinitializer, i32 5, i8 -1) +; CHECK-NEXT: ret i8 [[TMP]] +; +entry: + %sub.i1 = fsub ninf <4 x double> undef, undef + %tmp = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5, i8 -1) + ret i8 %tmp +} + + define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){ ; CHECK-LABEL: @sub_compare_folding_swapPD512( ; CHECK-NEXT: entry: diff --git a/test/Transforms/InstCombine/extractelement.ll b/test/Transforms/InstCombine/extractelement.ll new file mode 100644 index 000000000000..66fbd25947dc --- /dev/null +++ b/test/Transforms/InstCombine/extractelement.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i32 @extractelement_out_of_range(<2 x i32> %x) { +; CHECK-LABEL: @extractelement_out_of_range( +; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x i32> [[X:%.*]], i8 16 +; CHECK-NEXT: ret i32 [[E1]] +; + %E1 = extractelement <2 x i32> %x, i8 16 + ret i32 %E1 +} diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index c6f88fb9cf05..e0698f8b3b77 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -267,12 +267,17 @@ define void @powi(double %V, double *%P) { %C = tail call double @llvm.powi.f64(double %V, i32 1) nounwind store volatile double %C, double* %P + + %D = tail call double @llvm.powi.f64(double %V, i32 2) nounwind + store volatile double %D, double* %P ret void ; CHECK-LABEL: @powi( ; CHECK: %A = fdiv double 1.0{{.*}}, %V ; CHECK: store volatile double %A, ; CHECK: store volatile double 1.0 ; CHECK: store volatile double %V +; CHECK: %D = fmul double %V, %V +; CHECK: store volatile double %D } define i32 @cttz(i32 %a) { diff --git a/test/Transforms/InstCombine/minmax-fold.ll b/test/Transforms/InstCombine/minmax-fold.ll index 6004a55f0f8e..933aac7e23f2 100644 --- a/test/Transforms/InstCombine/minmax-fold.ll +++ b/test/Transforms/InstCombine/minmax-fold.ll @@ -744,3 +744,158 @@ define <2 x i8> @min_through_cast_vec2(<2 x i32> %x) { %res = select <2 x i1> %cmp, <2 x i8> %x_trunc, <2 x i8> <i8 255, i8 255> ret <2 x i8> %res } + +; Remove a min/max op in a sequence with a common operand. +; PR35717: https://bugs.llvm.org/show_bug.cgi?id=35717 + +; min(min(a, b), min(b, c)) --> min(min(a, b), c) + +define i32 @common_factor_smin(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_smin( +; CHECK-NEXT: [[CMP_AB:%.*]] = icmp slt i32 %a, %b +; CHECK-NEXT: [[MIN_AB:%.*]] = select i1 [[CMP_AB]], i32 %a, i32 %b +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp slt i32 %b, %c +; CHECK-NEXT: [[MIN_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_AB_BC:%.*]] = icmp slt i32 [[MIN_AB]], [[MIN_BC]] +; CHECK-NEXT: [[MIN_ABC:%.*]] = select i1 [[CMP_AB_BC]], i32 [[MIN_AB]], i32 [[MIN_BC]] +; CHECK-NEXT: ret i32 [[MIN_ABC]] +; + %cmp_ab = icmp slt i32 %a, %b + %min_ab = select i1 %cmp_ab, i32 %a, i32 %b + %cmp_bc = icmp slt i32 %b, %c + %min_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ab_bc = icmp slt i32 %min_ab, %min_bc + %min_abc = select i1 %cmp_ab_bc, i32 %min_ab, i32 %min_bc + ret i32 %min_abc +} + +; max(max(a, b), max(c, b)) --> max(max(a, b), c) + +define <2 x i32> @common_factor_smax(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { +; CHECK-LABEL: @common_factor_smax( +; CHECK-NEXT: [[CMP_AB:%.*]] = icmp sgt <2 x i32> %a, %b +; CHECK-NEXT: [[MAX_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b +; CHECK-NEXT: [[CMP_CB:%.*]] = icmp sgt <2 x i32> %c, %b +; CHECK-NEXT: [[MAX_CB:%.*]] = select <2 x i1> [[CMP_CB]], <2 x i32> %c, <2 x i32> %b +; CHECK-NEXT: [[CMP_AB_CB:%.*]] = icmp sgt <2 x i32> [[MAX_AB]], [[MAX_CB]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select <2 x i1> [[CMP_AB_CB]], <2 x i32> [[MAX_AB]], <2 x i32> [[MAX_CB]] +; CHECK-NEXT: ret <2 x i32> [[MAX_ABC]] +; + %cmp_ab = icmp sgt <2 x i32> %a, %b + %max_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b + %cmp_cb = icmp sgt <2 x i32> %c, %b + %max_cb = select <2 x i1> %cmp_cb, <2 x i32> %c, <2 x i32> %b + %cmp_ab_cb = icmp sgt <2 x i32> %max_ab, %max_cb + %max_abc = select <2 x i1> %cmp_ab_cb, <2 x i32> %max_ab, <2 x i32> %max_cb + ret <2 x i32> %max_abc +} + +; min(min(b, c), min(a, b)) --> min(min(b, c), a) + +define <2 x i32> @common_factor_umin(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { +; CHECK-LABEL: @common_factor_umin( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ult <2 x i32> %b, %c +; CHECK-NEXT: [[MIN_BC:%.*]] = select <2 x i1> [[CMP_BC]], <2 x i32> %b, <2 x i32> %c +; CHECK-NEXT: [[CMP_AB:%.*]] = icmp ult <2 x i32> %a, %b +; CHECK-NEXT: [[MIN_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b +; CHECK-NEXT: [[CMP_BC_AB:%.*]] = icmp ult <2 x i32> [[MIN_BC]], [[MIN_AB]] +; CHECK-NEXT: [[MIN_ABC:%.*]] = select <2 x i1> [[CMP_BC_AB]], <2 x i32> [[MIN_BC]], <2 x i32> [[MIN_AB]] +; CHECK-NEXT: ret <2 x i32> [[MIN_ABC]] +; + %cmp_bc = icmp ult <2 x i32> %b, %c + %min_bc = select <2 x i1> %cmp_bc, <2 x i32> %b, <2 x i32> %c + %cmp_ab = icmp ult <2 x i32> %a, %b + %min_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b + %cmp_bc_ab = icmp ult <2 x i32> %min_bc, %min_ab + %min_abc = select <2 x i1> %cmp_bc_ab, <2 x i32> %min_bc, <2 x i32> %min_ab + ret <2 x i32> %min_abc +} + +; max(max(b, c), max(b, a)) --> max(max(b, c), a) + +define i32 @common_factor_umax(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_umax( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c +; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a +; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a +; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]] +; CHECK-NEXT: ret i32 [[MAX_ABC]] +; + %cmp_bc = icmp ugt i32 %b, %c + %max_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ba = icmp ugt i32 %b, %a + %max_ba = select i1 %cmp_ba, i32 %b, i32 %a + %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba + %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba + ret i32 %max_abc +} + +declare void @extra_use(i32) + +define i32 @common_factor_umax_extra_use_lhs(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_umax_extra_use_lhs( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c +; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a +; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a +; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]] +; CHECK-NEXT: call void @extra_use(i32 [[MAX_BC]]) +; CHECK-NEXT: ret i32 [[MAX_ABC]] +; + %cmp_bc = icmp ugt i32 %b, %c + %max_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ba = icmp ugt i32 %b, %a + %max_ba = select i1 %cmp_ba, i32 %b, i32 %a + %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba + %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba + call void @extra_use(i32 %max_bc) + ret i32 %max_abc +} + +define i32 @common_factor_umax_extra_use_rhs(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_umax_extra_use_rhs( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c +; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a +; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a +; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]] +; CHECK-NEXT: call void @extra_use(i32 [[MAX_BA]]) +; CHECK-NEXT: ret i32 [[MAX_ABC]] +; + %cmp_bc = icmp ugt i32 %b, %c + %max_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ba = icmp ugt i32 %b, %a + %max_ba = select i1 %cmp_ba, i32 %b, i32 %a + %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba + %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba + call void @extra_use(i32 %max_ba) + ret i32 %max_abc +} + +define i32 @common_factor_umax_extra_use_both(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_umax_extra_use_both( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c +; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a +; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a +; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]] +; CHECK-NEXT: call void @extra_use(i32 [[MAX_BC]]) +; CHECK-NEXT: call void @extra_use(i32 [[MAX_BA]]) +; CHECK-NEXT: ret i32 [[MAX_ABC]] +; + %cmp_bc = icmp ugt i32 %b, %c + %max_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ba = icmp ugt i32 %b, %a + %max_ba = select i1 %cmp_ba, i32 %b, i32 %a + %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba + %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba + call void @extra_use(i32 %max_bc) + call void @extra_use(i32 %max_ba) + ret i32 %max_abc +} + diff --git a/test/Transforms/InstCombine/minmax-fp.ll b/test/Transforms/InstCombine/minmax-fp.ll index 0851a5d435b8..b94bce2dbb80 100644 --- a/test/Transforms/InstCombine/minmax-fp.ll +++ b/test/Transforms/InstCombine/minmax-fp.ll @@ -155,13 +155,13 @@ define i8 @t13(float %a) { ret i8 %3 } -; <= comparison, where %a could be -0.0. Not safe. +; %a could be -0.0, but it doesn't matter because the conversion to int is the same for 0.0 or -0.0. define i8 @t14(float %a) { ; CHECK-LABEL: @t14( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule float %a, 0.000000e+00 -; CHECK-NEXT: [[TMP2:%.*]] = fptosi float %a to i8 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 0 -; CHECK-NEXT: ret i8 [[TMP3]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp oge float %a, 0.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], float 0.000000e+00, float %a +; CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] ; %1 = fcmp ule float %a, 0.0 %2 = fptosi float %a to i8 @@ -169,6 +169,19 @@ define i8 @t14(float %a) { ret i8 %3 } +define i8 @t14_commute(float %a) { +; CHECK-LABEL: @t14_commute( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt float %a, 0.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float %a, float 0.000000e+00 +; CHECK-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i8 +; CHECK-NEXT: ret i8 [[TMP3]] +; + %1 = fcmp ule float %a, 0.0 + %2 = fptosi float %a to i8 + %3 = select i1 %1, i8 0, i8 %2 + ret i8 %3 +} + define i8 @t15(float %a) { ; CHECK-LABEL: @t15( ; CHECK-NEXT: [[DOTINV:%.*]] = fcmp nsz oge float %a, 0.000000e+00 diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll index 4e90b337b09c..f71a0fb4c621 100644 --- a/test/Transforms/InstCombine/rem.ll +++ b/test/Transforms/InstCombine/rem.ll @@ -593,3 +593,17 @@ define <2 x i32> @test23(<2 x i32> %A) { %mul = srem <2 x i32> %and, <i32 2147483647, i32 2147483647> ret <2 x i32> %mul } + +; FP division-by-zero is not UB. + +define double @PR34870(i1 %cond, double %x, double %y) { +; CHECK-LABEL: @PR34870( +; CHECK-NEXT: [[SEL:%.*]] = select i1 %cond, double %y, double 0.000000e+00 +; CHECK-NEXT: [[FMOD:%.*]] = frem double %x, [[SEL]] +; CHECK-NEXT: ret double [[FMOD]] +; + %sel = select i1 %cond, double %y, double 0.0 + %fmod = frem double %x, %sel + ret double %fmod +} + diff --git a/test/Transforms/InstSimplify/extract-element.ll b/test/Transforms/InstSimplify/extract-element.ll new file mode 100644 index 000000000000..8ee75a603cd1 --- /dev/null +++ b/test/Transforms/InstSimplify/extract-element.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instsimplify -S | FileCheck %s + +; Weird Types + +define i129 @vec_extract_negidx(<3 x i129> %a) { +; CHECK-LABEL: @vec_extract_negidx( +; CHECK-NEXT: [[E1:%.*]] = extractelement <3 x i129> [[A:%.*]], i129 -1 +; CHECK-NEXT: ret i129 [[E1]] +; + %E1 = extractelement <3 x i129> %a, i129 -1 + ret i129 %E1 +} diff --git a/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll b/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll new file mode 100644 index 000000000000..28db1c834062 --- /dev/null +++ b/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll @@ -0,0 +1,128 @@ +; RUN: opt -loop-unroll -unroll-runtime -unroll-runtime-epilog -S %s | FileCheck %s + +; Test that epilogue is tagged with the same debug information as original loop body rather than original loop exit. + +; CHECK: for.body.i: +; CHECK: br i1 {{.*}}, label %lee1.exit.loopexit.unr-lcssa.loopexit, label %for.body.i, !dbg ![[LOOP_LOC:[0-9]+]] +; CHECK: lee1.exit.loopexit.unr-lcssa.loopexit: +; CHECK: br label %lee1.exit.loopexit.unr-lcssa, !dbg ![[LOOP_LOC]] +; CHECK: lee1.exit.loopexit.unr-lcssa: +; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0, !dbg ![[LOOP_LOC]] +; CHECK: br i1 %lcmp.mod, label %for.body.i.epil.preheader, label %lee1.exit.loopexit, !dbg ![[LOOP_LOC]] +; CHECK: for.body.i.epil.preheader: +; CHECK: br label %for.body.i.epil, !dbg ![[LOOP_LOC]] +; CHECK: lee1.exit.loopexit: +; CHECK: br label %lee1.exit, !dbg ![[EXIT_LOC:[0-9]+]] + +; CHECK-DAG: ![[LOOP_LOC]] = !DILocation(line: 5, column: 3, scope: !{{.*}}, inlinedAt: !{{.*}}) +; CHECK-DAG: ![[EXIT_LOC]] = !DILocation(line: 11, column: 12, scope: !{{.*}}, inlinedAt: !{{.*}}) + +; Function Attrs: nounwind readnone +define i32 @goo(i32 %a, i32 %b) local_unnamed_addr #0 !dbg !8 { +entry: + tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !15), !dbg !16 + tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !14, metadata !15), !dbg !17 + tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !18, metadata !15), !dbg !26 + tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !21, metadata !15), !dbg !28 + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29 + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30 + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30 + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29 + %cmp7.i = icmp eq i32 %b, 0, !dbg !31 + br i1 %cmp7.i, label %lee1.exit, label %for.body.i.preheader, !dbg !33 + +for.body.i.preheader: ; preds = %entry + br label %for.body.i, !dbg !34 + +for.body.i: ; preds = %for.body.i.preheader, %for.body.i + %i.09.i = phi i32 [ %inc.i, %for.body.i ], [ 0, %for.body.i.preheader ] + %t.08.i = phi i32 [ %add1.i, %for.body.i ], [ 0, %for.body.i.preheader ] + %div.i = sdiv i32 %t.08.i, 2, !dbg !34 + %add.i = add i32 %t.08.i, %a, !dbg !35 + %add1.i = add i32 %add.i, %div.i, !dbg !36 + tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29 + %inc.i = add nuw i32 %i.09.i, 1, !dbg !37 + tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30 + tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30 + tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29 + %exitcond.i = icmp eq i32 %inc.i, %b, !dbg !31 + br i1 %exitcond.i, label %lee1.exit.loopexit, label %for.body.i, !dbg !33, !llvm.loop !38 + +lee1.exit.loopexit: ; preds = %for.body.i + %add1.i.lcssa = phi i32 [ %add1.i, %for.body.i ] + br label %lee1.exit, !dbg !41 + +lee1.exit: ; preds = %lee1.exit.loopexit, %entry + %t.0.lcssa.i = phi i32 [ 0, %entry ], [ %add1.i.lcssa, %lee1.exit.loopexit ] + tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !44, metadata !15), !dbg !47 + tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !45, metadata !15), !dbg !48 + %add.i4 = add nsw i32 %b, %a, !dbg !41 + %sub.i = sub nsw i32 %a, %b, !dbg !49 + %mul.i = mul nsw i32 %add.i4, %sub.i, !dbg !50 + %add = add nsw i32 %t.0.lcssa.i, %mul.i, !dbg !51 + ret i32 %add, !dbg !52 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+neon,+strict-align,+vfp3,-crypto,-d16,-fp-armv8,-fp-only-sp,-fp16,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "t.c", directory: "/prj/llvm-arm/scratch1/zhaoshiz/bugs/debug-symbol") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 1, !"min_enum_size", i32 4} +!7 = !{!"Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)"} +!8 = distinct !DISubprogram(name: "goo", scope: !1, file: !1, line: 23, type: !9, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !12) +!9 = !DISubroutineType(types: !10) +!10 = !{!11, !11, !11} +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 23, type: !11) +!14 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 23, type: !11) +!15 = !DIExpression() +!16 = !DILocation(line: 23, column: 14, scope: !8) +!17 = !DILocation(line: 23, column: 21, scope: !8) +!18 = !DILocalVariable(name: "a", arg: 1, scope: !19, file: !1, line: 3, type: !11) +!19 = distinct !DISubprogram(name: "lee1", scope: !1, file: !1, line: 3, type: !9, isLocal: true, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !20) +!20 = !{!18, !21, !22, !23} +!21 = !DILocalVariable(name: "b", arg: 2, scope: !19, file: !1, line: 3, type: !11) +!22 = !DILocalVariable(name: "t", scope: !19, file: !1, line: 4, type: !11) +!23 = !DILocalVariable(name: "i", scope: !24, file: !1, line: 5, type: !25) +!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 5, column: 3) +!25 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!26 = !DILocation(line: 3, column: 22, scope: !19, inlinedAt: !27) +!27 = distinct !DILocation(line: 24, column: 27, scope: !8) +!28 = !DILocation(line: 3, column: 29, scope: !19, inlinedAt: !27) +!29 = !DILocation(line: 4, column: 7, scope: !19, inlinedAt: !27) +!30 = !DILocation(line: 5, column: 17, scope: !24, inlinedAt: !27) +!31 = !DILocation(line: 5, column: 23, scope: !32, inlinedAt: !27) +!32 = distinct !DILexicalBlock(scope: !24, file: !1, line: 5, column: 3) +!33 = !DILocation(line: 5, column: 3, scope: !24, inlinedAt: !27) +!34 = !DILocation(line: 6, column: 13, scope: !32, inlinedAt: !27) +!35 = !DILocation(line: 6, column: 11, scope: !32, inlinedAt: !27) +!36 = !DILocation(line: 6, column: 7, scope: !32, inlinedAt: !27) +!37 = !DILocation(line: 5, column: 28, scope: !32, inlinedAt: !27) +!38 = distinct !{!38, !39, !40} +!39 = !DILocation(line: 5, column: 3, scope: !24) +!40 = !DILocation(line: 6, column: 14, scope: !24) +!41 = !DILocation(line: 11, column: 12, scope: !42, inlinedAt: !46) +!42 = distinct !DISubprogram(name: "lee2", scope: !1, file: !1, line: 10, type: !9, isLocal: true, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !43) +!43 = !{!44, !45} +!44 = !DILocalVariable(name: "a", arg: 1, scope: !42, file: !1, line: 10, type: !11) +!45 = !DILocalVariable(name: "b", arg: 2, scope: !42, file: !1, line: 10, type: !11) +!46 = distinct !DILocation(line: 24, column: 40, scope: !8) +!47 = !DILocation(line: 10, column: 22, scope: !42, inlinedAt: !46) +!48 = !DILocation(line: 10, column: 29, scope: !42, inlinedAt: !46) +!49 = !DILocation(line: 11, column: 20, scope: !42, inlinedAt: !46) +!50 = !DILocation(line: 11, column: 16, scope: !42, inlinedAt: !46) +!51 = !DILocation(line: 24, column: 38, scope: !8) +!52 = !DILocation(line: 24, column: 3, scope: !8) diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll index d180980c95b9..32463373ca99 100644 --- a/test/Transforms/LoopUnroll/runtime-loop1.ll +++ b/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -13,9 +13,11 @@ ; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]] ; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body ; EPILOG: for.body.epil.preheader: -; EPILOG: br label %for.body.epil, !dbg [[EXIT_LOC:![0-9]+]] +; EPILOG: br label %for.body.epil, !dbg [[BODY_LOC]] ; EPILOG: for.body.epil: -; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC:![0-9]+]] +; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC]] +; EPILOG: for.end.loopexit: +; EPILOG: br label %for.end, !dbg [[EXIT_LOC:![0-9]+]] ; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}}) ; EPILOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}}) diff --git a/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll b/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll deleted file mode 100644 index e3d1f6dd2b17..000000000000 --- a/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: opt < %s -memcpyopt -S | FileCheck %s -; Test memcpy-memcpy dependencies across invoke edges. - -; Test that memcpyopt works across the non-unwind edge of an invoke. - -define hidden void @test_normal(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %temp = alloca i8, i32 64 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) - invoke void @invoke_me() - to label %try.cont unwind label %lpad - -lpad: - landingpad { i8*, i32 } - catch i8* null - ret void - -try.cont: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false) - ret void -} - -; Test that memcpyopt works across the unwind edge of an invoke. - -define hidden void @test_unwind(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %temp = alloca i8, i32 64 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) - invoke void @invoke_me() - to label %try.cont unwind label %lpad - -lpad: - landingpad { i8*, i32 } - catch i8* null - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false) - ret void - -try.cont: - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) -declare i32 @__gxx_personality_v0(...) -declare void @invoke_me() readnone diff --git a/test/Transforms/MemCpyOpt/merge-into-memset.ll b/test/Transforms/MemCpyOpt/merge-into-memset.ll deleted file mode 100644 index fc31038a4e6d..000000000000 --- a/test/Transforms/MemCpyOpt/merge-into-memset.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: opt < %s -memcpyopt -S | FileCheck %s -; Update cached non-local dependence information when merging stores into memset. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; Don't delete the memcpy in %if.then, even though it depends on an instruction -; which will be deleted. - -; CHECK-LABEL: @foo -define void @foo(i1 %c, i8* %d, i8* %e, i8* %f) { -entry: - %tmp = alloca [50 x i8], align 8 - %tmp4 = bitcast [50 x i8]* %tmp to i8* - %tmp1 = getelementptr inbounds i8, i8* %tmp4, i64 1 - call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5 - store i8 0, i8* %tmp4, align 8, !dbg !5 -; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %tmp1, i8* nonnull %d, i64 10, i32 1, i1 false) - br i1 %c, label %if.then, label %exit - -if.then: -; CHECK: if.then: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false) - br label %exit - -exit: - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4} - -!0 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "t.rs", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !DILocation(line: 8, column: 5, scope: !6) -!6 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !7, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) -!7 = !DISubroutineType(types: !8) -!8 = !{null} diff --git a/test/Transforms/MemCpyOpt/mixed-sizes.ll b/test/Transforms/MemCpyOpt/mixed-sizes.ll deleted file mode 100644 index 9091fe7f56c0..000000000000 --- a/test/Transforms/MemCpyOpt/mixed-sizes.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: opt < %s -memcpyopt -S | FileCheck %s -; Handle memcpy-memcpy dependencies of differing sizes correctly. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; Don't delete the second memcpy, even though there's an earlier -; memcpy with a larger size from the same address. - -; CHECK-LABEL: @foo -define i32 @foo(i1 %z) { -entry: - %a = alloca [10 x i32] - %s = alloca [10 x i32] - %0 = bitcast [10 x i32]* %a to i8* - %1 = bitcast [10 x i32]* %s to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 0, i64 40, i32 16, i1 false) - %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %a, i64 0, i64 0 - store i32 1, i32* %arrayidx - %scevgep = getelementptr [10 x i32], [10 x i32]* %s, i64 0, i64 1 - %scevgep7 = bitcast i32* %scevgep to i8* - br i1 %z, label %for.body3.lr.ph, label %for.inc7.1 - -for.body3.lr.ph: ; preds = %entry - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 17179869180, i32 4, i1 false) - br label %for.inc7.1 - -for.inc7.1: -; CHECK: for.inc7.1: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false) - %2 = load i32, i32* %arrayidx - ret i32 %2 -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1) diff --git a/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll b/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll deleted file mode 100644 index 5b0510211d9f..000000000000 --- a/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll +++ /dev/null @@ -1,114 +0,0 @@ -; RUN: opt < %s -memcpyopt -S | FileCheck %s -; Make sure memcpy-memcpy dependence is optimized across -; basic blocks (conditional branches and invokes). - -%struct.s = type { i32, i32 } - -@s_foo = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 -@s_baz = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 -@i = external constant i8* - -declare void @qux() -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) -declare void @__cxa_throw(i8*, i8*, i8*) -declare i32 @__gxx_personality_v0(...) -declare i8* @__cxa_begin_catch(i8*) - -; A simple partial redundancy. Test that the second memcpy is optimized -; to copy directly from the original source rather than from the temporary. - -; CHECK-LABEL: @wobble -define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) { -bb: - %temp = alloca i8, i32 64 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) - br i1 %some_condition, label %more, label %out - -out: - call void @qux() - unreachable - -more: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false) - ret void -} - -; A CFG triangle with a partial redundancy targeting an alloca. Test that the -; memcpy inside the triangle is optimized to copy directly from the original -; source rather than from the temporary. - -; CHECK-LABEL: @foo -define i32 @foo(i1 %t3) { -bb: - %s = alloca %struct.s, align 4 - %t = alloca %struct.s, align 4 - %s1 = bitcast %struct.s* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false) - br i1 %t3, label %bb4, label %bb7 - -bb4: ; preds = %bb - %t5 = bitcast %struct.s* %t to i8* - %s6 = bitcast %struct.s* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* %s6, i64 8, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false) - br label %bb7 - -bb7: ; preds = %bb4, %bb - %t8 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 0 - %t9 = load i32, i32* %t8, align 4 - %t10 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 1 - %t11 = load i32, i32* %t10, align 4 - %t12 = add i32 %t9, %t11 - ret i32 %t12 -} - -; A CFG diamond with an invoke on one side, and a partially redundant memcpy -; into an alloca on the other. Test that the memcpy inside the diamond is -; optimized to copy ; directly from the original source rather than from the -; temporary. This more complex test represents a relatively common usage -; pattern. - -; CHECK-LABEL: @baz -define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -bb: - %s = alloca %struct.s, align 4 - %t = alloca %struct.s, align 4 - %s3 = bitcast %struct.s* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false) - br i1 %t5, label %bb6, label %bb22 - -bb6: ; preds = %bb - invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) - to label %bb25 unwind label %bb9 - -bb9: ; preds = %bb6 - %t10 = landingpad { i8*, i32 } - catch i8* null - br label %bb13 - -bb13: ; preds = %bb9 - %t15 = call i8* @__cxa_begin_catch(i8* null) - br label %bb23 - -bb22: ; preds = %bb - %t23 = bitcast %struct.s* %t to i8* - %s24 = bitcast %struct.s* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* %s24, i64 8, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false) - br label %bb23 - -bb23: ; preds = %bb22, %bb13 - %t17 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 0 - %t18 = load i32, i32* %t17, align 4 - %t19 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 1 - %t20 = load i32, i32* %t19, align 4 - %t21 = add nsw i32 %t18, %t20 - ret i32 %t21 - -bb25: ; preds = %bb6 - unreachable -} diff --git a/test/Transforms/RewriteStatepointsForGC/check_traversal_order.ll b/test/Transforms/RewriteStatepointsForGC/check_traversal_order.ll new file mode 100644 index 000000000000..57e35ccad638 --- /dev/null +++ b/test/Transforms/RewriteStatepointsForGC/check_traversal_order.ll @@ -0,0 +1,38 @@ +; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +declare void @f() +declare void @g(i8 addrspace(1)*, i8 addrspace(1)*) +declare i32 @personality_function() + +; Make sure that we do not fail assertion because we process call of @g before +; we process the call of @f. + +define void @test_01(i8 addrspace(1)* %p, i1 %cond) gc "statepoint-example" personality i32 ()* @personality_function { + +; CHECK-LABEL: @test_01( + +entry: + %tmp0 = insertelement <2 x i8 addrspace(1)*> undef, i8 addrspace(1)* %p, i32 0 + %tmp1 = insertelement <2 x i8 addrspace(1)*> %tmp0, i8 addrspace(1)* %p, i32 1 + %tmp2 = extractelement <2 x i8 addrspace(1)*> %tmp1, i32 1 + %tmp3 = extractelement <2 x i8 addrspace(1)*> %tmp1, i32 0 + br label %loop + +loop: + br i1 %cond, label %cond_block, label %exit + +cond_block: + br i1 %cond, label %backedge, label %exit + +exit: + %tmp4 = phi i8 addrspace(1)* [ %tmp2, %loop ], [ %tmp2, %cond_block ] + call void @g(i8 addrspace(1)* %tmp3, i8 addrspace(1)* %tmp4) + ret void + +backedge: + call void @f() + br label %loop +} diff --git a/test/Transforms/SimplifyCFG/X86/if-conversion.ll b/test/Transforms/SimplifyCFG/X86/if-conversion.ll deleted file mode 100644 index 28702572d480..000000000000 --- a/test/Transforms/SimplifyCFG/X86/if-conversion.ll +++ /dev/null @@ -1,231 +0,0 @@ -; RUN: opt < %s -simplifycfg -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -S | FileCheck %s -; Avoid if-conversion if there is a long dependence chain. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -; The first several cases test FindLongDependenceChain returns true, so -; if-conversion is blocked. - -define i64 @test1(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-NOT: select -} - -define i64 @test2(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = add i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-LABEL: @test2 -; CHECK-NOT: select -} - -; The following cases test FindLongDependenceChain returns false, so -; if-conversion will proceed. - -; Non trivial LatencyAdjustment. -define i64 @test3(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %p4 = add i64 %p3, %1 - %ptr = inttoptr i64 %p4 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-LABEL: @test3 -; CHECK: select -} - -; Short dependence chain. -define i64 @test4(i64* %pp, i64* %p) { -entry: - %0 = load i64, i64* %pp, align 8 - %cmp = icmp slt i64 %0, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-LABEL: @test4 -; CHECK: select -} - -; High IPC. -define i64 @test5(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - %2 = add i64 %pint, 2 - %3 = add i64 %pint, 3 - %4 = or i64 %pint, 16 - %5 = and i64 %pint, 255 - - %6 = or i64 %2, 9 - %7 = and i64 %3, 255 - %8 = add i64 %4, 4 - %9 = add i64 %5, 5 - - %10 = add i64 %6, 2 - %11 = add i64 %7, 3 - %12 = add i64 %8, 4 - %13 = add i64 %9, 5 - - %14 = add i64 %10, 6 - %15 = add i64 %11, 7 - %16 = add i64 %12, 8 - %17 = add i64 %13, 9 - - %18 = add i64 %14, 10 - %19 = add i64 %15, 11 - %20 = add i64 %16, 12 - %21 = add i64 %17, 13 - - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - - ret i64 %val - -; CHECK-LABEL: @test5 -; CHECK: select -} - -; Large BB size. -define i64 @test6(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - %2 = add i64 %pint, 2 - %3 = add i64 %pint, 3 - %4 = add i64 %2, 4 - %5 = add i64 %3, 5 - %6 = add i64 %4, 6 - %7 = add i64 %5, 7 - %8 = add i64 %6, 6 - %9 = add i64 %7, 7 - %10 = add i64 %8, 6 - %11 = add i64 %9, 7 - %12 = add i64 %10, 6 - %13 = add i64 %11, 7 - %14 = add i64 %12, 6 - %15 = add i64 %13, 7 - %16 = add i64 %14, 6 - %17 = add i64 %15, 7 - %18 = add i64 %16, 6 - %19 = add i64 %17, 7 - %20 = add i64 %18, 6 - %21 = add i64 %19, 7 - %22 = add i64 %20, 6 - %23 = add i64 %21, 7 - %24 = add i64 %22, 6 - %25 = add i64 %23, 7 - %26 = add i64 %24, 6 - %27 = add i64 %25, 7 - %28 = add i64 %26, 6 - %29 = add i64 %27, 7 - %30 = add i64 %28, 6 - %31 = add i64 %29, 7 - %32 = add i64 %30, 8 - %33 = add i64 %31, 9 - %34 = add i64 %32, %33 - %35 = and i64 %34, 255 - %res = add i64 %val, %35 - - ret i64 %res - -; CHECK-LABEL: @test6 -; CHECK: select -} diff --git a/test/tools/llvm-cov/cov-comdat.test b/test/tools/llvm-cov/cov-comdat.test index 9d2271636994..e8018d58be62 100644 --- a/test/tools/llvm-cov/cov-comdat.test +++ b/test/tools/llvm-cov/cov-comdat.test @@ -9,7 +9,7 @@ REQUIRES: shell // RUN: llvm-cov show %S/Inputs/binary-formats.v1.linux64l -instr-profile %S/Inputs/elf_binary_comdat.profdata -path-equivalence=/tmp,%S/Inputs %S/Inputs/instrprof-comdat.h -dump 2> %t.err | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h // RUN: FileCheck --check-prefix=ERROR -input-file %t.err %s -// ERROR: hash-mismatch: No profile record found for 'main' with hash = 0xA +// ERROR: hash-mismatch: No profile record found for 'main' with hash = 0xa // RUN: llvm-cov show %S/Inputs/binary-formats.v2.linux64l -instr-profile %S/Inputs/elf_binary_comdat.profdata -path-equivalence=/root/llvm/test/tools,%S/.. %S/Inputs/instrprof-comdat.h | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h // RUN: llvm-cov show %S/Inputs/binary-formats.v2.linux32l -instr-profile %S/Inputs/elf_binary_comdat.profdata -path-equivalence=/root/llvm/R/../test/tools,%S/.. %S/Inputs/instrprof-comdat.h | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h diff --git a/test/tools/llvm-objdump/X86/hex-displacement.test b/test/tools/llvm-objdump/X86/hex-displacement.test index dd2332e572f0..541cca53869b 100644 --- a/test/tools/llvm-objdump/X86/hex-displacement.test +++ b/test/tools/llvm-objdump/X86/hex-displacement.test @@ -3,4 +3,4 @@ # RUN: llvm-objdump -d %p/Inputs/hello.exe.macho-i386 | FileCheck %s -# CHECK: 1f47: e8 00 00 00 00 calll 0 <_main+0xC> +# CHECK: 1f47: e8 00 00 00 00 calll 0 <_main+0xc> diff --git a/test/tools/llvm-readobj/Inputs/needed-libs.obj.coff-am64 b/test/tools/llvm-readobj/Inputs/needed-libs.obj.coff-am64 Binary files differnew file mode 100644 index 000000000000..4cfc6e25396f --- /dev/null +++ b/test/tools/llvm-readobj/Inputs/needed-libs.obj.coff-am64 diff --git a/test/tools/llvm-readobj/coff-needed-libs.test b/test/tools/llvm-readobj/coff-needed-libs.test new file mode 100644 index 000000000000..deb6bc299eb5 --- /dev/null +++ b/test/tools/llvm-readobj/coff-needed-libs.test @@ -0,0 +1,5 @@ +RUN: llvm-readobj -needed-libs %p/Inputs/needed-libs.obj.coff-am64 | FileCheck %s + +CHECK: NeededLibraries [ +CHECK-NEXT: KERNEL32.dll +CHECK-NEXT: ] diff --git a/tools/dsymutil/CFBundle.cpp b/tools/dsymutil/CFBundle.cpp index 304838f7ee2c..15ee8011a4a4 100644 --- a/tools/dsymutil/CFBundle.cpp +++ b/tools/dsymutil/CFBundle.cpp @@ -11,6 +11,7 @@ #ifdef __APPLE__ #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include <CoreFoundation/CoreFoundation.h> #include <assert.h> @@ -56,7 +57,7 @@ public: static const char *UTF8(CFStringRef CFStr, std::string &Str); }; -/// Static function that puts a copy of the UTF8 contents of CFStringRef into +/// Static function that puts a copy of the UTF-8 contents of CFStringRef into /// std::string and returns the C string pointer that is contained in the /// std::string when successful, nullptr otherwise. /// @@ -85,13 +86,10 @@ const char *CFString::UTF8(CFStringRef CFStr, std::string &Str) { /// RAII wrapper around CFBundleRef. class CFBundle : public CFReleaser<CFBundleRef> { public: - CFBundle(const char *Path = nullptr) : CFReleaser<CFBundleRef>() { - if (Path && Path[0]) - SetFromPath(Path); - } + CFBundle(StringRef Path) : CFReleaser<CFBundleRef>() { SetFromPath(Path); } - CFBundle(CFURLRef url) - : CFReleaser<CFBundleRef>(url ? ::CFBundleCreate(nullptr, url) + CFBundle(CFURLRef Url) + : CFReleaser<CFBundleRef>(Url ? ::CFBundleCreate(nullptr, Url) : nullptr) {} /// Return the bundle identifier. @@ -109,67 +107,49 @@ public: } private: - /// Update this instance with a new bundle created from the given path. - bool SetFromPath(const char *Path); + /// Helper to initialize this instance with a new bundle created from the + /// given path. This function will recursively remove components from the + /// path in its search for the nearest Info.plist. + void SetFromPath(StringRef Path); }; -bool CFBundle::SetFromPath(const char *InPath) { - // Release our old bundle and URL. +void CFBundle::SetFromPath(StringRef Path) { + // Start from an empty/invalid CFBundle. reset(); - if (InPath && InPath[0]) { - char ResolvedPath[PATH_MAX]; - const char *Path = ::realpath(InPath, ResolvedPath); - if (Path == nullptr) - Path = InPath; + if (Path.empty() || !sys::fs::exists(Path)) + return; + + SmallString<256> RealPath; + sys::fs::real_path(Path, RealPath, /*expand_tilde*/ true); - CFAllocatorRef Allocator = kCFAllocatorDefault; - // Make our Bundle URL. + do { + // Create a CFURL from the current path and use it to create a CFBundle. CFReleaser<CFURLRef> BundleURL(::CFURLCreateFromFileSystemRepresentation( - Allocator, (const UInt8 *)Path, strlen(Path), false)); - if (BundleURL.get()) { - CFIndex LastLength = LONG_MAX; - - while (BundleURL.get() != nullptr) { - // Check the Path range and make sure we didn't make it to just "/", - // ".", or "..". - CFRange rangeIncludingSeparators; - CFRange range = ::CFURLGetByteRangeForComponent( - BundleURL.get(), kCFURLComponentPath, &rangeIncludingSeparators); - if (range.length > LastLength) - break; - - reset(::CFBundleCreate(Allocator, BundleURL.get())); - if (get() != nullptr) { - if (GetIdentifier() != nullptr) - break; - reset(); - } - BundleURL.reset(::CFURLCreateCopyDeletingLastPathComponent( - Allocator, BundleURL.get())); - - LastLength = range.length; - } + kCFAllocatorDefault, (const UInt8 *)RealPath.data(), RealPath.size(), + false)); + reset(::CFBundleCreate(kCFAllocatorDefault, BundleURL.get())); + + // If we have a valid bundle and find its identifier we are done. + if (get() != nullptr) { + if (GetIdentifier() != nullptr) + return; + reset(); } - } - return get() != nullptr; + // Remove the last component of the path and try again until there's + // nothing left but the root. + sys::path::remove_filename(RealPath); + } while (RealPath != sys::path::root_name(RealPath)); } - #endif -/// On Darwin, try and find the original executable's Info.plist information -/// using CoreFoundation calls by creating a URL for the executable and -/// chopping off the last Path component. The CFBundle can then get the -/// identifier and grab any needed information from it directly. Return default -/// CFBundleInfo on other platforms. +/// On Darwin, try and find the original executable's Info.plist to extract +/// information about the bundle. Return default values on other platforms. CFBundleInfo getBundleInfo(StringRef ExePath) { CFBundleInfo BundleInfo; #ifdef __APPLE__ - if (ExePath.empty() || !sys::fs::exists(ExePath)) - return BundleInfo; - auto PrintError = [&](CFTypeID TypeID) { CFString TypeIDCFStr(::CFCopyTypeIDDescription(TypeID)); std::string TypeIDStr; @@ -178,7 +158,7 @@ CFBundleInfo getBundleInfo(StringRef ExePath) { << ", but it should be a string in: " << ExePath << ".\n"; }; - CFBundle Bundle(ExePath.data()); + CFBundle Bundle(ExePath); if (CFStringRef BundleID = Bundle.GetIdentifier()) { CFString::UTF8(BundleID, BundleInfo.IDStr); if (CFTypeRef TypeRef = diff --git a/tools/llvm-cov/CodeCoverage.cpp b/tools/llvm-cov/CodeCoverage.cpp index 00258f2a1b33..c5ea50bff273 100644 --- a/tools/llvm-cov/CodeCoverage.cpp +++ b/tools/llvm-cov/CodeCoverage.cpp @@ -353,13 +353,14 @@ std::unique_ptr<CoverageMapping> CodeCoverageTool::load() { auto Coverage = std::move(CoverageOrErr.get()); unsigned Mismatched = Coverage->getMismatchedCount(); if (Mismatched) { - warning(utostr(Mismatched) + " functions have mismatched data"); + warning(Twine(Mismatched) + " functions have mismatched data"); if (ViewOpts.Debug) { for (const auto &HashMismatch : Coverage->getHashMismatches()) errs() << "hash-mismatch: " << "No profile record found for '" << HashMismatch.first << "'" - << " with hash = 0x" << utohexstr(HashMismatch.second) << "\n"; + << " with hash = 0x" << Twine::utohexstr(HashMismatch.second) + << '\n'; for (const auto &CounterMismatch : Coverage->getCounterMismatches()) errs() << "counter-mismatch: " diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 79204c6e9533..3a9112423cff 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -1643,7 +1643,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { outs() << " <" << TargetName; uint64_t Disp = Target - TargetAddress; if (Disp) - outs() << "+0x" << utohexstr(Disp); + outs() << "+0x" << Twine::utohexstr(Disp); outs() << '>'; } } diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp index 8ac9f1a51cc5..0e76e75c085d 100644 --- a/tools/llvm-readobj/COFFDumper.cpp +++ b/tools/llvm-readobj/COFFDumper.cpp @@ -81,6 +81,9 @@ public: void printSymbols() override; void printDynamicSymbols() override; void printUnwindInfo() override; + + void printNeededLibraries() override; + void printCOFFImports() override; void printCOFFExports() override; void printCOFFDirectives() override; @@ -764,7 +767,7 @@ void COFFDumper::printRVATable(uint64_t TableVA, uint64_t Count, for (uintptr_t I = TableStart; I < TableEnd; I += EntrySize) { uint32_t RVA = *reinterpret_cast<const ulittle32_t *>(I); raw_ostream &OS = W.startLine(); - OS << "0x" << utohexstr(Obj->getImageBase() + RVA); + OS << "0x" << W.hex(Obj->getImageBase() + RVA); if (PrintExtra) PrintExtra(OS, reinterpret_cast<const uint8_t *>(I)); OS << '\n'; @@ -1522,6 +1525,25 @@ void COFFDumper::printUnwindInfo() { } } +void COFFDumper::printNeededLibraries() { + ListScope D(W, "NeededLibraries"); + + using LibsTy = std::vector<StringRef>; + LibsTy Libs; + + for (const ImportDirectoryEntryRef &DirRef : Obj->import_directories()) { + StringRef Name; + if (!DirRef.getName(Name)) + Libs.push_back(Name); + } + + std::stable_sort(Libs.begin(), Libs.end()); + + for (const auto &L : Libs) { + outs() << " " << L << "\n"; + } +} + void COFFDumper::printImportedSymbols( iterator_range<imported_symbol_iterator> Range) { for (const ImportedSymbolRef &I : Range) { diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp index f2b936904393..5605eaea7555 100644 --- a/tools/llvm-readobj/ELFDumper.cpp +++ b/tools/llvm-readobj/ELFDumper.cpp @@ -1900,8 +1900,8 @@ template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() { ArrayRef<uint8_t> Contents = unwrapOrError(Obj->getSectionContents(&Sec)); if (Contents[0] != ARMBuildAttrs::Format_Version) { - errs() << "unrecognised FormatVersion: 0x" << utohexstr(Contents[0]) - << '\n'; + errs() << "unrecognised FormatVersion: 0x" + << Twine::utohexstr(Contents[0]) << '\n'; continue; } diff --git a/tools/yaml2obj/yaml2obj.cpp b/tools/yaml2obj/yaml2obj.cpp index 3e2a5ca7ae0f..0f21d7a54708 100644 --- a/tools/yaml2obj/yaml2obj.cpp +++ b/tools/yaml2obj/yaml2obj.cpp @@ -65,7 +65,7 @@ static int convertYAML(yaml::Input &YIn, raw_ostream &Out) { } } while (YIn.nextDocument()); - error("yaml2obj: Cannot find the " + utostr(DocNum) + + error("yaml2obj: Cannot find the " + Twine(DocNum) + llvm::getOrdinalSuffix(DocNum) + " document"); } diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp index e438e8af7aae..dffb68ac94f4 100644 --- a/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1184,5 +1184,109 @@ TEST_F(ScalarEvolutionsTest, SCEVExpanderIsSafeToExpandAt) { EXPECT_TRUE(isSafeToExpandAt(AR, Post->getTerminator(), SE)); } +// Check that SCEV expander does not use the nuw instruction +// for expansion. +TEST_F(ScalarEvolutionsTest, SCEVExpanderNUW) { + /* + * Create the following code: + * func(i64 %a) + * entry: + * br false, label %exit, label %body + * body: + * %s1 = add i64 %a, -1 + * br label %exit + * exit: + * %s = add nuw i64 %a, -1 + * ret %s + */ + + // Create a module. + Module M("SCEVExpanderNUW", Context); + + Type *T_int64 = Type::getInt64Ty(Context); + + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(Context), { T_int64 }, false); + Function *F = cast<Function>(M.getOrInsertFunction("func", FTy)); + Argument *Arg = &*F->arg_begin(); + ConstantInt *C = ConstantInt::get(Context, APInt(64, -1)); + + BasicBlock *Entry = BasicBlock::Create(Context, "entry", F); + BasicBlock *Body = BasicBlock::Create(Context, "body", F); + BasicBlock *Exit = BasicBlock::Create(Context, "exit", F); + + IRBuilder<> Builder(Entry); + ConstantInt *Cond = ConstantInt::get(Context, APInt(1, 0)); + Builder.CreateCondBr(Cond, Exit, Body); + + Builder.SetInsertPoint(Body); + auto *S1 = cast<Instruction>(Builder.CreateAdd(Arg, C, "add")); + Builder.CreateBr(Exit); + + Builder.SetInsertPoint(Exit); + auto *S2 = cast<Instruction>(Builder.CreateAdd(Arg, C, "add")); + S2->setHasNoUnsignedWrap(true); + auto *R = cast<Instruction>(Builder.CreateRetVoid()); + + ScalarEvolution SE = buildSE(*F); + const SCEV *S = SE.getSCEV(S1); + EXPECT_TRUE(isa<SCEVAddExpr>(S)); + SCEVExpander Exp(SE, M.getDataLayout(), "expander"); + auto *I = cast<Instruction>(Exp.expandCodeFor(S, nullptr, R)); + EXPECT_FALSE(I->hasNoUnsignedWrap()); +} + +// Check that SCEV expander does not use the nsw instruction +// for expansion. +TEST_F(ScalarEvolutionsTest, SCEVExpanderNSW) { + /* + * Create the following code: + * func(i64 %a) + * entry: + * br false, label %exit, label %body + * body: + * %s1 = add i64 %a, -1 + * br label %exit + * exit: + * %s = add nsw i64 %a, -1 + * ret %s + */ + + // Create a module. + Module M("SCEVExpanderNSW", Context); + + Type *T_int64 = Type::getInt64Ty(Context); + + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(Context), { T_int64 }, false); + Function *F = cast<Function>(M.getOrInsertFunction("func", FTy)); + Argument *Arg = &*F->arg_begin(); + ConstantInt *C = ConstantInt::get(Context, APInt(64, -1)); + + BasicBlock *Entry = BasicBlock::Create(Context, "entry", F); + BasicBlock *Body = BasicBlock::Create(Context, "body", F); + BasicBlock *Exit = BasicBlock::Create(Context, "exit", F); + + IRBuilder<> Builder(Entry); + ConstantInt *Cond = ConstantInt::get(Context, APInt(1, 0)); + Builder.CreateCondBr(Cond, Exit, Body); + + Builder.SetInsertPoint(Body); + auto *S1 = cast<Instruction>(Builder.CreateAdd(Arg, C, "add")); + Builder.CreateBr(Exit); + + Builder.SetInsertPoint(Exit); + auto *S2 = cast<Instruction>(Builder.CreateAdd(Arg, C, "add")); + S2->setHasNoSignedWrap(true); + auto *R = cast<Instruction>(Builder.CreateRetVoid()); + + ScalarEvolution SE = buildSE(*F); + const SCEV *S = SE.getSCEV(S1); + EXPECT_TRUE(isa<SCEVAddExpr>(S)); + SCEVExpander Exp(SE, M.getDataLayout(), "expander"); + auto *I = cast<Instruction>(Exp.expandCodeFor(S, nullptr, R)); + EXPECT_FALSE(I->hasNoSignedWrap()); +} + } // end anonymous namespace } // end namespace llvm diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index 7755cd1be355..64cf23314497 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -1873,7 +1873,7 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) { // Verify that we are passing the right number of operands. if (Frag->getNumArgs() != Children.size()) { TP.error("'" + Op->getName() + "' fragment requires " + - utostr(Frag->getNumArgs()) + " operands!"); + Twine(Frag->getNumArgs()) + " operands!"); return nullptr; } @@ -2195,7 +2195,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { SignBitAndAbove == 1) continue; - TP.error("Integer value '" + itostr(II->getValue()) + + TP.error("Integer value '" + Twine(II->getValue()) + "' is out of range for type '" + getEnumName(VT) + "'!"); break; } @@ -2245,9 +2245,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { MadeChange |= UpdateNodeType(i, Int->IS.RetVTs[i], TP); if (getNumChildren() != NumParamVTs + 1) { - TP.error("Intrinsic '" + Int->Name + "' expects " + - utostr(NumParamVTs) + " operands, not " + - utostr(getNumChildren() - 1) + " operands!"); + TP.error("Intrinsic '" + Int->Name + "' expects " + Twine(NumParamVTs) + + " operands, not " + Twine(getNumChildren() - 1) + " operands!"); return false; } @@ -2271,7 +2270,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { if (NI.getNumOperands() >= 0 && getNumChildren() != (unsigned)NI.getNumOperands()) { TP.error(getOperator()->getName() + " node requires exactly " + - itostr(NI.getNumOperands()) + " operands!"); + Twine(NI.getNumOperands()) + " operands!"); return false; } @@ -2340,7 +2339,7 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { TreePatternNode *SubIdxChild = getChild(I + 1); if (!isOperandClass(SubIdxChild, "SubRegIndex")) { TP.error("REG_SEQUENCE requires a SubRegIndex for operand " + - itostr(I + 1) + "!"); + Twine(I + 1) + "!"); return false; } } @@ -3514,7 +3513,7 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern( CGIOperandList::OperandInfo &Op = CGI.Operands[i]; const std::string &OpName = Op.Name; if (OpName.empty()) - I->error("Operand #" + utostr(i) + " in operands list has no name!"); + I->error("Operand #" + Twine(i) + " in operands list has no name!"); if (!InstInputsCheck.count(OpName)) { // If this is an operand with a DefaultOps set filled in, we can ignore diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp index f879a5bae215..1c1932a0144a 100644 --- a/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/utils/TableGen/DFAPacketizerEmitter.cpp @@ -283,10 +283,10 @@ void dbgsInsnClass(const std::vector<unsigned> &InsnClass) { if (i > 0) { DEBUG(dbgs() << ", "); } - DEBUG(dbgs() << "0x" << utohexstr(InsnClass[i])); + DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i])); } DFAInput InsnInput = getDFAInsnInput(InsnClass); - DEBUG(dbgs() << " (input: 0x" << utohexstr(InsnInput) << ")"); + DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")"); } // @@ -301,7 +301,7 @@ void dbgsStateInfo(const std::set<unsigned> &stateInfo) { if (i > 0) { DEBUG(dbgs() << ", "); } - DEBUG(dbgs() << "0x" << utohexstr(thisState)); + DEBUG(dbgs() << "0x" << Twine::utohexstr(thisState)); } } @@ -361,7 +361,7 @@ void State::AddInsnClass(std::vector<unsigned> &InsnClass, DenseSet<unsigned> VisitedResourceStates; - DEBUG(dbgs() << " thisState: 0x" << utohexstr(thisState) << "\n"); + DEBUG(dbgs() << " thisState: 0x" << Twine::utohexstr(thisState) << "\n"); AddInsnClassStages(InsnClass, ComboBitToBitsMap, numstages - 1, numstages, thisState, thisState, @@ -381,7 +381,7 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass, DEBUG({ dbgsIndent((1 + numstages - chkstage) << 1); dbgs() << "AddInsnClassStages " << chkstage << " (0x" - << utohexstr(thisStage) << ") from "; + << Twine::utohexstr(thisStage) << ") from "; dbgsInsnClass(InsnClass); dbgs() << "\n"; }); @@ -395,9 +395,10 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass, if (resourceMask & thisStage) { unsigned combo = ComboBitToBitsMap[resourceMask]; if (combo && ((~prevState & combo) != combo)) { - DEBUG(dbgs() << "\tSkipped Add 0x" << utohexstr(prevState) - << " - combo op 0x" << utohexstr(resourceMask) - << " (0x" << utohexstr(combo) <<") cannot be scheduled\n"); + DEBUG(dbgs() << "\tSkipped Add 0x" << Twine::utohexstr(prevState) + << " - combo op 0x" << Twine::utohexstr(resourceMask) + << " (0x" << Twine::utohexstr(combo) + << ") cannot be scheduled\n"); continue; } // @@ -407,11 +408,11 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass, unsigned ResultingResourceState = prevState | resourceMask | combo; DEBUG({ dbgsIndent((2 + numstages - chkstage) << 1); - dbgs() << "0x" << utohexstr(prevState) - << " | 0x" << utohexstr(resourceMask); + dbgs() << "0x" << Twine::utohexstr(prevState) << " | 0x" + << Twine::utohexstr(resourceMask); if (combo) - dbgs() << " | 0x" << utohexstr(combo); - dbgs() << " = 0x" << utohexstr(ResultingResourceState) << " "; + dbgs() << " | 0x" << Twine::utohexstr(combo); + dbgs() << " = 0x" << Twine::utohexstr(ResultingResourceState) << " "; }); // @@ -433,7 +434,7 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass, VisitedResourceStates.insert(ResultingResourceState); PossibleStates.insert(ResultingResourceState); DEBUG(dbgs() << "\tResultingResourceState: 0x" - << utohexstr(ResultingResourceState) << "\n"); + << Twine::utohexstr(ResultingResourceState) << "\n"); } else { DEBUG(dbgs() << "\tSkipped Add - state already seen\n"); } @@ -493,9 +494,10 @@ bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass, // These cases are caught later in AddInsnClass. unsigned combo = ComboBitToBitsMap[InsnClass[i]]; if (combo && ((~resources & combo) != combo)) { - DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" << utohexstr(resources) - << " - combo op 0x" << utohexstr(InsnClass[i]) - << " (0x" << utohexstr(combo) <<") cannot be scheduled\n"); + DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" + << Twine::utohexstr(resources) << " - combo op 0x" + << Twine::utohexstr(InsnClass[i]) << " (0x" + << Twine::utohexstr(combo) << ") cannot be scheduled\n"); available = false; break; } @@ -573,9 +575,8 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName, for (State::TransitionMap::iterator II = SI->Transitions.begin(), IE = SI->Transitions.end(); II != IE; ++II) { - OS << "{0x" << utohexstr(getDFAInsnInput(II->first)) << ", " - << II->second->stateNum - << "},\t"; + OS << "{0x" << Twine::utohexstr(getDFAInsnInput(II->first)) << ", " + << II->second->stateNum << "},\t"; } ValidTransitions += SI->Transitions.size(); @@ -668,8 +669,8 @@ int DFAPacketizerEmitter::collectAllFuncUnits( "Exceeded maximum number of representable resources"); unsigned FuncResources = (unsigned) (1U << j); FUNameToBitsMap[FUs[j]->getName()] = FuncResources; - DEBUG(dbgs() << " " << FUs[j]->getName() - << ":0x" << utohexstr(FuncResources)); + DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x" + << Twine::utohexstr(FuncResources)); } if (((int) numFUs) > maxFUs) { maxFUs = numFUs; @@ -713,20 +714,20 @@ int DFAPacketizerEmitter::collectAllComboFuncs( const std::string &ComboFuncName = ComboFunc->getName(); unsigned ComboBit = FUNameToBitsMap[ComboFuncName]; unsigned ComboResources = ComboBit; - DEBUG(dbgs() << " combo: " << ComboFuncName - << ":0x" << utohexstr(ComboResources) << "\n"); + DEBUG(dbgs() << " combo: " << ComboFuncName << ":0x" + << Twine::utohexstr(ComboResources) << "\n"); for (unsigned k = 0, M = FuncList.size(); k < M; ++k) { std::string FuncName = FuncList[k]->getName(); unsigned FuncResources = FUNameToBitsMap[FuncName]; - DEBUG(dbgs() << " " << FuncName - << ":0x" << utohexstr(FuncResources) << "\n"); + DEBUG(dbgs() << " " << FuncName << ":0x" + << Twine::utohexstr(FuncResources) << "\n"); ComboResources |= FuncResources; } ComboBitToBitsMap[ComboBit] = ComboResources; numCombos++; DEBUG(dbgs() << " => combo bits: " << ComboFuncName << ":0x" - << utohexstr(ComboBit) << " = 0x" - << utohexstr(ComboResources) << "\n"); + << Twine::utohexstr(ComboBit) << " = 0x" + << Twine::utohexstr(ComboResources) << "\n"); } } return numCombos; @@ -781,7 +782,7 @@ int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName, dbglen += 8; DEBUG(dbgs() << "\t"); } - DEBUG(dbgs() << " (bits: 0x" << utohexstr(UnitBitValue) << ")\n"); + DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue) << ")\n"); } if (!UnitBits.empty()) diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index 37e024b1665e..ba793ad9b938 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -448,7 +448,7 @@ void IntrinsicEmitter::EmitGenerator(const CodeGenIntrinsicTable &Ints, // If the entry fit in the table, just emit it. if (FixedEncodings[i] != ~0U) { - OS << "0x" << utohexstr(FixedEncodings[i]) << ", "; + OS << "0x" << Twine::utohexstr(FixedEncodings[i]) << ", "; continue; } |