diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-08-13 15:37:04 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-04-09 14:54:49 +0000 |
commit | a2e927433f9d66b477247c4d36da79c008cfa8a4 (patch) | |
tree | 7b42766cfbd1873929e65b114ee02ae26528ecdb /contrib/llvm-project/llvm | |
parent | 74093eb27f687d9f89d8db457e410aec1cd71b6b (diff) | |
download | src-a2e927433f9d66b477247c4d36da79c008cfa8a4.tar.gz src-a2e927433f9d66b477247c4d36da79c008cfa8a4.zip |
Merge llvm-project release/15.x llvmorg-15.0.0-rc2-40-gfbd2950d8d0d
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-15.0.0-rc2-40-gfbd2950d8d0d.
PR: 265425
MFC after: 2 weeks
(cherry picked from commit 61cfbce3347e4372143bcabf7b197577b9f3958a)
Diffstat (limited to 'contrib/llvm-project/llvm')
35 files changed, 418 insertions, 115 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h index 6ea6d2361eba..102b069ac722 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1440,6 +1440,10 @@ public: /// to a stack reload. unsigned getGISelRematGlobalCost() const; + /// \returns the lower bound of a trip count to decide on vectorization + /// while tail-folding. + unsigned getMinTripCountTailFoldingThreshold() const; + /// \returns True if the target supports scalable vectors. bool supportsScalableVectors() const; @@ -1830,6 +1834,7 @@ public: ReductionFlags) const = 0; virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; virtual unsigned getGISelRematGlobalCost() const = 0; + virtual unsigned getMinTripCountTailFoldingThreshold() const = 0; virtual bool enableScalableVectorization() const = 0; virtual bool supportsScalableVectors() const = 0; virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, @@ -2453,6 +2458,10 @@ public: return Impl.getGISelRematGlobalCost(); } + unsigned getMinTripCountTailFoldingThreshold() const override { + return Impl.getMinTripCountTailFoldingThreshold(); + } + bool supportsScalableVectors() const override { return Impl.supportsScalableVectors(); } diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 1a75cb35549e..da1f53aa33cb 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -803,6 +803,8 @@ public: unsigned getGISelRematGlobalCost() const { return 1; } + unsigned getMinTripCountTailFoldingThreshold() const { return 0; } + bool supportsScalableVectors() const { return false; } bool enableScalableVectorization() const { return false; } diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h index a0bb50db8c54..99e7a9868c29 100644 --- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h +++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/ELF.h @@ -1597,6 +1597,7 @@ enum { NT_GNU_BUILD_ID = 3, NT_GNU_GOLD_VERSION = 4, NT_GNU_PROPERTY_TYPE_0 = 5, + FDO_PACKAGING_METADATA = 0xcafe1a7e, }; // Android note types. diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h index e90730140406..b7f6de40266e 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1486,6 +1486,11 @@ inline unsigned getUnorderedFlavor(CondCode Cond) { /// SetCC operation. CondCode getSetCCInverse(CondCode Operation, EVT Type); +inline bool isExtOpcode(unsigned Opcode) { + return Opcode == ISD::ANY_EXTEND || Opcode == ISD::ZERO_EXTEND || + Opcode == ISD::SIGN_EXTEND; +} + namespace GlobalISel { /// Return the operation corresponding to !(X op Y), where 'op' is a valid /// SetCC operation. The U bit of the condition code has different meanings diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeView.h index d4cb6ae7a28e..b7a3e1561a07 100644 --- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeView.h +++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/CodeView/CodeView.h @@ -330,6 +330,9 @@ enum class DebugSubsectionKind : uint32_t { MergedAssemblyInput = 0xfc, CoffSymbolRVA = 0xfd, + + XfgHashType = 0xff, + XfgHashVirtual = 0x100, }; /// Equivalent to CV_ptrtype_e. diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h index 37fe5a98b093..549a6c096510 100644 --- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h +++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.h @@ -66,7 +66,10 @@ private: static llvm::orc::shared::CWrapperFunctionResult releaseWrapper(const char *ArgData, size_t ArgSize); +#if (defined(LLVM_ON_UNIX) && !defined(__ANDROID__)) || defined(_WIN32) std::atomic<int> SharedMemoryCount{0}; +#endif + std::mutex Mutex; ReservationMap Reservations; AllocationMap Allocations; diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h index cfb951178da6..99175d796974 100644 --- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h +++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h @@ -19,4 +19,7 @@ extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size); +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size); + #endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERGDB_H diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Type.h b/contrib/llvm-project/llvm/include/llvm/IR/Type.h index 51263c6b8fcc..a7f22324571b 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/Type.h +++ b/contrib/llvm-project/llvm/include/llvm/IR/Type.h @@ -144,6 +144,11 @@ public: /// Return true if this is 'bfloat', a 16-bit bfloat type. bool isBFloatTy() const { return getTypeID() == BFloatTyID; } + /// Return true if this is a 16-bit float type. + bool is16bitFPTy() const { + return getTypeID() == BFloatTyID || getTypeID() == HalfTyID; + } + /// Return true if this is 'float', a 32-bit IEEE fp type. bool isFloatTy() const { return getTypeID() == FloatTyID; } diff --git a/contrib/llvm-project/llvm/include/llvm/Support/ErrorHandling.h b/contrib/llvm-project/llvm/include/llvm/Support/ErrorHandling.h index 004b3b7868fb..9c8e3448f3a0 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/ErrorHandling.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/ErrorHandling.h @@ -147,7 +147,11 @@ llvm_unreachable_internal(const char *msg = nullptr, const char *file = nullptr, #elif LLVM_UNREACHABLE_OPTIMIZE #define llvm_unreachable(msg) LLVM_BUILTIN_UNREACHABLE #else -#define llvm_unreachable(msg) LLVM_BUILTIN_TRAP, LLVM_BUILTIN_UNREACHABLE +#define llvm_unreachable(msg) \ + do { \ + LLVM_BUILTIN_TRAP; \ + LLVM_BUILTIN_UNREACHABLE; \ + } while (false) #endif #endif diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Host.h b/contrib/llvm-project/llvm/include/llvm/Support/Host.h index f683371ad1d3..369d6745db5a 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/Host.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/Host.h @@ -65,6 +65,7 @@ namespace sys { StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent); + StringRef getHostCPUNameForSPARC(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForBPF(); /// Helper functions to extract CPU details from CPUID on x86. diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp index aa4da27be4e5..ae927dae74f7 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp @@ -3120,7 +3120,7 @@ static Constant *ConstantFoldFixedVectorCall( } return ConstantVector::get(NCs); } - break; + return nullptr; } case Intrinsic::get_active_lane_mask: { auto *Op0 = dyn_cast<ConstantInt>(Operands[0]); @@ -3139,7 +3139,7 @@ static Constant *ConstantFoldFixedVectorCall( } return ConstantVector::get(NCs); } - break; + return nullptr; } default: break; diff --git a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp index cfa6e3a97626..143f03ccac39 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1108,6 +1108,10 @@ unsigned TargetTransformInfo::getGISelRematGlobalCost() const { return TTIImpl->getGISelRematGlobalCost(); } +unsigned TargetTransformInfo::getMinTripCountTailFoldingThreshold() const { + return TTIImpl->getMinTripCountTailFoldingThreshold(); +} + bool TargetTransformInfo::supportsScalableVectors() const { return TTIImpl->supportsScalableVectors(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp index 96131dc2983e..e5cd46268600 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp @@ -750,7 +750,7 @@ void ScheduleDAGMI::moveInstruction( } bool ScheduleDAGMI::checkSchedLimit() { -#if LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG) if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { CurrentTop = CurrentBottom; return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 654879115ff9..8d465b9520de 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6353,6 +6353,24 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C && + ISD::isExtOpcode(N0.getOperand(0).getOpcode())) { + SDValue Ext = N0.getOperand(0); + EVT ExtVT = Ext->getValueType(0); + SDValue Extendee = Ext->getOperand(0); + + unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits(); + if (N1C->getAPIntValue().isMask(ScalarWidth)) { + // (and (extract_subvector (zext|anyext|sext v) _) iN_mask) + // => (extract_subvector (iN_zeroext v)) + SDValue ZeroExtExtendee = + DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee, + N0.getOperand(1)); + } + } + // fold (and (masked_gather x)) -> (zext_masked_gather x) if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) { EVT MemVT = GN0->getMemoryVT(); @@ -7301,12 +7319,14 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, unsigned MaskLoBits = 0; if (IsRotate && isPowerOf2_64(EltSize)) { unsigned Bits = Log2_64(EltSize); - APInt DemandedBits = - APInt::getLowBitsSet(Neg.getScalarValueSizeInBits(), Bits); - if (SDValue Inner = - TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) { - Neg = Inner; - MaskLoBits = Bits; + unsigned NegBits = Neg.getScalarValueSizeInBits(); + if (NegBits >= Bits) { + APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits); + if (SDValue Inner = + TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) { + Neg = Inner; + MaskLoBits = Bits; + } } } @@ -7322,11 +7342,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // affect Mask's demanded bits, just replace Pos with Pos'. These operations // are redundant for the purpose of the equality. if (MaskLoBits) { - APInt DemandedBits = - APInt::getLowBitsSet(Pos.getScalarValueSizeInBits(), MaskLoBits); - if (SDValue Inner = - TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) { - Pos = Inner; + unsigned PosBits = Pos.getScalarValueSizeInBits(); + if (PosBits >= MaskLoBits) { + APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits); + if (SDValue Inner = + TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) { + Pos = Inner; + } } } @@ -22707,6 +22729,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDLoc DL(N); EVT IntVT = VT.changeVectorElementTypeToInteger(); EVT IntSVT = VT.getVectorElementType().changeTypeToInteger(); + IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT); SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT); SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT); SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT)); diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp index a167d45982a9..9c05d585831a 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/FormatUtil.cpp @@ -66,6 +66,8 @@ std::string llvm::pdb::formatChunkKind(DebugSubsectionKind Kind, RETURN_CASE(DebugSubsectionKind, MergedAssemblyInput, "merged assembly input"); RETURN_CASE(DebugSubsectionKind, CoffSymbolRVA, "coff symbol rva"); + RETURN_CASE(DebugSubsectionKind, XfgHashType, "xfg hash type"); + RETURN_CASE(DebugSubsectionKind, XfgHashVirtual, "xfg hash virtual"); } } else { switch (Kind) { @@ -89,6 +91,11 @@ std::string llvm::pdb::formatChunkKind(DebugSubsectionKind Kind, "DEBUG_S_MERGED_ASSEMBLYINPUT"); RETURN_CASE(DebugSubsectionKind, CoffSymbolRVA, "DEBUG_S_COFF_SYMBOL_RVA"); + RETURN_CASE(DebugSubsectionKind, XfgHashType, + "DEBUG_S_XFGHASH_TYPE"); + RETURN_CASE(DebugSubsectionKind, XfgHashVirtual, + "DEBUG_S_XFGHASH_VIRTUAL"); + } } return formatUnknownEnum(Kind); diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 3f75012f5cf9..4afcf95e9e8e 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolSize.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp index ca3f64b8a409..ee92e5191b50 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp @@ -11,7 +11,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" #include "llvm/Support/WindowsError.h" -#if defined(LLVM_ON_UNIX) +#if defined(LLVM_ON_UNIX) && !defined(__ANDROID__) #include <fcntl.h> #include <sys/mman.h> #include <unistd.h> @@ -173,20 +173,30 @@ InProcessMemoryMapper::~InProcessMemoryMapper() { SharedMemoryMapper::SharedMemoryMapper(ExecutorProcessControl &EPC, SymbolAddrs SAs, size_t PageSize) - : EPC(EPC), SAs(SAs), PageSize(PageSize) {} + : EPC(EPC), SAs(SAs), PageSize(PageSize) { +#if (!defined(LLVM_ON_UNIX) || defined(__ANDROID__)) && !defined(_WIN32) + llvm_unreachable("SharedMemoryMapper is not supported on this platform yet"); +#endif +} Expected<std::unique_ptr<SharedMemoryMapper>> SharedMemoryMapper::Create(ExecutorProcessControl &EPC, SymbolAddrs SAs) { +#if (defined(LLVM_ON_UNIX) && !defined(__ANDROID__)) || defined(_WIN32) auto PageSize = sys::Process::getPageSize(); if (!PageSize) return PageSize.takeError(); return std::make_unique<SharedMemoryMapper>(EPC, SAs, *PageSize); +#else + return make_error<StringError>( + "SharedMemoryMapper is not supported on this platform yet", + inconvertibleErrorCode()); +#endif } void SharedMemoryMapper::reserve(size_t NumBytes, OnReservedFunction OnReserved) { -#if defined(LLVM_ON_UNIX) || defined(_WIN32) +#if (defined(LLVM_ON_UNIX) && !defined(__ANDROID__)) || defined(_WIN32) EPC.callSPSWrapperAsync< rt::SPSExecutorSharedMemoryMapperServiceReserveSignature>( @@ -334,7 +344,7 @@ void SharedMemoryMapper::deinitialize( void SharedMemoryMapper::release(ArrayRef<ExecutorAddr> Bases, OnReleasedFunction OnReleased) { -#if defined(LLVM_ON_UNIX) || defined(_WIN32) +#if (defined(LLVM_ON_UNIX) && !defined(__ANDROID__)) || defined(_WIN32) Error Err = Error::success(); { @@ -351,8 +361,8 @@ void SharedMemoryMapper::release(ArrayRef<ExecutorAddr> Bases, #elif defined(_WIN32) if (!UnmapViewOfFile(Reservations[Base].LocalAddr)) - joinErrors(std::move(Err), - errorCodeToError(mapWindowsError(GetLastError()))); + Err = joinErrors(std::move(Err), + errorCodeToError(mapWindowsError(GetLastError()))); #endif diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp index 6c9f099061ae..caa191cea899 100644 --- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp +++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp @@ -52,7 +52,7 @@ namespace rt_bootstrap { Expected<std::pair<ExecutorAddr, std::string>> ExecutorSharedMemoryMapperService::reserve(uint64_t Size) { -#if defined(LLVM_ON_UNIX) || defined(_WIN32) +#if (defined(LLVM_ON_UNIX) && !defined(__ANDROID__)) || defined(_WIN32) #if defined(LLVM_ON_UNIX) @@ -125,7 +125,7 @@ ExecutorSharedMemoryMapperService::reserve(uint64_t Size) { Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize( ExecutorAddr Reservation, tpctypes::SharedMemoryFinalizeRequest &FR) { -#if defined(LLVM_ON_UNIX) || defined(_WIN32) +#if (defined(LLVM_ON_UNIX) && !defined(__ANDROID__)) || defined(_WIN32) ExecutorAddr MinAddr(~0ULL); @@ -207,7 +207,7 @@ Error ExecutorSharedMemoryMapperService::deinitialize( Error ExecutorSharedMemoryMapperService::release( const std::vector<ExecutorAddr> &Bases) { -#if defined(LLVM_ON_UNIX) || defined(_WIN32) +#if (defined(LLVM_ON_UNIX) && !defined(__ANDROID__)) || defined(_WIN32) Error Err = Error::success(); for (auto Base : Bases) { @@ -241,6 +241,7 @@ Error ExecutorSharedMemoryMapperService::release( errno, std::generic_category()))); #elif defined(_WIN32) + (void)Size; if (!UnmapViewOfFile(Base.toPtr<void *>())) Err = joinErrors(std::move(Err), diff --git a/contrib/llvm-project/llvm/lib/Support/Host.cpp b/contrib/llvm-project/llvm/lib/Support/Host.cpp index 08e3a27e0173..c97f273b0739 100644 --- a/contrib/llvm-project/llvm/lib/Support/Host.cpp +++ b/contrib/llvm-project/llvm/lib/Support/Host.cpp @@ -47,6 +47,9 @@ #ifdef _AIX #include <sys/systemcfg.h> #endif +#if defined(__sun__) && defined(__svr4__) +#include <kstat.h> +#endif #define DEBUG_TYPE "host-detection" @@ -1413,6 +1416,111 @@ StringRef sys::getHostCPUName() { #endif #endif } +#elif defined(__sparc__) +#if defined(__linux__) +StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) { + SmallVector<StringRef> Lines; + ProcCpuinfoContent.split(Lines, "\n"); + + // Look for cpu line to determine cpu name + StringRef Cpu; + for (unsigned I = 0, E = Lines.size(); I != E; ++I) { + if (Lines[I].startswith("cpu")) { + Cpu = Lines[I].substr(5).ltrim("\t :"); + break; + } + } + + return StringSwitch<const char *>(Cpu) + .StartsWith("SuperSparc", "supersparc") + .StartsWith("HyperSparc", "hypersparc") + .StartsWith("SpitFire", "ultrasparc") + .StartsWith("BlackBird", "ultrasparc") + .StartsWith("Sabre", " ultrasparc") + .StartsWith("Hummingbird", "ultrasparc") + .StartsWith("Cheetah", "ultrasparc3") + .StartsWith("Jalapeno", "ultrasparc3") + .StartsWith("Jaguar", "ultrasparc3") + .StartsWith("Panther", "ultrasparc3") + .StartsWith("Serrano", "ultrasparc3") + .StartsWith("UltraSparc T1", "niagara") + .StartsWith("UltraSparc T2", "niagara2") + .StartsWith("UltraSparc T3", "niagara3") + .StartsWith("UltraSparc T4", "niagara4") + .StartsWith("UltraSparc T5", "niagara4") + .StartsWith("LEON", "leon3") + // niagara7/m8 not supported by LLVM yet. + .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */) + .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */) + .StartsWith("SPARC-M8", "niagara4" /* "m8" */) + .Default("generic"); +} +#endif + +StringRef sys::getHostCPUName() { +#if defined(__linux__) + std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); + StringRef Content = P ? P->getBuffer() : ""; + return detail::getHostCPUNameForSPARC(Content); +#elif defined(__sun__) && defined(__svr4__) + char *buf = NULL; + kstat_ctl_t *kc; + kstat_t *ksp; + kstat_named_t *brand = NULL; + + kc = kstat_open(); + if (kc != NULL) { + ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL); + if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 && + ksp->ks_type == KSTAT_TYPE_NAMED) + brand = + (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand")); + if (brand != NULL && brand->data_type == KSTAT_DATA_STRING) + buf = KSTAT_NAMED_STR_PTR(brand); + } + kstat_close(kc); + + return StringSwitch<const char *>(buf) + .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I + .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I + .Case("TMS390Z55", + "supersparc") // Texas Instruments SuperSPARC I with SuperCache + .Case("MB86904", "supersparc") // Fujitsu microSPARC II + .Case("MB86907", "supersparc") // Fujitsu TurboSPARC + .Case("RT623", "hypersparc") // Ross hyperSPARC + .Case("RT625", "hypersparc") + .Case("RT626", "hypersparc") + .Case("UltraSPARC-I", "ultrasparc") + .Case("UltraSPARC-II", "ultrasparc") + .Case("UltraSPARC-IIe", "ultrasparc") + .Case("UltraSPARC-IIi", "ultrasparc") + .Case("SPARC64-III", "ultrasparc") + .Case("SPARC64-IV", "ultrasparc") + .Case("UltraSPARC-III", "ultrasparc3") + .Case("UltraSPARC-III+", "ultrasparc3") + .Case("UltraSPARC-IIIi", "ultrasparc3") + .Case("UltraSPARC-IIIi+", "ultrasparc3") + .Case("UltraSPARC-IV", "ultrasparc3") + .Case("UltraSPARC-IV+", "ultrasparc3") + .Case("SPARC64-V", "ultrasparc3") + .Case("SPARC64-VI", "ultrasparc3") + .Case("SPARC64-VII", "ultrasparc3") + .Case("UltraSPARC-T1", "niagara") + .Case("UltraSPARC-T2", "niagara2") + .Case("UltraSPARC-T2", "niagara2") + .Case("UltraSPARC-T2+", "niagara2") + .Case("SPARC-T3", "niagara3") + .Case("SPARC-T4", "niagara4") + .Case("SPARC-T5", "niagara4") + // niagara7/m8 not supported by LLVM yet. + .Case("SPARC-M7", "niagara4" /* "niagara7" */) + .Case("SPARC-S7", "niagara4" /* "niagara7" */) + .Case("SPARC-M8", "niagara4" /* "m8" */) + .Default("generic"); +#else + return "generic"; +#endif +} #else StringRef sys::getHostCPUName() { return "generic"; } namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 72f0fc94940c..c28216048d7c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -886,7 +886,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND, ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG, ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR, - ISD::INSERT_SUBVECTOR, ISD::STORE}); + ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR}); if (Subtarget->supportsAddressTopByteIgnored()) setTargetDAGCombine(ISD::LOAD); @@ -15988,6 +15988,49 @@ static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue performBuildVectorCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + SDLoc DL(N); + + // A build vector of two extracted elements is equivalent to an + // extract subvector where the inner vector is any-extended to the + // extract_vector_elt VT. + // (build_vector (extract_elt_iXX_to_i32 vec Idx+0) + // (extract_elt_iXX_to_i32 vec Idx+1)) + // => (extract_subvector (anyext_iXX_to_i32 vec) Idx) + + // For now, only consider the v2i32 case, which arises as a result of + // legalization. + if (N->getValueType(0) != MVT::v2i32) + return SDValue(); + + SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1); + // Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT. + if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + // Constant index. + isa<ConstantSDNode>(Elt0->getOperand(1)) && + isa<ConstantSDNode>(Elt1->getOperand(1)) && + // Both EXTRACT_VECTOR_ELT from same vector... + Elt0->getOperand(0) == Elt1->getOperand(0) && + // ... and contiguous. First element's index +1 == second element's index. + Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1)) { + SDValue VecToExtend = Elt0->getOperand(0); + EVT ExtVT = VecToExtend.getValueType().changeVectorElementType(MVT::i32); + if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT)) + return SDValue(); + + SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL); + + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext, + SubvectorIdx); + } + + return SDValue(); +} + static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -19457,6 +19500,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, case ISD::ADD: case ISD::SUB: return performAddSubCombine(N, DCI, DAG); + case ISD::BUILD_VECTOR: + return performBuildVectorCombine(N, DCI, DAG); case AArch64ISD::ANDS: return performFlagSettingCombine(N, DCI, ISD::AND); case AArch64ISD::ADC: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 2231f8705998..0c5eadeffcdb 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -334,6 +334,10 @@ public: return 2; } + unsigned getMinTripCountTailFoldingThreshold() const { + return ST->hasSVE() ? 5 : 0; + } + PredicationStyle emitGetActiveLaneMask() const { if (ST->hasSVE()) return PredicationStyle::DataAndControlFlow; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index d3617b87a851..380d3621e745 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -662,8 +662,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower(); - // TODO: Handle vector types. getActionDefinitionsBuilder(G_CTTZ) + .lowerIf(isVector(0)) .clampScalar(0, s32, s64) .scalarSameSizeAs(1, 0) .customFor({s32, s64}); diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td index 73970b9c74c5..71388bc4efa4 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARM.td @@ -556,6 +556,15 @@ def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", "for leaf functions", [FeatureAAPCSFrameChain]>; +// Assume that lock-free 32-bit atomics are available, even if the target +// and operating system combination would not usually provide them. The user +// is responsible for providing any necessary __sync implementations. Code +// built with this feature is not ABI-compatible with code built without this +// feature, if atomic variables are exposed across the ABI boundary. +def FeatureAtomics32 : SubtargetFeature< + "atomics-32", "HasForced32BitAtomics", "true", + "Assume that lock-free 32-bit atomics are available">; + //===----------------------------------------------------------------------===// // ARM architecture class // diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp index 743cca9ff71f..4c24d7020932 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1370,7 +1370,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // instructions. (ARMv6 doesn't have dmb, but it has an equivalent // encoding; see ARMISD::MEMBARRIER_MCR.) setMaxAtomicSizeInBitsSupported(64); - } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) { + } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) || + Subtarget->hasForced32BitAtomics()) { // Cortex-M (besides Cortex-M0) have 32-bit atomics. setMaxAtomicSizeInBitsSupported(32); } else { diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp index 5c7f0619161c..7b1b9456080e 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -831,7 +831,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, // FIXME: Operands are not in canonical order at -O0, so an immediate // operand in position 1 is a lost opportunity for now. We are // similar to ARM in this regard. - long Imm = 0; + int64_t Imm = 0; bool UseImm = false; const bool HasSPE = Subtarget->hasSPE(); @@ -841,7 +841,8 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { const APInt &CIVal = ConstInt->getValue(); - Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue(); + Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() : + (int64_t)CIVal.getSExtValue(); if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm))) UseImm = true; } diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index baa19e81e436..d0ca325e9c14 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8199,7 +8199,13 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, if (!isIntEqualitySetCC(Cond)) return SDValue(); - const APInt &C1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + // Don't do this if the sign bit is provably zero, it will be turned back into + // an AND. + APInt SignMask = APInt::getOneBitSet(64, 31); + if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask)) + return SDValue(); + + const APInt &C1 = N1C->getAPIntValue(); SDLoc dl(N); // If the constant is larger than 2^32 - 1 it is impossible for both sides diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index fc0a983f6542..5d9bd2f67558 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1022,16 +1022,10 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &M return; } - // Two cases involving an AVL resulting from a previous vsetvli. - // 1) If the AVL is the result of a previous vsetvli which has the - // same AVL and VLMAX as our current state, we can reuse the AVL - // from the current state for the new one. This allows us to - // generate 'vsetvli x0, x0, vtype" or possible skip the transition - // entirely. - // 2) If AVL is defined by a vsetvli with the same VLMAX, we can - // replace the AVL operand with the AVL of the defining vsetvli. - // We avoid general register AVLs to avoid extending live ranges - // without being sure we can kill the original source reg entirely. + // If AVL is defined by a vsetvli with the same VLMAX, we can + // replace the AVL operand with the AVL of the defining vsetvli. + // We avoid general register AVLs to avoid extending live ranges + // without being sure we can kill the original source reg entirely. if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual()) return; MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg()); @@ -1039,17 +1033,6 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &M return; VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - // case 1 - if (PrevInfo.isValid() && !PrevInfo.isUnknown() && - DefInfo.hasSameAVL(PrevInfo) && - DefInfo.hasSameVLMAX(PrevInfo)) { - if (PrevInfo.hasAVLImm()) - Info.setAVLImm(PrevInfo.getAVLImm()); - else - Info.setAVLReg(PrevInfo.getAVLReg()); - return; - } - // case 2 if (DefInfo.hasSameVLMAX(Info) && (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) { if (DefInfo.hasAVLImm()) diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index b080ab7e138c..7d0fc4e8a8c6 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -419,7 +419,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(VT, MVT::bf16, Expand); setOperationAction(ISD::BF16_TO_FP, VT, Expand); - setOperationAction(ISD::FP_TO_BF16, VT, Expand); + setOperationAction(ISD::FP_TO_BF16, VT, Custom); } setOperationAction(ISD::PARITY, MVT::i8, Custom); @@ -2494,6 +2494,10 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, !Subtarget.hasX87()) return MVT::i32; + if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) + return getRegisterTypeForCallingConv(Context, CC, + VT.changeVectorElementTypeToInteger()); + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } @@ -2525,6 +2529,10 @@ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, return 3; } + if (VT.isVector() && VT.getVectorElementType() == MVT::bf16) + return getNumRegistersForCallingConv(Context, CC, + VT.changeVectorElementTypeToInteger()); + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } @@ -2733,6 +2741,40 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } +bool X86TargetLowering::splitValueIntoRegisterParts( + SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const { + bool IsABIRegCopy = CC.has_value(); + EVT ValueVT = Val.getValueType(); + if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { + unsigned ValueBits = ValueVT.getSizeInBits(); + unsigned PartBits = PartVT.getSizeInBits(); + Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val); + Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + Parts[0] = Val; + return true; + } + return false; +} + +SDValue X86TargetLowering::joinRegisterPartsIntoValue( + SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const { + bool IsABIRegCopy = CC.has_value(); + if (IsABIRegCopy && ValueVT == MVT::bf16 && PartVT == MVT::f32) { + unsigned ValueBits = ValueVT.getSizeInBits(); + unsigned PartBits = PartVT.getSizeInBits(); + SDValue Val = Parts[0]; + + Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val); + Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + return Val; + } + return SDValue(); +} + bool X86TargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } @@ -19304,44 +19346,6 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) { return false; } -static bool canCombineAsMaskOperation(SDValue V1, SDValue V2, - const X86Subtarget &Subtarget) { - if (!Subtarget.hasAVX512()) - return false; - - MVT VT = V1.getSimpleValueType().getScalarType(); - if ((VT == MVT::i16 || VT == MVT::i8) && !Subtarget.hasBWI()) - return false; - - // i8 is better to be widen to i16, because there is PBLENDW for vXi16 - // when the vector bit size is 128 or 256. - if (VT == MVT::i8 && V1.getSimpleValueType().getSizeInBits() < 512) - return false; - - auto HasMaskOperation = [&](SDValue V) { - // TODO: Currently we only check limited opcode. We probably extend - // it to all binary operation by checking TLI.isBinOp(). - switch (V->getOpcode()) { - default: - return false; - case ISD::ADD: - case ISD::SUB: - case ISD::AND: - case ISD::XOR: - break; - } - if (!V->hasOneUse()) - return false; - - return true; - }; - - if (HasMaskOperation(V1) || HasMaskOperation(V2)) - return true; - - return false; -} - // Forward declaration. static SDValue canonicalizeShuffleMaskWithHorizOp( MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask, @@ -19417,7 +19421,6 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget, // integers to handle flipping the low and high halves of AVX 256-bit vectors. SmallVector<int, 16> WidenedMask; if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && - !canCombineAsMaskOperation(V1, V2, Subtarget) && canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) { // Shuffle mask widening should not interfere with a broadcast opportunity // by obfuscating the operands with bitcasts. @@ -23058,6 +23061,18 @@ static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) { return Res; } +SDValue X86TargetLowering::LowerFP_TO_BF16(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MakeLibCallOptions CallOptions; + RTLIB::Libcall LC = + RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16); + SDValue Res = + makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, + DAG.getBitcast(MVT::i32, Res)); +} + /// Depending on uarch and/or optimizing for size, we might prefer to use a /// vector operation in place of the typical scalar operation. static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, @@ -32250,6 +32265,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STRICT_FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG); case ISD::FP_TO_FP16: case ISD::STRICT_FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); + case ISD::FP_TO_BF16: return LowerFP_TO_BF16(Op, DAG); case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FADD: diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h index 85e5d0ba4c34..18fb2dbe8d71 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h @@ -1598,6 +1598,7 @@ namespace llvm { SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1621,6 +1622,17 @@ namespace llvm { MachineBasicBlock *Entry, const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; + bool + splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + Optional<CallingConv::ID> CC) const override; + + SDValue + joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, + Optional<CallingConv::ID> CC) const override; + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 83252fec3ea8..59d5d88a126c 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -9899,6 +9899,15 @@ struct AAPotentialValuesImpl : AAPotentialValues { struct ItemInfo { AA::ValueAndContext I; AA::ValueScope S; + + bool operator==(const ItemInfo &II) const { + return II.I == I && II.S == S; + }; + bool operator<(const ItemInfo &II) const { + if (I == II.I) + return S < II.S; + return I < II.I; + }; }; bool recurseForValue(Attributor &A, const IRPosition &IRP, AA::ValueScope S) { @@ -10271,7 +10280,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs; Value *InitialV = &getAssociatedValue(); - SmallSet<AA::ValueAndContext, 16> Visited; + SmallSet<ItemInfo, 16> Visited; SmallVector<ItemInfo, 16> Worklist; Worklist.push_back({{*InitialV, getCtxI()}, AA::AnyScope}); @@ -10285,7 +10294,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { // Check if we should process the current value. To prevent endless // recursion keep a record of the values we followed! - if (!Visited.insert(II.I).second) + if (!Visited.insert(II).second) continue; // Make sure we limit the compile time for complex expressions. diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index ef2384faa273..0b42fc151991 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -499,6 +499,18 @@ struct OMPInformationCache : public InformationCache { } #include "llvm/Frontend/OpenMP/OMPKinds.def" + // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_` + // functions, except if `optnone` is present. + if (isOpenMPDevice(M)) { + for (Function &F : M) { + for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) + if (F.hasFnAttribute(Attribute::NoInline) && + F.getName().startswith(Prefix) && + !F.hasFnAttribute(Attribute::OptimizeNone)) + F.removeFnAttr(Attribute::NoInline); + } + } + // TODO: We should attach the attributes defined in OMPKinds.def. } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp index 926427450682..c9ff94dc9744 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp @@ -57,11 +57,15 @@ static bool shouldConvertToRelLookupTable(Module &M, GlobalVariable &GV) { return false; ConstantArray *Array = dyn_cast<ConstantArray>(GV.getInitializer()); - // If values are not pointers, do not generate a relative lookup table. - if (!Array || !Array->getType()->getElementType()->isPointerTy()) + if (!Array) return false; + // If values are not 64-bit pointers, do not generate a relative lookup table. const DataLayout &DL = M.getDataLayout(); + Type *ElemType = Array->getType()->getElementType(); + if (!ElemType->isPointerTy() || DL.getPointerTypeSizeInBits(ElemType) != 64) + return false; + for (const Use &Op : Array->operands()) { Constant *ConstOp = cast<Constant>(&Op); GlobalValue *GVOp; diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 238b074089aa..91bc7dbad1d0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10109,8 +10109,19 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); else { - LLVM_DEBUG(dbgs() << "\n"); - SEL = CM_ScalarEpilogueNotAllowedLowTripLoop; + if (*ExpectedTC > TTI->getMinTripCountTailFoldingThreshold()) { + LLVM_DEBUG(dbgs() << "\n"); + SEL = CM_ScalarEpilogueNotAllowedLowTripLoop; + } else { + LLVM_DEBUG(dbgs() << " But the target considers the trip count too " + "small to consider vectorizing.\n"); + reportVectorizationFailure( + "The trip count is below the minial threshold value.", + "loop trip count is too low, avoiding vectorization", + "LowTripCount", ORE, L); + Hints.emitRemarkWithHints(); + return false; + } } } diff --git a/contrib/llvm-project/llvm/tools/lli/lli.cpp b/contrib/llvm-project/llvm/tools/lli/lli.cpp index 42bea1a6487f..3fd2a618bf1a 100644 --- a/contrib/llvm-project/llvm/tools/lli/lli.cpp +++ b/contrib/llvm-project/llvm/tools/lli/lli.cpp @@ -291,7 +291,8 @@ namespace { LLVM_ATTRIBUTE_USED void linkComponents() { errs() << (void *)&llvm_orc_registerEHFrameSectionWrapper << (void *)&llvm_orc_deregisterEHFrameSectionWrapper - << (void *)&llvm_orc_registerJITLoaderGDBWrapper; + << (void *)&llvm_orc_registerJITLoaderGDBWrapper + << (void *)&llvm_orc_registerJITLoaderGDBAllocAction; } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp index 1d4a8e9cd398..4ffc5cf337a2 100644 --- a/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-ar/llvm-ar.cpp @@ -68,9 +68,9 @@ static StringRef ToolName; static StringRef Stem; static void printRanLibHelp(StringRef ToolName) { - outs() << "OVERVIEW: LLVM Ranlib\n\n" - << "This program generates an index to speed access to archives\n\n" - << "USAGE: " + ToolName + " <archive-file>\n\n" + outs() << "OVERVIEW: LLVM ranlib\n\n" + << "Generate an index for archives\n\n" + << "USAGE: " + ToolName + " archive...\n\n" << "OPTIONS:\n" << " -h --help - Display available options\n" << " -v --version - Display the version of this program\n" @@ -1125,8 +1125,7 @@ static void performOperation(ArchiveOperation Operation, llvm_unreachable("Unknown operation."); } -static int performOperation(ArchiveOperation Operation, - std::vector<NewArchiveMember> *NewMembers) { +static int performOperation(ArchiveOperation Operation) { // Create or open the archive object. ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile( ArchiveName, /*IsText=*/false, /*RequiresNullTerminator=*/false); @@ -1145,7 +1144,7 @@ static int performOperation(ArchiveOperation Operation, if (Archive->isThin()) CompareFullPath = true; performOperation(Operation, Archive.get(), std::move(Buf.get()), - NewMembers); + /*NewMembers=*/nullptr); return 0; } @@ -1160,7 +1159,7 @@ static int performOperation(ArchiveOperation Operation, } } - performOperation(Operation, nullptr, nullptr, NewMembers); + performOperation(Operation, nullptr, nullptr, /*NewMembers=*/nullptr); return 0; } @@ -1403,12 +1402,11 @@ static int ar_main(int argc, char **argv) { Options += *ArgIt + 1; } - ArchiveOperation Operation = parseCommandLine(); - return performOperation(Operation, nullptr); + return performOperation(parseCommandLine()); } static int ranlib_main(int argc, char **argv) { - bool ArchiveSpecified = false; + std::vector<StringRef> Archives; for (int i = 1; i < argc; ++i) { StringRef arg(argv[i]); if (handleGenericOption(arg)) { @@ -1433,16 +1431,17 @@ static int ranlib_main(int argc, char **argv) { arg = arg.drop_front(1); } } else { - if (ArchiveSpecified) - fail("exactly one archive should be specified"); - ArchiveSpecified = true; - ArchiveName = arg.str(); + Archives.push_back(arg); } } - if (!ArchiveSpecified) { - badUsage("an archive name must be specified"); + + for (StringRef Archive : Archives) { + ArchiveName = Archive.str(); + performOperation(CreateSymTab); } - return performOperation(CreateSymTab, nullptr); + if (Archives.empty()) + badUsage("an archive name must be specified"); + return 0; } int llvm_ar_main(int argc, char **argv) { |