diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-09-19 08:42:48 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-12-06 16:30:05 +0000 |
commit | 18baa991264e6ba7a416003721cf0fb19bf4582c (patch) | |
tree | 51c80b7ddfe8514c81c6f2f0436b00dffadaf825 /contrib | |
parent | c77c1b5c48476e0b0f6d3f4ea9dbf2c744eb1765 (diff) | |
download | src-18baa991264e6ba7a416003721cf0fb19bf4582c.tar.gz src-18baa991264e6ba7a416003721cf0fb19bf4582c.zip |
Merge llvm-project release/13.x llvmorg-13.0.0-rc3-8-g08642a395f23
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvmorg-13.0.0-rc3-8-g08642a395f23.
PR: 258209
(cherry picked from commit 8c6f6c0c80f896f9c90b7833def615dcb0ce9244)
Diffstat (limited to 'contrib')
20 files changed, 188 insertions, 123 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c09797e91b99..ca98c7a57446 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2120,11 +2120,12 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); // Ensure we do not inline the function. This is trivially true for the ones - // passed to __kmpc_fork_call but the ones calles in serialized regions + // passed to __kmpc_fork_call but the ones called in serialized regions // could be inlined. This is not a perfect but it is closer to the invariant // we want, namely, every data environment starts with a new function. // TODO: We should pass the if condition to the runtime function and do the // handling there. Much cleaner code. + OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); OutlinedFn->addFnAttr(llvm::Attribute::NoInline); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); diff --git a/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp index 54e6c7d38e7d..11dc661abc24 100644 --- a/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp +++ b/contrib/llvm-project/clang/lib/Format/TokenAnnotator.cpp @@ -2398,7 +2398,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { // This function heuristically determines whether 'Current' starts the name of a // function declaration. -static bool isFunctionDeclarationName(const FormatToken &Current, +static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current, const AnnotatedLine &Line) { auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * { for (; Next; Next = Next->Next) { @@ -2476,14 +2476,21 @@ static bool isFunctionDeclarationName(const FormatToken &Current, if (Next->MatchingParen->Next && Next->MatchingParen->Next->is(TT_PointerOrReference)) return true; - // Check for K&R C function definitions, e.g.: + + // Check for K&R C function definitions (and C++ function definitions with + // unnamed parameters), e.g.: // int f(i) // { // return i + 1; // } - if (Next->Next && Next->Next->is(tok::identifier) && - !(Next->MatchingParen->Next && Next->MatchingParen->Next->is(tok::semi))) + // bool g(size_t = 0, bool b = false) + // { + // return !b; + // } + if (IsCpp && Next->Next && Next->Next->is(tok::identifier) && + !Line.endsWith(tok::semi)) return true; + for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; Tok = Tok->Next) { if (Tok->is(TT_TypeDeclarationParen)) @@ -2544,7 +2551,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { calculateArrayInitializerColumnList(Line); while (Current) { - if (isFunctionDeclarationName(*Current, Line)) + if (isFunctionDeclarationName(Style.isCpp(), *Current, Line)) Current->setType(TT_FunctionDeclarationName); if (Current->is(TT_LineComment)) { if (Current->Previous->is(BK_BracedInit) && diff --git a/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp index beb2df59cd53..424c6ce0ce8c 100644 --- a/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp +++ b/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp @@ -995,6 +995,13 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, Keywords.kw_import, tok::kw_export); } +// Checks whether a token is a type in K&R C (aka C78). +static bool isC78Type(const FormatToken &Tok) { + return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, + tok::kw_unsigned, tok::kw_float, tok::kw_double, + tok::identifier); +} + // This function checks whether a token starts the first parameter declaration // in a K&R C (aka C78) function definition, e.g.: // int f(a, b) @@ -1002,13 +1009,24 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, // { // return a + b; // } -static bool isC78ParameterDecl(const FormatToken *Tok) { - if (!Tok) +static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, + const FormatToken *FuncName) { + assert(Tok); + assert(Next); + assert(FuncName); + + if (FuncName->isNot(tok::identifier)) return false; - if (!Tok->isOneOf(tok::kw_int, tok::kw_char, tok::kw_float, tok::kw_double, - tok::kw_struct, tok::kw_union, tok::kw_long, tok::kw_short, - tok::kw_unsigned, tok::kw_register, tok::identifier)) + const FormatToken *Prev = FuncName->Previous; + if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) + return false; + + if (!isC78Type(*Tok) && + !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) + return false; + + if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) return false; Tok = Tok->Previous; @@ -1369,21 +1387,20 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { case tok::r_brace: addUnwrappedLine(); return; - case tok::l_paren: + case tok::l_paren: { parseParens(); // Break the unwrapped line if a K&R C function definition has a parameter // declaration. - if (!IsTopLevel || !Style.isCpp()) - break; - if (!Previous || Previous->isNot(tok::identifier)) + if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) break; - if (Previous->Previous && Previous->Previous->is(tok::at)) - break; - if (isC78ParameterDecl(FormatTok)) { + const unsigned Position = Tokens->getPosition() + 1; + assert(Position < AllTokens.size()); + if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) { addUnwrappedLine(); return; } break; + } case tok::kw_operator: nextToken(); if (FormatTok->isBinaryOperator()) diff --git a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/complex b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/complex index dfd6193c97cb..eb1ead207d58 100644 --- a/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/complex +++ b/contrib/llvm-project/clang/lib/Headers/openmp_wrappers/complex @@ -36,7 +36,7 @@ #ifndef _LIBCPP_STD_VER #pragma omp begin declare variant match( \ - device = {arch(nvptx, nvptx64)}, \ + device = {arch(amdgcn, nvptx, nvptx64)}, \ implementation = {extension(match_any, allow_templates)}) #include <complex_cmath.h> diff --git a/contrib/llvm-project/clang/lib/Sema/SemaStmt.cpp b/contrib/llvm-project/clang/lib/Sema/SemaStmt.cpp index 3baccec2d7bb..f7e4110e6110 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaStmt.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaStmt.cpp @@ -3481,7 +3481,8 @@ VerifyInitializationSequenceCXX98(const Sema &S, ExprResult Sema::PerformMoveOrCopyInitialization( const InitializedEntity &Entity, const NamedReturnInfo &NRInfo, Expr *Value, bool SupressSimplerImplicitMoves) { - if ((!getLangOpts().CPlusPlus2b || SupressSimplerImplicitMoves) && + if (getLangOpts().CPlusPlus && + (!getLangOpts().CPlusPlus2b || SupressSimplerImplicitMoves) && NRInfo.isMoveEligible()) { ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(), CK_NoOp, Value, VK_XValue, FPOptionsOverride()); diff --git a/contrib/llvm-project/compiler-rt/lib/builtins/clear_cache.c b/contrib/llvm-project/compiler-rt/lib/builtins/clear_cache.c index 3c12b74e8fa6..da0715914b41 100644 --- a/contrib/llvm-project/compiler-rt/lib/builtins/clear_cache.c +++ b/contrib/llvm-project/compiler-rt/lib/builtins/clear_cache.c @@ -35,7 +35,7 @@ uintptr_t GetCurrentProcess(void); #include <machine/sysarch.h> #endif -#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__)) +#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__) || defined(__riscv)) // clang-format off #include <sys/types.h> #include <machine/sysarch.h> @@ -166,6 +166,13 @@ void __clear_cache(void *start, void *end) { : "=r"(start_reg) : "r"(start_reg), "r"(end_reg), "r"(flags), "r"(syscall_nr)); assert(start_reg == 0 && "Cache flush syscall failed."); +#elif defined(__riscv) && defined(__OpenBSD__) + struct riscv_sync_icache_args arg; + + arg.addr = (uintptr_t)start; + arg.len = (uintptr_t)end - (uintptr_t)start; + + sysarch(RISCV_SYNC_ICACHE, &arg); #else #if __APPLE__ // On Darwin, sys_icache_invalidate() provides this functionality diff --git a/contrib/llvm-project/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/contrib/llvm-project/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h index ba873ba4436b..96cab49d5ac8 100644 --- a/contrib/llvm-project/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h +++ b/contrib/llvm-project/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h @@ -110,6 +110,7 @@ public: bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); } bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); } + bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); } bool IsSVEReg(unsigned reg) const; bool IsSVEZReg(unsigned reg) const; diff --git a/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h b/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h index 877b2dc4ac92..2ff9c967e848 100644 --- a/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h +++ b/contrib/llvm-project/llvm/include/llvm/MC/MCContext.h @@ -374,17 +374,17 @@ namespace llvm { bool operator<(const ELFEntrySizeKey &Other) const { if (SectionName != Other.SectionName) return SectionName < Other.SectionName; - if (Flags != Other.Flags) - return Flags < Other.Flags; + if ((Flags & ELF::SHF_STRINGS) != (Other.Flags & ELF::SHF_STRINGS)) + return Other.Flags & ELF::SHF_STRINGS; return EntrySize < Other.EntrySize; } }; - // Symbols must be assigned to a section with a compatible entry size and - // flags. This map is used to assign unique IDs to sections to distinguish - // between sections with identical names but incompatible entry sizes and/or - // flags. This can occur when a symbol is explicitly assigned to a section, - // e.g. via __attribute__((section("myname"))). + // Symbols must be assigned to a section with a compatible entry + // size. This map is used to assign unique IDs to sections to + // distinguish between sections with identical names but incompatible entry + // sizes. This can occur when a symbol is explicitly assigned to a + // section, e.g. via __attribute__((section("myname"))). std::map<ELFEntrySizeKey, unsigned> ELFEntrySizeMap; // This set is used to record the generic mergeable section names seen. @@ -592,8 +592,6 @@ namespace llvm { bool isELFGenericMergeableSection(StringRef Name); - /// Return the unique ID of the section with the given name, flags and entry - /// size, if it exists. Optional<unsigned> getELFUniqueIDForEntsize(StringRef SectionName, unsigned Flags, unsigned EntrySize); diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 635b706d0bef..6203f37ebb01 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -65,7 +65,7 @@ private: bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI); bool processMemMove(MemMoveInst *M); bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, - Value *cpyDst, Value *cpySrc, uint64_t cpyLen, + Value *cpyDst, Value *cpySrc, TypeSize cpyLen, Align cpyAlign, CallInst *C); bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep); bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet); diff --git a/contrib/llvm-project/llvm/lib/IR/Constants.cpp b/contrib/llvm-project/llvm/lib/IR/Constants.cpp index 6c75085a6678..1e72cb4d3a66 100644 --- a/contrib/llvm-project/llvm/lib/IR/Constants.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Constants.cpp @@ -315,9 +315,11 @@ containsUndefinedElement(const Constant *C, return false; for (unsigned i = 0, e = cast<FixedVectorType>(VTy)->getNumElements(); - i != e; ++i) - if (HasFn(C->getAggregateElement(i))) - return true; + i != e; ++i) { + if (Constant *Elem = C->getAggregateElement(i)) + if (HasFn(Elem)) + return true; + } } return false; diff --git a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp index aa4051aa2400..cc349af6393b 100644 --- a/contrib/llvm-project/llvm/lib/MC/MCContext.cpp +++ b/contrib/llvm-project/llvm/lib/MC/MCContext.cpp @@ -586,7 +586,7 @@ void MCContext::recordELFMergeableSectionInfo(StringRef SectionName, unsigned Flags, unsigned UniqueID, unsigned EntrySize) { bool IsMergeable = Flags & ELF::SHF_MERGE; - if (UniqueID == GenericSectionID) + if (IsMergeable && (UniqueID == GenericSectionID)) ELFSeenGenericMergeableSections.insert(SectionName); // For mergeable sections or non-mergeable sections with a generic mergeable diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 60c00f47859b..494554ae7b33 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4161,7 +4161,8 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const { bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { if (VT.getVectorElementType() == MVT::i32 && - VT.getVectorElementCount().getKnownMinValue() >= 4) + VT.getVectorElementCount().getKnownMinValue() >= 4 && + !VT.isFixedLengthVector()) return true; return false; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 08e4a119127c..edf4d06d4d59 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -103,8 +103,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_BSWAP) .legalFor({s32, s64, v4s32, v2s32, v2s64}) .clampScalar(0, s32, s64) - .widenScalarToNextPow2(0) - .customIf(typeIs(0, v2s16)); // custom lower as G_REV32 + G_LSHR + .widenScalarToNextPow2(0); getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) .legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8}) @@ -799,8 +798,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); - case TargetOpcode::G_BSWAP: - return legalizeBSwap(MI, MRI, MIRBuilder); case TargetOpcode::G_SHL: case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: @@ -1015,46 +1012,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore( return true; } -bool AArch64LegalizerInfo::legalizeBSwap(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const { - assert(MI.getOpcode() == TargetOpcode::G_BSWAP); - - // The <2 x half> case needs special lowering because there isn't an - // instruction that does that directly. Instead, we widen to <8 x i8> - // and emit a G_REV32 followed by a G_LSHR knowing that instruction selection - // will later match them as: - // - // rev32.8b v0, v0 - // ushr.2s v0, v0, #16 - // - // We could emit those here directly, but it seems better to keep things as - // generic as possible through legalization, and avoid committing layering - // violations by legalizing & selecting here at the same time. - - Register ValReg = MI.getOperand(1).getReg(); - assert(LLT::fixed_vector(2, 16) == MRI.getType(ValReg)); - const LLT v2s32 = LLT::fixed_vector(2, 32); - const LLT v8s8 = LLT::fixed_vector(8, 8); - const LLT s32 = LLT::scalar(32); - - auto Undef = MIRBuilder.buildUndef(v8s8); - auto Insert = - MIRBuilder - .buildInstr(TargetOpcode::INSERT_SUBREG, {v8s8}, {Undef, ValReg}) - .addImm(AArch64::ssub); - auto Rev32 = MIRBuilder.buildInstr(AArch64::G_REV32, {v8s8}, {Insert}); - auto Bitcast = MIRBuilder.buildBitcast(v2s32, Rev32); - auto Amt = MIRBuilder.buildConstant(v2s32, 16); - auto UShr = - MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {v2s32}, {Bitcast, Amt}); - auto Zero = MIRBuilder.buildConstant(s32, 0); - auto Extract = MIRBuilder.buildExtractVectorElement(s32, UShr, Zero); - MIRBuilder.buildBitcast({MI.getOperand(0).getReg()}, Extract); - MI.eraseFromParent(); - return true; -} - bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 78fc24559d71..35456d95dc2b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -35,8 +35,6 @@ public: MachineInstr &MI) const override; private: - bool legalizeBSwap(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &MIRBuilder) const; bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp index 4cde7971e597..86cb86b19d62 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -671,7 +671,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop( MF.insert(MBBIter, testMBB); MF.insert(MBBIter, tailMBB); - Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 + : Is64Bit ? X86::R11D + : X86::EAX; BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); @@ -1092,7 +1094,9 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, MF.insert(MBBIter, bodyMBB); MF.insert(MBBIter, footMBB); const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; - Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D; + Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 + : Is64Bit ? X86::R11D + : X86::EAX; // Setup entry block { diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index d6b97915ede6..75eec25f5807 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1996,7 +1996,8 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() { UndefValue::get(Int8Ty), F->getName() + ".ID"); for (Use *U : ToBeReplacedStateMachineUses) - U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); + U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast( + ID, U->get()->getType())); ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; @@ -3183,10 +3184,14 @@ struct AAKernelInfoFunction : AAKernelInfo { IsWorker->setDebugLoc(DLoc); BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB); + Module &M = *Kernel->getParent(); + // Create local storage for the work function pointer. + const DataLayout &DL = M.getDataLayout(); Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); - AllocaInst *WorkFnAI = new AllocaInst(VoidPtrTy, 0, "worker.work_fn.addr", - &Kernel->getEntryBlock().front()); + Instruction *WorkFnAI = + new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr, + "worker.work_fn.addr", &Kernel->getEntryBlock().front()); WorkFnAI->setDebugLoc(DLoc); auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); @@ -3199,13 +3204,23 @@ struct AAKernelInfoFunction : AAKernelInfo { Value *Ident = KernelInitCB->getArgOperand(0); Value *GTid = KernelInitCB; - Module &M = *Kernel->getParent(); FunctionCallee BarrierFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_barrier_simple_spmd); CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB) ->setDebugLoc(DLoc); + if (WorkFnAI->getType()->getPointerAddressSpace() != + (unsigned int)AddressSpace::Generic) { + WorkFnAI = new AddrSpaceCastInst( + WorkFnAI, + PointerType::getWithSamePointeeType( + cast<PointerType>(WorkFnAI->getType()), + (unsigned int)AddressSpace::Generic), + WorkFnAI->getName() + ".generic", StateMachineBeginBB); + WorkFnAI->setDebugLoc(DLoc); + } + FunctionCallee KernelParallelFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_kernel_parallel); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 2e36c50b75fc..9afbe0e9a2a5 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -178,9 +178,9 @@ public: } void addStore(int64_t OffsetFromFirst, StoreInst *SI) { - int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); - - addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(), + TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); + assert(!StoreSize.isScalable() && "Can't track scalable-typed stores"); + addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(), SI->getAlign().value(), SI); } @@ -371,6 +371,11 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, Value *ByteVal) { const DataLayout &DL = StartInst->getModule()->getDataLayout(); + // We can't track scalable types + if (StoreInst *SI = dyn_cast<StoreInst>(StartInst)) + if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable()) + return nullptr; + // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks @@ -426,6 +431,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) break; + // We can't track ranges involving scalable types. + if (DL.getTypeStoreSize(StoredVal->getType()).isScalable()) + break; + // Check to see if this stored value is of the same byte-splattable value. Value *StoredByte = isBytewiseValue(StoredVal, DL); if (isa<UndefValue>(ByteVal) && StoredByte) @@ -859,7 +868,7 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { /// the call write its result directly into the destination of the memcpy. bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, Value *cpyDest, - Value *cpySrc, uint64_t cpyLen, + Value *cpySrc, TypeSize cpySize, Align cpyAlign, CallInst *C) { // The general transformation to keep in mind is // @@ -875,6 +884,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // src only holds uninitialized values at the moment of the call, meaning that // the memcpy can be discarded rather than moved. + // We can't optimize scalable types. + if (cpySize.isScalable()) + return false; + // Lifetime marks shouldn't be operated on. if (Function *F = C->getCalledFunction()) if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start) @@ -893,13 +906,13 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); - if (cpyLen < srcSize) + if (cpySize < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. - if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen), + if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize), DL, C, DT)) return false; @@ -1452,9 +1465,10 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { // of conservatively taking the minimum? Align Alignment = std::min(M->getDestAlign().valueOrOne(), M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), Alignment, - C)) { + if (performCallSlotOptzn( + M, M, M->getDest(), M->getSource(), + TypeSize::getFixed(CopySize->getZExtValue()), Alignment, + C)) { LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" << " call: " << *C << "\n" << " memcpy: " << *M << "\n"); @@ -1509,7 +1523,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { Align Alignment = std::min(M->getDestAlign().valueOrOne(), M->getSourceAlign().valueOrOne()); if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), Alignment, C)) { + TypeSize::getFixed(CopySize->getZExtValue()), + Alignment, C)) { eraseInstruction(M); ++NumMemCpyInstr; return true; @@ -1584,7 +1599,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { // Find out what feeds this byval argument. Value *ByValArg = CB.getArgOperand(ArgNo); Type *ByValTy = CB.getParamByValType(ArgNo); - uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); + TypeSize ByValSize = DL.getTypeAllocSize(ByValTy); MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); MemCpyInst *MDep = nullptr; if (EnableMemorySSA) { @@ -1612,7 +1627,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { // The length of the memcpy must be larger or equal to the size of the byval. ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); - if (!C1 || C1->getValue().getZExtValue() < ByValSize) + if (!C1 || !TypeSize::isKnownGE( + TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize)) return false; // Get the alignment of the byval. If the call doesn't specify the alignment, diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index b9cccc2af309..b1c105258027 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1587,10 +1587,12 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks, BB->eraseFromParent(); } -static void deleteDeadBlocksFromLoop(Loop &L, - SmallVectorImpl<BasicBlock *> &ExitBlocks, - DominatorTree &DT, LoopInfo &LI, - MemorySSAUpdater *MSSAU) { +static void +deleteDeadBlocksFromLoop(Loop &L, + SmallVectorImpl<BasicBlock *> &ExitBlocks, + DominatorTree &DT, LoopInfo &LI, + MemorySSAUpdater *MSSAU, + function_ref<void(Loop &, StringRef)> DestroyLoopCB) { // Find all the dead blocks tied to this loop, and remove them from their // successors. SmallSetVector<BasicBlock *, 8> DeadBlockSet; @@ -1640,6 +1642,7 @@ static void deleteDeadBlocksFromLoop(Loop &L, }) && "If the child loop header is dead all blocks in the child loop must " "be dead as well!"); + DestroyLoopCB(*ChildL, ChildL->getName()); LI.destroy(ChildL); return true; }); @@ -1980,6 +1983,8 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks, ParentL->removeChildLoop(llvm::find(*ParentL, &L)); else LI.removeLoop(llvm::find(LI, &L)); + // markLoopAsDeleted for L should be triggered by the caller (it is typically + // done by using the UnswitchCB callback). LI.destroy(&L); return false; } @@ -2019,7 +2024,8 @@ static void unswitchNontrivialInvariants( SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { + ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + function_ref<void(Loop &, StringRef)> DestroyLoopCB) { auto *ParentBB = TI.getParent(); BranchInst *BI = dyn_cast<BranchInst>(&TI); SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI); @@ -2319,7 +2325,7 @@ static void unswitchNontrivialInvariants( // Now that our cloned loops have been built, we can update the original loop. // First we delete the dead blocks from it and then we rebuild the loop // structure taking these deletions into account. - deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU); + deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -2670,7 +2676,8 @@ static bool unswitchBestCondition( Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { + ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + function_ref<void(Loop &, StringRef)> DestroyLoopCB) { // Collect all invariant conditions within this loop (as opposed to an inner // loop which would be handled when visiting that inner loop). SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4> @@ -2958,7 +2965,7 @@ static bool unswitchBestCondition( << "\n"); unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants, ExitBlocks, PartialIVInfo, DT, LI, AC, - UnswitchCB, SE, MSSAU); + UnswitchCB, SE, MSSAU, DestroyLoopCB); return true; } @@ -2988,7 +2995,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU) { + ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + function_ref<void(Loop &, StringRef)> DestroyLoopCB) { assert(L.isRecursivelyLCSSAForm(DT, LI) && "Loops must be in LCSSA form before unswitching."); @@ -3036,7 +3044,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, // Try to unswitch the best invariant condition. We prefer this full unswitch to // a partial unswitch when possible below the threshold. - if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU)) + if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU, + DestroyLoopCB)) return true; // No other opportunities to unswitch. @@ -3083,6 +3092,10 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, U.markLoopAsDeleted(L, LoopName); }; + auto DestroyLoopCB = [&U](Loop &L, StringRef Name) { + U.markLoopAsDeleted(L, Name); + }; + Optional<MemorySSAUpdater> MSSAU; if (AR.MSSA) { MSSAU = MemorySSAUpdater(AR.MSSA); @@ -3091,7 +3104,8 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, } if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial, UnswitchCB, &AR.SE, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr)) + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, + DestroyLoopCB)) return PreservedAnalyses::all(); if (AR.MSSA && VerifyMemorySSA) @@ -3179,12 +3193,17 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { LPM.markLoopAsDeleted(*L); }; + auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) { + LPM.markLoopAsDeleted(L); + }; + if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE, - MSSAU.hasValue() ? MSSAU.getPointer() : nullptr); + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, + DestroyLoopCB); if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 583bb379488e..d86ecbb6db00 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1094,17 +1094,24 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( // Update (liveout) uses of bonus instructions, // now that the bonus instruction has been cloned into predecessor. - SSAUpdater SSAUpdate; - SSAUpdate.Initialize(BonusInst.getType(), - (NewBonusInst->getName() + ".merge").str()); - SSAUpdate.AddAvailableValue(BB, &BonusInst); - SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst); + // Note that we expect to be in a block-closed SSA form for this to work! for (Use &U : make_early_inc_range(BonusInst.uses())) { auto *UI = cast<Instruction>(U.getUser()); - if (UI->getParent() != PredBlock) - SSAUpdate.RewriteUseAfterInsertions(U); - else // Use is in the same block as, and comes before, NewBonusInst. - SSAUpdate.RewriteUse(U); + auto *PN = dyn_cast<PHINode>(UI); + if (!PN) { + assert(UI->getParent() == BB && BonusInst.comesBefore(UI) && + "If the user is not a PHI node, then it should be in the same " + "block as, and come after, the original bonus instruction."); + continue; // Keep using the original bonus instruction. + } + // Is this the block-closed SSA form PHI node? + if (PN->getIncomingBlock(U) == BB) + continue; // Great, keep using the original bonus instruction. + // The only other alternative is an "use" when coming from + // the predecessor block - here we should refer to the cloned bonus instr. + assert(PN->getIncomingBlock(U) == PredBlock && + "Not in block-closed SSA form?"); + U.set(NewBonusInst); } } } @@ -3207,6 +3214,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, // Early exits once we reach the limit. if (NumBonusInsts > BonusInstThreshold) return false; + + auto IsBCSSAUse = [BB, &I](Use &U) { + auto *UI = cast<Instruction>(U.getUser()); + if (auto *PN = dyn_cast<PHINode>(UI)) + return PN->getIncomingBlock(U) == BB; + return UI->getParent() == BB && I.comesBefore(UI); + }; + + // Does this instruction require rewriting of uses? + if (!all_of(I.uses(), IsBCSSAUse)) + return false; } // Ok, we have the budget. Perform the transformation. diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 52b5ae083d0e..c05a8408e1fd 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -234,12 +234,15 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) { for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) { VPValue *PredInst1 = cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0); - for (VPUser *U : Phi1ToMove.getVPSingleValue()->users()) { + VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue(); + SmallVector<VPUser *> Users(Phi1ToMoveV->user_begin(), + Phi1ToMoveV->user_end()); + for (VPUser *U : Users) { auto *UI = dyn_cast<VPRecipeBase>(U); if (!UI || UI->getParent() != Then2) continue; for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) { - if (Phi1ToMove.getVPSingleValue() != U->getOperand(I)) + if (Phi1ToMoveV != U->getOperand(I)) continue; U->setOperand(I, PredInst1); } |