diff options
Diffstat (limited to 'lib/CodeGen')
69 files changed, 9104 insertions, 3314 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index 415bd9626220..b927acabac59 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/BackendUtil.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/TargetOptions.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearchOptions.h" @@ -37,6 +37,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" @@ -54,10 +55,13 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" +#include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include <memory> @@ -235,11 +239,12 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address); bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope; + bool UseOdrIndicator = CGOpts.SanitizeAddressUseOdrIndicator; bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts); PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover, UseAfterScope)); PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover, - UseGlobalsGC)); + UseGlobalsGC, UseOdrIndicator)); } static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -247,7 +252,8 @@ static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder, PM.add(createAddressSanitizerFunctionPass( /*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false)); PM.add(createAddressSanitizerModulePass( - /*CompileKernel*/ true, /*Recover*/ true)); + /*CompileKernel*/ true, /*Recover*/ true, /*UseGlobalsGC*/ true, + /*UseOdrIndicator*/ false)); } static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder, @@ -265,14 +271,15 @@ static void addKernelHWAddressSanitizerPasses(const PassManagerBuilder &Builder, /*CompileKernel*/ true, /*Recover*/ true)); } -static void addMemorySanitizerPass(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { +static void addGeneralOptsForMemorySanitizer(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM, + bool CompileKernel) { const PassManagerBuilderWrapper &BuilderWrapper = static_cast<const PassManagerBuilderWrapper&>(Builder); const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); int TrackOrigins = CGOpts.SanitizeMemoryTrackOrigins; bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Memory); - PM.add(createMemorySanitizerPass(TrackOrigins, Recover)); + PM.add(createMemorySanitizerLegacyPassPass(TrackOrigins, Recover, CompileKernel)); // MemorySanitizer inserts complex instrumentation that mostly follows // the logic of the original code, but operates on "shadow" values. @@ -287,9 +294,19 @@ static void addMemorySanitizerPass(const PassManagerBuilder &Builder, } } +static void addMemorySanitizerPass(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + addGeneralOptsForMemorySanitizer(Builder, PM, /*CompileKernel*/ false); +} + +static void addKernelMemorySanitizerPass(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + addGeneralOptsForMemorySanitizer(Builder, PM, /*CompileKernel*/ true); +} + static void addThreadSanitizerPass(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { - PM.add(createThreadSanitizerPass()); + PM.add(createThreadSanitizerLegacyPassPass()); } static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder, @@ -368,6 +385,7 @@ static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) { static Optional<llvm::CodeModel::Model> getCodeModel(const CodeGenOptions &CodeGenOpts) { unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel) + .Case("tiny", llvm::CodeModel::Tiny) .Case("small", llvm::CodeModel::Small) .Case("kernel", llvm::CodeModel::Kernel) .Case("medium", llvm::CodeModel::Medium) @@ -416,7 +434,7 @@ static void initTargetOptions(llvm::TargetOptions &Options, switch (LangOpts.getDefaultFPContractMode()) { case LangOptions::FPC_Off: // Preserve any contraction performed by the front-end. (Strict performs - // splitting of the muladd instrinsic in the backend.) + // splitting of the muladd intrinsic in the backend.) Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; break; case LangOptions::FPC_On: @@ -456,7 +474,7 @@ static void initTargetOptions(llvm::TargetOptions &Options, Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection; Options.EmitAddrsig = CodeGenOpts.Addrsig; - if (CodeGenOpts.EnableSplitDwarf) + if (CodeGenOpts.getSplitDwarfMode() != CodeGenOptions::NoFission) Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile; Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll; Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels; @@ -491,6 +509,8 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) { Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum; Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData; + Options.Filter = CodeGenOpts.ProfileFilterFiles; + Options.Exclude = CodeGenOpts.ProfileExcludeFiles; Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody; return Options; } @@ -613,6 +633,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, addMemorySanitizerPass); } + if (LangOpts.Sanitize.has(SanitizerKind::KernelMemory)) { + PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, + addKernelMemorySanitizerPass); + PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, + addKernelMemorySanitizerPass); + } + if (LangOpts.Sanitize.has(SanitizerKind::Thread)) { PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, addThreadSanitizerPass); @@ -653,6 +680,11 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, InstrProfOptions Options; Options.NoRedZone = CodeGenOpts.DisableRedZone; Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput; + + // TODO: Surface the option to emit atomic profile counter increments at + // the driver level. + Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread); + MPM.add(createInstrProfilingLegacyPass(Options)); } if (CodeGenOpts.hasProfileIRInstr()) { @@ -777,12 +809,14 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, break; case Backend_EmitBC: - if (CodeGenOpts.PrepareForThinLTO) { + if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); if (!ThinLinkOS) return; } + TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + CodeGenOpts.EnableSplitLTOUnit); PerModulePasses.add(createWriteThinLTOBitcodePass( *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr)); } else { @@ -790,14 +824,18 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, // targets bool EmitLTOSummary = (CodeGenOpts.PrepareForLTO && + !CodeGenOpts.DisableLLVMPasses && llvm::Triple(TheModule->getTargetTriple()).getVendor() != llvm::Triple::Apple); - if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO")) - TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + if (EmitLTOSummary) { + if (!TheModule->getModuleFlag("ThinLTO")) + TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + CodeGenOpts.EnableSplitLTOUnit); + } - PerModulePasses.add( - createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, - EmitLTOSummary)); + PerModulePasses.add(createBitcodeWriterPass( + *OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary)); } break; @@ -807,7 +845,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action, break; default: - if (!CodeGenOpts.SplitDwarfFile.empty()) { + if (!CodeGenOpts.SplitDwarfFile.empty() && + (CodeGenOpts.getSplitDwarfMode() == CodeGenOptions::SplitFileFission)) { DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile); if (!DwoOS) return; @@ -905,18 +944,21 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty() ? DefaultProfileGenName : CodeGenOpts.InstrProfileOutput, - "", "", true, CodeGenOpts.DebugInfoForProfiling); + "", "", "", true, + CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.hasProfileIRUse()) // -fprofile-use. - PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", false, + PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", + CodeGenOpts.ProfileRemappingFile, false, CodeGenOpts.DebugInfoForProfiling); else if (!CodeGenOpts.SampleProfileFile.empty()) // -fprofile-sample-use - PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, false, + PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, + CodeGenOpts.ProfileRemappingFile, false, CodeGenOpts.DebugInfoForProfiling); else if (CodeGenOpts.DebugInfoForProfiling) // -fdebug-info-for-profiling - PGOOpt = PGOOptions("", "", "", false, true); + PGOOpt = PGOOptions("", "", "", "", false, true); PassBuilder PB(TM.get(), PGOOpt); @@ -961,9 +1003,11 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass())); - // Lastly, add a semantically necessary pass for LTO. - if (IsLTO || IsThinLTO) + // Lastly, add semantically necessary passes for LTO. + if (IsLTO || IsThinLTO) { + MPM.addPass(CanonicalizeAliasesPass()); MPM.addPass(NameAnonGlobalPass()); + } } else { // Map our optimization levels into one of the distinct levels used to // configure the pipeline. @@ -984,10 +1028,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( if (IsThinLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline( Level, CodeGenOpts.DebugPassManager); + MPM.addPass(CanonicalizeAliasesPass()); MPM.addPass(NameAnonGlobalPass()); } else if (IsLTO) { MPM = PB.buildLTOPreLinkDefaultPipeline(Level, CodeGenOpts.DebugPassManager); + MPM.addPass(CanonicalizeAliasesPass()); MPM.addPass(NameAnonGlobalPass()); } else { MPM = PB.buildPerModuleDefaultPipeline(Level, @@ -1008,12 +1054,14 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( break; case Backend_EmitBC: - if (CodeGenOpts.PrepareForThinLTO) { + if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) { if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) { ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile); if (!ThinLinkOS) return; } + TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + CodeGenOpts.EnableSplitLTOUnit); MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr)); } else { @@ -1021,13 +1069,17 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // targets bool EmitLTOSummary = (CodeGenOpts.PrepareForLTO && + !CodeGenOpts.DisableLLVMPasses && llvm::Triple(TheModule->getTargetTriple()).getVendor() != llvm::Triple::Apple); - if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO")) - TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); - - MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, - EmitLTOSummary)); + if (EmitLTOSummary) { + if (!TheModule->getModuleFlag("ThinLTO")) + TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", + CodeGenOpts.EnableSplitLTOUnit); + } + MPM.addPass( + BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary)); } break; @@ -1104,6 +1156,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, const LangOptions &LOpts, std::unique_ptr<raw_pwrite_stream> OS, std::string SampleProfile, + std::string ProfileRemapping, BackendAction Action) { StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>> ModuleToDefinedGVSummaries; @@ -1121,15 +1174,14 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, continue; auto GUID = GlobalList.first; - assert(GlobalList.second.SummaryList.size() == 1 && - "Expected individual combined index to have one summary per GUID"); - auto &Summary = GlobalList.second.SummaryList[0]; - // Skip the summaries for the importing module. These are included to - // e.g. record required linkage changes. - if (Summary->modulePath() == M->getModuleIdentifier()) - continue; - // Add an entry to provoke importing by thinBackend. - ImportList[Summary->modulePath()].insert(GUID); + for (auto &Summary : GlobalList.second.SummaryList) { + // Skip the summaries for the importing module. These are included to + // e.g. record required linkage changes. + if (Summary->modulePath() == M->getModuleIdentifier()) + continue; + // Add an entry to provoke importing by thinBackend. + ImportList[Summary->modulePath()].insert(GUID); + } } std::vector<std::unique_ptr<llvm::MemoryBuffer>> OwnedImports; @@ -1176,6 +1228,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M, Conf.CGOptLevel = getCGOptLevel(CGOpts); initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts); Conf.SampleProfile = std::move(SampleProfile); + Conf.ProfileRemapping = std::move(ProfileRemapping); Conf.UseNewPM = CGOpts.ExperimentalNewPassManager; Conf.DebugPassManager = CGOpts.DebugPassManager; Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness; @@ -1242,7 +1295,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags, if (!CombinedIndex->skipModuleByDistributedBackend()) { runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts, LOpts, std::move(OS), CGOpts.SampleProfileFile, - Action); + CGOpts.ProfileRemappingFile, Action); return; } // Distributed indexing detected that nothing from the module is needed diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp index b34bcdc1fc38..24056a449def 100644 --- a/lib/CodeGen/CGAtomic.cpp +++ b/lib/CodeGen/CGAtomic.cpp @@ -18,7 +18,7 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Sema/SemaDiagnostic.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" @@ -765,11 +765,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy); uint64_t Size = sizeChars.getQuantity(); unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth(); - bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 || - getContext().toBits(sizeChars) > MaxInlineWidthInBits); - if (UseLibcall) - CGM.getDiags().Report(E->getLocStart(), diag::warn_atomic_op_misaligned); + bool Oversized = getContext().toBits(sizeChars) > MaxInlineWidthInBits; + bool Misaligned = (Ptr.getAlignment() % sizeChars) != 0; + bool UseLibcall = Misaligned | Oversized; + + if (UseLibcall) { + CGM.getDiags().Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned) + << !Oversized; + } llvm::Value *Order = EmitScalarExpr(E->getOrder()); llvm::Value *Scope = @@ -923,6 +927,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { UseOptimizedLibcall = true; break; + case AtomicExpr::AO__atomic_load: + case AtomicExpr::AO__atomic_store: + case AtomicExpr::AO__atomic_exchange: + case AtomicExpr::AO__atomic_compare_exchange: + // Use the generic version if we don't know that the operand will be + // suitably aligned for the optimized version. + if (Misaligned) + break; + LLVM_FALLTHROUGH; case AtomicExpr::AO__c11_atomic_load: case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__c11_atomic_exchange: @@ -934,14 +947,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: case AtomicExpr::AO__atomic_load_n: - case AtomicExpr::AO__atomic_load: case AtomicExpr::AO__atomic_store_n: - case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_exchange_n: - case AtomicExpr::AO__atomic_exchange: case AtomicExpr::AO__atomic_compare_exchange_n: - case AtomicExpr::AO__atomic_compare_exchange: // Only use optimized library calls for sizes for which they exist. + // FIXME: Size == 16 optimized library functions exist too. if (Size == 1 || Size == 2 || Size == 4 || Size == 8) UseOptimizedLibcall = true; break; diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index 8269b5b229a2..fa3c3ee8610c 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "CGBlocks.h" +#include "CGCXXABI.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" @@ -25,6 +26,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" +#include "llvm/Support/ScopedPrinter.h" #include <algorithm> #include <cstdio> @@ -34,8 +36,8 @@ using namespace CodeGen; CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name) : Name(name), CXXThisIndex(0), CanBeGlobal(false), NeedsCopyDispose(false), HasCXXObject(false), UsesStret(false), HasCapturedVariableLayout(false), - LocalAddress(Address::invalid()), StructureType(nullptr), Block(block), - DominatingIP(nullptr) { + CapturesNonExternalType(false), LocalAddress(Address::invalid()), + StructureType(nullptr), Block(block), DominatingIP(nullptr) { // Skip asm prefix, if any. 'name' is usually taken directly from // the mangled name of the enclosing function. @@ -63,6 +65,110 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM, return CodeGenFunction(CGM).GenerateDestroyHelperFunction(blockInfo); } +namespace { + +/// Represents a type of copy/destroy operation that should be performed for an +/// entity that's captured by a block. +enum class BlockCaptureEntityKind { + CXXRecord, // Copy or destroy + ARCWeak, + ARCStrong, + NonTrivialCStruct, + BlockObject, // Assign or release + None +}; + +/// Represents a captured entity that requires extra operations in order for +/// this entity to be copied or destroyed correctly. +struct BlockCaptureManagedEntity { + BlockCaptureEntityKind CopyKind, DisposeKind; + BlockFieldFlags CopyFlags, DisposeFlags; + const BlockDecl::Capture *CI; + const CGBlockInfo::Capture *Capture; + + BlockCaptureManagedEntity(BlockCaptureEntityKind CopyType, + BlockCaptureEntityKind DisposeType, + BlockFieldFlags CopyFlags, + BlockFieldFlags DisposeFlags, + const BlockDecl::Capture &CI, + const CGBlockInfo::Capture &Capture) + : CopyKind(CopyType), DisposeKind(DisposeType), CopyFlags(CopyFlags), + DisposeFlags(DisposeFlags), CI(&CI), Capture(&Capture) {} + + bool operator<(const BlockCaptureManagedEntity &Other) const { + return Capture->getOffset() < Other.Capture->getOffset(); + } +}; + +enum class CaptureStrKind { + // String for the copy helper. + CopyHelper, + // String for the dispose helper. + DisposeHelper, + // Merge the strings for the copy helper and dispose helper. + Merged +}; + +} // end anonymous namespace + +static void findBlockCapturedManagedEntities( + const CGBlockInfo &BlockInfo, const LangOptions &LangOpts, + SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures); + +static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E, + CaptureStrKind StrKind, + CharUnits BlockAlignment, + CodeGenModule &CGM); + +static std::string getBlockDescriptorName(const CGBlockInfo &BlockInfo, + CodeGenModule &CGM) { + std::string Name = "__block_descriptor_"; + Name += llvm::to_string(BlockInfo.BlockSize.getQuantity()) + "_"; + + if (BlockInfo.needsCopyDisposeHelpers()) { + if (CGM.getLangOpts().Exceptions) + Name += "e"; + if (CGM.getCodeGenOpts().ObjCAutoRefCountExceptions) + Name += "a"; + Name += llvm::to_string(BlockInfo.BlockAlign.getQuantity()) + "_"; + + SmallVector<BlockCaptureManagedEntity, 4> ManagedCaptures; + findBlockCapturedManagedEntities(BlockInfo, CGM.getContext().getLangOpts(), + ManagedCaptures); + + for (const BlockCaptureManagedEntity &E : ManagedCaptures) { + Name += llvm::to_string(E.Capture->getOffset().getQuantity()); + + if (E.CopyKind == E.DisposeKind) { + // If CopyKind and DisposeKind are the same, merge the capture + // information. + assert(E.CopyKind != BlockCaptureEntityKind::None && + "shouldn't see BlockCaptureManagedEntity that is None"); + Name += getBlockCaptureStr(E, CaptureStrKind::Merged, + BlockInfo.BlockAlign, CGM); + } else { + // If CopyKind and DisposeKind are not the same, which can happen when + // either Kind is None or the captured object is a __strong block, + // concatenate the copy and dispose strings. + Name += getBlockCaptureStr(E, CaptureStrKind::CopyHelper, + BlockInfo.BlockAlign, CGM); + Name += getBlockCaptureStr(E, CaptureStrKind::DisposeHelper, + BlockInfo.BlockAlign, CGM); + } + } + Name += "_"; + } + + std::string TypeAtEncoding = + CGM.getContext().getObjCEncodingForBlock(BlockInfo.getBlockExpr()); + /// Replace occurrences of '@' with '\1'. '@' is reserved on ELF platforms as + /// a separator between symbol name and symbol version. + std::replace(TypeAtEncoding.begin(), TypeAtEncoding.end(), '@', '\1'); + Name += "e" + llvm::to_string(TypeAtEncoding.size()) + "_" + TypeAtEncoding; + Name += "l" + CGM.getObjCRuntime().getRCBlockLayoutStr(CGM, BlockInfo); + return Name; +} + /// buildBlockDescriptor - Build the block descriptor meta-data for a block. /// buildBlockDescriptor is accessed from 5th field of the Block_literal /// meta-data and contains stationary information about the block literal. @@ -72,7 +178,7 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM, /// unsigned long reserved; /// unsigned long size; // size of Block_literal metadata in bytes. /// void *copy_func_helper_decl; // optional copy helper. -/// void *destroy_func_decl; // optioanl destructor helper. +/// void *destroy_func_decl; // optional destructor helper. /// void *block_method_encoding_address; // @encode for block literal signature. /// void *block_layout_info; // encoding of captured block variables. /// }; @@ -91,6 +197,19 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, else i8p = CGM.VoidPtrTy; + std::string descName; + + // If an equivalent block descriptor global variable exists, return it. + if (C.getLangOpts().ObjC && + CGM.getLangOpts().getGC() == LangOptions::NonGC) { + descName = getBlockDescriptorName(blockInfo, CGM); + if (llvm::GlobalValue *desc = CGM.getModule().getNamedValue(descName)) + return llvm::ConstantExpr::getBitCast(desc, + CGM.getBlockDescriptorType()); + } + + // If there isn't an equivalent block descriptor global variable, create a new + // one. ConstantInitBuilder builder(CGM); auto elements = builder.beginStruct(); @@ -104,12 +223,20 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, elements.addInt(ulong, blockInfo.BlockSize.getQuantity()); // Optional copy/dispose helpers. + bool hasInternalHelper = false; if (blockInfo.needsCopyDisposeHelpers()) { // copy_func_helper_decl - elements.add(buildCopyHelper(CGM, blockInfo)); + llvm::Constant *copyHelper = buildCopyHelper(CGM, blockInfo); + elements.add(copyHelper); // destroy_func_decl - elements.add(buildDisposeHelper(CGM, blockInfo)); + llvm::Constant *disposeHelper = buildDisposeHelper(CGM, blockInfo); + elements.add(disposeHelper); + + if (cast<llvm::Function>(copyHelper->getOperand(0))->hasInternalLinkage() || + cast<llvm::Function>(disposeHelper->getOperand(0)) + ->hasInternalLinkage()) + hasInternalHelper = true; } // Signature. Mandatory ObjC-style method descriptor @encode sequence. @@ -119,7 +246,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p)); // GC layout. - if (C.getLangOpts().ObjC1) { + if (C.getLangOpts().ObjC) { if (CGM.getLangOpts().getGC() != LangOptions::NonGC) elements.add(CGM.getObjCRuntime().BuildGCBlockLayout(CGM, blockInfo)); else @@ -132,12 +259,26 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM, if (C.getLangOpts().OpenCL) AddrSpace = C.getTargetAddressSpace(LangAS::opencl_constant); + llvm::GlobalValue::LinkageTypes linkage; + if (descName.empty()) { + linkage = llvm::GlobalValue::InternalLinkage; + descName = "__block_descriptor_tmp"; + } else if (hasInternalHelper) { + // If either the copy helper or the dispose helper has internal linkage, + // the block descriptor must have internal linkage too. + linkage = llvm::GlobalValue::InternalLinkage; + } else { + linkage = llvm::GlobalValue::LinkOnceODRLinkage; + } + llvm::GlobalVariable *global = - elements.finishAndCreateGlobal("__block_descriptor_tmp", - CGM.getPointerAlign(), - /*constant*/ true, - llvm::GlobalValue::InternalLinkage, - AddrSpace); + elements.finishAndCreateGlobal(descName, CGM.getPointerAlign(), + /*constant*/ true, linkage, AddrSpace); + + if (linkage == llvm::GlobalValue::LinkOnceODRLinkage) { + global->setVisibility(llvm::GlobalValue::HiddenVisibility); + global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + } return llvm::ConstantExpr::getBitCast(global, CGM.getBlockDescriptorType()); } @@ -308,12 +449,25 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info, assert(elementTypes.empty()); if (CGM.getLangOpts().OpenCL) { - // The header is basically 'struct { int; int; + // The header is basically 'struct { int; int; generic void *; // custom_fields; }'. Assert that struct is packed. + auto GenericAS = + CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic); + auto GenPtrAlign = + CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8); + auto GenPtrSize = + CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8); + assert(CGM.getIntSize() <= GenPtrSize); + assert(CGM.getIntAlign() <= GenPtrAlign); + assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign)); elementTypes.push_back(CGM.IntTy); /* total size */ elementTypes.push_back(CGM.IntTy); /* align */ - unsigned Offset = 2 * CGM.getIntSize().getQuantity(); - unsigned BlockAlign = CGM.getIntAlign().getQuantity(); + elementTypes.push_back( + CGM.getOpenCLRuntime() + .getGenericVoidPointerType()); /* invoke function */ + unsigned Offset = + 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity(); + unsigned BlockAlign = GenPtrAlign.getQuantity(); if (auto *Helper = CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ { @@ -355,7 +509,11 @@ static QualType getCaptureFieldType(const CodeGenFunction &CGF, return CGF.BlockInfo->getCapture(VD).fieldType(); if (auto *FD = CGF.LambdaCaptureFields.lookup(VD)) return FD->getType(); - return VD->getType(); + // If the captured variable is a non-escaping __block variable, the field + // type is the reference type. If the variable is a __block variable that + // already has a reference type, the field type is the variable's type. + return VD->isNonEscapingByref() ? + CGF.getContext().getLValueReferenceType(VD->getType()) : VD->getType(); } /// Compute the layout of the given block. Attempts to lay the block @@ -378,7 +536,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, info.CanBeGlobal = true; return; } - else if (C.getLangOpts().ObjC1 && + else if (C.getLangOpts().ObjC && CGM.getLangOpts().getGC() == LangOptions::NonGC) info.HasCapturedVariableLayout = true; @@ -393,7 +551,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, if (block->capturesCXXThis()) { assert(CGF && CGF->CurFuncDecl && isa<CXXMethodDecl>(CGF->CurFuncDecl) && "Can't capture 'this' outside a method"); - QualType thisType = cast<CXXMethodDecl>(CGF->CurFuncDecl)->getThisType(C); + QualType thisType = cast<CXXMethodDecl>(CGF->CurFuncDecl)->getThisType(); // Theoretically, this could be in a different address space, so // don't assume standard pointer size/align. @@ -411,7 +569,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, for (const auto &CI : block->captures()) { const VarDecl *variable = CI.getVariable(); - if (CI.isByRef()) { + if (CI.isEscapingByref()) { // We have to copy/dispose of the __block reference. info.NeedsCopyDispose = true; @@ -419,6 +577,10 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, CharUnits align = CGM.getPointerAlign(); maxFieldAlign = std::max(maxFieldAlign, align); + // Since a __block variable cannot be captured by lambdas, its type and + // the capture field type should always match. + assert(getCaptureFieldType(*CGF, CI) == variable->getType() && + "capture type differs from the variable type"); layout.push_back(BlockLayoutChunk(align, CGM.getPointerSize(), Qualifiers::OCL_None, &CI, CGM.VoidPtrTy, variable->getType())); @@ -432,10 +594,11 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, continue; } + QualType VT = getCaptureFieldType(*CGF, CI); + // If we have a lifetime qualifier, honor it for capture purposes. // That includes *not* copying it if it's __unsafe_unretained. - Qualifiers::ObjCLifetime lifetime = - variable->getType().getObjCLifetime(); + Qualifiers::ObjCLifetime lifetime = VT.getObjCLifetime(); if (lifetime) { switch (lifetime) { case Qualifiers::OCL_None: llvm_unreachable("impossible"); @@ -449,10 +612,10 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, } // Block pointers require copy/dispose. So do Objective-C pointers. - } else if (variable->getType()->isObjCRetainableType()) { + } else if (VT->isObjCRetainableType()) { // But honor the inert __unsafe_unretained qualifier, which doesn't // actually make it into the type system. - if (variable->getType()->isObjCInertUnsafeUnretainedType()) { + if (VT->isObjCInertUnsafeUnretainedType()) { lifetime = Qualifiers::OCL_ExplicitNone; } else { info.NeedsCopyDispose = true; @@ -464,27 +627,27 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, } else if (CI.hasCopyExpr()) { info.NeedsCopyDispose = true; info.HasCXXObject = true; + if (!VT->getAsCXXRecordDecl()->isExternallyVisible()) + info.CapturesNonExternalType = true; // So do C structs that require non-trivial copy construction or // destruction. - } else if (variable->getType().isNonTrivialToPrimitiveCopy() == - QualType::PCK_Struct || - variable->getType().isDestructedType() == - QualType::DK_nontrivial_c_struct) { + } else if (VT.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct || + VT.isDestructedType() == QualType::DK_nontrivial_c_struct) { info.NeedsCopyDispose = true; // And so do types with destructors. } else if (CGM.getLangOpts().CPlusPlus) { - if (const CXXRecordDecl *record = - variable->getType()->getAsCXXRecordDecl()) { + if (const CXXRecordDecl *record = VT->getAsCXXRecordDecl()) { if (!record->hasTrivialDestructor()) { info.HasCXXObject = true; info.NeedsCopyDispose = true; + if (!record->isExternallyVisible()) + info.CapturesNonExternalType = true; } } } - QualType VT = getCaptureFieldType(*CGF, CI); CharUnits size = C.getTypeSizeInChars(VT); CharUnits align = C.getDeclAlign(variable); @@ -699,10 +862,12 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) { /// Enter a full-expression with a non-trivial number of objects to /// clean up. This is in this file because, at the moment, the only /// kind of cleanup object is a BlockDecl*. -void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) { - assert(E->getNumObjects() != 0); - for (const ExprWithCleanups::CleanupObject &C : E->getObjects()) - enterBlockScope(*this, C); +void CodeGenFunction::enterNonTrivialFullExpression(const FullExpr *E) { + if (const auto EWC = dyn_cast<ExprWithCleanups>(E)) { + assert(EWC->getNumObjects() != 0); + for (const ExprWithCleanups::CleanupObject &C : EWC->getObjects()) + enterBlockScope(*this, C); + } } /// Find the layout for the given block in a linked list and remove it. @@ -759,12 +924,20 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) { llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL; + auto GenVoidPtrTy = + IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy; + LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default; + auto GenVoidPtrSize = CharUnits::fromQuantity( + CGM.getTarget().getPointerWidth( + CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) / + 8); // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); CodeGenFunction BlockCGF{CGM, true}; BlockCGF.SanOpts = SanOpts; auto *InvokeFn = BlockCGF.GenerateBlockFunction( CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal); + auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy); // If there is nothing to capture, we can emit this as a global block. if (blockInfo.CanBeGlobal) @@ -840,12 +1013,11 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()), getIntSize(), "block.align"); } - if (!IsOpenCL) { - addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy), - getPointerSize(), "block.invoke"); + addHeaderField(blockFn, GenVoidPtrSize, "block.invoke"); + if (!IsOpenCL) addHeaderField(descriptor, getPointerSize(), "block.descriptor"); - } else if (auto *Helper = - CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + else if (auto *Helper = + CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) { addHeaderField( I.first, @@ -889,7 +1061,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // The lambda capture in a lambda's conversion-to-block-pointer is // special; we'll simply emit it directly. src = Address::invalid(); - } else if (CI.isByRef()) { + } else if (CI.isEscapingByref()) { if (BlockInfo && CI.isNested()) { // We need to use the capture from the enclosing block. const CGBlockInfo::Capture &enclosingCapture = @@ -906,7 +1078,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { src = I->second; } } else { - DeclRefExpr declRef(const_cast<VarDecl *>(variable), + DeclRefExpr declRef(getContext(), const_cast<VarDecl *>(variable), /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type.getNonReferenceType(), VK_LValue, SourceLocation()); @@ -917,7 +1089,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // the block field. There's no need to chase the forwarding // pointer at this point, since we're building something that will // live a shorter life than the stack byref anyway. - if (CI.isByRef()) { + if (CI.isEscapingByref()) { // Get a void* that points to the byref struct. llvm::Value *byrefPointer; if (CI.isNested()) @@ -980,7 +1152,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // We use one of these or the other depending on whether the // reference is nested. - DeclRefExpr declRef(const_cast<VarDecl *>(variable), + DeclRefExpr declRef(getContext(), const_cast<VarDecl *>(variable), /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type, VK_LValue, SourceLocation()); @@ -1049,23 +1221,38 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() { } llvm::Type *CodeGenModule::getGenericBlockLiteralType() { - assert(!getLangOpts().OpenCL && "OpenCL does not need this"); - if (GenericBlockLiteralType) return GenericBlockLiteralType; llvm::Type *BlockDescPtrTy = getBlockDescriptorType(); - // struct __block_literal_generic { - // void *__isa; - // int __flags; - // int __reserved; - // void (*__invoke)(void *); - // struct __block_descriptor *__descriptor; - // }; - GenericBlockLiteralType = - llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, - IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); + if (getLangOpts().OpenCL) { + // struct __opencl_block_literal_generic { + // int __size; + // int __align; + // __generic void *__invoke; + // /* custom fields */ + // }; + SmallVector<llvm::Type *, 8> StructFields( + {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()}); + if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) { + for (auto I : Helper->getCustomFieldTypes()) + StructFields.push_back(I); + } + GenericBlockLiteralType = llvm::StructType::create( + StructFields, "struct.__opencl_block_literal_generic"); + } else { + // struct __block_literal_generic { + // void *__isa; + // int __flags; + // int __reserved; + // void (*__invoke)(void *); + // struct __block_descriptor *__descriptor; + // }; + GenericBlockLiteralType = + llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy, + IntTy, IntTy, VoidPtrTy, BlockDescPtrTy); + } return GenericBlockLiteralType; } @@ -1076,21 +1263,27 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, E->getCallee()->getType()->getAs<BlockPointerType>(); llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee()); - llvm::Value *FuncPtr; - if (!CGM.getLangOpts().OpenCL) { - // Get a pointer to the generic block literal. - llvm::Type *BlockLiteralTy = - llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0); + // Get a pointer to the generic block literal. + // For OpenCL we generate generic AS void ptr to be able to reuse the same + // block definition for blocks with captures generated as private AS local + // variables and without captures generated as global AS program scope + // variables. + unsigned AddrSpace = 0; + if (getLangOpts().OpenCL) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic); - // Bitcast the callee to a block literal. - BlockPtr = - Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); + llvm::Type *BlockLiteralTy = + llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace); - // Get the function pointer from the literal. - FuncPtr = - Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3); - } + // Bitcast the callee to a block literal. + BlockPtr = + Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal"); + + // Get the function pointer from the literal. + llvm::Value *FuncPtr = + Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, + CGM.getLangOpts().OpenCL ? 2 : 3); // Add the block literal. CallArgList Args; @@ -1113,11 +1306,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments()); // Load the function. - llvm::Value *Func; - if (CGM.getLangOpts().OpenCL) - Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee()); - else - Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); + llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign()); const FunctionType *FuncTy = FnType->castAs<FunctionType>(); const CGFunctionInfo &FnInfo = @@ -1136,8 +1325,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, return EmitCall(FnInfo, Callee, ReturnValue, Args); } -Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable, - bool isByRef) { +Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) { assert(BlockInfo && "evaluating block ref without block information?"); const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable); @@ -1148,7 +1336,7 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable, Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(), capture.getOffset(), "block.capture.addr"); - if (isByRef) { + if (variable->isEscapingByref()) { // addr should be a void** right now. Load, then cast the result // to byref*. @@ -1162,6 +1350,10 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable, variable->getName()); } + assert((!variable->isNonEscapingByref() || + capture.fieldType()->isReferenceType()) && + "the capture field of a non-escaping variable should have a " + "reference type"); if (capture.fieldType()->isReferenceType()) addr = EmitLoadOfReference(MakeAddrLValue(addr, capture.fieldType())); @@ -1213,9 +1405,13 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, auto fields = builder.beginStruct(); bool IsOpenCL = CGM.getLangOpts().OpenCL; + bool IsWindows = CGM.getTarget().getTriple().isOSWindows(); if (!IsOpenCL) { // isa - fields.add(CGM.getNSConcreteGlobalBlock()); + if (IsWindows) + fields.addNullPointer(CGM.Int8PtrPtrTy); + else + fields.add(CGM.getNSConcreteGlobalBlock()); // __flags BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE; @@ -1226,14 +1422,14 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, // Reserved fields.addInt(CGM.IntTy, 0); - - // Function - fields.add(blockFn); } else { fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity()); fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity()); } + // Function + fields.add(blockFn); + if (!IsOpenCL) { // Descriptor fields.add(buildBlockDescriptor(CGM, blockInfo)); @@ -1250,7 +1446,27 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM, llvm::Constant *literal = fields.finishAndCreateGlobal( "__block_literal_global", blockInfo.BlockAlign, - /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace); + /*constant*/ !IsWindows, llvm::GlobalVariable::InternalLinkage, AddrSpace); + + // Windows does not allow globals to be initialised to point to globals in + // different DLLs. Any such variables must run code to initialise them. + if (IsWindows) { + auto *Init = llvm::Function::Create(llvm::FunctionType::get(CGM.VoidTy, + {}), llvm::GlobalValue::InternalLinkage, ".block_isa_init", + &CGM.getModule()); + llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry", + Init)); + b.CreateAlignedStore(CGM.getNSConcreteGlobalBlock(), + b.CreateStructGEP(literal, 0), CGM.getPointerAlign().getQuantity()); + b.CreateRetVoid(); + // We can't use the normal LLVM global initialisation array, because we + // need to specify that this runs early in library initialisation. + auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(), + /*isConstant*/true, llvm::GlobalValue::InternalLinkage, + Init, ".block_isa_init_ptr"); + InitVar->setSection(".CRT$XCLa"); + CGM.addUsedGlobal(InitVar); + } // Return a constant of the appropriately-casted type. llvm::Type *RequiredType = @@ -1284,7 +1500,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D, } } - SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getLocStart(); + SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getBeginLoc(); ApplyDebugLocation Scope(*this, StartLoc); // Instead of messing around with LocalDeclMap, just set the value @@ -1314,7 +1530,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, CurGD = GD; - CurEHLocation = blockInfo.getBlockExpr()->getLocEnd(); + CurEHLocation = blockInfo.getBlockExpr()->getEndLoc(); BlockInfo = &blockInfo; @@ -1379,7 +1595,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // Begin generating the function. StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args, blockDecl->getLocation(), - blockInfo.getBlockExpr()->getBody()->getLocStart()); + blockInfo.getBlockExpr()->getBody()->getBeginLoc()); // Okay. Undo some of what StartFunction did. @@ -1480,35 +1696,6 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, return fn; } -namespace { - -/// Represents a type of copy/destroy operation that should be performed for an -/// entity that's captured by a block. -enum class BlockCaptureEntityKind { - CXXRecord, // Copy or destroy - ARCWeak, - ARCStrong, - NonTrivialCStruct, - BlockObject, // Assign or release - None -}; - -/// Represents a captured entity that requires extra operations in order for -/// this entity to be copied or destroyed correctly. -struct BlockCaptureManagedEntity { - BlockCaptureEntityKind Kind; - BlockFieldFlags Flags; - const BlockDecl::Capture &CI; - const CGBlockInfo::Capture &Capture; - - BlockCaptureManagedEntity(BlockCaptureEntityKind Type, BlockFieldFlags Flags, - const BlockDecl::Capture &CI, - const CGBlockInfo::Capture &Capture) - : Kind(Type), Flags(Flags), CI(CI), Capture(Capture) {} -}; - -} // end anonymous namespace - static std::pair<BlockCaptureEntityKind, BlockFieldFlags> computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, const LangOptions &LangOpts) { @@ -1518,7 +1705,7 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); } BlockFieldFlags Flags; - if (CI.isByRef()) { + if (CI.isEscapingByref()) { Flags = BLOCK_FIELD_IS_BYREF; if (T.isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; @@ -1566,23 +1753,32 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, llvm_unreachable("after exhaustive PrimitiveCopyKind switch"); } +static std::pair<BlockCaptureEntityKind, BlockFieldFlags> +computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, + const LangOptions &LangOpts); + /// Find the set of block captures that need to be explicitly copied or destroy. static void findBlockCapturedManagedEntities( const CGBlockInfo &BlockInfo, const LangOptions &LangOpts, - SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures, - llvm::function_ref<std::pair<BlockCaptureEntityKind, BlockFieldFlags>( - const BlockDecl::Capture &, QualType, const LangOptions &)> - Predicate) { + SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures) { for (const auto &CI : BlockInfo.getBlockDecl()->captures()) { const VarDecl *Variable = CI.getVariable(); const CGBlockInfo::Capture &Capture = BlockInfo.getCapture(Variable); if (Capture.isConstant()) continue; - auto Info = Predicate(CI, Variable->getType(), LangOpts); - if (Info.first != BlockCaptureEntityKind::None) - ManagedCaptures.emplace_back(Info.first, Info.second, CI, Capture); + QualType VT = Capture.fieldType(); + auto CopyInfo = computeCopyInfoForBlockCapture(CI, VT, LangOpts); + auto DisposeInfo = computeDestroyInfoForBlockCapture(CI, VT, LangOpts); + if (CopyInfo.first != BlockCaptureEntityKind::None || + DisposeInfo.first != BlockCaptureEntityKind::None) + ManagedCaptures.emplace_back(CopyInfo.first, DisposeInfo.first, + CopyInfo.second, DisposeInfo.second, CI, + Capture); } + + // Sort the captures by offset. + llvm::sort(ManagedCaptures); } namespace { @@ -1590,10 +1786,12 @@ namespace { struct CallBlockRelease final : EHScopeStack::Cleanup { Address Addr; BlockFieldFlags FieldFlags; - bool LoadBlockVarAddr; + bool LoadBlockVarAddr, CanThrow; - CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue) - : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue) {} + CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue, + bool CT) + : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue), + CanThrow(CT) {} void Emit(CodeGenFunction &CGF, Flags flags) override { llvm::Value *BlockVarAddr; @@ -1604,15 +1802,145 @@ struct CallBlockRelease final : EHScopeStack::Cleanup { BlockVarAddr = Addr.getPointer(); } - CGF.BuildBlockRelease(BlockVarAddr, FieldFlags); + CGF.BuildBlockRelease(BlockVarAddr, FieldFlags, CanThrow); } }; } // end anonymous namespace +/// Check if \p T is a C++ class that has a destructor that can throw. +bool CodeGenFunction::cxxDestructorCanThrow(QualType T) { + if (const auto *RD = T->getAsCXXRecordDecl()) + if (const CXXDestructorDecl *DD = RD->getDestructor()) + return DD->getType()->getAs<FunctionProtoType>()->canThrow(); + return false; +} + +// Return a string that has the information about a capture. +static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E, + CaptureStrKind StrKind, + CharUnits BlockAlignment, + CodeGenModule &CGM) { + std::string Str; + ASTContext &Ctx = CGM.getContext(); + const BlockDecl::Capture &CI = *E.CI; + QualType CaptureTy = CI.getVariable()->getType(); + + BlockCaptureEntityKind Kind; + BlockFieldFlags Flags; + + // CaptureStrKind::Merged should be passed only when the operations and the + // flags are the same for copy and dispose. + assert((StrKind != CaptureStrKind::Merged || + (E.CopyKind == E.DisposeKind && E.CopyFlags == E.DisposeFlags)) && + "different operations and flags"); + + if (StrKind == CaptureStrKind::DisposeHelper) { + Kind = E.DisposeKind; + Flags = E.DisposeFlags; + } else { + Kind = E.CopyKind; + Flags = E.CopyFlags; + } + + switch (Kind) { + case BlockCaptureEntityKind::CXXRecord: { + Str += "c"; + SmallString<256> TyStr; + llvm::raw_svector_ostream Out(TyStr); + CGM.getCXXABI().getMangleContext().mangleTypeName(CaptureTy, Out); + Str += llvm::to_string(TyStr.size()) + TyStr.c_str(); + break; + } + case BlockCaptureEntityKind::ARCWeak: + Str += "w"; + break; + case BlockCaptureEntityKind::ARCStrong: + Str += "s"; + break; + case BlockCaptureEntityKind::BlockObject: { + const VarDecl *Var = CI.getVariable(); + unsigned F = Flags.getBitMask(); + if (F & BLOCK_FIELD_IS_BYREF) { + Str += "r"; + if (F & BLOCK_FIELD_IS_WEAK) + Str += "w"; + else { + // If CaptureStrKind::Merged is passed, check both the copy expression + // and the destructor. + if (StrKind != CaptureStrKind::DisposeHelper) { + if (Ctx.getBlockVarCopyInit(Var).canThrow()) + Str += "c"; + } + if (StrKind != CaptureStrKind::CopyHelper) { + if (CodeGenFunction::cxxDestructorCanThrow(CaptureTy)) + Str += "d"; + } + } + } else { + assert((F & BLOCK_FIELD_IS_OBJECT) && "unexpected flag value"); + if (F == BLOCK_FIELD_IS_BLOCK) + Str += "b"; + else + Str += "o"; + } + break; + } + case BlockCaptureEntityKind::NonTrivialCStruct: { + bool IsVolatile = CaptureTy.isVolatileQualified(); + CharUnits Alignment = + BlockAlignment.alignmentAtOffset(E.Capture->getOffset()); + + Str += "n"; + std::string FuncStr; + if (StrKind == CaptureStrKind::DisposeHelper) + FuncStr = CodeGenFunction::getNonTrivialDestructorStr( + CaptureTy, Alignment, IsVolatile, Ctx); + else + // If CaptureStrKind::Merged is passed, use the copy constructor string. + // It has all the information that the destructor string has. + FuncStr = CodeGenFunction::getNonTrivialCopyConstructorStr( + CaptureTy, Alignment, IsVolatile, Ctx); + // The underscore is necessary here because non-trivial copy constructor + // and destructor strings can start with a number. + Str += llvm::to_string(FuncStr.size()) + "_" + FuncStr; + break; + } + case BlockCaptureEntityKind::None: + break; + } + + return Str; +} + +static std::string getCopyDestroyHelperFuncName( + const SmallVectorImpl<BlockCaptureManagedEntity> &Captures, + CharUnits BlockAlignment, CaptureStrKind StrKind, CodeGenModule &CGM) { + assert((StrKind == CaptureStrKind::CopyHelper || + StrKind == CaptureStrKind::DisposeHelper) && + "unexpected CaptureStrKind"); + std::string Name = StrKind == CaptureStrKind::CopyHelper + ? "__copy_helper_block_" + : "__destroy_helper_block_"; + if (CGM.getLangOpts().Exceptions) + Name += "e"; + if (CGM.getCodeGenOpts().ObjCAutoRefCountExceptions) + Name += "a"; + Name += llvm::to_string(BlockAlignment.getQuantity()) + "_"; + + for (const BlockCaptureManagedEntity &E : Captures) { + Name += llvm::to_string(E.Capture->getOffset().getQuantity()); + Name += getBlockCaptureStr(E, StrKind, BlockAlignment, CGM); + } + + return Name; +} + static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind, Address Field, QualType CaptureType, - BlockFieldFlags Flags, bool EHOnly, - CodeGenFunction &CGF) { + BlockFieldFlags Flags, bool ForCopyHelper, + VarDecl *Var, CodeGenFunction &CGF) { + bool EHOnly = ForCopyHelper; + switch (CaptureKind) { case BlockCaptureEntityKind::CXXRecord: case BlockCaptureEntityKind::ARCWeak: @@ -1634,15 +1962,34 @@ static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind, case BlockCaptureEntityKind::BlockObject: { if (!EHOnly || CGF.getLangOpts().Exceptions) { CleanupKind Kind = EHOnly ? EHCleanup : NormalAndEHCleanup; - CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true); + // Calls to _Block_object_dispose along the EH path in the copy helper + // function don't throw as newly-copied __block variables always have a + // reference count of 2. + bool CanThrow = + !ForCopyHelper && CGF.cxxDestructorCanThrow(CaptureType); + CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true, + CanThrow); } break; } case BlockCaptureEntityKind::None: - llvm_unreachable("unexpected BlockCaptureEntityKind"); + break; } } +static void setBlockHelperAttributesVisibility(bool CapturesNonExternalType, + llvm::Function *Fn, + const CGFunctionInfo &FI, + CodeGenModule &CGM) { + if (CapturesNonExternalType) { + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); + } else { + Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); + Fn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); + CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); + } +} /// Generate the copy-helper function for a block closure object: /// static void block_copy_helper(block_t *dst, block_t *src); /// The runtime will have previously initialized 'dst' by doing a @@ -1653,42 +2000,51 @@ static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind, /// the contents of an individual __block variable to the heap. llvm::Constant * CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { + SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures; + findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures); + std::string FuncName = + getCopyDestroyHelperFuncName(CopiedCaptures, blockInfo.BlockAlign, + CaptureStrKind::CopyHelper, CGM); + + if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName)) + return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy); + ASTContext &C = getContext(); + QualType ReturnTy = C.VoidTy; + FunctionArgList args; - ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl DstDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&DstDecl); - ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); // FIXME: it would be nice if these were mergeable with things with // identical semantics. llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *Fn = - llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, - "__copy_helper_block_", &CGM.getModule()); + llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, + FuncName, &CGM.getModule()); - IdentifierInfo *II - = &CGM.getContext().Idents.get("__copy_helper_block_"); + IdentifierInfo *II = &C.Idents.get(FuncName); - FunctionDecl *FD = FunctionDecl::Create(C, - C.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, C.VoidTy, - nullptr, SC_Static, - false, - false); + SmallVector<QualType, 2> ArgTys; + ArgTys.push_back(C.VoidPtrTy); + ArgTys.push_back(C.VoidPtrTy); + QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); - CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); + FunctionDecl *FD = FunctionDecl::Create( + C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + FunctionTy, nullptr, SC_Static, false, false); - StartFunction(FD, C.VoidTy, Fn, FI, args); - ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; + setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI, + CGM); + StartFunction(FD, ReturnTy, Fn, FI, args); + ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()}; llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); Address src = GetAddrOfLocalVar(&SrcDecl); @@ -1699,88 +2055,81 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign); dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest"); - SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures; - findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures, - computeCopyInfoForBlockCapture); - for (const auto &CopiedCapture : CopiedCaptures) { - const BlockDecl::Capture &CI = CopiedCapture.CI; - const CGBlockInfo::Capture &capture = CopiedCapture.Capture; + const BlockDecl::Capture &CI = *CopiedCapture.CI; + const CGBlockInfo::Capture &capture = *CopiedCapture.Capture; QualType captureType = CI.getVariable()->getType(); - BlockFieldFlags flags = CopiedCapture.Flags; + BlockFieldFlags flags = CopiedCapture.CopyFlags; unsigned index = capture.getIndex(); Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset()); Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset()); - // If there's an explicit copy expression, we do that. - if (CI.getCopyExpr()) { - assert(CopiedCapture.Kind == BlockCaptureEntityKind::CXXRecord); + switch (CopiedCapture.CopyKind) { + case BlockCaptureEntityKind::CXXRecord: + // If there's an explicit copy expression, we do that. + assert(CI.getCopyExpr() && "copy expression for variable is missing"); EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr()); - } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) { + break; + case BlockCaptureEntityKind::ARCWeak: EmitARCCopyWeak(dstField, srcField); - // If this is a C struct that requires non-trivial copy construction, emit a - // call to its copy constructor. - } else if (CopiedCapture.Kind == - BlockCaptureEntityKind::NonTrivialCStruct) { + break; + case BlockCaptureEntityKind::NonTrivialCStruct: { + // If this is a C struct that requires non-trivial copy construction, + // emit a call to its copy constructor. QualType varType = CI.getVariable()->getType(); callCStructCopyConstructor(MakeAddrLValue(dstField, varType), MakeAddrLValue(srcField, varType)); - } else { + break; + } + case BlockCaptureEntityKind::ARCStrong: { llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src"); - if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) { - // At -O0, store null into the destination field (so that the - // storeStrong doesn't over-release) and then call storeStrong. - // This is a workaround to not having an initStrong call. - if (CGM.getCodeGenOpts().OptimizationLevel == 0) { - auto *ty = cast<llvm::PointerType>(srcValue->getType()); - llvm::Value *null = llvm::ConstantPointerNull::get(ty); - Builder.CreateStore(null, dstField); - EmitARCStoreStrongCall(dstField, srcValue, true); - - // With optimization enabled, take advantage of the fact that - // the blocks runtime guarantees a memcpy of the block data, and - // just emit a retain of the src field. - } else { - EmitARCRetainNonBlock(srcValue); - - // Unless EH cleanup is required, we don't need this anymore, so kill - // it. It's not quite worth the annoyance to avoid creating it in the - // first place. - if (!needsEHCleanup(captureType.isDestructedType())) - cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent(); - } + // At -O0, store null into the destination field (so that the + // storeStrong doesn't over-release) and then call storeStrong. + // This is a workaround to not having an initStrong call. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) { + auto *ty = cast<llvm::PointerType>(srcValue->getType()); + llvm::Value *null = llvm::ConstantPointerNull::get(ty); + Builder.CreateStore(null, dstField); + EmitARCStoreStrongCall(dstField, srcValue, true); + + // With optimization enabled, take advantage of the fact that + // the blocks runtime guarantees a memcpy of the block data, and + // just emit a retain of the src field. } else { - assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject); - srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy); - llvm::Value *dstAddr = - Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy); - llvm::Value *args[] = { - dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) - }; - - const VarDecl *variable = CI.getVariable(); - bool copyCanThrow = false; - if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) { - const Expr *copyExpr = - CGM.getContext().getBlockVarCopyInits(variable); - if (copyExpr) { - copyCanThrow = true; // FIXME: reuse the noexcept logic - } - } + EmitARCRetainNonBlock(srcValue); - if (copyCanThrow) { - EmitRuntimeCallOrInvoke(CGM.getBlockObjectAssign(), args); - } else { - EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args); - } + // Unless EH cleanup is required, we don't need this anymore, so kill + // it. It's not quite worth the annoyance to avoid creating it in the + // first place. + if (!needsEHCleanup(captureType.isDestructedType())) + cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent(); } + break; + } + case BlockCaptureEntityKind::BlockObject: { + llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src"); + srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy); + llvm::Value *dstAddr = + Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy); + llvm::Value *args[] = { + dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) + }; + + if (CI.isByRef() && C.getBlockVarCopyInit(CI.getVariable()).canThrow()) + EmitRuntimeCallOrInvoke(CGM.getBlockObjectAssign(), args); + else + EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args); + break; + } + case BlockCaptureEntityKind::None: + continue; } // Ensure that we destroy the copied object if an exception is thrown later // in the helper function. - pushCaptureCleanup(CopiedCapture.Kind, dstField, captureType, flags, /*EHOnly*/ true, - *this); + pushCaptureCleanup(CopiedCapture.CopyKind, dstField, captureType, flags, + /*ForCopyHelper*/ true, CI.getVariable(), *this); } FinishFunction(); @@ -1800,7 +2149,7 @@ getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI, static std::pair<BlockCaptureEntityKind, BlockFieldFlags> computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, const LangOptions &LangOpts) { - if (CI.isByRef()) { + if (CI.isEscapingByref()) { BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF; if (T.isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; @@ -1844,37 +2193,50 @@ computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, /// variable. llvm::Constant * CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { + SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures; + findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures); + std::string FuncName = + getCopyDestroyHelperFuncName(DestroyedCaptures, blockInfo.BlockAlign, + CaptureStrKind::DisposeHelper, CGM); + + if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName)) + return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy); + ASTContext &C = getContext(); + QualType ReturnTy = C.VoidTy; + FunctionArgList args; - ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); // FIXME: We'd like to put these into a mergable by content, with // internal linkage. llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *Fn = - llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, - "__destroy_helper_block_", &CGM.getModule()); + llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage, + FuncName, &CGM.getModule()); - IdentifierInfo *II - = &CGM.getContext().Idents.get("__destroy_helper_block_"); + IdentifierInfo *II = &C.Idents.get(FuncName); + + SmallVector<QualType, 1> ArgTys; + ArgTys.push_back(C.VoidPtrTy); + QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); - FunctionDecl *FD = FunctionDecl::Create(C, C.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, C.VoidTy, - nullptr, SC_Static, - false, false); + FunctionDecl *FD = FunctionDecl::Create( + C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + FunctionTy, nullptr, SC_Static, false, false); - CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); + setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI, + CGM); + StartFunction(FD, ReturnTy, Fn, FI, args); + markAsIgnoreThreadCheckingAtRuntime(Fn); - StartFunction(FD, C.VoidTy, Fn, FI, args); - ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()}; + ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()}; llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); @@ -1884,20 +2246,17 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { CodeGenFunction::RunCleanupsScope cleanups(*this); - SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures; - findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures, - computeDestroyInfoForBlockCapture); - for (const auto &DestroyedCapture : DestroyedCaptures) { - const BlockDecl::Capture &CI = DestroyedCapture.CI; - const CGBlockInfo::Capture &capture = DestroyedCapture.Capture; - BlockFieldFlags flags = DestroyedCapture.Flags; + const BlockDecl::Capture &CI = *DestroyedCapture.CI; + const CGBlockInfo::Capture &capture = *DestroyedCapture.Capture; + BlockFieldFlags flags = DestroyedCapture.DisposeFlags; Address srcField = Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset()); - pushCaptureCleanup(DestroyedCapture.Kind, srcField, - CI.getVariable()->getType(), flags, /*EHOnly*/ false, *this); + pushCaptureCleanup(DestroyedCapture.DisposeKind, srcField, + CI.getVariable()->getType(), flags, + /*ForCopyHelper*/ false, CI.getVariable(), *this); } cleanups.ForceCleanup(); @@ -1937,7 +2296,7 @@ public: field = CGF.Builder.CreateBitCast(field, CGF.Int8PtrTy->getPointerTo(0)); llvm::Value *value = CGF.Builder.CreateLoad(field); - CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER); + CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER, false); } void profileImpl(llvm::FoldingSetNodeID &id) const override { @@ -2093,19 +2452,17 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, BlockByrefHelpers &generator) { ASTContext &Context = CGF.getContext(); - QualType R = Context.VoidTy; + QualType ReturnTy = Context.VoidTy; FunctionArgList args; - ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl Dst(Context, Context.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&Dst); - ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamDecl Src(Context, Context.VoidPtrTy, ImplicitParamDecl::Other); args.push_back(&Src); const CGFunctionInfo &FI = - CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args); + CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI); @@ -2118,16 +2475,18 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, IdentifierInfo *II = &Context.Idents.get("__Block_byref_object_copy_"); - FunctionDecl *FD = FunctionDecl::Create(Context, - Context.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, R, nullptr, - SC_Static, - false, false); + SmallVector<QualType, 2> ArgTys; + ArgTys.push_back(Context.VoidPtrTy); + ArgTys.push_back(Context.VoidPtrTy); + QualType FunctionTy = Context.getFunctionType(ReturnTy, ArgTys, {}); + + FunctionDecl *FD = FunctionDecl::Create( + Context, Context.getTranslationUnitDecl(), SourceLocation(), + SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false); CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); - CGF.StartFunction(FD, R, Fn, FI, args); + CGF.StartFunction(FD, ReturnTy, Fn, FI, args); if (generator.needsCopy()) { llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0); @@ -2192,12 +2551,13 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, IdentifierInfo *II = &Context.Idents.get("__Block_byref_object_dispose_"); - FunctionDecl *FD = FunctionDecl::Create(Context, - Context.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, R, nullptr, - SC_Static, - false, false); + SmallVector<QualType, 1> ArgTys; + ArgTys.push_back(Context.VoidPtrTy); + QualType FunctionTy = Context.getFunctionType(R, ArgTys, {}); + + FunctionDecl *FD = FunctionDecl::Create( + Context, Context.getTranslationUnitDecl(), SourceLocation(), + SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false); CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); @@ -2254,6 +2614,9 @@ BlockByrefHelpers * CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType, const AutoVarEmission &emission) { const VarDecl &var = *emission.Variable; + assert(var.isEscapingByref() && + "only escaping __block variables need byref helpers"); + QualType type = var.getType(); auto &byrefInfo = getBlockByrefInfo(&var); @@ -2264,7 +2627,8 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType, byrefInfo.ByrefAlignment.alignmentAtOffset(byrefInfo.FieldOffset); if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) { - const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(&var); + const Expr *copyExpr = + CGM.getContext().getBlockVarCopyInit(&var).getCopyExpr(); if (!copyExpr && record->hasTrivialDestructor()) return nullptr; return ::buildByrefHelpers( @@ -2567,19 +2931,25 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) { } } -void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags) { +void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags, + bool CanThrow) { llvm::Value *F = CGM.getBlockObjectDispose(); llvm::Value *args[] = { Builder.CreateBitCast(V, Int8PtrTy), llvm::ConstantInt::get(Int32Ty, flags.getBitMask()) }; - EmitNounwindRuntimeCall(F, args); // FIXME: throwing destructors? + + if (CanThrow) + EmitRuntimeCallOrInvoke(F, args); + else + EmitNounwindRuntimeCall(F, args); } void CodeGenFunction::enterByrefCleanup(CleanupKind Kind, Address Addr, BlockFieldFlags Flags, - bool LoadBlockVarAddr) { - EHStack.pushCleanup<CallBlockRelease>(Kind, Addr, Flags, LoadBlockVarAddr); + bool LoadBlockVarAddr, bool CanThrow) { + EHStack.pushCleanup<CallBlockRelease>(Kind, Addr, Flags, LoadBlockVarAddr, + CanThrow); } /// Adjust the declaration of something from the blocks API. diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h index 5abf82b3f6e1..3f9fc16d9b10 100644 --- a/lib/CodeGen/CGBlocks.h +++ b/lib/CodeGen/CGBlocks.h @@ -60,7 +60,7 @@ enum BlockLiteralFlags { BLOCK_IS_GLOBAL = (1 << 28), BLOCK_USE_STRET = (1 << 29), BLOCK_HAS_SIGNATURE = (1 << 30), - BLOCK_HAS_EXTENDED_LAYOUT = (1 << 31) + BLOCK_HAS_EXTENDED_LAYOUT = (1u << 31) }; class BlockFlags { uint32_t flags; @@ -132,6 +132,9 @@ public: friend bool operator&(BlockFieldFlags l, BlockFieldFlags r) { return (l.flags & r.flags); } + bool operator==(BlockFieldFlags Other) const { + return flags == Other.flags; + } }; inline BlockFieldFlags operator|(BlockFieldFlag_t l, BlockFieldFlag_t r) { return BlockFieldFlags(l) | BlockFieldFlags(r); @@ -231,6 +234,11 @@ public: /// and their layout meta-data has been generated. bool HasCapturedVariableLayout : 1; + /// Indicates whether an object of a non-external C++ class is captured. This + /// bit is used to determine the linkage of the block copy/destroy helper + /// functions. + bool CapturesNonExternalType : 1; + /// The mapping of allocated indexes within the block. llvm::DenseMap<const VarDecl*, Capture> Captures; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index e99121c46d9b..a718f2f19aa6 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -21,10 +21,11 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" -#include "clang/Analysis/Analyses/OSLog.h" +#include "clang/AST/OSLog.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -93,11 +94,11 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, return V; } -/// Utility to insert an atomic instruction based on Instrinsic::ID +/// Utility to insert an atomic instruction based on Intrinsic::ID /// and the expression node. -static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, - llvm::AtomicRMWInst::BinOp Kind, - const CallExpr *E) { +static Value *MakeBinaryAtomicValue( + CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { QualType T = E->getType(); assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType(T, @@ -119,7 +120,7 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, Args[1] = EmitToInt(CGF, Args[1], T, IntType); llvm::Value *Result = CGF.Builder.CreateAtomicRMW( - Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); + Kind, Args[0], Args[1], Ordering); return EmitFromInt(CGF, Result, T, ValueType); } @@ -151,7 +152,7 @@ static RValue EmitBinaryAtomic(CodeGenFunction &CGF, return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); } -/// Utility to insert an atomic instruction based Instrinsic::ID and +/// Utility to insert an atomic instruction based Intrinsic::ID and /// the expression node, where the return value is the result of the /// operation. static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, @@ -200,6 +201,9 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, /// cmpxchg result or the old value. /// /// @returns result of cmpxchg, according to ReturnBool +/// +/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics +/// invoke the function EmitAtomicCmpXchgForMSIntrin. static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool) { QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); @@ -230,6 +234,72 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, ValueType); } +/// This function should be invoked to emit atomic cmpxchg for Microsoft's +/// _InterlockedCompareExchange* intrinsics which have the following signature: +/// T _InterlockedCompareExchange(T volatile *Destination, +/// T Exchange, +/// T Comparand); +/// +/// Whereas the llvm 'cmpxchg' instruction has the following syntax: +/// cmpxchg *Destination, Comparand, Exchange. +/// So we need to swap Comparand and Exchange when invoking +/// CreateAtomicCmpXchg. That is the reason we could not use the above utility +/// function MakeAtomicCmpXchgValue since it expects the arguments to be +/// already swapped. + +static +Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) { + assert(E->getArg(0)->getType()->isPointerType()); + assert(CGF.getContext().hasSameUnqualifiedType( + E->getType(), E->getArg(0)->getType()->getPointeeType())); + assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), + E->getArg(1)->getType())); + assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), + E->getArg(2)->getType())); + + auto *Destination = CGF.EmitScalarExpr(E->getArg(0)); + auto *Comparand = CGF.EmitScalarExpr(E->getArg(2)); + auto *Exchange = CGF.EmitScalarExpr(E->getArg(1)); + + // For Release ordering, the failure ordering should be Monotonic. + auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ? + AtomicOrdering::Monotonic : + SuccessOrdering; + + auto *Result = CGF.Builder.CreateAtomicCmpXchg( + Destination, Comparand, Exchange, + SuccessOrdering, FailureOrdering); + Result->setVolatile(true); + return CGF.Builder.CreateExtractValue(Result, 0); +} + +static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { + assert(E->getArg(0)->getType()->isPointerType()); + + auto *IntTy = CGF.ConvertType(E->getType()); + auto *Result = CGF.Builder.CreateAtomicRMW( + AtomicRMWInst::Add, + CGF.EmitScalarExpr(E->getArg(0)), + ConstantInt::get(IntTy, 1), + Ordering); + return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1)); +} + +static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { + assert(E->getArg(0)->getType()->isPointerType()); + + auto *IntTy = CGF.ConvertType(E->getType()); + auto *Result = CGF.Builder.CreateAtomicRMW( + AtomicRMWInst::Sub, + CGF.EmitScalarExpr(E->getArg(0)), + ConstantInt::get(IntTy, 1), + Ordering); + return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); +} + // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. static Value *emitUnaryBuiltin(CodeGenFunction &CGF, @@ -316,7 +386,7 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue) { - CGCallee callee = CGCallee::forDirect(calleeValue, FD); + CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); } @@ -461,7 +531,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, assert(DIter != LocalDeclMap.end()); return EmitLoadOfScalar(DIter->second, /*volatile=*/false, - getContext().getSizeType(), E->getLocStart()); + getContext().getSizeType(), E->getBeginLoc()); } } @@ -485,7 +555,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, } namespace { -/// A struct to generically desribe a bit test intrinsic. +/// A struct to generically describe a bit test intrinsic. struct BitTest { enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set }; enum InterlockingKind : uint8_t { @@ -711,8 +781,11 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, } else { Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; Arg1Ty = CGF.Int8PtrTy; - Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), - llvm::ConstantInt::get(CGF.Int32Ty, 0)); + if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { + Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry)); + } else + Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); } // Mark the call site and declaration with ReturnsTwice. @@ -745,6 +818,30 @@ enum class CodeGenFunction::MSVCIntrin { _InterlockedIncrement, _InterlockedOr, _InterlockedXor, + _InterlockedExchangeAdd_acq, + _InterlockedExchangeAdd_rel, + _InterlockedExchangeAdd_nf, + _InterlockedExchange_acq, + _InterlockedExchange_rel, + _InterlockedExchange_nf, + _InterlockedCompareExchange_acq, + _InterlockedCompareExchange_rel, + _InterlockedCompareExchange_nf, + _InterlockedOr_acq, + _InterlockedOr_rel, + _InterlockedOr_nf, + _InterlockedXor_acq, + _InterlockedXor_rel, + _InterlockedXor_nf, + _InterlockedAnd_acq, + _InterlockedAnd_rel, + _InterlockedAnd_nf, + _InterlockedIncrement_acq, + _InterlockedIncrement_rel, + _InterlockedIncrement_nf, + _InterlockedDecrement_acq, + _InterlockedDecrement_rel, + _InterlockedDecrement_nf, __fastfail, }; @@ -811,25 +908,74 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); case MSVCIntrin::_InterlockedXor: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); - - case MSVCIntrin::_InterlockedDecrement: { - llvm::Type *IntTy = ConvertType(E->getType()); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Sub, - EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)); - } - case MSVCIntrin::_InterlockedIncrement: { - llvm::Type *IntTy = ConvertType(E->getType()); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Add, - EmitScalarExpr(E->getArg(0)), - ConstantInt::get(IntTy, 1), - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)); - } + case MSVCIntrin::_InterlockedExchangeAdd_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedExchangeAdd_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedExchangeAdd_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedExchange_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedExchange_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedExchange_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedCompareExchange_acq: + return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedCompareExchange_rel: + return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedCompareExchange_nf: + return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedOr_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedOr_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedOr_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedXor_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedXor_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedXor_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedAnd_acq: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedAnd_rel: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::Release); + case MSVCIntrin::_InterlockedAnd_nf: + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedIncrement_acq: + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedIncrement_rel: + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedIncrement_nf: + return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic); + case MSVCIntrin::_InterlockedDecrement_acq: + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire); + case MSVCIntrin::_InterlockedDecrement_rel: + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release); + case MSVCIntrin::_InterlockedDecrement_nf: + return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic); + + case MSVCIntrin::_InterlockedDecrement: + return EmitAtomicDecrementValue(*this, E); + case MSVCIntrin::_InterlockedIncrement: + return EmitAtomicIncrementValue(*this, E); case MSVCIntrin::__fastfail: { // Request immediate process termination from the kernel. The instruction @@ -923,35 +1069,42 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( if (llvm::Function *F = CGM.getModule().getFunction(Name)) return F; + llvm::SmallVector<QualType, 4> ArgTys; llvm::SmallVector<ImplicitParamDecl, 4> Params; Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, ImplicitParamDecl::Other); + ArgTys.emplace_back(Ctx.VoidPtrTy); for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { char Size = Layout.Items[I].getSizeByte(); if (!Size) continue; + QualType ArgTy = getOSLogArgType(Ctx, Size); Params.emplace_back( Ctx, nullptr, SourceLocation(), - &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), - getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other); + &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, + ImplicitParamDecl::Other); + ArgTys.emplace_back(ArgTy); } FunctionArgList Args; for (auto &P : Params) Args.push_back(&P); + QualType ReturnTy = Ctx.VoidTy; + QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {}); + // The helper function has linkonce_odr linkage to enable the linker to merge // identical functions. To ensure the merging always happens, 'noinline' is // attached to the function when compiling with -Oz. const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args); llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *Fn = llvm::Function::Create( FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); - CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); // Attach 'noinline' at -Oz. @@ -962,9 +1115,9 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( IdentifierInfo *II = &Ctx.Idents.get(Name); FunctionDecl *FD = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, - Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); + FuncionTy, nullptr, SC_PrivateExtern, false, false); - StartFunction(FD, Ctx.VoidTy, Fn, FI, Args); + StartFunction(FD, ReturnTy, Fn, FI, Args); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); @@ -1024,7 +1177,12 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { llvm::Value *ArgVal; - if (const Expr *TheExpr = Item.getExpr()) { + if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) { + uint64_t Val = 0; + for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I) + Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8; + ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val)); + } else if (const Expr *TheExpr = Item.getExpr()) { ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); // Check if this is a retainable type. @@ -1077,7 +1235,7 @@ static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo) { return BuiltinID == Builtin::BI__builtin_mul_overflow && - Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width && + std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width && Op1Info.Signed != Op2Info.Signed; } @@ -1098,11 +1256,20 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1; llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp); llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp); + unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width; + unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width; + + // One of the operands may be smaller than the other. If so, [s|z]ext it. + if (SignedOpWidth < UnsignedOpWidth) + Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext"); + if (UnsignedOpWidth < SignedOpWidth) + Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext"); llvm::Type *OpTy = Signed->getType(); llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); llvm::Type *ResTy = ResultPtr.getElementType(); + unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width); // Take the absolute value of the signed operand. llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero); @@ -1120,8 +1287,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, if (ResultInfo.Signed) { // Signed overflow occurs if the result is greater than INT_MAX or lesser // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). - auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width) - .zextOrSelf(Op1Info.Width); + auto IntMax = + llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth); llvm::Value *MaxResult = CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), CGF.Builder.CreateZExt(IsNegative, OpTy)); @@ -1139,9 +1306,9 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, llvm::Value *Underflow = CGF.Builder.CreateAnd( IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult)); Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow); - if (ResultInfo.Width < Op1Info.Width) { + if (ResultInfo.Width < OpWidth) { auto IntMax = - llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width); + llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth); llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT( UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax)); Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); @@ -1252,9 +1419,61 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType, return Res; } -RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, - unsigned BuiltinID, const CallExpr *E, +static bool +TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, + llvm::SmallPtrSetImpl<const Decl *> &Seen) { + if (const auto *Arr = Ctx.getAsArrayType(Ty)) + Ty = Ctx.getBaseElementType(Arr); + + const auto *Record = Ty->getAsCXXRecordDecl(); + if (!Record) + return false; + + // We've already checked this type, or are in the process of checking it. + if (!Seen.insert(Record).second) + return false; + + assert(Record->hasDefinition() && + "Incomplete types should already be diagnosed"); + + if (Record->isDynamicClass()) + return true; + + for (FieldDecl *F : Record->fields()) { + if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen)) + return true; + } + return false; +} + +/// Determine if the specified type requires laundering by checking if it is a +/// dynamic class type or contains a subobject which is a dynamic class type. +static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) { + if (!CGM.getCodeGenOpts().StrictVTablePointers) + return false; + llvm::SmallPtrSet<const Decl *, 16> Seen; + return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen); +} + +RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { + llvm::Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1)); + + // The builtin's shift arg may have a different type than the source arg and + // result, but the LLVM intrinsic uses the same type for all values. + llvm::Type *Ty = Src->getType(); + ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false); + + // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. + unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; + Value *F = CGM.getIntrinsic(IID, Ty); + return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); +} + +RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, + const CallExpr *E, ReturnValueSlot ReturnValue) { + const FunctionDecl *FD = GD.getDecl()->getAsFunction(); // See if we can constant fold this builtin. If so, don't emit it at all. Expr::EvalResult Result; if (E->EvaluateAsRValue(Result, CGM.getContext()) && @@ -1537,6 +1756,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(ComplexVal.second); } + case Builtin::BI__builtin_clrsb: + case Builtin::BI__builtin_clrsbl: + case Builtin::BI__builtin_clrsbll: { + // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + + llvm::Type *ArgType = ArgValue->getType(); + Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Zero = llvm::Constant::getNullValue(ArgType); + Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg"); + Value *Inverse = Builder.CreateNot(ArgValue, "not"); + Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue); + Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()}); + Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1)); + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return RValue::get(Result); + } case Builtin::BI__builtin_ctzs: case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: @@ -1609,6 +1848,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "cast"); return RValue::get(Result); } + case Builtin::BI__lzcnt16: + case Builtin::BI__lzcnt: + case Builtin::BI__lzcnt64: { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + + llvm::Type *ArgType = ArgValue->getType(); + Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + + llvm::Type *ResultType = ConvertType(E->getType()); + Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, + "cast"); + return RValue::get(Result); + } case Builtin::BI__popcnt16: case Builtin::BI__popcnt: case Builtin::BI__popcnt64: @@ -1627,46 +1881,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, "cast"); return RValue::get(Result); } - case Builtin::BI_rotr8: - case Builtin::BI_rotr16: - case Builtin::BI_rotr: - case Builtin::BI_lrotr: - case Builtin::BI_rotr64: { - Value *Val = EmitScalarExpr(E->getArg(0)); - Value *Shift = EmitScalarExpr(E->getArg(1)); - - llvm::Type *ArgType = Val->getType(); - Shift = Builder.CreateIntCast(Shift, ArgType, false); - unsigned ArgWidth = ArgType->getIntegerBitWidth(); - Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); - - Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask); - Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); - Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); - Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); - Value *Result = Builder.CreateOr(LeftShifted, RightShifted); - return RValue::get(Result); - } - case Builtin::BI_rotl8: - case Builtin::BI_rotl16: - case Builtin::BI_rotl: - case Builtin::BI_lrotl: - case Builtin::BI_rotl64: { - Value *Val = EmitScalarExpr(E->getArg(0)); - Value *Shift = EmitScalarExpr(E->getArg(1)); - - llvm::Type *ArgType = Val->getType(); - Shift = Builder.CreateIntCast(Shift, ArgType, false); - unsigned ArgWidth = ArgType->getIntegerBitWidth(); - Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1); - - Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask); - Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt); - Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask); - Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt); - Value *Result = Builder.CreateOr(LeftShifted, RightShifted); - return RValue::get(Result); - } case Builtin::BI__builtin_unpredictable: { // Always return the argument of __builtin_unpredictable. LLVM does not // handle this builtin. Metadata for this builtin should be added directly @@ -1690,15 +1904,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Result); } case Builtin::BI__builtin_assume_aligned: { - Value *PtrValue = EmitScalarExpr(E->getArg(0)); + const Expr *Ptr = E->getArg(0); + Value *PtrValue = EmitScalarExpr(Ptr); Value *OffsetValue = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); - unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); + unsigned Alignment = (unsigned)AlignmentCI->getZExtValue(); - EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); + EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), + Alignment, OffsetValue); return RValue::get(PtrValue); } case Builtin::BI__assume: @@ -1721,6 +1937,48 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_bitreverse64: { return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); } + case Builtin::BI__builtin_rotateleft8: + case Builtin::BI__builtin_rotateleft16: + case Builtin::BI__builtin_rotateleft32: + case Builtin::BI__builtin_rotateleft64: + case Builtin::BI_rotl8: // Microsoft variants of rotate left + case Builtin::BI_rotl16: + case Builtin::BI_rotl: + case Builtin::BI_lrotl: + case Builtin::BI_rotl64: + return emitRotate(E, false); + + case Builtin::BI__builtin_rotateright8: + case Builtin::BI__builtin_rotateright16: + case Builtin::BI__builtin_rotateright32: + case Builtin::BI__builtin_rotateright64: + case Builtin::BI_rotr8: // Microsoft variants of rotate right + case Builtin::BI_rotr16: + case Builtin::BI_rotr: + case Builtin::BI_lrotr: + case Builtin::BI_rotr64: + return emitRotate(E, true); + + case Builtin::BI__builtin_constant_p: { + llvm::Type *ResultType = ConvertType(E->getType()); + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + // At -O0, we don't perform inlining, so we don't need to delay the + // processing. + return RValue::get(ConstantInt::get(ResultType, 0)); + + const Expr *Arg = E->getArg(0); + QualType ArgType = Arg->getType(); + if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType()) + // We can only reason about scalar types. + return RValue::get(ConstantInt::get(ResultType, 0)); + + Value *ArgValue = EmitScalarExpr(Arg); + Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); + Value *Result = Builder.CreateCall(F, ArgValue); + if (Result->getType() != ResultType) + Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); + return RValue::get(Result); + } case Builtin::BI__builtin_object_size: { unsigned Type = E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); @@ -1985,10 +2243,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin___memcpy_chk: { // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. - llvm::APSInt Size, DstSize; - if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || - !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) + Expr::EvalResult SizeResult, DstSizeResult; + if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || + !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; + llvm::APSInt Size = SizeResult.Val.getInt(); + llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -2009,10 +2269,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin___memmove_chk: { // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. - llvm::APSInt Size, DstSize; - if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || - !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) + Expr::EvalResult SizeResult, DstSizeResult; + if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || + !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; + llvm::APSInt Size = SizeResult.Val.getInt(); + llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -2047,10 +2309,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, } case Builtin::BI__builtin___memset_chk: { // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. - llvm::APSInt Size, DstSize; - if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || - !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) + Expr::EvalResult SizeResult, DstSizeResult; + if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || + !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; + llvm::APSInt Size = SizeResult.Val.getInt(); + llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); @@ -2258,6 +2522,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(nullptr); } + case Builtin::BI__builtin_launder: { + const Expr *Arg = E->getArg(0); + QualType ArgTy = Arg->getType()->getPointeeType(); + Value *Ptr = EmitScalarExpr(Arg); + if (TypeRequiresBuiltinLaunder(CGM, ArgTy)) + Ptr = Builder.CreateLaunderInvariantGroup(Ptr); + + return RValue::get(Ptr); + } case Builtin::BI__sync_fetch_and_add: case Builtin::BI__sync_fetch_and_sub: case Builtin::BI__sync_fetch_and_or: @@ -2952,7 +3225,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedExchangePointer: return RValue::get( EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); - case Builtin::BI_InterlockedCompareExchangePointer: { + case Builtin::BI_InterlockedCompareExchangePointer: + case Builtin::BI_InterlockedCompareExchangePointer_nf: { llvm::Type *RTy; llvm::IntegerType *IntType = IntegerType::get(getLLVMContext(), @@ -2969,10 +3243,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *Comparand = Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); - auto Result = - Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, - AtomicOrdering::SequentiallyConsistent, - AtomicOrdering::SequentiallyConsistent); + auto Ordering = + BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ? + AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent; + + auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, + Ordering, Ordering); Result->setVolatile(true); return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, @@ -2982,16 +3258,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI_InterlockedCompareExchange8: case Builtin::BI_InterlockedCompareExchange16: case Builtin::BI_InterlockedCompareExchange: - case Builtin::BI_InterlockedCompareExchange64: { - AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( - EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(2)), - EmitScalarExpr(E->getArg(1)), - AtomicOrdering::SequentiallyConsistent, - AtomicOrdering::SequentiallyConsistent); - CXI->setVolatile(true); - return RValue::get(Builder.CreateExtractValue(CXI, 0)); - } + case Builtin::BI_InterlockedCompareExchange64: + return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E)); case Builtin::BI_InterlockedIncrement16: case Builtin::BI_InterlockedIncrement: return RValue::get( @@ -3337,24 +3605,31 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // Create a temporary array to hold the sizes of local pointer arguments // for the block. \p First is the position of the first size argument. - auto CreateArrayForSizeVar = [=](unsigned First) { - auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First); - auto *Arr = Builder.CreateAlloca(AT); - llvm::Value *Ptr; + auto CreateArrayForSizeVar = [=](unsigned First) + -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> { + llvm::APInt ArraySize(32, NumArgs - First); + QualType SizeArrayTy = getContext().getConstantArrayType( + getContext().getSizeType(), ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); + llvm::Value *TmpPtr = Tmp.getPointer(); + llvm::Value *TmpSize = EmitLifetimeStart( + CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); + llvm::Value *ElemPtr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. auto *Zero = llvm::ConstantInt::get(IntTy, 0); for (unsigned I = First; I < NumArgs; ++I) { auto *Index = llvm::ConstantInt::get(IntTy, I - First); - auto *GEP = Builder.CreateGEP(Arr, {Zero, Index}); + auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index}); if (I == First) - Ptr = GEP; + ElemPtr = GEP; auto *V = Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); Builder.CreateAlignedStore( V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy)); } - return Ptr; + return std::tie(ElemPtr, TmpSize, TmpPtr); }; // Could have events and/or varargs. @@ -3366,24 +3641,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy); auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); - auto *PtrToSizeArray = CreateArrayForSizeVar(4); + llvm::Value *ElemPtr, *TmpSize, *TmpPtr; + std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4); // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. std::vector<llvm::Value *> Args = { Queue, Flags, Range, Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), - PtrToSizeArray}; + ElemPtr}; std::vector<llvm::Type *> ArgTys = { - QueueTy, IntTy, RangeTy, - GenericVoidPtrTy, GenericVoidPtrTy, IntTy, - PtrToSizeArray->getType()}; + QueueTy, IntTy, RangeTy, GenericVoidPtrTy, + GenericVoidPtrTy, IntTy, ElemPtr->getType()}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - llvm::ArrayRef<llvm::Value *>(Args))); + auto Call = + RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + if (TmpSize) + EmitLifetimeEnd(TmpSize, TmpPtr); + return Call; } // Any calls now have event arguments passed. if (NumArgs >= 7) { @@ -3400,7 +3678,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); // Convert to generic address space. EventList = Builder.CreatePointerCast(EventList, EventPtrTy); - ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy); + ClkEvent = ClkEvent->getType()->isIntegerTy() + ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy) + : Builder.CreatePointerCast(ClkEvent, EventPtrTy); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); llvm::Value *Kernel = @@ -3430,15 +3710,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, ArgTys.push_back(Int32Ty); Name = "__enqueue_kernel_events_varargs"; - auto *PtrToSizeArray = CreateArrayForSizeVar(7); - Args.push_back(PtrToSizeArray); - ArgTys.push_back(PtrToSizeArray->getType()); + llvm::Value *ElemPtr, *TmpSize, *TmpPtr; + std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7); + Args.push_back(ElemPtr); + ArgTys.push_back(ElemPtr->getType()); llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); - return RValue::get( - Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), - llvm::ArrayRef<llvm::Value *>(Args))); + auto Call = + RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), + llvm::ArrayRef<llvm::Value *>(Args))); + if (TmpSize) + EmitLifetimeEnd(TmpSize, TmpPtr); + return Call; } LLVM_FALLTHROUGH; } @@ -3530,13 +3814,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, case Builtin::BI__builtin_os_log_format: return emitBuiltinOSLogFormat(*E); - case Builtin::BI__builtin_os_log_format_buffer_size: { - analyze_os_log::OSLogBufferLayout Layout; - analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout); - return RValue::get(ConstantInt::get(ConvertType(E->getType()), - Layout.size().getQuantity())); - } - case Builtin::BI__xray_customevent: { if (!ShouldXRayInstrumentFunction()) return RValue::getIgnored(); @@ -3703,6 +3980,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, // we need to do a bit cast. llvm::Type *PTy = FTy->getParamType(i); if (PTy != ArgValue->getType()) { + // XXX - vector of pointers? + if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) { + if (PtrTy->getAddressSpace() != + ArgValue->getType()->getPointerAddressSpace()) { + ArgValue = Builder.CreateAddrSpaceCast( + ArgValue, + ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace())); + } + } + assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && "Must be able to losslessly bit cast to param"); ArgValue = Builder.CreateBitCast(ArgValue, PTy); @@ -3719,6 +4006,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, RetTy = ConvertType(BuiltinRetType); if (RetTy != V->getType()) { + // XXX - vector of pointers? + if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) { + if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) { + V = Builder.CreateAddrSpaceCast( + V, V->getType()->getPointerTo(PtrTy->getAddressSpace())); + } + } + assert(V->getType()->canLosslesslyBitCastTo(RetTy) && "Must be able to losslessly bit cast result type"); V = Builder.CreateBitCast(V, RetTy); @@ -4286,6 +4581,14 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP0(vextq_v), NEONMAP0(vfma_v), NEONMAP0(vfmaq_v), + NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0), + NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0), + NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0), + NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0), + NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0), NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), @@ -5259,6 +5562,34 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); } + case NEON::BI__builtin_neon_vfmlal_low_v: + case NEON::BI__builtin_neon_vfmlalq_low_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low"); + } + case NEON::BI__builtin_neon_vfmlsl_low_v: + case NEON::BI__builtin_neon_vfmlslq_low_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low"); + } + case NEON::BI__builtin_neon_vfmlal_high_v: + case NEON::BI__builtin_neon_vfmlalq_high_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high"); + } + case NEON::BI__builtin_neon_vfmlsl_high_v: + case NEON::BI__builtin_neon_vfmlslq_high_v: { + llvm::Type *InputTy = + llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); + llvm::Type *Tys[2] = { Ty, InputTy }; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high"); + } } assert(Int && "Expected valid intrinsic number"); @@ -5506,10 +5837,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic=*/false); - APSInt Value; - if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) + Expr::EvalResult Result; + if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) llvm_unreachable("Sema will ensure that the parameter is constant"); + llvm::APSInt Value = Result.Val.getInt(); uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); llvm::InlineAsm *Emit = @@ -5991,6 +6323,120 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); case ARM::BI_InterlockedIncrement64: return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + case ARM::BI_InterlockedExchangeAdd8_acq: + case ARM::BI_InterlockedExchangeAdd16_acq: + case ARM::BI_InterlockedExchangeAdd_acq: + case ARM::BI_InterlockedExchangeAdd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); + case ARM::BI_InterlockedExchangeAdd8_rel: + case ARM::BI_InterlockedExchangeAdd16_rel: + case ARM::BI_InterlockedExchangeAdd_rel: + case ARM::BI_InterlockedExchangeAdd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); + case ARM::BI_InterlockedExchangeAdd8_nf: + case ARM::BI_InterlockedExchangeAdd16_nf: + case ARM::BI_InterlockedExchangeAdd_nf: + case ARM::BI_InterlockedExchangeAdd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); + case ARM::BI_InterlockedExchange8_acq: + case ARM::BI_InterlockedExchange16_acq: + case ARM::BI_InterlockedExchange_acq: + case ARM::BI_InterlockedExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); + case ARM::BI_InterlockedExchange8_rel: + case ARM::BI_InterlockedExchange16_rel: + case ARM::BI_InterlockedExchange_rel: + case ARM::BI_InterlockedExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); + case ARM::BI_InterlockedExchange8_nf: + case ARM::BI_InterlockedExchange16_nf: + case ARM::BI_InterlockedExchange_nf: + case ARM::BI_InterlockedExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); + case ARM::BI_InterlockedCompareExchange8_acq: + case ARM::BI_InterlockedCompareExchange16_acq: + case ARM::BI_InterlockedCompareExchange_acq: + case ARM::BI_InterlockedCompareExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); + case ARM::BI_InterlockedCompareExchange8_rel: + case ARM::BI_InterlockedCompareExchange16_rel: + case ARM::BI_InterlockedCompareExchange_rel: + case ARM::BI_InterlockedCompareExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); + case ARM::BI_InterlockedCompareExchange8_nf: + case ARM::BI_InterlockedCompareExchange16_nf: + case ARM::BI_InterlockedCompareExchange_nf: + case ARM::BI_InterlockedCompareExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); + case ARM::BI_InterlockedOr8_acq: + case ARM::BI_InterlockedOr16_acq: + case ARM::BI_InterlockedOr_acq: + case ARM::BI_InterlockedOr64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); + case ARM::BI_InterlockedOr8_rel: + case ARM::BI_InterlockedOr16_rel: + case ARM::BI_InterlockedOr_rel: + case ARM::BI_InterlockedOr64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); + case ARM::BI_InterlockedOr8_nf: + case ARM::BI_InterlockedOr16_nf: + case ARM::BI_InterlockedOr_nf: + case ARM::BI_InterlockedOr64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); + case ARM::BI_InterlockedXor8_acq: + case ARM::BI_InterlockedXor16_acq: + case ARM::BI_InterlockedXor_acq: + case ARM::BI_InterlockedXor64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); + case ARM::BI_InterlockedXor8_rel: + case ARM::BI_InterlockedXor16_rel: + case ARM::BI_InterlockedXor_rel: + case ARM::BI_InterlockedXor64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); + case ARM::BI_InterlockedXor8_nf: + case ARM::BI_InterlockedXor16_nf: + case ARM::BI_InterlockedXor_nf: + case ARM::BI_InterlockedXor64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); + case ARM::BI_InterlockedAnd8_acq: + case ARM::BI_InterlockedAnd16_acq: + case ARM::BI_InterlockedAnd_acq: + case ARM::BI_InterlockedAnd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); + case ARM::BI_InterlockedAnd8_rel: + case ARM::BI_InterlockedAnd16_rel: + case ARM::BI_InterlockedAnd_rel: + case ARM::BI_InterlockedAnd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); + case ARM::BI_InterlockedAnd8_nf: + case ARM::BI_InterlockedAnd16_nf: + case ARM::BI_InterlockedAnd_nf: + case ARM::BI_InterlockedAnd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); + case ARM::BI_InterlockedIncrement16_acq: + case ARM::BI_InterlockedIncrement_acq: + case ARM::BI_InterlockedIncrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); + case ARM::BI_InterlockedIncrement16_rel: + case ARM::BI_InterlockedIncrement_rel: + case ARM::BI_InterlockedIncrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); + case ARM::BI_InterlockedIncrement16_nf: + case ARM::BI_InterlockedIncrement_nf: + case ARM::BI_InterlockedIncrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); + case ARM::BI_InterlockedDecrement16_acq: + case ARM::BI_InterlockedDecrement_acq: + case ARM::BI_InterlockedDecrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); + case ARM::BI_InterlockedDecrement16_rel: + case ARM::BI_InterlockedDecrement_rel: + case ARM::BI_InterlockedDecrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); + case ARM::BI_InterlockedDecrement16_nf: + case ARM::BI_InterlockedDecrement_nf: + case ARM::BI_InterlockedDecrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); } // Get the last argument, which specifies the vector type. @@ -6497,11 +6943,33 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); } + if (BuiltinID == AArch64::BI__getReg) { + Expr::EvalResult Result; + if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) + llvm_unreachable("Sema will ensure that the parameter is constant"); + + llvm::APSInt Value = Result.Val.getInt(); + LLVMContext &Context = CGM.getLLVMContext(); + std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10); + + llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)}; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + + llvm::Value *F = + CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); + return Builder.CreateCall(F, Metadata); + } + if (BuiltinID == AArch64::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); return Builder.CreateCall(F); } + if (BuiltinID == AArch64::BI_ReadWriteBarrier) + return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, + llvm::SyncScope::SingleThread); + // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { @@ -6564,6 +7032,48 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); } + if (BuiltinID == AArch64::BI_ReadStatusReg || + BuiltinID == AArch64::BI_WriteStatusReg) { + LLVMContext &Context = CGM.getLLVMContext(); + + unsigned SysReg = + E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue(); + + std::string SysRegStr; + llvm::raw_string_ostream(SysRegStr) << + ((1 << 1) | ((SysReg >> 14) & 1)) << ":" << + ((SysReg >> 11) & 7) << ":" << + ((SysReg >> 7) & 15) << ":" << + ((SysReg >> 3) & 15) << ":" << + ( SysReg & 7); + + llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) }; + llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); + llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); + + llvm::Type *RegisterType = Int64Ty; + llvm::Type *ValueType = Int32Ty; + llvm::Type *Types[] = { RegisterType }; + + if (BuiltinID == AArch64::BI_ReadStatusReg) { + llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); + llvm::Value *Call = Builder.CreateCall(F, Metadata); + + return Builder.CreateTrunc(Call, ValueType); + } + + llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); + llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); + ArgValue = Builder.CreateZExt(ArgValue, RegisterType); + + return Builder.CreateCall(F, { Metadata, ArgValue }); + } + + if (BuiltinID == AArch64::BI_AddressOfReturnAddress) { + llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress); + return Builder.CreateCall(F); + } + // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; @@ -6659,7 +7169,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvth_f16_u32: case NEON::BI__builtin_neon_vcvth_f16_u64: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_f16_s16: case NEON::BI__builtin_neon_vcvth_f16_s32: case NEON::BI__builtin_neon_vcvth_f16_s64: { @@ -6679,7 +7189,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vcvth_u16_f16: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_s16_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); @@ -6689,7 +7199,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vcvth_u32_f16: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_s32_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); @@ -6699,7 +7209,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } case NEON::BI__builtin_neon_vcvth_u64_f16: usgn = true; - // FALL THROUGH + LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vcvth_s64_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); @@ -8414,6 +8924,129 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); case AArch64::BI_InterlockedIncrement64: return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + case AArch64::BI_InterlockedExchangeAdd8_acq: + case AArch64::BI_InterlockedExchangeAdd16_acq: + case AArch64::BI_InterlockedExchangeAdd_acq: + case AArch64::BI_InterlockedExchangeAdd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); + case AArch64::BI_InterlockedExchangeAdd8_rel: + case AArch64::BI_InterlockedExchangeAdd16_rel: + case AArch64::BI_InterlockedExchangeAdd_rel: + case AArch64::BI_InterlockedExchangeAdd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); + case AArch64::BI_InterlockedExchangeAdd8_nf: + case AArch64::BI_InterlockedExchangeAdd16_nf: + case AArch64::BI_InterlockedExchangeAdd_nf: + case AArch64::BI_InterlockedExchangeAdd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); + case AArch64::BI_InterlockedExchange8_acq: + case AArch64::BI_InterlockedExchange16_acq: + case AArch64::BI_InterlockedExchange_acq: + case AArch64::BI_InterlockedExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); + case AArch64::BI_InterlockedExchange8_rel: + case AArch64::BI_InterlockedExchange16_rel: + case AArch64::BI_InterlockedExchange_rel: + case AArch64::BI_InterlockedExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); + case AArch64::BI_InterlockedExchange8_nf: + case AArch64::BI_InterlockedExchange16_nf: + case AArch64::BI_InterlockedExchange_nf: + case AArch64::BI_InterlockedExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); + case AArch64::BI_InterlockedCompareExchange8_acq: + case AArch64::BI_InterlockedCompareExchange16_acq: + case AArch64::BI_InterlockedCompareExchange_acq: + case AArch64::BI_InterlockedCompareExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); + case AArch64::BI_InterlockedCompareExchange8_rel: + case AArch64::BI_InterlockedCompareExchange16_rel: + case AArch64::BI_InterlockedCompareExchange_rel: + case AArch64::BI_InterlockedCompareExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); + case AArch64::BI_InterlockedCompareExchange8_nf: + case AArch64::BI_InterlockedCompareExchange16_nf: + case AArch64::BI_InterlockedCompareExchange_nf: + case AArch64::BI_InterlockedCompareExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); + case AArch64::BI_InterlockedOr8_acq: + case AArch64::BI_InterlockedOr16_acq: + case AArch64::BI_InterlockedOr_acq: + case AArch64::BI_InterlockedOr64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); + case AArch64::BI_InterlockedOr8_rel: + case AArch64::BI_InterlockedOr16_rel: + case AArch64::BI_InterlockedOr_rel: + case AArch64::BI_InterlockedOr64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); + case AArch64::BI_InterlockedOr8_nf: + case AArch64::BI_InterlockedOr16_nf: + case AArch64::BI_InterlockedOr_nf: + case AArch64::BI_InterlockedOr64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); + case AArch64::BI_InterlockedXor8_acq: + case AArch64::BI_InterlockedXor16_acq: + case AArch64::BI_InterlockedXor_acq: + case AArch64::BI_InterlockedXor64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); + case AArch64::BI_InterlockedXor8_rel: + case AArch64::BI_InterlockedXor16_rel: + case AArch64::BI_InterlockedXor_rel: + case AArch64::BI_InterlockedXor64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); + case AArch64::BI_InterlockedXor8_nf: + case AArch64::BI_InterlockedXor16_nf: + case AArch64::BI_InterlockedXor_nf: + case AArch64::BI_InterlockedXor64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); + case AArch64::BI_InterlockedAnd8_acq: + case AArch64::BI_InterlockedAnd16_acq: + case AArch64::BI_InterlockedAnd_acq: + case AArch64::BI_InterlockedAnd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); + case AArch64::BI_InterlockedAnd8_rel: + case AArch64::BI_InterlockedAnd16_rel: + case AArch64::BI_InterlockedAnd_rel: + case AArch64::BI_InterlockedAnd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); + case AArch64::BI_InterlockedAnd8_nf: + case AArch64::BI_InterlockedAnd16_nf: + case AArch64::BI_InterlockedAnd_nf: + case AArch64::BI_InterlockedAnd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); + case AArch64::BI_InterlockedIncrement16_acq: + case AArch64::BI_InterlockedIncrement_acq: + case AArch64::BI_InterlockedIncrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); + case AArch64::BI_InterlockedIncrement16_rel: + case AArch64::BI_InterlockedIncrement_rel: + case AArch64::BI_InterlockedIncrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); + case AArch64::BI_InterlockedIncrement16_nf: + case AArch64::BI_InterlockedIncrement_nf: + case AArch64::BI_InterlockedIncrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); + case AArch64::BI_InterlockedDecrement16_acq: + case AArch64::BI_InterlockedDecrement_acq: + case AArch64::BI_InterlockedDecrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); + case AArch64::BI_InterlockedDecrement16_rel: + case AArch64::BI_InterlockedDecrement_rel: + case AArch64::BI_InterlockedDecrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); + case AArch64::BI_InterlockedDecrement16_nf: + case AArch64::BI_InterlockedDecrement_nf: + case AArch64::BI_InterlockedDecrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); + + case AArch64::BI_InterlockedAdd: { + Value *Arg0 = EmitScalarExpr(E->getArg(0)); + Value *Arg1 = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Add, Arg0, Arg1, + llvm::AtomicOrdering::SequentiallyConsistent); + return Builder.CreateAdd(RMWI, Arg1); + } } } @@ -8524,8 +9157,9 @@ static Value *EmitX86CompressStore(CodeGenFunction &CGF, } static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, - unsigned NumElts, ArrayRef<Value *> Ops, + ArrayRef<Value *> Ops, bool InvertLHS = false) { + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); @@ -8533,7 +9167,25 @@ static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, LHS = CGF.Builder.CreateNot(LHS); return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), - CGF.Builder.getIntNTy(std::max(NumElts, 8U))); + Ops[0]->getType()); +} + +static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, + Value *Amt, bool IsRight) { + llvm::Type *Ty = Op0->getType(); + + // Amount may be scalar immediate, in which case create a splat vector. + // Funnel shifts amounts are treated as modulo and types are all power-of-2 so + // we only care about the lowest log2 bits anyway. + if (Amt->getType() != Ty) { + unsigned NumElts = Ty->getVectorNumElements(); + Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false); + Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt); + } + + unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl; + Value *F = CGF.CGM.getIntrinsic(IID, Ty); + return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); } static Value *EmitX86Select(CodeGenFunction &CGF, @@ -8855,6 +9507,17 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } +// Emit addition or subtraction with signed/unsigned saturation. +static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, bool IsSigned, + bool IsAddition) { + Intrinsic::ID IID = + IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) + : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); + llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType()); + return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]}); +} + Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); @@ -8876,6 +9539,7 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { // Grab the global __cpu_model. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); // Calculate the index needed to access the correct field based on the // range. Also adjust the expected value. @@ -8911,17 +9575,17 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { return EmitX86CpuSupports(FeatureStr); } -uint32_t +uint64_t CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) { // Processor features and mapping to processor feature value. - uint32_t FeaturesMask = 0; + uint64_t FeaturesMask = 0; for (const StringRef &FeatureStr : FeatureStrs) { unsigned Feature = StringSwitch<unsigned>(FeatureStr) #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL) #include "llvm/Support/X86TargetParser.def" ; - FeaturesMask |= (1U << Feature); + FeaturesMask |= (1ULL << Feature); } return FeaturesMask; } @@ -8930,37 +9594,66 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) { return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs)); } -llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) { - // Matching the struct layout from the compiler-rt/libgcc structure that is - // filled in: - // unsigned int __cpu_vendor; - // unsigned int __cpu_type; - // unsigned int __cpu_subtype; - // unsigned int __cpu_features[1]; - llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, - llvm::ArrayType::get(Int32Ty, 1)); +llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) { + uint32_t Features1 = Lo_32(FeaturesMask); + uint32_t Features2 = Hi_32(FeaturesMask); - // Grab the global __cpu_model. - llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + Value *Result = Builder.getTrue(); + + if (Features1 != 0) { + // Matching the struct layout from the compiler-rt/libgcc structure that is + // filled in: + // unsigned int __cpu_vendor; + // unsigned int __cpu_type; + // unsigned int __cpu_subtype; + // unsigned int __cpu_features[1]; + llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, + llvm::ArrayType::get(Int32Ty, 1)); + + // Grab the global __cpu_model. + llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); + cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true); + + // Grab the first (0th) element from the field __cpu_features off of the + // global in the struct STy. + Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3), + Builder.getInt32(0)}; + Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); + Value *Features = + Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); + + // Check the value of the bit corresponding to the feature requested. + Value *Mask = Builder.getInt32(Features1); + Value *Bitset = Builder.CreateAnd(Features, Mask); + Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); + Result = Builder.CreateAnd(Result, Cmp); + } - // Grab the first (0th) element from the field __cpu_features off of the - // global in the struct STy. - Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3), - ConstantInt::get(Int32Ty, 0)}; - Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); - Value *Features = - Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4)); - - // Check the value of the bit corresponding to the feature requested. - Value *Bitset = Builder.CreateAnd( - Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask)); - return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); + if (Features2 != 0) { + llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty, + "__cpu_features2"); + cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true); + + Value *Features = + Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4)); + + // Check the value of the bit corresponding to the feature requested. + Value *Mask = Builder.getInt32(Features2); + Value *Bitset = Builder.CreateAnd(Features, Mask); + Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); + Result = Builder.CreateAnd(Result, Cmp); + } + + return Result; } Value *CodeGenFunction::EmitX86CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic*/ false); llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); + cast<llvm::GlobalValue>(Func)->setDSOLocal(true); + cast<llvm::GlobalValue>(Func)->setDLLStorageClass( + llvm::GlobalValue::DefaultStorageClass); return Builder.CreateCall(Func); } @@ -9051,6 +9744,24 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__rdtsc: { return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); } + case X86::BI__builtin_ia32_rdtscp: { + Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp)); + Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), + Ops[0]); + return Builder.CreateExtractValue(Call, 0); + } + case X86::BI__builtin_ia32_lzcnt_u16: + case X86::BI__builtin_ia32_lzcnt_u32: + case X86::BI__builtin_ia32_lzcnt_u64: { + Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } + case X86::BI__builtin_ia32_tzcnt_u16: + case X86::BI__builtin_ia32_tzcnt_u32: + case X86::BI__builtin_ia32_tzcnt_u64: { + Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); + return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: @@ -9822,6 +10533,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, "psrldq"); return Builder.CreateBitCast(SV, ResultType, "cast"); } + case X86::BI__builtin_ia32_kshiftliqi: + case X86::BI__builtin_ia32_kshiftlihi: + case X86::BI__builtin_ia32_kshiftlisi: + case X86::BI__builtin_ia32_kshiftlidi: { + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + + if (ShiftVal >= NumElts) + return llvm::Constant::getNullValue(Ops[0]->getType()); + + Value *In = getMaskVecValue(*this, Ops[0], NumElts); + + uint32_t Indices[64]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = NumElts + i - ShiftVal; + + Value *Zero = llvm::Constant::getNullValue(In->getType()); + Value *SV = Builder.CreateShuffleVector(Zero, In, + makeArrayRef(Indices, NumElts), + "kshiftl"); + return Builder.CreateBitCast(SV, Ops[0]->getType()); + } + case X86::BI__builtin_ia32_kshiftriqi: + case X86::BI__builtin_ia32_kshiftrihi: + case X86::BI__builtin_ia32_kshiftrisi: + case X86::BI__builtin_ia32_kshiftridi: { + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + + if (ShiftVal >= NumElts) + return llvm::Constant::getNullValue(Ops[0]->getType()); + + Value *In = getMaskVecValue(*this, Ops[0], NumElts); + + uint32_t Indices[64]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i + ShiftVal; + + Value *Zero = llvm::Constant::getNullValue(In->getType()); + Value *SV = Builder.CreateShuffleVector(In, Zero, + makeArrayRef(Indices, NumElts), + "kshiftr"); + return Builder.CreateBitCast(SV, Ops[0]->getType()); + } case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: @@ -9847,7 +10602,41 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, SI->setAlignment(1); return SI; } - + // Rotate is a special case of funnel shift - 1st 2 args are the same. + case X86::BI__builtin_ia32_vprotb: + case X86::BI__builtin_ia32_vprotw: + case X86::BI__builtin_ia32_vprotd: + case X86::BI__builtin_ia32_vprotq: + case X86::BI__builtin_ia32_vprotbi: + case X86::BI__builtin_ia32_vprotwi: + case X86::BI__builtin_ia32_vprotdi: + case X86::BI__builtin_ia32_vprotqi: + case X86::BI__builtin_ia32_prold128: + case X86::BI__builtin_ia32_prold256: + case X86::BI__builtin_ia32_prold512: + case X86::BI__builtin_ia32_prolq128: + case X86::BI__builtin_ia32_prolq256: + case X86::BI__builtin_ia32_prolq512: + case X86::BI__builtin_ia32_prolvd128: + case X86::BI__builtin_ia32_prolvd256: + case X86::BI__builtin_ia32_prolvd512: + case X86::BI__builtin_ia32_prolvq128: + case X86::BI__builtin_ia32_prolvq256: + case X86::BI__builtin_ia32_prolvq512: + return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false); + case X86::BI__builtin_ia32_prord128: + case X86::BI__builtin_ia32_prord256: + case X86::BI__builtin_ia32_prord512: + case X86::BI__builtin_ia32_prorq128: + case X86::BI__builtin_ia32_prorq256: + case X86::BI__builtin_ia32_prorq512: + case X86::BI__builtin_ia32_prorvd128: + case X86::BI__builtin_ia32_prorvd256: + case X86::BI__builtin_ia32_prorvd512: + case X86::BI__builtin_ia32_prorvq128: + case X86::BI__builtin_ia32_prorvq256: + case X86::BI__builtin_ia32_prorvq512: + return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true); case X86::BI__builtin_ia32_selectb_128: case X86::BI__builtin_ia32_selectb_256: case X86::BI__builtin_ia32_selectb_512: @@ -9905,38 +10694,147 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompare(*this, CC, false, Ops); } + case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: - case X86::BI__builtin_ia32_kortestzhi: { - Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); - Value *C; - if (BuiltinID == X86::BI__builtin_ia32_kortestchi) - C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty()); - else - C = llvm::Constant::getNullValue(Builder.getInt16Ty()); + case X86::BI__builtin_ia32_kortestcsi: + case X86::BI__builtin_ia32_kortestcdi: { + Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); + Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType()); Value *Cmp = Builder.CreateICmpEQ(Or, C); return Builder.CreateZExt(Cmp, ConvertType(E->getType())); } + case X86::BI__builtin_ia32_kortestzqi: + case X86::BI__builtin_ia32_kortestzhi: + case X86::BI__builtin_ia32_kortestzsi: + case X86::BI__builtin_ia32_kortestzdi: { + Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); + Value *C = llvm::Constant::getNullValue(Ops[0]->getType()); + Value *Cmp = Builder.CreateICmpEQ(Or, C); + return Builder.CreateZExt(Cmp, ConvertType(E->getType())); + } + + case X86::BI__builtin_ia32_ktestcqi: + case X86::BI__builtin_ia32_ktestzqi: + case X86::BI__builtin_ia32_ktestchi: + case X86::BI__builtin_ia32_ktestzhi: + case X86::BI__builtin_ia32_ktestcsi: + case X86::BI__builtin_ia32_ktestzsi: + case X86::BI__builtin_ia32_ktestcdi: + case X86::BI__builtin_ia32_ktestzdi: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_ktestcqi: + IID = Intrinsic::x86_avx512_ktestc_b; + break; + case X86::BI__builtin_ia32_ktestzqi: + IID = Intrinsic::x86_avx512_ktestz_b; + break; + case X86::BI__builtin_ia32_ktestchi: + IID = Intrinsic::x86_avx512_ktestc_w; + break; + case X86::BI__builtin_ia32_ktestzhi: + IID = Intrinsic::x86_avx512_ktestz_w; + break; + case X86::BI__builtin_ia32_ktestcsi: + IID = Intrinsic::x86_avx512_ktestc_d; + break; + case X86::BI__builtin_ia32_ktestzsi: + IID = Intrinsic::x86_avx512_ktestz_d; + break; + case X86::BI__builtin_ia32_ktestcdi: + IID = Intrinsic::x86_avx512_ktestc_q; + break; + case X86::BI__builtin_ia32_ktestzdi: + IID = Intrinsic::x86_avx512_ktestz_q; + break; + } + + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); + Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); + Function *Intr = CGM.getIntrinsic(IID); + return Builder.CreateCall(Intr, {LHS, RHS}); + } + case X86::BI__builtin_ia32_kaddqi: + case X86::BI__builtin_ia32_kaddhi: + case X86::BI__builtin_ia32_kaddsi: + case X86::BI__builtin_ia32_kadddi: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_kaddqi: + IID = Intrinsic::x86_avx512_kadd_b; + break; + case X86::BI__builtin_ia32_kaddhi: + IID = Intrinsic::x86_avx512_kadd_w; + break; + case X86::BI__builtin_ia32_kaddsi: + IID = Intrinsic::x86_avx512_kadd_d; + break; + case X86::BI__builtin_ia32_kadddi: + IID = Intrinsic::x86_avx512_kadd_q; + break; + } + + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); + Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); + Function *Intr = CGM.getIntrinsic(IID); + Value *Res = Builder.CreateCall(Intr, {LHS, RHS}); + return Builder.CreateBitCast(Res, Ops[0]->getType()); + } + case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: - return EmitX86MaskLogic(*this, Instruction::And, 16, Ops); + case X86::BI__builtin_ia32_kandsi: + case X86::BI__builtin_ia32_kanddi: + return EmitX86MaskLogic(*this, Instruction::And, Ops); + case X86::BI__builtin_ia32_kandnqi: case X86::BI__builtin_ia32_kandnhi: - return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true); + case X86::BI__builtin_ia32_kandnsi: + case X86::BI__builtin_ia32_kandndi: + return EmitX86MaskLogic(*this, Instruction::And, Ops, true); + case X86::BI__builtin_ia32_korqi: case X86::BI__builtin_ia32_korhi: - return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops); + case X86::BI__builtin_ia32_korsi: + case X86::BI__builtin_ia32_kordi: + return EmitX86MaskLogic(*this, Instruction::Or, Ops); + case X86::BI__builtin_ia32_kxnorqi: case X86::BI__builtin_ia32_kxnorhi: - return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true); + case X86::BI__builtin_ia32_kxnorsi: + case X86::BI__builtin_ia32_kxnordi: + return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true); + case X86::BI__builtin_ia32_kxorqi: case X86::BI__builtin_ia32_kxorhi: - return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops); - case X86::BI__builtin_ia32_knothi: { - Ops[0] = getMaskVecValue(*this, Ops[0], 16); - return Builder.CreateBitCast(Builder.CreateNot(Ops[0]), - Builder.getInt16Ty()); + case X86::BI__builtin_ia32_kxorsi: + case X86::BI__builtin_ia32_kxordi: + return EmitX86MaskLogic(*this, Instruction::Xor, Ops); + case X86::BI__builtin_ia32_knotqi: + case X86::BI__builtin_ia32_knothi: + case X86::BI__builtin_ia32_knotsi: + case X86::BI__builtin_ia32_knotdi: { + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *Res = getMaskVecValue(*this, Ops[0], NumElts); + return Builder.CreateBitCast(Builder.CreateNot(Res), + Ops[0]->getType()); + } + case X86::BI__builtin_ia32_kmovb: + case X86::BI__builtin_ia32_kmovw: + case X86::BI__builtin_ia32_kmovd: + case X86::BI__builtin_ia32_kmovq: { + // Bitcast to vXi1 type and then back to integer. This gets the mask + // register type into the IR, but might be optimized out depending on + // what's around it. + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); + Value *Res = getMaskVecValue(*this, Ops[0], NumElts); + return Builder.CreateBitCast(Res, Ops[0]->getType()); } case X86::BI__builtin_ia32_kunpckdi: case X86::BI__builtin_ia32_kunpcksi: case X86::BI__builtin_ia32_kunpckhi: { - unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits(); + unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); uint32_t Indices[64]; @@ -10103,6 +11001,52 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_pternlogq256_maskz: return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops); + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); + + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: + // Ops 0 and 1 are swapped. + return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); + + case X86::BI__builtin_ia32_vpshldvd128: + case X86::BI__builtin_ia32_vpshldvd256: + case X86::BI__builtin_ia32_vpshldvd512: + case X86::BI__builtin_ia32_vpshldvq128: + case X86::BI__builtin_ia32_vpshldvq256: + case X86::BI__builtin_ia32_vpshldvq512: + case X86::BI__builtin_ia32_vpshldvw128: + case X86::BI__builtin_ia32_vpshldvw256: + case X86::BI__builtin_ia32_vpshldvw512: + return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); + + case X86::BI__builtin_ia32_vpshrdvd128: + case X86::BI__builtin_ia32_vpshrdvd256: + case X86::BI__builtin_ia32_vpshrdvd512: + case X86::BI__builtin_ia32_vpshrdvq128: + case X86::BI__builtin_ia32_vpshrdvq256: + case X86::BI__builtin_ia32_vpshrdvq512: + case X86::BI__builtin_ia32_vpshrdvw128: + case X86::BI__builtin_ia32_vpshrdvw256: + case X86::BI__builtin_ia32_vpshrdvw512: + // Ops 0 and 1 are swapped. + return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); + // 3DNow! case X86::BI__builtin_ia32_pswapdsf: case X86::BI__builtin_ia32_pswapdsi: { @@ -10145,6 +11089,33 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0]); return Builder.CreateExtractValue(Call, 1); } + case X86::BI__builtin_ia32_addcarryx_u32: + case X86::BI__builtin_ia32_addcarryx_u64: + case X86::BI__builtin_ia32_subborrow_u32: + case X86::BI__builtin_ia32_subborrow_u64: { + Intrinsic::ID IID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_addcarryx_u32: + IID = Intrinsic::x86_addcarry_32; + break; + case X86::BI__builtin_ia32_addcarryx_u64: + IID = Intrinsic::x86_addcarry_64; + break; + case X86::BI__builtin_ia32_subborrow_u32: + IID = Intrinsic::x86_subborrow_32; + break; + case X86::BI__builtin_ia32_subborrow_u64: + IID = Intrinsic::x86_subborrow_64; + break; + } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), + { Ops[0], Ops[1], Ops[2] }); + Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), + Ops[3]); + return Builder.CreateExtractValue(Call, 0); + } case X86::BI__builtin_ia32_fpclassps128_mask: case X86::BI__builtin_ia32_fpclassps256_mask: @@ -10183,6 +11154,51 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); } + case X86::BI__builtin_ia32_vpmultishiftqb128: + case X86::BI__builtin_ia32_vpmultishiftqb256: + case X86::BI__builtin_ia32_vpmultishiftqb512: { + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_vpmultishiftqb128: + ID = Intrinsic::x86_avx512_pmultishift_qb_128; + break; + case X86::BI__builtin_ia32_vpmultishiftqb256: + ID = Intrinsic::x86_avx512_pmultishift_qb_256; + break; + case X86::BI__builtin_ia32_vpmultishiftqb512: + ID = Intrinsic::x86_avx512_pmultishift_qb_512; + break; + } + + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } + + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + Value *MaskIn = Ops[2]; + Ops.erase(&Ops[2]); + + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + ID = Intrinsic::x86_avx512_vpshufbitqmb_128; + break; + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + ID = Intrinsic::x86_avx512_vpshufbitqmb_256; + break; + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: + ID = Intrinsic::x86_avx512_vpshufbitqmb_512; + break; + } + + Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn); + } + // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: @@ -10361,6 +11377,27 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, llvm::SyncScope::System); } + case X86::BI__shiftleft128: + case X86::BI__shiftright128: { + // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this: + // llvm::Function *F = CGM.getIntrinsic( + // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, + // Int64Ty); + // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); + // return Builder.CreateCall(F, Ops); + llvm::Type *Int128Ty = Builder.getInt128Ty(); + Value *Val = Builder.CreateOr( + Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64), + Builder.CreateZExt(Ops[0], Int128Ty)); + Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty), + llvm::ConstantInt::get(Int128Ty, 0x3f)); + Value *Res; + if (BuiltinID == X86::BI__shiftleft128) + Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64); + else + Res = Builder.CreateLShr(Val, Amt); + return Builder.CreateTrunc(Res, Int64Ty); + } case X86::BI_ReadWriteBarrier: case X86::BI_ReadBarrier: case X86::BI_WriteBarrier: { @@ -10401,14 +11438,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, llvm::Type *Int128PtrTy = Int128Ty->getPointerTo(); Value *Destination = - Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy); - Value *ExchangeHigh128 = - Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty); - Value *ExchangeLow128 = - Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty); - Address ComparandResult( - Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy), - getContext().toCharUnitsFromBits(128)); + Builder.CreateBitCast(Ops[0], Int128PtrTy); + Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty); + Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty); + Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy), + getContext().toCharUnitsFromBits(128)); Value *Exchange = Builder.CreateOr( Builder.CreateShl(ExchangeHigh128, 64, "", false, false), @@ -10459,8 +11493,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__readfsdword: case X86::BI__readfsqword: { llvm::Type *IntTy = ConvertType(E->getType()); - Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(IntTy, 257)); + Value *Ptr = + Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257)); LoadInst *Load = Builder.CreateAlignedLoad( IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); Load->setVolatile(true); @@ -10471,17 +11505,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__readgsdword: case X86::BI__readgsqword: { llvm::Type *IntTy = ConvertType(E->getType()); - Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), - llvm::PointerType::get(IntTy, 256)); + Value *Ptr = + Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256)); LoadInst *Load = Builder.CreateAlignedLoad( IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); Load->setVolatile(true); return Load; } + case X86::BI__builtin_ia32_paddsb512: + case X86::BI__builtin_ia32_paddsw512: + case X86::BI__builtin_ia32_paddsb256: + case X86::BI__builtin_ia32_paddsw256: + case X86::BI__builtin_ia32_paddsb128: + case X86::BI__builtin_ia32_paddsw128: + return EmitX86AddSubSatExpr(*this, Ops, true, true); + case X86::BI__builtin_ia32_paddusb512: + case X86::BI__builtin_ia32_paddusw512: + case X86::BI__builtin_ia32_paddusb256: + case X86::BI__builtin_ia32_paddusw256: + case X86::BI__builtin_ia32_paddusb128: + case X86::BI__builtin_ia32_paddusw128: + return EmitX86AddSubSatExpr(*this, Ops, false, true); + case X86::BI__builtin_ia32_psubsb512: + case X86::BI__builtin_ia32_psubsw512: + case X86::BI__builtin_ia32_psubsb256: + case X86::BI__builtin_ia32_psubsw256: + case X86::BI__builtin_ia32_psubsb128: + case X86::BI__builtin_ia32_psubsw128: + return EmitX86AddSubSatExpr(*this, Ops, true, false); + case X86::BI__builtin_ia32_psubusb512: + case X86::BI__builtin_ia32_psubusw512: + case X86::BI__builtin_ia32_psubusb256: + case X86::BI__builtin_ia32_psubusw256: + case X86::BI__builtin_ia32_psubusb128: + case X86::BI__builtin_ia32_psubusw128: + return EmitX86AddSubSatExpr(*this, Ops, false, false); } } - Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { SmallVector<Value*, 4> Ops; @@ -10901,6 +11962,28 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } + + case PPC::BI__builtin_pack_vector_int128: { + bool isLittleEndian = getTarget().isLittleEndian(); + Value *UndefValue = + llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2)); + Value *Res = Builder.CreateInsertElement( + UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0)); + Res = Builder.CreateInsertElement(Res, Ops[1], + (uint64_t)(isLittleEndian ? 0 : 1)); + return Builder.CreateBitCast(Res, ConvertType(E->getType())); + } + + case PPC::BI__builtin_unpack_vector_int128: { + ConstantInt *Index = cast<ConstantInt>(Ops[1]); + Value *Unpacked = Builder.CreateBitCast( + Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2)); + + if (getTarget().isLittleEndian()) + Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue()); + + return Builder.CreateExtractElement(Unpacked, Index); + } } } @@ -10948,12 +12031,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); - case AMDGPU::BI__builtin_amdgcn_mov_dpp: { - llvm::SmallVector<llvm::Value *, 5> Args; - for (unsigned I = 0; I != 5; ++I) + case AMDGPU::BI__builtin_amdgcn_mov_dpp: + case AMDGPU::BI__builtin_amdgcn_update_dpp: { + llvm::SmallVector<llvm::Value *, 6> Args; + for (unsigned I = 0; I != E->getNumArgs(); ++I) Args.push_back(EmitScalarExpr(E->getArg(I))); - Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp, - Args[0]->getType()); + assert(Args.size() == 5 || Args.size() == 6); + if (Args.size() == 5) + Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType())); + Value *F = + CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } case AMDGPU::BI__builtin_amdgcn_div_fixup: @@ -11039,50 +12126,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, CI->setConvergent(); return CI; } - case AMDGPU::BI__builtin_amdgcn_ds_faddf: - case AMDGPU::BI__builtin_amdgcn_ds_fminf: - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { - llvm::SmallVector<llvm::Value *, 5> Args; - for (unsigned I = 0; I != 5; ++I) - Args.push_back(EmitScalarExpr(E->getArg(I))); - const llvm::Type *PtrTy = Args[0]->getType(); - // check pointer parameter - if (!PtrTy->isPointerTy() || - E->getArg(0) - ->getType() - ->getPointeeType() - .getQualifiers() - .getAddressSpace() != LangAS::opencl_local || - !PtrTy->getPointerElementType()->isFloatTy()) { - CGM.Error(E->getArg(0)->getLocStart(), - "parameter should have type \"local float*\""); - return nullptr; - } - // check float parameter - if (!Args[1]->getType()->isFloatTy()) { - CGM.Error(E->getArg(1)->getLocStart(), - "parameter should have type \"float\""); - return nullptr; - } - - Intrinsic::ID ID; - switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_ds_faddf: - ID = Intrinsic::amdgcn_ds_fadd; - break; - case AMDGPU::BI__builtin_amdgcn_ds_fminf: - ID = Intrinsic::amdgcn_ds_fmin; - break; - case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: - ID = Intrinsic::amdgcn_ds_fmax; - break; - default: - llvm_unreachable("Unknown BuiltinID"); - } - Value *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Args); - } - // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); @@ -11363,7 +12406,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {X, Y, M4Value}); } - // Vector intrisincs that output the post-instruction CC value. + // Vector intrinsics that output the post-instruction CC value. #define INTRINSIC_WITH_CC(NAME) \ case SystemZ::BI__builtin_##NAME: \ @@ -11823,7 +12866,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, bool isColMajor = isColMajorArg.getSExtValue(); unsigned IID; unsigned NumResults = 8; - // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet + // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet // for some reason nvcc builtins use _c_. switch (BuiltinID) { case NVPTX::BI__hmma_m16n16k16_st_c_f16: @@ -12046,31 +13089,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); return Builder.CreateCall(Callee, Args); } - case WebAssembly::BI__builtin_wasm_mem_size: { - llvm::Type *ResultType = ConvertType(E->getType()); - Value *I = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType); - return Builder.CreateCall(Callee, I); - } - case WebAssembly::BI__builtin_wasm_mem_grow: { - llvm::Type *ResultType = ConvertType(E->getType()); - Value *Args[] = { - EmitScalarExpr(E->getArg(0)), - EmitScalarExpr(E->getArg(1)) - }; - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType); - return Builder.CreateCall(Callee, Args); - } - case WebAssembly::BI__builtin_wasm_current_memory: { - llvm::Type *ResultType = ConvertType(E->getType()); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); - return Builder.CreateCall(Callee); - } - case WebAssembly::BI__builtin_wasm_grow_memory: { - Value *X = EmitScalarExpr(E->getArg(0)); - Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); - return Builder.CreateCall(Callee, X); - } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); @@ -12081,6 +13099,211 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); return Builder.CreateCall(Callee); } + case WebAssembly::BI__builtin_wasm_atomic_wait_i32: { + Value *Addr = EmitScalarExpr(E->getArg(0)); + Value *Expected = EmitScalarExpr(E->getArg(1)); + Value *Timeout = EmitScalarExpr(E->getArg(2)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32); + return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); + } + case WebAssembly::BI__builtin_wasm_atomic_wait_i64: { + Value *Addr = EmitScalarExpr(E->getArg(0)); + Value *Expected = EmitScalarExpr(E->getArg(1)); + Value *Timeout = EmitScalarExpr(E->getArg(2)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64); + return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); + } + case WebAssembly::BI__builtin_wasm_atomic_notify: { + Value *Addr = EmitScalarExpr(E->getArg(0)); + Value *Count = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify); + return Builder.CreateCall(Callee, {Addr, Count}); + } + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, + {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: { + Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Type *ResT = ConvertType(E->getType()); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, + {ResT, Src->getType()}); + return Builder.CreateCall(Callee, {Src}); + } + case WebAssembly::BI__builtin_wasm_min_f32: + case WebAssembly::BI__builtin_wasm_min_f64: + case WebAssembly::BI__builtin_wasm_min_f32x4: + case WebAssembly::BI__builtin_wasm_min_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::minimum, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_max_f32: + case WebAssembly::BI__builtin_wasm_max_f64: + case WebAssembly::BI__builtin_wasm_max_f32x4: + case WebAssembly::BI__builtin_wasm_max_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(Intrinsic::maximum, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: + case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8: + case WebAssembly::BI__builtin_wasm_extract_lane_i32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_i64x2: + case WebAssembly::BI__builtin_wasm_extract_lane_f32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: { + llvm::APSInt LaneConst; + if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + Value *Extract = Builder.CreateExtractElement(Vec, Lane); + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8: + return Builder.CreateSExt(Extract, ConvertType(E->getType())); + case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16: + case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8: + return Builder.CreateZExt(Extract, ConvertType(E->getType())); + case WebAssembly::BI__builtin_wasm_extract_lane_i32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_i64x2: + case WebAssembly::BI__builtin_wasm_extract_lane_f32x4: + case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: + return Extract; + default: + llvm_unreachable("unexpected builtin ID"); + } + } + case WebAssembly::BI__builtin_wasm_replace_lane_i8x16: + case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: + case WebAssembly::BI__builtin_wasm_replace_lane_i32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_i64x2: + case WebAssembly::BI__builtin_wasm_replace_lane_f32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: { + llvm::APSInt LaneConst; + if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext())) + llvm_unreachable("Constant arg isn't actually constant?"); + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst); + Value *Val = EmitScalarExpr(E->getArg(2)); + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_replace_lane_i8x16: + case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: { + llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType(); + Value *Trunc = Builder.CreateTrunc(Val, ElemType); + return Builder.CreateInsertElement(Vec, Trunc, Lane); + } + case WebAssembly::BI__builtin_wasm_replace_lane_i32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_i64x2: + case WebAssembly::BI__builtin_wasm_replace_lane_f32x4: + case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: + return Builder.CreateInsertElement(Vec, Val, Lane); + default: + llvm_unreachable("unexpected builtin ID"); + } + } + case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8: + case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8: + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8: + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: { + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8: + IntNo = Intrinsic::sadd_sat; + break; + case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8: + IntNo = Intrinsic::uadd_sat; + break; + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8: + IntNo = Intrinsic::wasm_sub_saturate_signed; + break; + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16: + case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: + IntNo = Intrinsic::wasm_sub_saturate_unsigned; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_bitselect: { + Value *V1 = EmitScalarExpr(E->getArg(0)); + Value *V2 = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, + ConvertType(E->getType())); + return Builder.CreateCall(Callee, {V1, V2, C}); + } + case WebAssembly::BI__builtin_wasm_any_true_i8x16: + case WebAssembly::BI__builtin_wasm_any_true_i16x8: + case WebAssembly::BI__builtin_wasm_any_true_i32x4: + case WebAssembly::BI__builtin_wasm_any_true_i64x2: + case WebAssembly::BI__builtin_wasm_all_true_i8x16: + case WebAssembly::BI__builtin_wasm_all_true_i16x8: + case WebAssembly::BI__builtin_wasm_all_true_i32x4: + case WebAssembly::BI__builtin_wasm_all_true_i64x2: { + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_any_true_i8x16: + case WebAssembly::BI__builtin_wasm_any_true_i16x8: + case WebAssembly::BI__builtin_wasm_any_true_i32x4: + case WebAssembly::BI__builtin_wasm_any_true_i64x2: + IntNo = Intrinsic::wasm_anytrue; + break; + case WebAssembly::BI__builtin_wasm_all_true_i8x16: + case WebAssembly::BI__builtin_wasm_all_true_i16x8: + case WebAssembly::BI__builtin_wasm_all_true_i32x4: + case WebAssembly::BI__builtin_wasm_all_true_i64x2: + IntNo = Intrinsic::wasm_alltrue; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } + case WebAssembly::BI__builtin_wasm_abs_f32x4: + case WebAssembly::BI__builtin_wasm_abs_f64x2: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } + case WebAssembly::BI__builtin_wasm_sqrt_f32x4: + case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { + Value *Vec = EmitScalarExpr(E->getArg(0)); + Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); + return Builder.CreateCall(Callee, {Vec}); + } default: return nullptr; diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp index 5fcc9e011bcb..1c578bd151bd 100644 --- a/lib/CodeGen/CGCUDANV.cpp +++ b/lib/CodeGen/CGCUDANV.cpp @@ -137,7 +137,7 @@ CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const { CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()), TheModule(CGM.getModule()), - RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) { + RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) { CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); @@ -353,8 +353,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // global variable and save a reference in GpuBinaryHandle to be cleaned up // in destructor on exit. Then associate all known kernels with the GPU binary // handle so CUDA runtime can figure out what to call on the GPU side. - std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary; - if (!IsHIP) { + std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr; + if (!CudaGpuBinaryFileName.empty()) { llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr = llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName); if (std::error_code EC = CudaGpuBinaryOrErr.getError()) { @@ -388,15 +388,23 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { ModuleIDSectionName = "__hip_module_id"; ModuleIDPrefix = "__hip_"; - // For HIP, create an external symbol __hip_fatbin in section .hip_fatbin. - // The external symbol is supposed to contain the fat binary but will be - // populated somewhere else, e.g. by lld through link script. - FatBinStr = new llvm::GlobalVariable( + if (CudaGpuBinary) { + // If fatbin is available from early finalization, create a string + // literal containing the fat binary loaded from the given file. + FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "", + FatbinConstantName, 8); + } else { + // If fatbin is not available, create an external symbol + // __hip_fatbin in section .hip_fatbin. The external symbol is supposed + // to contain the fat binary but will be populated somewhere else, + // e.g. by lld through link script. + FatBinStr = new llvm::GlobalVariable( CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, "__hip_fatbin", nullptr, llvm::GlobalVariable::NotThreadLocal); - cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName); + cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName); + } FatMagic = HIPFatMagic; } else { @@ -447,6 +455,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // thread safety of the loaded program. Therefore we can assume sequential // execution of constructor functions here. if (IsHIP) { + auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage : + llvm::GlobalValue::LinkOnceAnyLinkage; llvm::BasicBlock *IfBlock = llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc); llvm::BasicBlock *ExitBlock = @@ -455,10 +465,13 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // of HIP ABI. GpuBinaryHandle = new llvm::GlobalVariable( TheModule, VoidPtrPtrTy, /*isConstant=*/false, - llvm::GlobalValue::LinkOnceAnyLinkage, + Linkage, /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__hip_gpubin_handle"); GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity()); + // Prevent the weak symbol in different shared libraries being merged. + if (Linkage != llvm::GlobalValue::InternalLinkage) + GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility); Address GpuBinaryAddr( GpuBinaryHandle, CharUnits::fromQuantity(GpuBinaryHandle->getAlignment())); @@ -507,7 +520,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { // Generate a unique module ID. SmallString<64> ModuleID; llvm::raw_svector_ostream OS(ModuleID); - OS << ModuleIDPrefix << llvm::format("%x", FatbinWrapper->getGUID()); + OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID()); llvm::Constant *ModuleIDConstant = makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32); diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index d5945be43458..8b0733fbec3e 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -23,7 +23,7 @@ #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtCXX.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" using namespace clang; using namespace CodeGen; @@ -276,7 +276,7 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF, CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt"); llvm::Value *VFunc = CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes); - CGCallee Callee(GD.getDecl()->getCanonicalDecl(), VFunc); + CGCallee Callee(GD, VFunc); return Callee; } diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index 3b1b47cdfe07..ed168b1ce72d 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -132,7 +132,7 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList ¶ms) { // generation. Maybe we can come up with a better way? auto *ThisDecl = ImplicitParamDecl::Create( CGM.getContext(), nullptr, MD->getLocation(), - &CGM.getContext().Idents.get("this"), MD->getThisType(CGM.getContext()), + &CGM.getContext().Idents.get("this"), MD->getThisType(), ImplicitParamDecl::CXXThis); params.push_back(ThisDecl); CGF.CXXABIThisDecl = ThisDecl; diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index fa51dc30c58b..7d494bb1f1c7 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -23,11 +23,11 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" @@ -59,6 +59,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { case CC_X86Pascal: return llvm::CallingConv::C; // TODO: Add support for __vectorcall to LLVM. case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall; + case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall; case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC; case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv(); case CC_PreserveMost: return llvm::CallingConv::PreserveMost; @@ -67,11 +68,13 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) { } } -/// Derives the 'this' type for codegen purposes, i.e. ignoring method +/// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR /// qualification. -/// FIXME: address space qualification? -static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD) { +static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD, + const CXXMethodDecl *MD) { QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal(); + if (MD) + RecTy = Context.getAddrSpaceQualType(RecTy, MD->getTypeQualifiers().getAddressSpace()); return Context.getPointerType(CanQualType::CreateUnsafe(RecTy)); } @@ -214,6 +217,9 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) { if (PcsAttr *PCS = D->getAttr<PcsAttr>()) return (PCS->getPCS() == PcsAttr::AAPCS ? CC_AAPCS : CC_AAPCS_VFP); + if (D->hasAttr<AArch64VectorPcsAttr>()) + return CC_AArch64VectorCall; + if (D->hasAttr<IntelOclBiccAttr>()) return CC_IntelOclBicc; @@ -246,7 +252,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD, // Add the 'this' pointer. if (RD) - argTypes.push_back(GetThisType(Context, RD)); + argTypes.push_back(GetThisType(Context, RD, MD)); else argTypes.push_back(Context.VoidPtrTy); @@ -302,7 +308,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD, SmallVector<CanQualType, 16> argTypes; SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos; - argTypes.push_back(GetThisType(Context, MD->getParent())); + argTypes.push_back(GetThisType(Context, MD->getParent(), MD)); bool PassParams = true; @@ -529,7 +535,7 @@ const CGFunctionInfo & CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) { assert(MD->isVirtual() && "only methods have thunks"); CanQual<FunctionProtoType> FTP = GetFormalType(MD); - CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) }; + CanQualType ArgTys[] = { GetThisType(Context, MD->getParent(), MD) }; return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false, /*chainCall=*/false, ArgTys, FTP->getExtInfo(), {}, RequiredArgs(1)); @@ -543,7 +549,7 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD, CanQual<FunctionProtoType> FTP = GetFormalType(CD); SmallVector<CanQualType, 2> ArgTys; const CXXRecordDecl *RD = CD->getParent(); - ArgTys.push_back(GetThisType(Context, RD)); + ArgTys.push_back(GetThisType(Context, RD, CD)); if (CT == Ctor_CopyingClosure) ArgTys.push_back(*FTP->param_type_begin()); if (RD->getNumVBases() > 0) @@ -741,8 +747,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, FunctionType::ExtInfo info, ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos, RequiredArgs required) { - assert(std::all_of(argTypes.begin(), argTypes.end(), - [](CanQualType T) { return T.isCanonicalAsParam(); })); + assert(llvm::all_of(argTypes, + [](CanQualType T) { return T.isCanonicalAsParam(); })); // Lookup or create unique function info. llvm::FoldingSetNodeID ID; @@ -1253,8 +1259,8 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // Otherwise do coercion through memory. This is stupid, but simple. Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment()); - Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); - Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy); + Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); + Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty); CGF.Builder.CreateMemCpy(Casted, SrcCasted, llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize), false); @@ -1335,8 +1341,8 @@ static void CreateCoercedStore(llvm::Value *Src, // to that information. Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment()); CGF.Builder.CreateStore(Src, Tmp); - Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); - Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy); + Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty); + Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty); CGF.Builder.CreateMemCpy(DstCasted, Casted, llvm::ConstantInt::get(CGF.IntPtrTy, DstSize), false); @@ -1709,6 +1715,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (CodeGenOpts.DisableRedZone) FuncAttrs.addAttribute(llvm::Attribute::NoRedZone); + if (CodeGenOpts.IndirectTlsSegRefs) + FuncAttrs.addAttribute("indirect-tls-seg-refs"); if (CodeGenOpts.NoImplicitFloat) FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat); @@ -1784,6 +1792,11 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, FuncAttrs.addAttribute("stackrealign"); if (CodeGenOpts.Backchain) FuncAttrs.addAttribute("backchain"); + + // FIXME: The interaction of this attribute with the SLH command line flag + // has not been determined. + if (CodeGenOpts.SpeculativeLoadHardening) + FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); } if (getLangOpts().assumeFunctionsAreConvergent()) { @@ -1803,6 +1816,12 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, if (CodeGenOpts.FlushDenorm) FuncAttrs.addAttribute("nvptx-f32ftz", "true"); } + + for (StringRef Attr : CodeGenOpts.DefaultFunctionAttrs) { + StringRef Var, Value; + std::tie(Var, Value) = Attr.split('='); + FuncAttrs.addAttribute(Var, Value); + } } void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { @@ -1828,7 +1847,7 @@ void CodeGenModule::ConstructAttributeList( AddAttributesFromFunctionProtoType(getContext(), FuncAttrs, CalleeInfo.getCalleeFunctionProtoType()); - const Decl *TargetDecl = CalleeInfo.getCalleeDecl(); + const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl(); bool HasOptnone = false; // FIXME: handle sseregparm someday... @@ -1845,6 +1864,8 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); if (TargetDecl->hasAttr<ConvergentAttr>()) FuncAttrs.addAttribute(llvm::Attribute::Convergent); + if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>()) + FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) { AddAttributesFromFunctionProtoType( @@ -1936,7 +1957,7 @@ void CodeGenModule::ConstructAttributeList( FuncAttrs.addAttribute("disable-tail-calls", llvm::toStringRef(DisableTailCalls)); - GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs); + GetCPUAndFeaturesAttributes(CalleeInfo.getCalleeDecl(), FuncAttrs); } ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI); @@ -2327,7 +2348,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, } else { // Load scalar value from indirect argument. llvm::Value *V = - EmitLoadOfScalar(ParamAddr, false, Ty, Arg->getLocStart()); + EmitLoadOfScalar(ParamAddr, false, Ty, Arg->getBeginLoc()); if (isPromoted) V = emitArgumentDemotion(*this, Arg, V); @@ -2389,7 +2410,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, if (!AVAttr) if (const auto *TOTy = dyn_cast<TypedefType>(OTy)) AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>(); - if (AVAttr) { + if (AVAttr && !SanOpts.has(SanitizerKind::Alignment)) { + // If alignment-assumption sanitizer is enabled, we do *not* add + // alignment attribute here, but emit normal alignment assumption, + // so the UBSAN check could function. llvm::Value *AlignmentValue = EmitScalarExpr(AVAttr->getAlignment()); llvm::ConstantInt *AlignmentCI = @@ -2490,7 +2514,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // Match to what EmitParmDecl is expecting for this type. if (CodeGenFunction::hasScalarEvaluationKind(Ty)) { llvm::Value *V = - EmitLoadOfScalar(Alloca, false, Ty, Arg->getLocStart()); + EmitLoadOfScalar(Alloca, false, Ty, Arg->getBeginLoc()); if (isPromoted) V = emitArgumentDemotion(*this, Arg, V); ArgVals.push_back(ParamValue::forDirect(V)); @@ -3063,8 +3087,9 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args, QualType type = param->getType(); - assert(!isInAllocaArgument(CGM.getCXXABI(), type) && - "cannot emit delegate call arguments for inalloca arguments!"); + if (isInAllocaArgument(CGM.getCXXABI(), type)) { + CGM.ErrorUnsupported(param, "forwarded non-trivially copyable parameter"); + } // GetAddrOfLocalVar returns a pointer-to-pointer for references, // but the argument needs to be the original pointer. @@ -3945,15 +3970,28 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } else if (I->hasLValue()) { auto LV = I->getKnownLValue(); auto AS = LV.getAddressSpace(); + if ((!ArgInfo.getIndirectByVal() && (LV.getAlignment() >= - getContext().getTypeAlignInChars(I->Ty))) || - (ArgInfo.getIndirectByVal() && - ((AS != LangAS::Default && AS != LangAS::opencl_private && - AS != CGM.getASTAllocaAddressSpace())))) { + getContext().getTypeAlignInChars(I->Ty)))) { + NeedCopy = true; + } + if (!getLangOpts().OpenCL) { + if ((ArgInfo.getIndirectByVal() && + (AS != LangAS::Default && + AS != CGM.getASTAllocaAddressSpace()))) { + NeedCopy = true; + } + } + // For OpenCL even if RV is located in default or alloca address space + // we don't want to perform address space cast for it. + else if ((ArgInfo.getIndirectByVal() && + Addr.getType()->getAddressSpace() != IRFuncTy-> + getParamType(FirstIRArg)->getPointerAddressSpace())) { NeedCopy = true; } } + if (NeedCopy) { // Create an aligned temporary, and copy to it. Address AI = CreateMemTempWithoutCast( @@ -4235,6 +4273,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } #endif + // Update the largest vector width if any arguments have vector types. + for (unsigned i = 0; i < IRCallArgs.size(); ++i) { + if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + } + // Compute the calling convention and attributes. unsigned CallingConv; llvm::AttributeList Attrs; @@ -4248,8 +4293,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Apply always_inline to all calls within flatten functions. // FIXME: should this really take priority over __try, below? if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() && - !(Callee.getAbstractInfo().getCalleeDecl() && - Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) { + !(Callee.getAbstractInfo().getCalleeDecl().getDecl() && + Callee.getAbstractInfo() + .getCalleeDecl() + .getDecl() + ->hasAttr<NoInlineAttr>())) { Attrs = Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::AlwaysInline); @@ -4315,6 +4363,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (!CI->getType()->isVoidTy()) CI->setName("call"); + // Update largest vector width from the return type. + if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of // IPVK_IndirectCallTarget in InstrProfData.inc. @@ -4329,7 +4382,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Suppress tail calls if requested. if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) { - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl(); + const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>()) Call->setTailCallKind(llvm::CallInst::TCK_NoTail); } @@ -4476,7 +4529,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } (); // Emit the assume_aligned check on the return value. - const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl(); + const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl(); if (Ret.isScalar() && TargetDecl) { if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) { llvm::Value *OffsetValue = nullptr; @@ -4485,13 +4538,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment()); llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment); - EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(), - OffsetValue); + EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(), + AlignmentCI->getZExtValue(), OffsetValue); } else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) { - llvm::Value *ParamVal = - CallArgs[AA->getParamIndex().getLLVMIndex()].getRValue( - *this).getScalarVal(); - EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal); + llvm::Value *AlignmentVal = CallArgs[AA->getParamIndex().getLLVMIndex()] + .getRValue(*this) + .getScalarVal(); + EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(), + AlignmentVal); } } @@ -4502,8 +4556,8 @@ CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const { if (isVirtual()) { const CallExpr *CE = getVirtualCallExpr(); return CGF.CGM.getCXXABI().getVirtualFunctionPointer( - CGF, getVirtualMethodDecl(), getThisAddress(), - getFunctionType(), CE ? CE->getLocStart() : SourceLocation()); + CGF, getVirtualMethodDecl(), getThisAddress(), getFunctionType(), + CE ? CE->getBeginLoc() : SourceLocation()); } return *this; diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h index 99a36e4e12f1..c300808bea28 100644 --- a/lib/CodeGen/CGCall.h +++ b/lib/CodeGen/CGCall.h @@ -46,21 +46,21 @@ class CGCalleeInfo { /// The function prototype of the callee. const FunctionProtoType *CalleeProtoTy; /// The function declaration of the callee. - const Decl *CalleeDecl; + GlobalDecl CalleeDecl; public: - explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {} - CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl) + explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl() {} + CGCalleeInfo(const FunctionProtoType *calleeProtoTy, GlobalDecl calleeDecl) : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {} CGCalleeInfo(const FunctionProtoType *calleeProtoTy) - : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {} - CGCalleeInfo(const Decl *calleeDecl) + : CalleeProtoTy(calleeProtoTy), CalleeDecl() {} + CGCalleeInfo(GlobalDecl calleeDecl) : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {} const FunctionProtoType *getCalleeFunctionProtoType() const { return CalleeProtoTy; } - const Decl *getCalleeDecl() const { return CalleeDecl; } + const GlobalDecl getCalleeDecl() const { return CalleeDecl; } }; /// All available information about a concrete callee. @@ -171,7 +171,7 @@ public: } CGCalleeInfo getAbstractInfo() const { if (isVirtual()) - return VirtualInfo.MD.getDecl(); + return VirtualInfo.MD; assert(isOrdinary()); return AbstractInfo; } diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index ec4eb000a3b9..ee150a792b76 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -16,14 +16,15 @@ #include "CGDebugInfo.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" +#include "TargetInfo.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtCXX.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Metadata.h" #include "llvm/Transforms/Utils/SanitizerStats.h" @@ -829,7 +830,7 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) { // delegation optimization. if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor) && CGM.getTarget().getCXXABI().hasConstructorVariants()) { - EmitDelegateCXXConstructorCall(Ctor, Ctor_Base, Args, Ctor->getLocEnd()); + EmitDelegateCXXConstructorCall(Ctor, Ctor_Base, Args, Ctor->getEndLoc()); return; } @@ -2012,8 +2013,19 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, bool NewPointerIsChecked) { CallArgList Args; + LangAS SlotAS = E->getType().getAddressSpace(); + QualType ThisType = D->getThisType(); + LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace(); + llvm::Value *ThisPtr = This.getPointer(); + if (SlotAS != ThisAS) { + unsigned TargetThisAS = getContext().getTargetAddressSpace(ThisAS); + llvm::Type *NewType = + ThisPtr->getType()->getPointerElementType()->getPointerTo(TargetThisAS); + ThisPtr = getTargetHooks().performAddrSpaceCast(*this, This.getPointer(), + ThisAS, SlotAS, NewType); + } // Push the this ptr. - Args.add(RValue::get(This.getPointer()), D->getThisType(getContext())); + Args.add(RValue::get(ThisPtr), D->getThisType()); // If this is a trivial constructor, emit a memcpy now before we lose // the alignment information on the argument. @@ -2122,7 +2134,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, CGM.getAddrOfCXXStructor(D, getFromCtorType(Type)); const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall( Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs); - CGCallee Callee = CGCallee::forDirect(CalleePtr, D); + CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type)); EmitCall(Info, Callee, ReturnValueSlot(), Args); // Generate vtable assumptions if we're constructing a complete object @@ -2147,7 +2159,7 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall( const CXXConstructorDecl *D, bool ForVirtualBase, Address This, bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) { CallArgList Args; - CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext())); + CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType()); // Forward the parameters. if (InheritedFromVBase && @@ -2196,6 +2208,7 @@ void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall( GlobalDecl GD(Ctor, CtorType); InlinedInheritingConstructorScope Scope(*this, GD); ApplyInlineDebugLocation DebugScope(*this, GD); + RunCleanupsScope RunCleanups(*this); // Save the arguments to be passed to the inherited constructor. CXXInheritedCtorInitExprArgs = Args; @@ -2271,7 +2284,7 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D, CallArgList Args; // Push the this ptr. - Args.add(RValue::get(This.getPointer()), D->getThisType(getContext())); + Args.add(RValue::get(This.getPointer()), D->getThisType()); // Push the src ptr. QualType QT = *(FPT->param_type_begin()); @@ -2808,7 +2821,7 @@ void CodeGenFunction::EmitForwardingCallToLambda( // variadic arguments. // Now emit our call. - auto callee = CGCallee::forDirect(calleePtr, callOperator); + auto callee = CGCallee::forDirect(calleePtr, GlobalDecl(callOperator)); RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs); // If necessary, copy the returned value into the slot. @@ -2839,12 +2852,12 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() { CallArgList CallArgs; QualType ThisType = getContext().getPointerType(getContext().getRecordType(Lambda)); - Address ThisPtr = GetAddrOfBlockDecl(variable, false); + Address ThisPtr = GetAddrOfBlockDecl(variable); CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType); // Add the rest of the parameters. for (auto param : BD->parameters()) - EmitDelegateCallArg(CallArgs, param, param->getLocStart()); + EmitDelegateCallArg(CallArgs, param, param->getBeginLoc()); assert(!Lambda->isGenericLambda() && "generic lambda interconversion to block not implemented"); @@ -2863,7 +2876,7 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) { // Add the rest of the parameters. for (auto Param : MD->parameters()) - EmitDelegateCallArg(CallArgs, Param, Param->getLocStart()); + EmitDelegateCallArg(CallArgs, Param, Param->getBeginLoc()); const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator(); // For a generic lambda, find the corresponding call operator specialization diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index 0a766d176200..3743d24f11fc 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -366,7 +366,7 @@ static llvm::SwitchInst *TransitionToCleanupSwitch(CodeGenFunction &CGF, llvm::BasicBlock *Block) { // If it's a branch, turn it into a switch whose default // destination is its original target. - llvm::TerminatorInst *Term = Block->getTerminator(); + llvm::Instruction *Term = Block->getTerminator(); assert(Term && "can't transition block without terminator"); if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) { @@ -589,7 +589,7 @@ static void ForwardPrebranchedFallthrough(llvm::BasicBlock *Exit, llvm::BasicBlock *To) { // Exit is the exit block of a cleanup, so it always terminates in // an unconditional branch or a switch. - llvm::TerminatorInst *Term = Exit->getTerminator(); + llvm::Instruction *Term = Exit->getTerminator(); if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) { assert(Br->isUnconditional() && Br->getSuccessor(0) == From); diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp index 4f525c8aac85..80fa7c873631 100644 --- a/lib/CodeGen/CGCoroutine.cpp +++ b/lib/CodeGen/CGCoroutine.cpp @@ -93,10 +93,10 @@ static void createCoroData(CodeGenFunction &CGF, CallExpr const *CoroIdExpr = nullptr) { if (CurCoro.Data) { if (CurCoro.Data->CoroIdExpr) - CGF.CGM.Error(CoroIdExpr->getLocStart(), + CGF.CGM.Error(CoroIdExpr->getBeginLoc(), "only one __builtin_coro_id can be used in a function"); else if (CoroIdExpr) - CGF.CGM.Error(CoroIdExpr->getLocStart(), + CGF.CGM.Error(CoroIdExpr->getBeginLoc(), "__builtin_coro_id shall not be used in a C++ coroutine"); else llvm_unreachable("EmitCoroutineBodyStatement called twice?"); @@ -444,7 +444,7 @@ struct CallCoroDelete final : public EHScopeStack::Cleanup { // We should have captured coro.free from the emission of deallocate. auto *CoroFree = CGF.CurCoro.Data->LastCoroFree; if (!CoroFree) { - CGF.CGM.Error(Deallocate->getLocStart(), + CGF.CGM.Error(Deallocate->getBeginLoc(), "Deallocation expressoin does not refer to coro.free"); return; } @@ -654,7 +654,7 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { EmitBlock(BodyBB); } - auto Loc = S.getLocStart(); + auto Loc = S.getBeginLoc(); CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, CurCoro.Data->ExceptionHandler); auto *TryStmt = @@ -707,8 +707,8 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, if (CurCoro.Data && CurCoro.Data->CoroBegin) { return RValue::get(CurCoro.Data->CoroBegin); } - CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_begin " - "has been used earlier in this function"); + CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_begin " + "has been used earlier in this function"); auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); return RValue::get(NullPtr); } @@ -722,7 +722,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, Args.push_back(CurCoro.Data->CoroId); break; } - CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_id has" + CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_id has" " been used earlier in this function"); // Fallthrough to the next case to add TokenNone as the first argument. LLVM_FALLTHROUGH; diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 5be6fb3e4245..41f8721468a3 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -25,10 +25,10 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/Version.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Frontend/FrontendOptions.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/ModuleMap.h" @@ -41,6 +41,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MD5.h" @@ -180,8 +181,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) { SourceManager &SM = CGM.getContext().getSourceManager(); auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back()); PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc); - - if (PCLoc.isInvalid() || Scope->getFilename() == PCLoc.getFilename()) + if (PCLoc.isInvalid() || Scope->getFile() == getOrCreateFile(CurLoc)) return; if (auto *LBF = dyn_cast<llvm::DILexicalBlockFile>(Scope)) { @@ -220,7 +220,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context, if (const auto *RDecl = dyn_cast<RecordDecl>(Context)) if (!RDecl->isDependentType()) return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl), - getOrCreateMainFile()); + TheCU->getFile()); return Default; } @@ -234,6 +234,9 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const { if (CGM.getCodeGenOpts().EmitCodeView) PP.MSVCFormatting = true; + // Apply -fdebug-prefix-map. + PP.RemapFilePaths = true; + PP.remapPath = [this](StringRef Path) { return remapDIPath(Path); }; return PP; } @@ -401,19 +404,18 @@ Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM, llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { if (!Loc.isValid()) // If Location is not valid then use main input file. - return getOrCreateMainFile(); + return TheCU->getFile(); SourceManager &SM = CGM.getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(Loc); - if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty()) + StringRef FileName = PLoc.getFilename(); + if (PLoc.isInvalid() || FileName.empty()) // If the location is not valid then use main input file. - return getOrCreateMainFile(); + return TheCU->getFile(); // Cache the results. - const char *fname = PLoc.getFilename(); - auto It = DIFileCache.find(fname); - + auto It = DIFileCache.find(FileName.data()); if (It != DIFileCache.end()) { // Verify that the information still exists. if (llvm::Metadata *V = It->second) @@ -426,22 +428,48 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) { Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo; if (CSKind) CSInfo.emplace(*CSKind, Checksum); - - llvm::DIFile *F = DBuilder.createFile( - remapDIPath(PLoc.getFilename()), remapDIPath(getCurrentDirname()), CSInfo, - getSource(SM, SM.getFileID(Loc))); - - DIFileCache[fname].reset(F); + return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc))); +} + +llvm::DIFile * +CGDebugInfo::createFile(StringRef FileName, + Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo, + Optional<StringRef> Source) { + StringRef Dir; + StringRef File; + std::string RemappedFile = remapDIPath(FileName); + std::string CurDir = remapDIPath(getCurrentDirname()); + SmallString<128> DirBuf; + SmallString<128> FileBuf; + if (llvm::sys::path::is_absolute(RemappedFile)) { + // Strip the common prefix (if it is more than just "/") from current + // directory and FileName for a more space-efficient encoding. + auto FileIt = llvm::sys::path::begin(RemappedFile); + auto FileE = llvm::sys::path::end(RemappedFile); + auto CurDirIt = llvm::sys::path::begin(CurDir); + auto CurDirE = llvm::sys::path::end(CurDir); + for (; CurDirIt != CurDirE && *CurDirIt == *FileIt; ++CurDirIt, ++FileIt) + llvm::sys::path::append(DirBuf, *CurDirIt); + if (std::distance(llvm::sys::path::begin(CurDir), CurDirIt) == 1) { + // The common prefix only the root; stripping it would cause + // LLVM diagnostic locations to be more confusing. + Dir = {}; + File = RemappedFile; + } else { + for (; FileIt != FileE; ++FileIt) + llvm::sys::path::append(FileBuf, *FileIt); + Dir = DirBuf; + File = FileBuf; + } + } else { + Dir = CurDir; + File = RemappedFile; + } + llvm::DIFile *F = DBuilder.createFile(File, Dir, CSInfo, Source); + DIFileCache[FileName.data()].reset(F); return F; } -llvm::DIFile *CGDebugInfo::getOrCreateMainFile() { - return DBuilder.createFile( - remapDIPath(TheCU->getFilename()), remapDIPath(TheCU->getDirectory()), - TheCU->getFile()->getChecksum(), - CGM.getCodeGenOpts().EmbedSource ? TheCU->getSource() : None); -} - std::string CGDebugInfo::remapDIPath(StringRef Path) const { for (const auto &Entry : DebugPrefixMap) if (Path.startswith(Entry.first)) @@ -527,11 +555,11 @@ void CGDebugInfo::CreateCompileUnit() { llvm::dwarf::SourceLanguage LangTag; const LangOptions &LO = CGM.getLangOpts(); if (LO.CPlusPlus) { - if (LO.ObjC1) + if (LO.ObjC) LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus; else LangTag = llvm::dwarf::DW_LANG_C_plus_plus; - } else if (LO.ObjC1) { + } else if (LO.ObjC) { LangTag = llvm::dwarf::DW_LANG_ObjC; } else if (LO.RenderScript) { LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript; @@ -545,7 +573,7 @@ void CGDebugInfo::CreateCompileUnit() { // Figure out which version of the ObjC runtime we have. unsigned RuntimeVers = 0; - if (LO.ObjC1) + if (LO.ObjC) RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1; llvm::DICompileUnit::DebugEmissionKind EmissionKind; @@ -557,29 +585,42 @@ void CGDebugInfo::CreateCompileUnit() { case codegenoptions::DebugLineTablesOnly: EmissionKind = llvm::DICompileUnit::LineTablesOnly; break; + case codegenoptions::DebugDirectivesOnly: + EmissionKind = llvm::DICompileUnit::DebugDirectivesOnly; + break; case codegenoptions::LimitedDebugInfo: case codegenoptions::FullDebugInfo: EmissionKind = llvm::DICompileUnit::FullDebug; break; } + uint64_t DwoId = 0; + auto &CGOpts = CGM.getCodeGenOpts(); + // The DIFile used by the CU is distinct from the main source + // file. Its directory part specifies what becomes the + // DW_AT_comp_dir (the compilation directory), even if the source + // file was specified with an absolute path. if (CSKind) CSInfo.emplace(*CSKind, Checksum); + llvm::DIFile *CUFile = DBuilder.createFile( + remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), CSInfo, + getSource(SM, SM.getMainFileID())); // Create new compile unit. - // FIXME - Eliminate TheCU. - auto &CGOpts = CGM.getCodeGenOpts(); TheCU = DBuilder.createCompileUnit( - LangTag, - DBuilder.createFile(remapDIPath(MainFileName), - remapDIPath(getCurrentDirname()), CSInfo, - getSource(SM, SM.getMainFileID())), - CGOpts.EmitVersionIdentMetadata ? Producer : "", + LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "", LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO, CGOpts.DwarfDebugFlags, RuntimeVers, - CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind, - 0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling, - CGOpts.GnuPubnames); + (CGOpts.getSplitDwarfMode() != CodeGenOptions::NoFission) + ? "" + : CGOpts.SplitDwarfFile, + EmissionKind, DwoId, CGOpts.SplitDwarfInlining, + CGOpts.DebugInfoForProfiling, + CGM.getTarget().getTriple().isNVPTX() + ? llvm::DICompileUnit::DebugNameTableKind::None + : static_cast<llvm::DICompileUnit::DebugNameTableKind>( + CGOpts.DebugNameTable), + CGOpts.DebugRangesBaseAddress); } llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { @@ -597,9 +638,9 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return nullptr; case BuiltinType::ObjCClass: if (!ClassTy) - ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, - "objc_class", TheCU, - getOrCreateMainFile(), 0); + ClassTy = + DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, + "objc_class", TheCU, TheCU->getFile(), 0); return ClassTy; case BuiltinType::ObjCId: { // typedef struct objc_class *Class; @@ -611,21 +652,21 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return ObjTy; if (!ClassTy) - ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, - "objc_class", TheCU, - getOrCreateMainFile(), 0); + ClassTy = + DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, + "objc_class", TheCU, TheCU->getFile(), 0); unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy); auto *ISATy = DBuilder.createPointerType(ClassTy, Size); - ObjTy = DBuilder.createStructType( - TheCU, "objc_object", getOrCreateMainFile(), 0, 0, 0, - llvm::DINode::FlagZero, nullptr, llvm::DINodeArray()); + ObjTy = DBuilder.createStructType(TheCU, "objc_object", TheCU->getFile(), 0, + 0, 0, llvm::DINode::FlagZero, nullptr, + llvm::DINodeArray()); DBuilder.replaceArrays( ObjTy, DBuilder.getOrCreateArray(&*DBuilder.createMemberType( - ObjTy, "isa", getOrCreateMainFile(), 0, Size, 0, 0, + ObjTy, "isa", TheCU->getFile(), 0, Size, 0, 0, llvm::DINode::FlagZero, ISATy))); return ObjTy; } @@ -633,7 +674,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { if (!SelTy) SelTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, "objc_selector", TheCU, - getOrCreateMainFile(), 0); + TheCU->getFile(), 0); return SelTy; } @@ -652,6 +693,10 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy); case BuiltinType::OCLReserveID: return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy); +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + case BuiltinType::Id: \ + return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty); +#include "clang/Basic/OpenCLExtensionTypes.def" case BuiltinType::UChar: case BuiltinType::Char_U: @@ -825,31 +870,45 @@ static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) { } } -// Determines if the tag declaration will require a type identifier. +// Determines if the debug info for this tag declaration needs a type +// identifier. The purpose of the unique identifier is to deduplicate type +// information for identical types across TUs. Because of the C++ one definition +// rule (ODR), it is valid to assume that the type is defined the same way in +// every TU and its debug info is equivalent. +// +// C does not have the ODR, and it is common for codebases to contain multiple +// different definitions of a struct with the same name in different TUs. +// Therefore, if the type doesn't have a C++ mangling, don't give it an +// identifer. Type information in C is smaller and simpler than C++ type +// information, so the increase in debug info size is negligible. +// +// If the type is not externally visible, it should be unique to the current TU, +// and should not need an identifier to participate in type deduplication. +// However, when emitting CodeView, the format internally uses these +// unique type name identifers for references between debug info. For example, +// the method of a class in an anonymous namespace uses the identifer to refer +// to its parent class. The Microsoft C++ ABI attempts to provide unique names +// for such types, so when emitting CodeView, always use identifiers for C++ +// types. This may create problems when attempting to emit CodeView when the MS +// C++ ABI is not in use. static bool needsTypeIdentifier(const TagDecl *TD, CodeGenModule &CGM, llvm::DICompileUnit *TheCU) { // We only add a type identifier for types with C++ name mangling. if (!hasCXXMangling(TD, TheCU)) return false; - // CodeView types with C++ mangling need a type identifier. - if (CGM.getCodeGenOpts().EmitCodeView) - return true; - // Externally visible types with C++ mangling need a type identifier. if (TD->isExternallyVisible()) return true; + // CodeView types with C++ mangling need a type identifier. + if (CGM.getCodeGenOpts().EmitCodeView) + return true; + return false; } -// When emitting CodeView debug information we need to produce a type -// identifier for all types which have a C++ mangling. Until a GUID is added -// to the identifier (not currently implemented) the result will not be unique -// across compilation units. -// When emitting DWARF debug information, we need to produce a type identifier -// for all externally visible types with C++ name mangling. This identifier -// should be unique across ODR-compliant compilation units. +// Returns a unique type identifier string if one exists, or an empty string. static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM, llvm::DICompileUnit *TheCU) { SmallString<256> Identifier; @@ -936,18 +995,53 @@ llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name, if (Cache) return Cache; Cache = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, Name, - TheCU, getOrCreateMainFile(), 0); + TheCU, TheCU->getFile(), 0); unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy); Cache = DBuilder.createPointerType(Cache, Size); return Cache; } +uint64_t CGDebugInfo::collectDefaultElementTypesForBlockPointer( + const BlockPointerType *Ty, llvm::DIFile *Unit, llvm::DIDerivedType *DescTy, + unsigned LineNo, SmallVectorImpl<llvm::Metadata *> &EltTys) { + QualType FType; + + // Advanced by calls to CreateMemberType in increments of FType, then + // returned as the overall size of the default elements. + uint64_t FieldOffset = 0; + + // Blocks in OpenCL have unique constraints which make the standard fields + // redundant while requiring size and align fields for enqueue_kernel. See + // initializeForBlockHeader in CGBlocks.cpp + if (CGM.getLangOpts().OpenCL) { + FType = CGM.getContext().IntTy; + EltTys.push_back(CreateMemberType(Unit, FType, "__size", &FieldOffset)); + EltTys.push_back(CreateMemberType(Unit, FType, "__align", &FieldOffset)); + } else { + FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy); + EltTys.push_back(CreateMemberType(Unit, FType, "__isa", &FieldOffset)); + FType = CGM.getContext().IntTy; + EltTys.push_back(CreateMemberType(Unit, FType, "__flags", &FieldOffset)); + EltTys.push_back(CreateMemberType(Unit, FType, "__reserved", &FieldOffset)); + FType = CGM.getContext().getPointerType(Ty->getPointeeType()); + EltTys.push_back(CreateMemberType(Unit, FType, "__FuncPtr", &FieldOffset)); + FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy); + uint64_t FieldSize = CGM.getContext().getTypeSize(Ty); + uint32_t FieldAlign = CGM.getContext().getTypeAlign(Ty); + EltTys.push_back(DBuilder.createMemberType( + Unit, "__descriptor", nullptr, LineNo, FieldSize, FieldAlign, + FieldOffset, llvm::DINode::FlagZero, DescTy)); + FieldOffset += FieldSize; + } + + return FieldOffset; +} + llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty, llvm::DIFile *Unit) { SmallVector<llvm::Metadata *, 8> EltTys; QualType FType; - uint64_t FieldSize, FieldOffset; - uint32_t FieldAlign; + uint64_t FieldOffset; llvm::DINodeArray Elements; FieldOffset = 0; @@ -959,10 +1053,9 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty, EltTys.clear(); llvm::DINode::DIFlags Flags = llvm::DINode::FlagAppleBlock; - unsigned LineNo = 0; auto *EltTy = - DBuilder.createStructType(Unit, "__block_descriptor", nullptr, LineNo, + DBuilder.createStructType(Unit, "__block_descriptor", nullptr, 0, FieldOffset, 0, Flags, nullptr, Elements); // Bit size, align and offset of the type. @@ -970,27 +1063,8 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty, auto *DescTy = DBuilder.createPointerType(EltTy, Size); - FieldOffset = 0; - if (CGM.getLangOpts().OpenCL) { - FType = CGM.getContext().IntTy; - EltTys.push_back(CreateMemberType(Unit, FType, "__size", &FieldOffset)); - EltTys.push_back(CreateMemberType(Unit, FType, "__align", &FieldOffset)); - } else { - FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy); - EltTys.push_back(CreateMemberType(Unit, FType, "__isa", &FieldOffset)); - FType = CGM.getContext().IntTy; - EltTys.push_back(CreateMemberType(Unit, FType, "__flags", &FieldOffset)); - EltTys.push_back(CreateMemberType(Unit, FType, "__reserved", &FieldOffset)); - FType = CGM.getContext().getPointerType(Ty->getPointeeType()); - EltTys.push_back(CreateMemberType(Unit, FType, "__FuncPtr", &FieldOffset)); - FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy); - FieldSize = CGM.getContext().getTypeSize(Ty); - FieldAlign = CGM.getContext().getTypeAlign(Ty); - EltTys.push_back(DBuilder.createMemberType( - Unit, "__descriptor", nullptr, LineNo, FieldSize, FieldAlign, FieldOffset, - llvm::DINode::FlagZero, DescTy)); - FieldOffset += FieldSize; - } + FieldOffset = collectDefaultElementTypesForBlockPointer(Ty, Unit, DescTy, + 0, EltTys); Elements = DBuilder.getOrCreateArray(EltTys); @@ -998,7 +1072,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty, // DW_AT_APPLE_BLOCK attribute and are an implementation detail only // the debugger needs to know about. To allow type uniquing, emit // them without a name or a location. - EltTy = DBuilder.createStructType(Unit, "", nullptr, LineNo, FieldOffset, 0, + EltTy = DBuilder.createStructType(Unit, "", nullptr, 0, FieldOffset, 0, Flags, nullptr, Elements); return DBuilder.createPointerType(EltTy, Size); @@ -1058,6 +1132,7 @@ static unsigned getDwarfCC(CallingConv CC) { case CC_X86_64SysV: return llvm::dwarf::DW_CC_LLVM_X86_64SysV; case CC_AAPCS: + case CC_AArch64VectorCall: return llvm::dwarf::DW_CC_LLVM_AAPCS; case CC_AAPCS_VFP: return llvm::dwarf::DW_CC_LLVM_AAPCS_VFP; @@ -1353,8 +1428,7 @@ CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method, if (Method->isStatic()) return cast_or_null<llvm::DISubroutineType>( getOrCreateType(QualType(Func, 0), Unit)); - return getOrCreateInstanceMethodType(Method->getThisType(CGM.getContext()), - Func, Unit); + return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit); } llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType( @@ -1450,16 +1524,16 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // Collect virtual method info. llvm::DIType *ContainingType = nullptr; - unsigned Virtuality = 0; unsigned VIndex = 0; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; + llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero; int ThisAdjustment = 0; if (Method->isVirtual()) { if (Method->isPure()) - Virtuality = llvm::dwarf::DW_VIRTUALITY_pure_virtual; + SPFlags |= llvm::DISubprogram::SPFlagPureVirtual; else - Virtuality = llvm::dwarf::DW_VIRTUALITY_virtual; + SPFlags |= llvm::DISubprogram::SPFlagVirtual; if (CGM.getTarget().getCXXABI().isItaniumFamily()) { // It doesn't make sense to give a virtual destructor a vtable index, @@ -1511,12 +1585,13 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( Flags |= llvm::DINode::FlagLValueReference; if (Method->getRefQualifier() == RQ_RValue) Flags |= llvm::DINode::FlagRValueReference; + if (CGM.getLangOpts().Optimize) + SPFlags |= llvm::DISubprogram::SPFlagOptimized; llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit); llvm::DISubprogram *SP = DBuilder.createMethod( RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine, - MethodTy, /*isLocalToUnit=*/false, /*isDefinition=*/false, Virtuality, - VIndex, ThisAdjustment, ContainingType, Flags, CGM.getLangOpts().Optimize, + MethodTy, VIndex, ThisAdjustment, ContainingType, Flags, SPFlags, TParamsArray.get()); SPCache[Method->getCanonicalDecl()].reset(SP); @@ -1741,6 +1816,29 @@ CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD, return llvm::DINodeArray(); } +llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL, + llvm::DIFile *Unit) { + if (auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL)) { + auto T = TS->getSpecializedTemplateOrPartial(); + auto TA = TS->getTemplateArgs().asArray(); + // Collect parameters for a partial specialization + if (T.is<VarTemplatePartialSpecializationDecl *>()) { + const TemplateParameterList *TList = + T.get<VarTemplatePartialSpecializationDecl *>() + ->getTemplateParameters(); + return CollectTemplateParams(TList, TA, Unit); + } + + // Collect parameters for an explicit specialization + if (T.is<VarTemplateDecl *>()) { + const TemplateParameterList *TList = T.get<VarTemplateDecl *>() + ->getTemplateParameters(); + return CollectTemplateParams(TList, TA, Unit); + } + } + return llvm::DINodeArray(); +} + llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams( const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) { // Always get the full list of parameters, not just the ones from @@ -1896,8 +1994,17 @@ static bool isDefinedInClangModule(const RecordDecl *RD) { if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) { if (!CXXDecl->isCompleteDefinition()) return false; + // Check wether RD is a template. auto TemplateKind = CXXDecl->getTemplateSpecializationKind(); if (TemplateKind != TSK_Undeclared) { + // Unfortunately getOwningModule() isn't accurate enough to find the + // owning module of a ClassTemplateSpecializationDecl that is inside a + // namespace spanning multiple modules. + bool Explicit = false; + if (auto *TD = dyn_cast<ClassTemplateSpecializationDecl>(CXXDecl)) + Explicit = TD->isExplicitInstantiationOrSpecialization(); + if (!Explicit && CXXDecl->getEnclosingNamespaceContext()) + return false; // This is a template, check the origin of the first member. if (CXXDecl->field_begin() == CXXDecl->field_end()) return TemplateKind == TSK_ExplicitInstantiationDeclaration; @@ -2445,9 +2552,9 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) { Count = CAT->getSize().getZExtValue(); else if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) { if (Expr *Size = VAT->getSizeExpr()) { - llvm::APSInt V; - if (Size->EvaluateAsInt(V, CGM.getContext())) - Count = V.getExtValue(); + Expr::EvalResult Result; + if (Size->EvaluateAsInt(Result, CGM.getContext())) + Count = Result.Val.getInt().getExtValue(); } } @@ -2513,9 +2620,9 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty, const FunctionProtoType *FPT = Ty->getPointeeType()->getAs<FunctionProtoType>(); return DBuilder.createMemberPointerType( - getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType( - Ty->getClass(), FPT->getTypeQuals())), - FPT, U), + getOrCreateInstanceMethodType( + CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()), + FPT, U), ClassType, Size, /*Align=*/0, Flags); } @@ -2603,7 +2710,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) { llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit); return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit, Line, Size, Align, EltArray, ClassTy, - Identifier, ED->isFixed()); + Identifier, ED->isScoped()); } llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent, @@ -3035,6 +3142,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit, void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, unsigned &LineNo, QualType &T, StringRef &Name, StringRef &LinkageName, + llvm::MDTuple *&TemplateParameters, llvm::DIScope *&VDContext) { Unit = getOrCreateFile(VD->getLocation()); LineNo = getLineNumber(VD->getLocation()); @@ -3058,6 +3166,13 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, if (LinkageName == Name) LinkageName = StringRef(); + if (isa<VarTemplateSpecializationDecl>(VD)) { + llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VD, &*Unit); + TemplateParameters = parameterNodes.get(); + } else { + TemplateParameters = nullptr; + } + // Since we emit declarations (DW_AT_members) for static members, place the // definition of those static members in the namespace they were declared in // in the source code (the lexical decl context). @@ -3084,6 +3199,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, llvm::DINodeArray TParamsArray; StringRef Name, LinkageName; llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; + llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero; SourceLocation Loc = GD.getDecl()->getLocation(); llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *DContext = Unit; @@ -3100,20 +3216,23 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD, CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv(); QualType FnType = CGM.getContext().getFunctionType( FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC)); + if (!FD->isExternallyVisible()) + SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit; + if (CGM.getLangOpts().Optimize) + SPFlags |= llvm::DISubprogram::SPFlagOptimized; + if (Stub) { + Flags |= getCallSiteRelatedAttrs(); + SPFlags |= llvm::DISubprogram::SPFlagDefinition; return DBuilder.createFunction( DContext, Name, LinkageName, Unit, Line, - getOrCreateFunctionType(GD.getDecl(), FnType, Unit), - !FD->isExternallyVisible(), - /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize, + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags, TParamsArray.get(), getFunctionDeclaration(FD)); } llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl( DContext, Name, LinkageName, Unit, Line, - getOrCreateFunctionType(GD.getDecl(), FnType, Unit), - !FD->isExternallyVisible(), - /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize, + getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags, TParamsArray.get(), getFunctionDeclaration(FD)); const FunctionDecl *CanonDecl = FD->getCanonicalDecl(); FwdDeclReplaceMap.emplace_back(std::piecewise_construct, @@ -3138,12 +3257,14 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *DContext = Unit; unsigned Line = getLineNumber(Loc); + llvm::MDTuple *TemplateParameters = nullptr; - collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, DContext); + collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, TemplateParameters, + DContext); auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); auto *GV = DBuilder.createTempGlobalVariableFwdDecl( DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit), - !VD->isExternallyVisible(), nullptr, Align); + !VD->isExternallyVisible(), nullptr, TemplateParameters, Align); FwdDeclReplaceMap.emplace_back( std::piecewise_construct, std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())), @@ -3299,6 +3420,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, bool HasDecl = (D != nullptr); llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; + llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero; llvm::DIFile *Unit = getOrCreateFile(Loc); llvm::DIScope *FDContext = Unit; llvm::DINodeArray TParamsArray; @@ -3338,6 +3460,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, if (CurFuncIsThunk) Flags |= llvm::DINode::FlagThunk; + if (Fn->hasLocalLinkage()) + SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit; + if (CGM.getLangOpts().Optimize) + SPFlags |= llvm::DISubprogram::SPFlagOptimized; + + llvm::DINode::DIFlags FlagsForDef = Flags | getCallSiteRelatedAttrs(); + llvm::DISubprogram::DISPFlags SPFlagsForDef = + SPFlags | llvm::DISubprogram::SPFlagDefinition; + unsigned LineNo = getLineNumber(Loc); unsigned ScopeLine = getLineNumber(ScopeLoc); @@ -3348,9 +3479,8 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, // are emitted as CU level entities by the backend. llvm::DISubprogram *SP = DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(), - true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, - TParamsArray.get(), getFunctionDeclaration(D)); + getOrCreateFunctionType(D, FnType, Unit), ScopeLine, FlagsForDef, + SPFlagsForDef, TParamsArray.get(), getFunctionDeclaration(D)); Fn->setSubprogram(SP); // We might get here with a VarDecl in the case we're generating // code for the initialization of globals. Do not record these decls @@ -3370,8 +3500,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc, cast<llvm::DICompositeType>(It->second); llvm::DISubprogram *FD = DBuilder.createFunction( InterfaceDecl, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(), - false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, + getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags, TParamsArray.get()); DBuilder.finalizeSubprogram(FD); ObjCMethodCache[ID].push_back(FD); @@ -3420,11 +3549,13 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, } unsigned LineNo = getLineNumber(Loc); unsigned ScopeLine = 0; + llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero; + if (CGM.getLangOpts().Optimize) + SPFlags |= llvm::DISubprogram::SPFlagOptimized; DBuilder.retainType(DBuilder.createFunction( FDContext, Name, LinkageName, Unit, LineNo, - getOrCreateFunctionType(D, FnType, Unit), false /*internalLinkage*/, - false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, + getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags, TParamsArray.get(), getFunctionDeclaration(D))); } @@ -3453,7 +3584,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) { // Update our current location setLocation(Loc); - if (CurLoc.isInvalid() || CurLoc.isMacroID()) + if (CurLoc.isInvalid() || CurLoc.isMacroID() || LexicalBlockStack.empty()) return; llvm::MDNode *Scope = LexicalBlockStack.back(); @@ -3530,9 +3661,9 @@ void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn) { DBuilder.finalizeSubprogram(Fn->getSubprogram()); } -llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, - uint64_t *XOffset) { - +CGDebugInfo::BlockByRefType +CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, + uint64_t *XOffset) { SmallVector<llvm::Metadata *, 5> EltTys; QualType FType; uint64_t FieldSize, FieldOffset; @@ -3584,23 +3715,21 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD, } FType = Type; - llvm::DIType *FieldTy = getOrCreateType(FType, Unit); + llvm::DIType *WrappedTy = getOrCreateType(FType, Unit); FieldSize = CGM.getContext().getTypeSize(FType); FieldAlign = CGM.getContext().toBits(Align); *XOffset = FieldOffset; - FieldTy = DBuilder.createMemberType(Unit, VD->getName(), Unit, 0, FieldSize, - FieldAlign, FieldOffset, - llvm::DINode::FlagZero, FieldTy); + llvm::DIType *FieldTy = DBuilder.createMemberType( + Unit, VD->getName(), Unit, 0, FieldSize, FieldAlign, FieldOffset, + llvm::DINode::FlagZero, WrappedTy); EltTys.push_back(FieldTy); FieldOffset += FieldSize; llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys); - - llvm::DINode::DIFlags Flags = llvm::DINode::FlagBlockByrefStruct; - - return DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0, Flags, - nullptr, Elements); + return {DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0, + llvm::DINode::FlagZero, nullptr, Elements), + WrappedTy}; } llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, @@ -3621,7 +3750,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::DIType *Ty; uint64_t XOffset = 0; if (VD->hasAttr<BlocksAttr>()) - Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset); + Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType; else Ty = getOrCreateType(VD->getType(), Unit); @@ -3759,7 +3888,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( llvm::DIFile *Unit = getOrCreateFile(VD->getLocation()); llvm::DIType *Ty; if (isByRef) - Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset); + Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType; else Ty = getOrCreateType(VD->getType(), Unit); @@ -3830,6 +3959,44 @@ bool operator<(const BlockLayoutChunk &l, const BlockLayoutChunk &r) { } } // namespace +void CGDebugInfo::collectDefaultFieldsForBlockLiteralDeclare( + const CGBlockInfo &Block, const ASTContext &Context, SourceLocation Loc, + const llvm::StructLayout &BlockLayout, llvm::DIFile *Unit, + SmallVectorImpl<llvm::Metadata *> &Fields) { + // Blocks in OpenCL have unique constraints which make the standard fields + // redundant while requiring size and align fields for enqueue_kernel. See + // initializeForBlockHeader in CGBlocks.cpp + if (CGM.getLangOpts().OpenCL) { + Fields.push_back(createFieldType("__size", Context.IntTy, Loc, AS_public, + BlockLayout.getElementOffsetInBits(0), + Unit, Unit)); + Fields.push_back(createFieldType("__align", Context.IntTy, Loc, AS_public, + BlockLayout.getElementOffsetInBits(1), + Unit, Unit)); + } else { + Fields.push_back(createFieldType("__isa", Context.VoidPtrTy, Loc, AS_public, + BlockLayout.getElementOffsetInBits(0), + Unit, Unit)); + Fields.push_back(createFieldType("__flags", Context.IntTy, Loc, AS_public, + BlockLayout.getElementOffsetInBits(1), + Unit, Unit)); + Fields.push_back( + createFieldType("__reserved", Context.IntTy, Loc, AS_public, + BlockLayout.getElementOffsetInBits(2), Unit, Unit)); + auto *FnTy = Block.getBlockExpr()->getFunctionType(); + auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar()); + Fields.push_back(createFieldType("__FuncPtr", FnPtrType, Loc, AS_public, + BlockLayout.getElementOffsetInBits(3), + Unit, Unit)); + Fields.push_back(createFieldType( + "__descriptor", + Context.getPointerType(Block.NeedsCopyDispose + ? Context.getBlockDescriptorExtendedType() + : Context.getBlockDescriptorType()), + Loc, AS_public, BlockLayout.getElementOffsetInBits(4), Unit, Unit)); + } +} + void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, StringRef Name, unsigned ArgNo, @@ -3852,35 +4019,8 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, CGM.getDataLayout().getStructLayout(block.StructureType); SmallVector<llvm::Metadata *, 16> fields; - if (CGM.getLangOpts().OpenCL) { - fields.push_back(createFieldType("__size", C.IntTy, loc, AS_public, - blockLayout->getElementOffsetInBits(0), - tunit, tunit)); - fields.push_back(createFieldType("__align", C.IntTy, loc, AS_public, - blockLayout->getElementOffsetInBits(1), - tunit, tunit)); - } else { - fields.push_back(createFieldType("__isa", C.VoidPtrTy, loc, AS_public, - blockLayout->getElementOffsetInBits(0), - tunit, tunit)); - fields.push_back(createFieldType("__flags", C.IntTy, loc, AS_public, - blockLayout->getElementOffsetInBits(1), - tunit, tunit)); - fields.push_back(createFieldType("__reserved", C.IntTy, loc, AS_public, - blockLayout->getElementOffsetInBits(2), - tunit, tunit)); - auto *FnTy = block.getBlockExpr()->getFunctionType(); - auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar()); - fields.push_back(createFieldType("__FuncPtr", FnPtrType, loc, AS_public, - blockLayout->getElementOffsetInBits(3), - tunit, tunit)); - fields.push_back(createFieldType( - "__descriptor", - C.getPointerType(block.NeedsCopyDispose - ? C.getBlockDescriptorExtendedType() - : C.getBlockDescriptorType()), - loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit)); - } + collectDefaultFieldsForBlockLiteralDeclare(block, C, loc, *blockLayout, tunit, + fields); // We want to sort the captures by offset, not because DWARF // requires this, but because we're paranoid about debuggers. @@ -3923,7 +4063,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, QualType type; if (auto *Method = cast_or_null<CXXMethodDecl>(blockDecl->getNonClosureContext())) - type = Method->getThisType(C); + type = Method->getThisType(); else if (auto *RDecl = dyn_cast<CXXRecordDecl>(blockDecl->getParent())) type = QualType(RDecl->getTypeForDecl(), 0); else @@ -3941,10 +4081,10 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, if (capture->isByRef()) { TypeInfo PtrInfo = C.getTypeInfo(C.VoidPtrTy); auto Align = PtrInfo.AlignIsRequired ? PtrInfo.Align : 0; - - // FIXME: this creates a second copy of this type! + // FIXME: This recomputes the layout of the BlockByRefWrapper. uint64_t xoffset; - fieldType = EmitTypeForVarWithBlocksAttr(variable, &xoffset); + fieldType = + EmitTypeForVarWithBlocksAttr(variable, &xoffset).BlockByRefWrapper; fieldType = DBuilder.createPointerType(fieldType, PtrInfo.Width); fieldType = DBuilder.createMemberType(tunit, name, tunit, line, PtrInfo.Width, Align, offsetInBits, @@ -4045,7 +4185,9 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, unsigned LineNo; StringRef DeclName, LinkageName; QualType T; - collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName, DContext); + llvm::MDTuple *TemplateParameters = nullptr; + collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName, + TemplateParameters, DContext); // Attempt to store one global variable for the declaration - even if we // emit a lot of fields. @@ -4071,7 +4213,8 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), Var->hasLocalLinkage(), Expr.empty() ? nullptr : DBuilder.createExpression(Expr), - getOrCreateStaticDataMemberDeclarationOrNull(D), Align); + getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters, + Align); Var->addDebugInfo(GVE); } DeclCache[D->getCanonicalDecl()].reset(GVE); @@ -4128,10 +4271,19 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { InitExpr = DBuilder.createConstantValueExpression( Init.getFloat().bitcastToAPInt().getZExtValue()); } + + llvm::MDTuple *TemplateParameters = nullptr; + + if (isa<VarTemplateSpecializationDecl>(VD)) + if (VarD) { + llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VarD, &*Unit); + TemplateParameters = parameterNodes.get(); + } + GV.reset(DBuilder.createGlobalVariableExpression( DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty, true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD), - Align)); + TemplateParameters, Align)); } llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) { @@ -4320,7 +4472,7 @@ void CGDebugInfo::EmitExplicitCastType(QualType Ty) { if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo) return; - if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile())) + if (auto *DieTy = getOrCreateType(Ty, TheCU->getFile())) // Don't ignore in case of explicit cast where it is referenced indirectly. DBuilder.retainType(DieTy); } @@ -4332,3 +4484,22 @@ llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) { llvm::MDNode *Scope = LexicalBlockStack.back(); return llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), Scope); } + +llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const { + // Call site-related attributes are only useful in optimized programs, and + // when there's a possibility of debugging backtraces. + if (!CGM.getLangOpts().Optimize || DebugKind == codegenoptions::NoDebugInfo || + DebugKind == codegenoptions::LocTrackingOnly) + return llvm::DINode::FlagZero; + + // Call site-related attributes are available in DWARF v5. Some debuggers, + // while not fully DWARF v5-compliant, may accept these attributes as if they + // were part of DWARF v4. + bool SupportsDWARFv4Ext = + CGM.getCodeGenOpts().DwarfVersion == 4 && + CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB; + if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5) + return llvm::DINode::FlagZero; + + return llvm::DINode::FlagAllCallsDescribed; +} diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index e632806138f0..031e40b9dde9 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -20,8 +20,8 @@ #include "clang/AST/ExternalASTSource.h" #include "clang/AST/Type.h" #include "clang/AST/TypeOrdering.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/SourceLocation.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" @@ -76,6 +76,9 @@ class CGDebugInfo { llvm::DIType *OCLQueueDITy = nullptr; llvm::DIType *OCLNDRangeDITy = nullptr; llvm::DIType *OCLReserveIDDITy = nullptr; +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + llvm::DIType *Id##Ty = nullptr; +#include "clang/Basic/OpenCLExtensionTypes.def" /// Cache of previously constructed Types. llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache; @@ -248,6 +251,11 @@ class CGDebugInfo { llvm::DINodeArray CollectFunctionTemplateParams(const FunctionDecl *FD, llvm::DIFile *Unit); + /// A helper function to collect debug info for function template + /// parameters. + llvm::DINodeArray CollectVarTemplateParams(const VarDecl *VD, + llvm::DIFile *Unit); + /// A helper function to collect debug info for template /// parameters. llvm::DINodeArray @@ -311,12 +319,31 @@ class CGDebugInfo { void AppendAddressSpaceXDeref(unsigned AddressSpace, SmallVectorImpl<int64_t> &Expr) const; + /// A helper function to collect debug info for the default elements of a + /// block. + /// + /// \returns The next available field offset after the default elements. + uint64_t collectDefaultElementTypesForBlockPointer( + const BlockPointerType *Ty, llvm::DIFile *Unit, + llvm::DIDerivedType *DescTy, unsigned LineNo, + SmallVectorImpl<llvm::Metadata *> &EltTys); + + /// A helper function to collect debug info for the default fields of a + /// block. + void collectDefaultFieldsForBlockLiteralDeclare( + const CGBlockInfo &Block, const ASTContext &Context, SourceLocation Loc, + const llvm::StructLayout &BlockLayout, llvm::DIFile *Unit, + SmallVectorImpl<llvm::Metadata *> &Fields); + public: CGDebugInfo(CodeGenModule &CGM); ~CGDebugInfo(); void finalize(); + /// Remap a given path with the current debug prefix map + std::string remapDIPath(StringRef) const; + /// Register VLA size expression debug node with the qualified type. void registerVLASizeExpression(QualType Ty, llvm::Metadata *SizeExpr) { SizeExprCache[Ty] = SizeExpr; @@ -475,9 +502,16 @@ private: llvm::Optional<unsigned> ArgNo, CGBuilderTy &Builder); + struct BlockByRefType { + /// The wrapper struct used inside the __block_literal struct. + llvm::DIType *BlockByRefWrapper; + /// The type as it appears in the source code. + llvm::DIType *WrappedType; + }; + /// Build up structure info for the byref. See \a BuildByRefType. - llvm::DIType *EmitTypeForVarWithBlocksAttr(const VarDecl *VD, - uint64_t *OffSet); + BlockByRefType EmitTypeForVarWithBlocksAttr(const VarDecl *VD, + uint64_t *OffSet); /// Get context info for the DeclContext of \p Decl. llvm::DIScope *getDeclContextDescriptor(const Decl *D); @@ -497,9 +531,6 @@ private: /// Create new compile unit. void CreateCompileUnit(); - /// Remap a given path with the current debug prefix map - std::string remapDIPath(StringRef) const; - /// Compute the file checksum debug info for input file ID. Optional<llvm::DIFile::ChecksumKind> computeChecksum(FileID FID, SmallString<32> &Checksum) const; @@ -507,11 +538,15 @@ private: /// Get the source of the given file ID. Optional<StringRef> getSource(const SourceManager &SM, FileID FID); - /// Get the file debug info descriptor for the input location. + /// Convenience function to get the file debug info descriptor for the input + /// location. llvm::DIFile *getOrCreateFile(SourceLocation Loc); - /// Get the file info for main compile unit. - llvm::DIFile *getOrCreateMainFile(); + /// Create a file debug info descriptor for a source file. + llvm::DIFile * + createFile(StringRef FileName, + Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo, + Optional<StringRef> Source); /// Get the type from the cache or create a new type if necessary. llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg); @@ -580,6 +615,11 @@ private: unsigned LineNo, StringRef LinkageName, llvm::GlobalVariable *Var, llvm::DIScope *DContext); + + /// Return flags which enable debug info emission for call sites, provided + /// that it is supported and enabled. + llvm::DINode::DIFlags getCallSiteRelatedAttrs() const; + /// Get the printing policy for producing names for debug info. PrintingPolicy getPrintingPolicy() const; @@ -622,7 +662,9 @@ private: /// Collect various properties of a VarDecl. void collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit, unsigned &LineNo, QualType &T, StringRef &Name, - StringRef &LinkageName, llvm::DIScope *&VDContext); + StringRef &LinkageName, + llvm::MDTuple *&TemplateParameters, + llvm::DIScope *&VDContext); /// Allocate a copy of \p A using the DebugInfoNames allocator /// and return a reference to it. If multiple arguments are given the strings @@ -702,7 +744,7 @@ public: /// function \p InlinedFn. The current debug location becomes the inlined call /// site of the inlined function. ApplyInlineDebugLocation(CodeGenFunction &CGF, GlobalDecl InlinedFn); - /// Restore everything back to the orginial state. + /// Restore everything back to the original state. ~ApplyInlineDebugLocation(); }; diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 57b2fbadbeec..5959d889b455 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -26,10 +26,11 @@ #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" @@ -104,6 +105,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) { case Decl::Import: case Decl::OMPThreadPrivate: case Decl::OMPCapturedExpr: + case Decl::OMPRequires: case Decl::Empty: // None of these decls require codegen support. return; @@ -545,7 +547,7 @@ namespace { void Emit(CodeGenFunction &CGF, Flags flags) override { // Compute the address of the local variable, in case it's a // byref or something. - DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false, + DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(&Var), false, Var.getType(), VK_LValue, SourceLocation()); llvm::Value *value = CGF.EmitLoadOfScalar(CGF.EmitDeclRefLValue(&DRE), SourceLocation()); @@ -563,7 +565,7 @@ namespace { : CleanupFn(CleanupFn), FnInfo(*Info), Var(*Var) {} void Emit(CodeGenFunction &CGF, Flags flags) override { - DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false, + DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(&Var), false, Var.getType(), VK_LValue, SourceLocation()); // Compute the address of the local variable, in case it's a byref // or something. @@ -752,9 +754,9 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, // If we're emitting a value with lifetime, we have to do the // initialization *before* we leave the cleanup scopes. - if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) { - enterFullExpression(ewc); - init = ewc->getSubExpr(); + if (const FullExpr *fe = dyn_cast<FullExpr>(init)) { + enterFullExpression(fe); + init = fe->getSubExpr(); } CodeGenFunction::RunCleanupsScope Scope(*this); @@ -795,15 +797,21 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, case Qualifiers::OCL_None: llvm_unreachable("present but none"); + case Qualifiers::OCL_Strong: { + if (!D || !isa<VarDecl>(D) || !cast<VarDecl>(D)->isARCPseudoStrong()) { + value = EmitARCRetainScalarExpr(init); + break; + } + // If D is pseudo-strong, treat it like __unsafe_unretained here. This means + // that we omit the retain, and causes non-autoreleased return values to be + // immediately released. + LLVM_FALLTHROUGH; + } + case Qualifiers::OCL_ExplicitNone: value = EmitARCUnsafeUnretainedScalarExpr(init); break; - case Qualifiers::OCL_Strong: { - value = EmitARCRetainScalarExpr(init); - break; - } - case Qualifiers::OCL_Weak: { // If it's not accessed by the initializer, try to emit the // initialization with a copy or move. @@ -948,111 +956,242 @@ static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init, canEmitInitWithFewStoresAfterBZero(Init, StoreBudget); } -/// A byte pattern. -/// -/// Can be "any" pattern if the value was padding or known to be undef. -/// Can be "none" pattern if a sequence doesn't exist. -class BytePattern { - uint8_t Val; - enum class ValueType : uint8_t { Specific, Any, None } Type; - BytePattern(ValueType Type) : Type(Type) {} - -public: - BytePattern(uint8_t Value) : Val(Value), Type(ValueType::Specific) {} - static BytePattern Any() { return BytePattern(ValueType::Any); } - static BytePattern None() { return BytePattern(ValueType::None); } - bool isAny() const { return Type == ValueType::Any; } - bool isNone() const { return Type == ValueType::None; } - bool isValued() const { return Type == ValueType::Specific; } - uint8_t getValue() const { - assert(isValued()); - return Val; - } - BytePattern merge(const BytePattern Other) const { - if (isNone() || Other.isNone()) - return None(); - if (isAny()) - return Other; - if (Other.isAny()) - return *this; - if (getValue() == Other.getValue()) - return *this; - return None(); - } -}; - -/// Figures out whether the constant can be initialized with memset. -static BytePattern constantIsRepeatedBytePattern(llvm::Constant *C) { - if (isa<llvm::ConstantAggregateZero>(C) || isa<llvm::ConstantPointerNull>(C)) - return BytePattern(0x00); - if (isa<llvm::UndefValue>(C)) - return BytePattern::Any(); - - if (isa<llvm::ConstantInt>(C)) { - auto *Int = cast<llvm::ConstantInt>(C); - if (Int->getBitWidth() % 8 != 0) - return BytePattern::None(); - const llvm::APInt &Value = Int->getValue(); - if (Value.isSplat(8)) - return BytePattern(Value.getLoBits(8).getLimitedValue()); - return BytePattern::None(); - } - - if (isa<llvm::ConstantFP>(C)) { - auto *FP = cast<llvm::ConstantFP>(C); - llvm::APInt Bits = FP->getValueAPF().bitcastToAPInt(); - if (Bits.getBitWidth() % 8 != 0) - return BytePattern::None(); - if (!Bits.isSplat(8)) - return BytePattern::None(); - return BytePattern(Bits.getLimitedValue() & 0xFF); - } - - if (isa<llvm::ConstantVector>(C)) { - llvm::Constant *Splat = cast<llvm::ConstantVector>(C)->getSplatValue(); - if (Splat) - return constantIsRepeatedBytePattern(Splat); - return BytePattern::None(); - } - - if (isa<llvm::ConstantArray>(C) || isa<llvm::ConstantStruct>(C)) { - BytePattern Pattern(BytePattern::Any()); - for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) { - llvm::Constant *Elt = cast<llvm::Constant>(C->getOperand(I)); - Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt)); - if (Pattern.isNone()) - return Pattern; +/// Decide whether we should use memset to initialize a local variable instead +/// of using a memcpy from a constant global. Assumes we've already decided to +/// not user bzero. +/// FIXME We could be more clever, as we are for bzero above, and generate +/// memset followed by stores. It's unclear that's worth the effort. +static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init, + uint64_t GlobalSize) { + uint64_t SizeLimit = 32; + if (GlobalSize <= SizeLimit) + return nullptr; + return llvm::isBytewiseValue(Init); +} + +static llvm::Constant *patternFor(CodeGenModule &CGM, llvm::Type *Ty) { + // The following value is a guaranteed unmappable pointer value and has a + // repeated byte-pattern which makes it easier to synthesize. We use it for + // pointers as well as integers so that aggregates are likely to be + // initialized with this repeated value. + constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull; + // For 32-bit platforms it's a bit trickier because, across systems, only the + // zero page can reasonably be expected to be unmapped, and even then we need + // a very low address. We use a smaller value, and that value sadly doesn't + // have a repeated byte-pattern. We don't use it for integers. + constexpr uint32_t SmallValue = 0x000000AA; + // Floating-point values are initialized as NaNs because they propagate. Using + // a repeated byte pattern means that it will be easier to initialize + // all-floating-point aggregates and arrays with memset. Further, aggregates + // which mix integral and a few floats might also initialize with memset + // followed by a handful of stores for the floats. Using fairly unique NaNs + // also means they'll be easier to distinguish in a crash. + constexpr bool NegativeNaN = true; + constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull; + if (Ty->isIntOrIntVectorTy()) { + unsigned BitWidth = cast<llvm::IntegerType>( + Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) + ->getBitWidth(); + if (BitWidth <= 64) + return llvm::ConstantInt::get(Ty, LargeValue); + return llvm::ConstantInt::get( + Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue))); + } + if (Ty->isPtrOrPtrVectorTy()) { + auto *PtrTy = cast<llvm::PointerType>( + Ty->isVectorTy() ? Ty->getVectorElementType() : Ty); + unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth( + PtrTy->getAddressSpace()); + llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth); + uint64_t IntValue; + switch (PtrWidth) { + default: + llvm_unreachable("pattern initialization of unsupported pointer width"); + case 64: + IntValue = LargeValue; + break; + case 32: + IntValue = SmallValue; + break; } - return Pattern; + auto *Int = llvm::ConstantInt::get(IntTy, IntValue); + return llvm::ConstantExpr::getIntToPtr(Int, PtrTy); + } + if (Ty->isFPOrFPVectorTy()) { + unsigned BitWidth = llvm::APFloat::semanticsSizeInBits( + (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty) + ->getFltSemantics()); + llvm::APInt Payload(64, NaNPayload); + if (BitWidth >= 64) + Payload = llvm::APInt::getSplat(BitWidth, Payload); + return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload); + } + if (Ty->isArrayTy()) { + // Note: this doesn't touch tail padding (at the end of an object, before + // the next array object). It is instead handled by replaceUndef. + auto *ArrTy = cast<llvm::ArrayType>(Ty); + llvm::SmallVector<llvm::Constant *, 8> Element( + ArrTy->getNumElements(), patternFor(CGM, ArrTy->getElementType())); + return llvm::ConstantArray::get(ArrTy, Element); + } + + // Note: this doesn't touch struct padding. It will initialize as much union + // padding as is required for the largest type in the union. Padding is + // instead handled by replaceUndef. Stores to structs with volatile members + // don't have a volatile qualifier when initialized according to C++. This is + // fine because stack-based volatiles don't really have volatile semantics + // anyways, and the initialization shouldn't be observable. + auto *StructTy = cast<llvm::StructType>(Ty); + llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements()); + for (unsigned El = 0; El != Struct.size(); ++El) + Struct[El] = patternFor(CGM, StructTy->getElementType(El)); + return llvm::ConstantStruct::get(StructTy, Struct); +} + +static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D, + CGBuilderTy &Builder, + llvm::Constant *Constant, + CharUnits Align) { + auto FunctionName = [&](const DeclContext *DC) -> std::string { + if (const auto *FD = dyn_cast<FunctionDecl>(DC)) { + if (const auto *CC = dyn_cast<CXXConstructorDecl>(FD)) + return CC->getNameAsString(); + if (const auto *CD = dyn_cast<CXXDestructorDecl>(FD)) + return CD->getNameAsString(); + return CGM.getMangledName(FD); + } else if (const auto *OM = dyn_cast<ObjCMethodDecl>(DC)) { + return OM->getNameAsString(); + } else if (isa<BlockDecl>(DC)) { + return "<block>"; + } else if (isa<CapturedDecl>(DC)) { + return "<captured>"; + } else { + llvm::llvm_unreachable_internal("expected a function or method"); + } + }; + + auto *Ty = Constant->getType(); + bool isConstant = true; + llvm::GlobalVariable *InsertBefore = nullptr; + unsigned AS = CGM.getContext().getTargetAddressSpace( + CGM.getStringLiteralAddressSpace()); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + CGM.getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage, + Constant, + "__const." + FunctionName(D.getParentFunctionOrMethod()) + "." + + D.getName(), + InsertBefore, llvm::GlobalValue::NotThreadLocal, AS); + GV->setAlignment(Align.getQuantity()); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + + Address SrcPtr = Address(GV, Align); + llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS); + if (SrcPtr.getType() != BP) + SrcPtr = Builder.CreateBitCast(SrcPtr, BP); + return SrcPtr; +} + +static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, + Address Loc, bool isVolatile, + CGBuilderTy &Builder, + llvm::Constant *constant) { + auto *Ty = constant->getType(); + bool isScalar = Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy() || + Ty->isFPOrFPVectorTy(); + if (isScalar) { + Builder.CreateStore(constant, Loc, isVolatile); + return; } - if (llvm::ConstantDataSequential *CDS = - dyn_cast<llvm::ConstantDataSequential>(C)) { - BytePattern Pattern(BytePattern::Any()); - for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { - llvm::Constant *Elt = CDS->getElementAsConstant(I); - Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt)); - if (Pattern.isNone()) - return Pattern; + auto *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext()); + auto *IntPtrTy = CGM.getDataLayout().getIntPtrType(CGM.getLLVMContext()); + + // If the initializer is all or mostly the same, codegen with bzero / memset + // then do a few stores afterward. + uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty); + auto *SizeVal = llvm::ConstantInt::get(IntPtrTy, ConstantSize); + if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, + isVolatile); + + bool valueAlreadyCorrect = + constant->isNullValue() || isa<llvm::UndefValue>(constant); + if (!valueAlreadyCorrect) { + Loc = Builder.CreateBitCast(Loc, Ty->getPointerTo(Loc.getAddressSpace())); + emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder); + } + return; + } + + llvm::Value *Pattern = shouldUseMemSetToInitialize(constant, ConstantSize); + if (Pattern) { + uint64_t Value = 0x00; + if (!isa<llvm::UndefValue>(Pattern)) { + const llvm::APInt &AP = cast<llvm::ConstantInt>(Pattern)->getValue(); + assert(AP.getBitWidth() <= 8); + Value = AP.getLimitedValue(); } - return Pattern; + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal, + isVolatile); + return; } - // BlockAddress, ConstantExpr, and everything else is scary. - return BytePattern::None(); + Builder.CreateMemCpy( + Loc, + createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()), + SizeVal, isVolatile); } -/// Decide whether we should use memset to initialize a local variable instead -/// of using a memcpy from a constant global. Assumes we've already decided to -/// not user bzero. -/// FIXME We could be more clever, as we are for bzero above, and generate -/// memset followed by stores. It's unclear that's worth the effort. -static BytePattern shouldUseMemSetToInitialize(llvm::Constant *Init, - uint64_t GlobalSize) { - uint64_t SizeLimit = 32; - if (GlobalSize <= SizeLimit) - return BytePattern::None(); - return constantIsRepeatedBytePattern(Init); +static void emitStoresForZeroInit(CodeGenModule &CGM, const VarDecl &D, + Address Loc, bool isVolatile, + CGBuilderTy &Builder) { + llvm::Type *ElTy = Loc.getElementType(); + llvm::Constant *constant = llvm::Constant::getNullValue(ElTy); + emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); +} + +static void emitStoresForPatternInit(CodeGenModule &CGM, const VarDecl &D, + Address Loc, bool isVolatile, + CGBuilderTy &Builder) { + llvm::Type *ElTy = Loc.getElementType(); + llvm::Constant *constant = patternFor(CGM, ElTy); + assert(!isa<llvm::UndefValue>(constant)); + emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); +} + +static bool containsUndef(llvm::Constant *constant) { + auto *Ty = constant->getType(); + if (isa<llvm::UndefValue>(constant)) + return true; + if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) + for (llvm::Use &Op : constant->operands()) + if (containsUndef(cast<llvm::Constant>(Op))) + return true; + return false; +} + +static llvm::Constant *replaceUndef(llvm::Constant *constant) { + // FIXME: when doing pattern initialization, replace undef with 0xAA instead. + // FIXME: also replace padding between values by creating a new struct type + // which has no padding. + auto *Ty = constant->getType(); + if (isa<llvm::UndefValue>(constant)) + return llvm::Constant::getNullValue(Ty); + if (!(Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy())) + return constant; + if (!containsUndef(constant)) + return constant; + llvm::SmallVector<llvm::Constant *, 8> Values(constant->getNumOperands()); + for (unsigned Op = 0, NumOp = constant->getNumOperands(); Op != NumOp; ++Op) { + auto *OpValue = cast<llvm::Constant>(constant->getOperand(Op)); + Values[Op] = replaceUndef(OpValue); + } + if (Ty->isStructTy()) + return llvm::ConstantStruct::get(cast<llvm::StructType>(Ty), Values); + if (Ty->isArrayTy()) + return llvm::ConstantArray::get(cast<llvm::ArrayType>(Ty), Values); + assert(Ty->isVectorTy()); + return llvm::ConstantVector::get(Values); } /// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a @@ -1098,6 +1237,7 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( // For each dimension stores its QualType and corresponding // size-expression Value. SmallVector<CodeGenFunction::VlaSizePair, 4> Dimensions; + SmallVector<IdentifierInfo *, 4> VLAExprNames; // Break down the array into individual dimensions. QualType Type1D = D.getType(); @@ -1106,8 +1246,14 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts)) Dimensions.emplace_back(C, Type1D.getUnqualifiedType()); else { - auto SizeExprAddr = CreateDefaultAlignTempAlloca( - VlaSize.NumElts->getType(), "__vla_expr"); + // Generate a locally unique name for the size expression. + Twine Name = Twine("__vla_expr") + Twine(VLAExprCounter++); + SmallString<12> Buffer; + StringRef NameRef = Name.toStringRef(Buffer); + auto &Ident = getContext().Idents.getOwn(NameRef); + VLAExprNames.push_back(&Ident); + auto SizeExprAddr = + CreateDefaultAlignTempAlloca(VlaSize.NumElts->getType(), NameRef); Builder.CreateStore(VlaSize.NumElts, SizeExprAddr); Dimensions.emplace_back(SizeExprAddr.getPointer(), Type1D.getUnqualifiedType()); @@ -1121,20 +1267,20 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions( // Register each dimension's size-expression with a DILocalVariable, // so that it can be used by CGDebugInfo when instantiating a DISubrange // to describe this array. + unsigned NameIdx = 0; for (auto &VlaSize : Dimensions) { llvm::Metadata *MD; if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts)) MD = llvm::ConstantAsMetadata::get(C); else { // Create an artificial VarDecl to generate debug info for. - IdentifierInfo &NameIdent = getContext().Idents.getOwn( - cast<llvm::AllocaInst>(VlaSize.NumElts)->getName()); + IdentifierInfo *NameIdent = VLAExprNames[NameIdx++]; auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType(); auto QT = getContext().getIntTypeForBitwidth( VlaExprTy->getScalarSizeInBits(), false); auto *ArtificialDecl = VarDecl::Create( getContext(), const_cast<DeclContext *>(D.getDeclContext()), - D.getLocation(), D.getLocation(), &NameIdent, QT, + D.getLocation(), D.getLocation(), NameIdent, QT, getContext().CreateTypeSourceInfo(QT), SC_Auto); ArtificialDecl->setImplicit(); @@ -1157,8 +1303,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { AutoVarEmission emission(D); - bool isByRef = D.hasAttr<BlocksAttr>(); - emission.IsByRef = isByRef; + bool isEscapingByRef = D.isEscapingByref(); + emission.IsEscapingByRef = isEscapingByRef; CharUnits alignment = getContext().getDeclAlign(&D); @@ -1197,8 +1343,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // in OpenCL. if ((!getLangOpts().OpenCL || Ty.getAddressSpace() == LangAS::opencl_constant) && - (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef && - CGM.isTypeConstant(Ty, true))) { + (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && + !isEscapingByRef && CGM.isTypeConstant(Ty, true))) { EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage); // Signal this condition to later callbacks. @@ -1250,7 +1396,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { } else { CharUnits allocaAlignment; llvm::Type *allocaTy; - if (isByRef) { + if (isEscapingByRef) { auto &byrefInfo = getBlockByrefInfo(&D); allocaTy = byrefInfo.Type; allocaAlignment = byrefInfo.ByrefAlignment; @@ -1439,6 +1585,8 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, D.getLocation()); QualType type = D.getType(); + bool isVolatile = type.isVolatileQualified(); + // If this local has an initializer, emit it now. const Expr *Init = D.getInit(); @@ -1450,7 +1598,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { } // Initialize the structure of a __block variable. - if (emission.IsByRef) + if (emission.IsEscapingByRef) emitByrefStructureInit(emission); // Initialize the variable here if it doesn't have a initializer and it is a @@ -1460,30 +1608,126 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { type.isNonTrivialToPrimitiveDefaultInitialize() == QualType::PDIK_Struct) { LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type); - if (emission.IsByRef) + if (emission.IsEscapingByRef) drillIntoBlockVariable(*this, Dst, &D); defaultInitNonTrivialCStructVar(Dst); return; } - if (isTrivialInitializer(Init)) - return; - // Check whether this is a byref variable that's potentially // captured and moved by its own initializer. If so, we'll need to // emit the initializer first, then copy into the variable. - bool capturedByInit = emission.IsByRef && isCapturedBy(D, Init); + bool capturedByInit = + Init && emission.IsEscapingByRef && isCapturedBy(D, Init); Address Loc = - capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + + // Note: constexpr already initializes everything correctly. + LangOptions::TrivialAutoVarInitKind trivialAutoVarInit = + (D.isConstexpr() + ? LangOptions::TrivialAutoVarInitKind::Uninitialized + : (D.getAttr<UninitializedAttr>() + ? LangOptions::TrivialAutoVarInitKind::Uninitialized + : getContext().getLangOpts().getTrivialAutoVarInit())); + + auto initializeWhatIsTechnicallyUninitialized = [&]() { + if (trivialAutoVarInit == + LangOptions::TrivialAutoVarInitKind::Uninitialized) + return; + + CharUnits Size = getContext().getTypeSizeInChars(type); + if (!Size.isZero()) { + switch (trivialAutoVarInit) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + llvm_unreachable("Uninitialized handled above"); + case LangOptions::TrivialAutoVarInitKind::Zero: + emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder); + break; + case LangOptions::TrivialAutoVarInitKind::Pattern: + emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder); + break; + } + return; + } + + // VLAs look zero-sized to getTypeInfo. We can't emit constant stores to + // them, so emit a memcpy with the VLA size to initialize each element. + // Technically zero-sized or negative-sized VLAs are undefined, and UBSan + // will catch that code, but there exists code which generates zero-sized + // VLAs. Be nice and initialize whatever they requested. + const VariableArrayType *VlaType = + dyn_cast_or_null<VariableArrayType>(getContext().getAsArrayType(type)); + if (!VlaType) + return; + auto VlaSize = getVLASize(VlaType); + auto SizeVal = VlaSize.NumElts; + CharUnits EltSize = getContext().getTypeSizeInChars(VlaSize.Type); + switch (trivialAutoVarInit) { + case LangOptions::TrivialAutoVarInitKind::Uninitialized: + llvm_unreachable("Uninitialized handled above"); + + case LangOptions::TrivialAutoVarInitKind::Zero: + if (!EltSize.isOne()) + SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); + Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, + isVolatile); + break; + + case LangOptions::TrivialAutoVarInitKind::Pattern: { + llvm::Type *ElTy = Loc.getElementType(); + llvm::Constant *Constant = patternFor(CGM, ElTy); + CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type); + llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop"); + llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop"); + llvm::BasicBlock *ContBB = createBasicBlock("vla-init.cont"); + llvm::Value *IsZeroSizedVLA = Builder.CreateICmpEQ( + SizeVal, llvm::ConstantInt::get(SizeVal->getType(), 0), + "vla.iszerosized"); + Builder.CreateCondBr(IsZeroSizedVLA, ContBB, SetupBB); + EmitBlock(SetupBB); + if (!EltSize.isOne()) + SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize)); + llvm::Value *BaseSizeInChars = + llvm::ConstantInt::get(IntPtrTy, EltSize.getQuantity()); + Address Begin = Builder.CreateElementBitCast(Loc, Int8Ty, "vla.begin"); + llvm::Value *End = + Builder.CreateInBoundsGEP(Begin.getPointer(), SizeVal, "vla.end"); + llvm::BasicBlock *OriginBB = Builder.GetInsertBlock(); + EmitBlock(LoopBB); + llvm::PHINode *Cur = Builder.CreatePHI(Begin.getType(), 2, "vla.cur"); + Cur->addIncoming(Begin.getPointer(), OriginBB); + CharUnits CurAlign = Loc.getAlignment().alignmentOfArrayElement(EltSize); + Builder.CreateMemCpy( + Address(Cur, CurAlign), + createUnnamedGlobalFrom(CGM, D, Builder, Constant, ConstantAlign), + BaseSizeInChars, isVolatile); + llvm::Value *Next = + Builder.CreateInBoundsGEP(Int8Ty, Cur, BaseSizeInChars, "vla.next"); + llvm::Value *Done = Builder.CreateICmpEQ(Next, End, "vla-init.isdone"); + Builder.CreateCondBr(Done, ContBB, LoopBB); + Cur->addIncoming(Next, LoopBB); + EmitBlock(ContBB); + } break; + } + }; + + if (isTrivialInitializer(Init)) { + initializeWhatIsTechnicallyUninitialized(); + return; + } llvm::Constant *constant = nullptr; if (emission.IsConstantAggregate || D.isConstexpr()) { assert(!capturedByInit && "constant init contains a capturing block?"); constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D); + if (constant && trivialAutoVarInit != + LangOptions::TrivialAutoVarInitKind::Uninitialized) + constant = replaceUndef(constant); } if (!constant) { + initializeWhatIsTechnicallyUninitialized(); LValue lv = MakeAddrLValue(Loc, type); lv.setNonGC(true); return EmitExprAsInit(Init, &D, lv, capturedByInit); @@ -1496,61 +1740,11 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { return EmitStoreThroughLValue(RValue::get(constant), lv, true); } - // If this is a simple aggregate initialization, we can optimize it - // in various ways. - bool isVolatile = type.isVolatileQualified(); - - llvm::Value *SizeVal = - llvm::ConstantInt::get(IntPtrTy, - getContext().getTypeSizeInChars(type).getQuantity()); - llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace()); if (Loc.getType() != BP) Loc = Builder.CreateBitCast(Loc, BP); - // If the initializer is all or mostly the same, codegen with bzero / memset - // then do a few stores afterward. - uint64_t ConstantSize = - CGM.getDataLayout().getTypeAllocSize(constant->getType()); - if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal, - isVolatile); - // Zero and undef don't require a stores. - if (!constant->isNullValue() && !isa<llvm::UndefValue>(constant)) { - Loc = Builder.CreateBitCast(Loc, - constant->getType()->getPointerTo(Loc.getAddressSpace())); - emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder); - } - return; - } - - BytePattern Pattern = shouldUseMemSetToInitialize(constant, ConstantSize); - if (!Pattern.isNone()) { - uint8_t Value = Pattern.isAny() ? 0x00 : Pattern.getValue(); - Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal, - isVolatile); - return; - } - - // Otherwise, create a temporary global with the initializer then - // memcpy from the global to the alloca. - std::string Name = getStaticDeclName(CGM, D); - unsigned AS = CGM.getContext().getTargetAddressSpace( - CGM.getStringLiteralAddressSpace()); - BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS); - - llvm::GlobalVariable *GV = new llvm::GlobalVariable( - CGM.getModule(), constant->getType(), true, - llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr, - llvm::GlobalValue::NotThreadLocal, AS); - GV->setAlignment(Loc.getAlignment().getQuantity()); - GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - Address SrcPtr = Address(GV, Loc.getAlignment()); - if (SrcPtr.getType() != BP) - SrcPtr = Builder.CreateBitCast(SrcPtr, BP); - - Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile); + emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant); } /// Emit an expression as an initializer for an object (variable, field, etc.) @@ -1712,12 +1906,14 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { // If this is a block variable, call _Block_object_destroy // (on the unforwarded address). Don't enter this cleanup if we're in pure-GC // mode. - if (emission.IsByRef && CGM.getLangOpts().getGC() != LangOptions::GCOnly) { + if (emission.IsEscapingByRef && + CGM.getLangOpts().getGC() != LangOptions::GCOnly) { BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF; if (emission.Variable->getType().isObjCGCWeak()) Flags |= BLOCK_FIELD_IS_WEAK; enterByrefCleanup(NormalAndEHCleanup, emission.Addr, Flags, - /*LoadBlockVarAddr*/ false); + /*LoadBlockVarAddr*/ false, + cxxDestructorCanThrow(emission.Variable->getType())); } } @@ -2134,15 +2330,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // cleanup to do the release at the end of the function. bool isConsumed = D.hasAttr<NSConsumedAttr>(); - // 'self' is always formally __strong, but if this is not an - // init method then we don't want to retain it. + // If a parameter is pseudo-strong then we can omit the implicit retain. if (D.isARCPseudoStrong()) { - const ObjCMethodDecl *method = cast<ObjCMethodDecl>(CurCodeDecl); - assert(&D == method->getSelfDecl()); - assert(lt == Qualifiers::OCL_Strong); - assert(qs.hasConst()); - assert(method->getMethodFamily() != OMF_init); - (void) method; + assert(lt == Qualifiers::OCL_Strong && + "pseudo-strong variable isn't strong?"); + assert(qs.hasConst() && "pseudo-strong variable should be const!"); lt = Qualifiers::OCL_ExplicitNone; } @@ -2224,3 +2416,7 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, return; getOpenMPRuntime().emitUserDefinedReduction(CGF, D); } + +void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) { + getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D); +} diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index 510863f68eff..9aa31f181e99 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -15,7 +15,7 @@ #include "CGCXXABI.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" @@ -26,7 +26,10 @@ using namespace CodeGen; static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, ConstantAddress DeclPtr) { - assert(D.hasGlobalStorage() && "VarDecl must have global storage!"); + assert( + (D.hasGlobalStorage() || + (D.hasLocalStorage() && CGF.getContext().getLangOpts().OpenCLCPlusPlus)) && + "VarDecl must have global or local (in the case of OpenCL) storage!"); assert(!D.getType()->isReferenceType() && "Should not call EmitDeclInit on a reference!"); @@ -63,15 +66,24 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, /// Emit code to cause the destruction of the given variable with /// static storage duration. static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, - ConstantAddress addr) { + ConstantAddress Addr) { + // Honor __attribute__((no_destroy)) and bail instead of attempting + // to emit a reference to a possibly nonexistent destructor, which + // in turn can cause a crash. This will result in a global constructor + // that isn't balanced out by a destructor call as intended by the + // attribute. This also checks for -fno-c++-static-destructors and + // bails even if the attribute is not present. + if (D.isNoDestroy(CGF.getContext())) + return; + CodeGenModule &CGM = CGF.CGM; // FIXME: __attribute__((cleanup)) ? - QualType type = D.getType(); - QualType::DestructionKind dtorKind = type.isDestructedType(); + QualType Type = D.getType(); + QualType::DestructionKind DtorKind = Type.isDestructedType(); - switch (dtorKind) { + switch (DtorKind) { case QualType::DK_none: return; @@ -86,13 +98,14 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, return; } - llvm::Constant *function; - llvm::Constant *argument; + llvm::Constant *Func; + llvm::Constant *Argument; // Special-case non-array C++ destructors, if they have the right signature. // Under some ABIs, destructors return this instead of void, and cannot be - // passed directly to __cxa_atexit if the target does not allow this mismatch. - const CXXRecordDecl *Record = type->getAsCXXRecordDecl(); + // passed directly to __cxa_atexit if the target does not allow this + // mismatch. + const CXXRecordDecl *Record = Type->getAsCXXRecordDecl(); bool CanRegisterDestructor = Record && (!CGM.getCXXABI().HasThisReturn( GlobalDecl(Record->getDestructor(), Dtor_Complete)) || @@ -103,43 +116,47 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit; if (Record && (CanRegisterDestructor || UsingExternalHelper)) { assert(!Record->hasTrivialDestructor()); - CXXDestructorDecl *dtor = Record->getDestructor(); + CXXDestructorDecl *Dtor = Record->getDestructor(); - function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete); - argument = llvm::ConstantExpr::getBitCast( - addr.getPointer(), CGF.getTypes().ConvertType(type)->getPointerTo()); + Func = CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete); + Argument = llvm::ConstantExpr::getBitCast( + Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo()); // Otherwise, the standard logic requires a helper function. } else { - function = CodeGenFunction(CGM) - .generateDestroyHelper(addr, type, CGF.getDestroyer(dtorKind), - CGF.needsEHCleanup(dtorKind), &D); - argument = llvm::Constant::getNullValue(CGF.Int8PtrTy); + Func = CodeGenFunction(CGM) + .generateDestroyHelper(Addr, Type, CGF.getDestroyer(DtorKind), + CGF.needsEHCleanup(DtorKind), &D); + Argument = llvm::Constant::getNullValue(CGF.Int8PtrTy); } - CGM.getCXXABI().registerGlobalDtor(CGF, D, function, argument); + CGM.getCXXABI().registerGlobalDtor(CGF, D, Func, Argument); } /// Emit code to cause the variable at the given address to be considered as /// constant from this point onwards. static void EmitDeclInvariant(CodeGenFunction &CGF, const VarDecl &D, llvm::Constant *Addr) { + return CGF.EmitInvariantStart( + Addr, CGF.getContext().getTypeSizeInChars(D.getType())); +} + +void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) { // Do not emit the intrinsic if we're not optimizing. - if (!CGF.CGM.getCodeGenOpts().OptimizationLevel) + if (!CGM.getCodeGenOpts().OptimizationLevel) return; // Grab the llvm.invariant.start intrinsic. llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start; // Overloaded address space type. - llvm::Type *ObjectPtr[1] = {CGF.Int8PtrTy}; - llvm::Constant *InvariantStart = CGF.CGM.getIntrinsic(InvStartID, ObjectPtr); + llvm::Type *ObjectPtr[1] = {Int8PtrTy}; + llvm::Constant *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); // Emit a call with the size in bytes of the object. - CharUnits WidthChars = CGF.getContext().getTypeSizeInChars(D.getType()); - uint64_t Width = WidthChars.getQuantity(); - llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(CGF.Int64Ty, Width), - llvm::ConstantExpr::getBitCast(Addr, CGF.Int8PtrTy)}; - CGF.Builder.CreateCall(InvariantStart, Args); + uint64_t Width = Size.getQuantity(); + llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width), + llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)}; + Builder.CreateCall(InvariantStart, Args); } void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, @@ -347,6 +364,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( !isInSanitizerBlacklist(SanitizerKind::Memory, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); + if (getLangOpts().Sanitize.has(SanitizerKind::KernelMemory) && + !isInSanitizerBlacklist(SanitizerKind::KernelMemory, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeMemory); + if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack) && !isInSanitizerBlacklist(SanitizerKind::SafeStack, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SafeStack); @@ -355,6 +376,22 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( !isInSanitizerBlacklist(SanitizerKind::ShadowCallStack, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::ShadowCallStack); + auto RASignKind = getCodeGenOpts().getSignReturnAddress(); + if (RASignKind != CodeGenOptions::SignReturnAddressScope::None) { + Fn->addFnAttr("sign-return-address", + RASignKind == CodeGenOptions::SignReturnAddressScope::All + ? "all" + : "non-leaf"); + auto RASignKey = getCodeGenOpts().getSignReturnAddressKey(); + Fn->addFnAttr("sign-return-address-key", + RASignKey == CodeGenOptions::SignReturnAddressKeyValue::AKey + ? "a_key" + : "b_key"); + } + + if (getCodeGenOpts().BranchTargetEnforcement) + Fn->addFnAttr("branch-target-enforcement"); + return Fn; } @@ -565,7 +602,7 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, if (D->hasAttr<NoDebugAttr>()) DebugInfo = nullptr; // disable debug info indefinitely for this function - CurEHLocation = D->getLocStart(); + CurEHLocation = D->getBeginLoc(); StartFunction(GlobalDecl(D), getContext().VoidTy, Fn, getTypes().arrangeNullaryFunction(), @@ -587,7 +624,7 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, void CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, ArrayRef<llvm::Function *> Decls, - Address Guard) { + ConstantAddress Guard) { { auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), getContext().VoidTy, Fn, @@ -611,6 +648,12 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, // initializers use previously-initialized thread_local vars, that's // probably supposed to be OK, but the standard doesn't say. Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard); + + // The guard variable can't ever change again. + EmitInvariantStart( + Guard.getPointer(), + CharUnits::fromQuantity( + CGM.getDataLayout().getTypeAllocSize(GuardVal->getType()))); } RunCleanupsScope Scope(*this); @@ -679,7 +722,7 @@ llvm::Function *CodeGenFunction::generateDestroyHelper( llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction( FTy, "__cxx_global_array_dtor", FI, VD->getLocation()); - CurEHLocation = VD->getLocStart(); + CurEHLocation = VD->getBeginLoc(); StartFunction(VD, getContext().VoidTy, fn, FI, args); diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index a2ff102e1ab4..5756e13d2623 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -66,7 +66,7 @@ llvm::Constant *CodeGenModule::getTerminateFn() { name = "__std_terminate"; else name = "?terminate@@YAXXZ"; - } else if (getLangOpts().ObjC1 && + } else if (getLangOpts().ObjC && getLangOpts().ObjCRuntime.hasTerminate()) name = "objc_terminate"; else @@ -224,7 +224,7 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM, if (FD && FD->usesSEHTry()) return getSEHPersonalityMSVC(T); - if (L.ObjC1) + if (L.ObjC) return L.CPlusPlus ? getObjCXXPersonality(Target, L) : getObjCPersonality(Target, L); return L.CPlusPlus ? getCXXPersonality(Target, L) @@ -250,7 +250,11 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM, static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { llvm::Constant *Fn = getPersonalityFn(CGM, Personality); - return llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + llvm::PointerType* Int8PtrTy = llvm::PointerType::get( + llvm::Type::getInt8Ty(CGM.getLLVMContext()), + CGM.getDataLayout().getProgramAddressSpace()); + + return llvm::ConstantExpr::getBitCast(Fn, Int8PtrTy); } /// Check whether a landingpad instruction only uses C++ features. @@ -315,7 +319,7 @@ static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) { /// when it really needs it. void CodeGenModule::SimplifyPersonality() { // If we're not in ObjC++ -fexceptions, there's nothing to do. - if (!LangOpts.CPlusPlus || !LangOpts.ObjC1 || !LangOpts.Exceptions) + if (!LangOpts.CPlusPlus || !LangOpts.ObjC || !LangOpts.Exceptions) return; // Both the problem this endeavors to fix and the way the logic @@ -1248,7 +1252,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { // we follow the false destination for each of the cond branches to reach // the rethrow block. llvm::BasicBlock *RethrowBlock = WasmCatchStartBlock; - while (llvm::TerminatorInst *TI = RethrowBlock->getTerminator()) { + while (llvm::Instruction *TI = RethrowBlock->getTerminator()) { auto *BI = cast<llvm::BranchInst>(TI); assert(BI->isConditional()); RethrowBlock = BI->getSuccessor(1); @@ -1623,8 +1627,16 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup { // Compute the two argument values. QualType ArgTys[2] = {Context.UnsignedCharTy, Context.VoidPtrTy}; - llvm::Value *LocalAddrFn = CGM.getIntrinsic(llvm::Intrinsic::localaddress); - llvm::Value *FP = CGF.Builder.CreateCall(LocalAddrFn); + llvm::Value *FP = nullptr; + // If CFG.IsOutlinedSEHHelper is true, then we are within a finally block. + if (CGF.IsOutlinedSEHHelper) { + FP = &CGF.CurFn->arg_begin()[1]; + } else { + llvm::Value *LocalAddrFn = + CGM.getIntrinsic(llvm::Intrinsic::localaddress); + FP = CGF.Builder.CreateCall(LocalAddrFn); + } + llvm::Value *IsForEH = llvm::ConstantInt::get(CGF.ConvertType(ArgTys[0]), F.isForEHCleanup()); Args.add(RValue::get(IsForEH), ArgTys[0]); @@ -1777,7 +1789,7 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF, // frame pointer of the parent function. We only need to do this in filters, // since finally funclets recover the parent FP for us. llvm::Function *RecoverFPIntrin = - CGM.getIntrinsic(llvm::Intrinsic::x86_seh_recoverfp); + CGM.getIntrinsic(llvm::Intrinsic::eh_recoverfp); llvm::Constant *ParentI8Fn = llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP}); @@ -1823,13 +1835,13 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF, void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, bool IsFilter, const Stmt *OutlinedStmt) { - SourceLocation StartLoc = OutlinedStmt->getLocStart(); + SourceLocation StartLoc = OutlinedStmt->getBeginLoc(); // Get the mangled function name. SmallString<128> Name; { llvm::raw_svector_ostream OS(Name); - const FunctionDecl *ParentSEHFn = ParentCGF.CurSEHParent; + const NamedDecl *ParentSEHFn = ParentCGF.CurSEHParent; assert(ParentSEHFn && "No CurSEHParent!"); MangleContext &Mangler = CGM.getCXXABI().getMangleContext(); if (IsFilter) @@ -1871,10 +1883,10 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF, IsOutlinedSEHHelper = true; StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args, - OutlinedStmt->getLocStart(), OutlinedStmt->getLocStart()); + OutlinedStmt->getBeginLoc(), OutlinedStmt->getBeginLoc()); CurSEHParent = ParentCGF.CurSEHParent; - CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, CurFn); EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter); } @@ -1893,7 +1905,7 @@ CodeGenFunction::GenerateSEHFilterFunction(CodeGenFunction &ParentCGF, FilterExpr->getType()->isSignedIntegerType()); Builder.CreateStore(R, ReturnValue); - FinishFunction(FilterExpr->getLocEnd()); + FinishFunction(FilterExpr->getEndLoc()); return CurFn; } @@ -1907,7 +1919,7 @@ CodeGenFunction::GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF, // Emit the original filter expression, convert to i32, and return. EmitStmt(FinallyBlock); - FinishFunction(FinallyBlock->getLocEnd()); + FinishFunction(FinallyBlock->getEndLoc()); return CurFn; } @@ -1972,6 +1984,11 @@ llvm::Value *CodeGenFunction::EmitSEHAbnormalTermination() { return Builder.CreateZExt(&*AI, Int32Ty); } +void CodeGenFunction::pushSEHCleanup(CleanupKind Kind, + llvm::Function *FinallyFunc) { + EHStack.pushCleanup<PerformSEHFinally>(Kind, FinallyFunc); +} + void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) { CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true); if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) { diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index f168dd02ead1..34a921e2dc00 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -26,7 +26,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/NSAPI.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" @@ -419,8 +419,12 @@ LValue CodeGenFunction:: EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { const Expr *E = M->GetTemporaryExpr(); - // FIXME: ideally this would use EmitAnyExprToMem, however, we cannot do so - // as that will cause the lifetime adjustment to be lost for ARC + assert((!M->getExtendingDecl() || !isa<VarDecl>(M->getExtendingDecl()) || + !cast<VarDecl>(M->getExtendingDecl())->isARCPseudoStrong()) && + "Reference should never be pseudo-strong!"); + + // FIXME: ideally this would use EmitAnyExprToMem, however, we cannot do so + // as that will cause the lifetime adjustment to be lost for ARC auto ownership = M->getType().getObjCLifetime(); if (ownership != Qualifiers::OCL_None && ownership != Qualifiers::OCL_ExplicitNone) { @@ -498,18 +502,51 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { } else { switch (M->getStorageDuration()) { case SD_Automatic: - case SD_FullExpression: if (auto *Size = EmitLifetimeStart( CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()), Alloca.getPointer())) { - if (M->getStorageDuration() == SD_Automatic) - pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker, - Alloca, Size); - else - pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca, - Size); + pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker, + Alloca, Size); + } + break; + + case SD_FullExpression: { + if (!ShouldEmitLifetimeMarkers) + break; + + // Avoid creating a conditional cleanup just to hold an llvm.lifetime.end + // marker. Instead, start the lifetime of a conditional temporary earlier + // so that it's unconditional. Don't do this in ASan's use-after-scope + // mode so that it gets the more precise lifetime marks. If the type has + // a non-trivial destructor, we'll have a cleanup block for it anyway, + // so this typically doesn't help; skip it in that case. + ConditionalEvaluation *OldConditional = nullptr; + CGBuilderTy::InsertPoint OldIP; + if (isInConditionalBranch() && !E->getType().isDestructedType() && + !CGM.getCodeGenOpts().SanitizeAddressUseAfterScope) { + OldConditional = OutermostConditional; + OutermostConditional = nullptr; + + OldIP = Builder.saveIP(); + llvm::BasicBlock *Block = OldConditional->getStartingBlock(); + Builder.restoreIP(CGBuilderTy::InsertPoint( + Block, llvm::BasicBlock::iterator(Block->back()))); + } + + if (auto *Size = EmitLifetimeStart( + CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()), + Alloca.getPointer())) { + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca, + Size); + } + + if (OldConditional) { + OutermostConditional = OldConditional; + Builder.restoreIP(OldIP); } break; + } + default: break; } @@ -1043,7 +1080,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E, EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr.getPointer(), /*MayBeNull=*/true, CodeGenFunction::CFITCK_UnrelatedCast, - CE->getLocStart()); + CE->getBeginLoc()); } return CE->getCastKind() != CK_AddressSpaceConversion ? Builder.CreateBitCast(Addr, ConvertType(E->getType())) @@ -1227,6 +1264,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { return EmitVAArgExprLValue(cast<VAArgExpr>(E)); case Expr::DeclRefExprClass: return EmitDeclRefLValue(cast<DeclRefExpr>(E)); + case Expr::ConstantExprClass: + return EmitLValue(cast<ConstantExpr>(E)->getSubExpr()); case Expr::ParenExprClass: return EmitLValue(cast<ParenExpr>(E)->getSubExpr()); case Expr::GenericSelectionExprClass: @@ -1458,6 +1497,16 @@ CodeGenFunction::tryEmitAsConstant(const MemberExpr *ME) { return ConstantEmission(); } +llvm::Value *CodeGenFunction::emitScalarConstant( + const CodeGenFunction::ConstantEmission &Constant, Expr *E) { + assert(Constant && "not a constant"); + if (Constant.isReference()) + return EmitLoadOfLValue(Constant.getReferenceLValue(*this, E), + E->getExprLoc()) + .getScalarVal(); + return Constant.getValue(); +} + llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue, SourceLocation Loc) { return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), @@ -2237,18 +2286,14 @@ static LValue EmitThreadPrivateVarDeclLValue( static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD, QualType T) { - for (const auto *D : VD->redecls()) { - if (!VD->hasAttrs()) - continue; - if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) - if (Attr->getMapType() == OMPDeclareTargetDeclAttr::MT_Link) { - QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); - Address Addr = - CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); - return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>()); - } - } - return Address::invalid(); + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_To) + return Address::invalid(); + assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && "Expected link clause"); + QualType PtrTy = CGF.getContext().getPointerType(VD->getType()); + Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>()); } Address @@ -2408,6 +2453,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // A DeclRefExpr for a reference initialized by a constant expression can // appear without being odr-used. Directly emit the constant initializer. const Expr *Init = VD->getAnyInitializer(VD); + const auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl); if (Init && !isa<ParmVarDecl>(VD) && VD->getType()->isReferenceType() && VD->isUsableInConstantExpressions(getContext()) && VD->checkInitIsICE() && @@ -2417,7 +2463,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { (LocalDeclMap.count(VD->getCanonicalDecl()) || CapturedStmtInfo->lookup(VD->getCanonicalDecl()))) || LambdaCaptureFields.lookup(VD->getCanonicalDecl()) || - isa<BlockDecl>(CurCodeDecl)))) { + (BD && BD->capturesVariable(VD))))) { llvm::Constant *Val = ConstantEmitter(*this).emitAbstract(E->getLocation(), *VD->evaluateValue(), @@ -2456,7 +2502,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { } assert(isa<BlockDecl>(CurCodeDecl)); - Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()); + Address addr = GetAddrOfBlockDecl(VD); return MakeAddrLValue(addr, T, AlignmentSource::Decl); } } @@ -2508,7 +2554,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { } // Drill into block byref variables. - bool isBlockByref = VD->hasAttr<BlocksAttr>(); + bool isBlockByref = VD->isEscapingByref(); if (isBlockByref) { addr = emitBlockByrefAddress(addr, VD); } @@ -2571,7 +2617,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { // of a pointer to object; as in void foo (__weak id *param); *param = 0; // But, we continue to generate __strong write barrier on indirect write // into a pointer to object. - if (getLangOpts().ObjC1 && + if (getLangOpts().ObjC && getLangOpts().getGC() != LangOptions::NonGC && LV.isObjCWeak()) LV.setNonGC(!E->isOBJCGCCandidate(getContext())); @@ -2632,7 +2678,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { if (FnName.startswith("\01")) FnName = FnName.substr(1); StringRef NameItems[] = { - PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName}; + PredefinedExpr::getIdentKindName(E->getIdentKind()), FnName}; std::string GVName = llvm::join(NameItems, NameItems + 2, "."); if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) { std::string Name = SL->getString(); @@ -2837,6 +2883,11 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF, CheckRecoverableKind RecoverKind, bool IsFatal, llvm::BasicBlock *ContBB) { assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable); + Optional<ApplyDebugLocation> DL; + if (!CGF.Builder.getCurrentDebugLocation()) { + // Ensure that the call has at least an artificial debug location. + DL.emplace(CGF, SourceLocation()); + } bool NeedsAbortSuffix = IsFatal && RecoverKind != CheckRecoverableKind::Unrecoverable; bool MinimalRuntime = CGF.CGM.getCodeGenOpts().SanitizeMinimalRuntime; @@ -3448,7 +3499,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo); - if (getLangOpts().ObjC1 && + if (getLangOpts().ObjC && getLangOpts().getGC() != LangOptions::NonGC) { LV.setNonGC(!E->isOBJCGCCandidate(getContext())); setObjCGCLValueClass(getContext(), E, LV); @@ -3901,7 +3952,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo); if (RecordCVR & Qualifiers::Volatile) - RefLVal.getQuals().setVolatile(true); + RefLVal.getQuals().addVolatile(); addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo); // Qualifiers on the struct don't apply to the referencee. @@ -4121,8 +4172,9 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_ARCReclaimReturnedObject: case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: - case CK_AddressSpaceConversion: case CK_IntToOCLSampler: + case CK_FixedPointCast: + case CK_FixedPointToBoolean: return EmitUnsupportedLValue(E, "unexpected cast lvalue"); case CK_Dependent: @@ -4193,8 +4245,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { if (SanOpts.has(SanitizerKind::CFIDerivedCast)) EmitVTablePtrCheckForCast(E->getType(), Derived.getPointer(), - /*MayBeNull=*/false, - CFITCK_DerivedCast, E->getLocStart()); + /*MayBeNull=*/false, CFITCK_DerivedCast, + E->getBeginLoc()); return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, E->getType())); @@ -4210,12 +4262,21 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { if (SanOpts.has(SanitizerKind::CFIUnrelatedCast)) EmitVTablePtrCheckForCast(E->getType(), V.getPointer(), - /*MayBeNull=*/false, - CFITCK_UnrelatedCast, E->getLocStart()); + /*MayBeNull=*/false, CFITCK_UnrelatedCast, + E->getBeginLoc()); return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, E->getType())); } + case CK_AddressSpaceConversion: { + LValue LV = EmitLValue(E->getSubExpr()); + QualType DestTy = getContext().getPointerType(E->getType()); + llvm::Value *V = getTargetHooks().performAddrSpaceCast( + *this, LV.getPointer(), E->getSubExpr()->getType().getAddressSpace(), + E->getType().getAddressSpace(), ConvertType(DestTy)); + return MakeAddrLValue(Address(V, LV.getAddress().getAlignment()), + E->getType(), LV.getBaseInfo(), LV.getTBAAInfo()); + } case CK_ObjCObjectLValueCast: { LValue LV = EmitLValue(E->getSubExpr()); Address V = Builder.CreateElementBitCast(LV.getAddress(), @@ -4223,10 +4284,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(), CGM.getTBAAInfoForSubobject(LV, E->getType())); } - case CK_ZeroToOCLQueue: - llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid"); - case CK_ZeroToOCLEvent: - llvm_unreachable("NULL to OpenCL event lvalue cast is not valid"); + case CK_ZeroToOCLOpaqueType: + llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid"); } llvm_unreachable("Unhandled lvalue cast kind?"); @@ -4333,7 +4392,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) { } llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, FD); - return CGCallee::forDirect(calleePtr, FD); + return CGCallee::forDirect(calleePtr, GlobalDecl(FD)); } CGCallee CodeGenFunction::EmitCallee(const Expr *E) { @@ -4377,8 +4436,13 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) { calleePtr = EmitLValue(E).getPointer(); } assert(functionType->isFunctionType()); - CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(), - E->getReferencedDeclOfCallee()); + + GlobalDecl GD; + if (const auto *VD = + dyn_cast_or_null<VarDecl>(E->getReferencedDeclOfCallee())) + GD = GlobalDecl(VD); + + CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(), GD); CGCallee callee(calleeInfo, calleePtr); return callee; } @@ -4563,7 +4627,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee assert(CalleeType->isFunctionPointerType() && "Call must have function pointer type!"); - const Decl *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl(); + const Decl *TargetDecl = + OrigCallee.getAbstractInfo().getCalleeDecl().getDecl(); if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) // We can only guarantee that a function is called from the correct @@ -4620,10 +4685,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee DecodeAddrUsedInPrologue(CalleePtr, CalleeRTTIEncoded); llvm::Value *CalleeRTTIMatch = Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst); - llvm::Constant *StaticData[] = { - EmitCheckSourceLocation(E->getLocStart()), - EmitCheckTypeDescriptor(CalleeType) - }; + llvm::Constant *StaticData[] = {EmitCheckSourceLocation(E->getBeginLoc()), + EmitCheckTypeDescriptor(CalleeType)}; EmitCheck(std::make_pair(CalleeRTTIMatch, SanitizerKind::Function), SanitizerHandler::FunctionTypeMismatch, StaticData, CalleePtr); @@ -4657,7 +4720,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD); llvm::Constant *StaticData[] = { llvm::ConstantInt::get(Int8Ty, CFITCK_ICall), - EmitCheckSourceLocation(E->getLocStart()), + EmitCheckSourceLocation(E->getBeginLoc()), EmitCheckTypeDescriptor(QualType(FnType, 0)), }; if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) { diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index 62641102861c..db49b3f28a59 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -125,6 +125,10 @@ public: return Visit(E->getReplacement()); } + void VisitConstantExpr(ConstantExpr *E) { + return Visit(E->getSubExpr()); + } + // l-values. void VisitDeclRefExpr(DeclRefExpr *E) { EmitAggLoadOfLValue(E); } void VisitMemberExpr(MemberExpr *ME) { EmitAggLoadOfLValue(ME); } @@ -847,10 +851,11 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: - case CK_ZeroToOCLEvent: - case CK_ZeroToOCLQueue: + case CK_ZeroToOCLOpaqueType: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: + case CK_FixedPointCast: + case CK_FixedPointToBoolean: llvm_unreachable("cast kind invalid for aggregate types"); } } diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index f29ef754c03f..884ce96859c5 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -17,8 +17,8 @@ #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "ConstantEmitter.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Intrinsics.h" @@ -177,7 +177,8 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE, if (MD->isStatic()) { // The method is static, emit it as we would a regular call. - CGCallee callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD), MD); + CGCallee callee = + CGCallee::forDirect(CGM.GetAddrOfFunction(MD), GlobalDecl(MD)); return EmitCall(getContext().getPointerType(MD->getType()), callee, CE, ReturnValue); } @@ -353,13 +354,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( else if (!DevirtualizedMethod) Callee = CGCallee::forDirect( CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty), - Dtor); + GlobalDecl(Dtor, Dtor_Complete)); else { const CXXDestructorDecl *DDtor = cast<CXXDestructorDecl>(DevirtualizedMethod); Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty), - DDtor); + CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty), + GlobalDecl(DDtor, Dtor_Complete)); } EmitCXXMemberOrOperatorCall( CalleeDecl, Callee, ReturnValue, This.getPointer(), @@ -371,8 +372,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( CGCallee Callee; if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) { Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty), - Ctor); + CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty), + GlobalDecl(Ctor, Ctor_Complete)); } else if (UseVirtualCall) { Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty); } else { @@ -383,17 +384,18 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( std::tie(VTable, RD) = CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(), MD->getParent()); - EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getLocStart()); + EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getBeginLoc()); } if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier) Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty); else if (!DevirtualizedMethod) - Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), MD); + Callee = + CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), GlobalDecl(MD)); else { - Callee = CGCallee::forDirect( - CGM.GetAddrOfFunction(DevirtualizedMethod, Ty), - DevirtualizedMethod); + Callee = + CGCallee::forDirect(CGM.GetAddrOfFunction(DevirtualizedMethod, Ty), + GlobalDecl(DevirtualizedMethod)); } } @@ -1293,7 +1295,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, const CallArgList &Args) { llvm::Instruction *CallOrInvoke; llvm::Constant *CalleePtr = CGF.CGM.GetAddrOfFunction(CalleeDecl); - CGCallee Callee = CGCallee::forDirect(CalleePtr, CalleeDecl); + CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl)); RValue RV = CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall( Args, CalleeType, /*chainCall=*/false), @@ -1654,9 +1656,10 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { // Emit a null check on the allocation result if the allocation // function is allowed to return null (because it has a non-throwing // exception spec or is the reserved placement new) and we have an - // interesting initializer. - bool nullCheck = E->shouldNullCheckAllocation(getContext()) && - (!allocType.isPODType(getContext()) || E->hasInitializer()); + // interesting initializer will be running sanitizers on the initialization. + bool nullCheck = E->shouldNullCheckAllocation() && + (!allocType.isPODType(getContext()) || E->hasInitializer() || + sanitizePerformTypeCheck()); llvm::BasicBlock *nullCheckBB = nullptr; llvm::BasicBlock *contBB = nullptr; @@ -2252,7 +2255,6 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr, } void CodeGenFunction::EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Slot) { - RunCleanupsScope Scope(*this); LValue SlotLV = MakeAddrLValue(Slot.getAddress(), E->getType()); CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp index fb176093a741..2db693b44c90 100644 --- a/lib/CodeGen/CGExprComplex.cpp +++ b/lib/CodeGen/CGExprComplex.cpp @@ -101,6 +101,9 @@ public: llvm_unreachable("Stmt can't have complex result type!"); } ComplexPairTy VisitExpr(Expr *S); + ComplexPairTy VisitConstantExpr(ConstantExpr *E) { + return Visit(E->getSubExpr()); + } ComplexPairTy VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr());} ComplexPairTy VisitGenericSelectionExpr(GenericSelectionExpr *GE) { return Visit(GE->getResultExpr()); @@ -505,10 +508,11 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_ARCExtendBlockObject: case CK_CopyAndAutoreleaseBlockObject: case CK_BuiltinFnToFnPtr: - case CK_ZeroToOCLEvent: - case CK_ZeroToOCLQueue: + case CK_ZeroToOCLOpaqueType: case CK_AddressSpaceConversion: case CK_IntToOCLSampler: + case CK_FixedPointCast: + case CK_FixedPointToBoolean: llvm_unreachable("invalid cast kind for complex value"); case CK_FloatingRealToComplex: diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index 68766479a539..c9475840aeeb 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -47,7 +47,7 @@ class ConstStructBuilder { public: static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, ConstExprEmitter *ExprEmitter, - llvm::ConstantStruct *Base, + llvm::Constant *Base, InitListExpr *Updater, QualType ValTy); static llvm::Constant *BuildStruct(ConstantEmitter &Emitter, @@ -76,7 +76,7 @@ private: void ConvertStructToPacked(); bool Build(InitListExpr *ILE); - bool Build(ConstExprEmitter *Emitter, llvm::ConstantStruct *Base, + bool Build(ConstExprEmitter *Emitter, llvm::Constant *Base, InitListExpr *Updater); bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase, const CXXRecordDecl *VTableClass, CharUnits BaseOffset); @@ -566,7 +566,7 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) { llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter, ConstExprEmitter *ExprEmitter, - llvm::ConstantStruct *Base, + llvm::Constant *Base, InitListExpr *Updater, QualType ValTy) { ConstStructBuilder Builder(Emitter); @@ -723,6 +723,10 @@ public: return nullptr; } + llvm::Constant *VisitConstantExpr(ConstantExpr *CE, QualType T) { + return Visit(CE->getSubExpr(), T); + } + llvm::Constant *VisitParenExpr(ParenExpr *PE, QualType T) { return Visit(PE->getSubExpr(), T); } @@ -869,8 +873,9 @@ public: case CK_FloatingToIntegral: case CK_FloatingToBoolean: case CK_FloatingCast: - case CK_ZeroToOCLEvent: - case CK_ZeroToOCLQueue: + case CK_FixedPointCast: + case CK_FixedPointToBoolean: + case CK_ZeroToOCLOpaqueType: return nullptr; } llvm_unreachable("Invalid CastKind"); @@ -1026,8 +1031,8 @@ public: } if (destType->isRecordType()) - return ConstStructBuilder::BuildStruct(Emitter, this, - dyn_cast<llvm::ConstantStruct>(Base), Updater, destType); + return ConstStructBuilder::BuildStruct(Emitter, this, Base, Updater, + destType); return nullptr; } @@ -1102,7 +1107,7 @@ public: } // end anonymous namespace. bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, - llvm::ConstantStruct *Base, + llvm::Constant *Base, InitListExpr *Updater) { assert(Base && "base expression should not be empty"); @@ -1110,7 +1115,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, RecordDecl *RD = ExprType->getAs<RecordType>()->getDecl(); const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); const llvm::StructLayout *BaseLayout = CGM.getDataLayout().getStructLayout( - Base->getType()); + cast<llvm::StructType>(Base->getType())); unsigned FieldNo = -1; unsigned ElementNo = 0; @@ -1131,7 +1136,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter, if (Field->isUnnamedBitfield()) continue; - llvm::Constant *EltInit = Base->getOperand(ElementNo); + llvm::Constant *EltInit = Base->getAggregateElement(ElementNo); // Bail out if the type of the ConstantStruct does not have the same layout // as the type of the InitListExpr. @@ -1450,6 +1455,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) { if (CD->isTrivial() && CD->isDefaultConstructor()) return CGM.EmitNullConstant(D.getType()); } + InConstantContext = true; } QualType destType = D.getType(); @@ -1547,7 +1553,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E, if (destType->isReferenceType()) Success = E->EvaluateAsLValue(Result, CGM.getContext()); else - Success = E->EvaluateAsRValue(Result, CGM.getContext()); + Success = E->EvaluateAsRValue(Result, CGM.getContext(), InConstantContext); llvm::Constant *C; if (Success && !Result.HasSideEffects) @@ -1600,6 +1606,7 @@ private: ConstantLValue tryEmitBase(const APValue::LValueBase &base); ConstantLValue VisitStmt(const Stmt *S) { return nullptr; } + ConstantLValue VisitConstantExpr(const ConstantExpr *E); ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E); ConstantLValue VisitStringLiteral(const StringLiteral *E); ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E); @@ -1755,6 +1762,11 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { } ConstantLValue +ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) { + return Visit(E->getSubExpr()); +} + +ConstantLValue ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) { return tryEmitGlobalCompoundLiteral(CGM, Emitter.CGF, E); } @@ -1782,7 +1794,7 @@ ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) { return cast<ConstantAddress>(Res.getAddress()); } - auto kind = E->getIdentType(); + auto kind = E->getIdentKind(); if (kind == PredefinedExpr::PrettyFunction) { return CGM.GetAddrOfConstantCString("top level", ".tmp"); } @@ -1968,6 +1980,16 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value, Elts.push_back(C); } + // This means that the array type is probably "IncompleteType" or some + // type that is not ConstantArray. + if (CAT == nullptr && CommonElementType == nullptr && !NumInitElts) { + const ArrayType *AT = CGM.getContext().getAsArrayType(DestType); + CommonElementType = CGM.getTypes().ConvertType(AT->getElementType()); + llvm::ArrayType *AType = llvm::ArrayType::get(CommonElementType, + NumElements); + return llvm::ConstantAggregateZero::get(AType); + } + return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts, Filler); } diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index c62588c68272..1c14d4c99a23 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "CodeGenFunction.h" -#include "CGCleanup.h" #include "CGCXXABI.h" +#include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CodeGenFunction.h" #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -23,8 +23,9 @@ #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/FixedPoint.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/Optional.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -257,8 +258,11 @@ public: AVAttr = TTy->getDecl()->getAttr<AlignValueAttr>(); } else { // Assumptions for function parameters are emitted at the start of the - // function, so there is no need to repeat that here. - if (isa<ParmVarDecl>(VD)) + // function, so there is no need to repeat that here, + // unless the alignment-assumption sanitizer is enabled, + // then we prefer the assumption over alignment attribute + // on IR function param. + if (isa<ParmVarDecl>(VD) && !CGF.SanOpts.has(SanitizerKind::Alignment)) return; AVAttr = VD->getAttr<AlignValueAttr>(); @@ -275,7 +279,8 @@ public: Value *AlignmentValue = CGF.EmitScalarExpr(AVAttr->getAlignment()); llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue); - CGF.EmitAlignmentAssumption(V, AlignmentCI->getZExtValue()); + CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(), + AlignmentCI->getZExtValue()); } /// EmitLoadOfLValue - Given an expression with complex type that represents a @@ -302,7 +307,11 @@ public: /// Known implicit conversion check kinds. /// Keep in sync with the enum of the same name in ubsan_handlers.h enum ImplicitConversionCheckKind : unsigned char { - ICCK_IntegerTruncation = 0, + ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7. + ICCK_UnsignedIntegerTruncation = 1, + ICCK_SignedIntegerTruncation = 2, + ICCK_IntegerSignChange = 3, + ICCK_SignedIntegerTruncationOrSignChange = 4, }; /// Emit a check that an [implicit] truncation of an integer does not @@ -310,21 +319,39 @@ public: void EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst, QualType DstType, SourceLocation Loc); + /// Emit a check that an [implicit] conversion of an integer does not change + /// the sign of the value. It is not UB, so we use the value after conversion. + /// NOTE: Src and Dst may be the exact same value! (point to the same thing) + void EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, SourceLocation Loc); + /// Emit a conversion from the specified type to the specified destination /// type, both of which are LLVM scalar types. struct ScalarConversionOpts { bool TreatBooleanAsSigned; bool EmitImplicitIntegerTruncationChecks; + bool EmitImplicitIntegerSignChangeChecks; ScalarConversionOpts() : TreatBooleanAsSigned(false), - EmitImplicitIntegerTruncationChecks(false) {} + EmitImplicitIntegerTruncationChecks(false), + EmitImplicitIntegerSignChangeChecks(false) {} + + ScalarConversionOpts(clang::SanitizerSet SanOpts) + : TreatBooleanAsSigned(false), + EmitImplicitIntegerTruncationChecks( + SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation)), + EmitImplicitIntegerSignChangeChecks( + SanOpts.has(SanitizerKind::ImplicitIntegerSignChange)) {} }; Value * EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc, ScalarConversionOpts Opts = ScalarConversionOpts()); + Value *EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy, + SourceLocation Loc); + /// Emit a conversion from the specified complex type to the specified /// destination type, where the destination type is an LLVM scalar type. Value *EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src, @@ -382,6 +409,9 @@ public: } Value *VisitExpr(Expr *S); + Value *VisitConstantExpr(ConstantExpr *E) { + return Visit(E->getSubExpr()); + } Value *VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr()); } @@ -450,19 +480,10 @@ public: return CGF.getOrCreateOpaqueRValueMapping(E).getScalarVal(); } - Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant, - Expr *E) { - assert(Constant && "not a constant"); - if (Constant.isReference()) - return EmitLoadOfLValue(Constant.getReferenceLValue(CGF, E), - E->getExprLoc()); - return Constant.getValue(); - } - // l-values. Value *VisitDeclRefExpr(DeclRefExpr *E) { if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) - return emitConstant(Constant, E); + return CGF.emitScalarConstant(Constant, E); return EmitLoadOfLValue(E); } @@ -664,7 +685,7 @@ public: case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); - // Fall through. + LLVM_FALLTHROUGH; case LangOptions::SOB_Trapping: if (CanElideOverflowCheck(CGF.getContext(), Ops)) return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul"); @@ -941,48 +962,233 @@ void ScalarExprEmitter::EmitFloatConversionCheck( SanitizerHandler::FloatCastOverflow, StaticArgs, OrigSrc); } +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the truncation Src -> Dst was lossy. +static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind, + std::pair<llvm::Value *, SanitizerMask>> +EmitIntegerTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + llvm::Type *SrcTy = Src->getType(); + llvm::Type *DstTy = Dst->getType(); + (void)DstTy; // Only used in assert() + + // This should be truncation of integral types. + assert(Src != Dst); + assert(SrcTy->getScalarSizeInBits() > Dst->getType()->getScalarSizeInBits()); + assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) && + "non-integer llvm type"); + + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + + // If both (src and dst) types are unsigned, then it's an unsigned truncation. + // Else, it is a signed truncation. + ScalarExprEmitter::ImplicitConversionCheckKind Kind; + SanitizerMask Mask; + if (!SrcSigned && !DstSigned) { + Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation; + Mask = SanitizerKind::ImplicitUnsignedIntegerTruncation; + } else { + Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation; + Mask = SanitizerKind::ImplicitSignedIntegerTruncation; + } + + llvm::Value *Check = nullptr; + // 1. Extend the truncated value back to the same width as the Src. + Check = Builder.CreateIntCast(Dst, SrcTy, DstSigned, "anyext"); + // 2. Equality-compare with the original source value + Check = Builder.CreateICmpEQ(Check, Src, "truncheck"); + // If the comparison result is 'i1 false', then the truncation was lossy. + return std::make_pair(Kind, std::make_pair(Check, Mask)); +} + void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst, QualType DstType, SourceLocation Loc) { - if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation)) + if (!CGF.SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation)) return; - llvm::Type *SrcTy = Src->getType(); - llvm::Type *DstTy = Dst->getType(); - // We only care about int->int conversions here. // We ignore conversions to/from pointer and/or bool. if (!(SrcType->isIntegerType() && DstType->isIntegerType())) return; - assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) && - "clang integer type lowered to non-integer llvm type"); - - unsigned SrcBits = SrcTy->getScalarSizeInBits(); - unsigned DstBits = DstTy->getScalarSizeInBits(); + unsigned SrcBits = Src->getType()->getScalarSizeInBits(); + unsigned DstBits = Dst->getType()->getScalarSizeInBits(); // This must be truncation. Else we do not care. if (SrcBits <= DstBits) return; assert(!DstType->isBooleanType() && "we should not get here with booleans."); + // If the integer sign change sanitizer is enabled, + // and we are truncating from larger unsigned type to smaller signed type, + // let that next sanitizer deal with it. + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange) && + (!SrcSigned && DstSigned)) + return; + CodeGenFunction::SanitizerScope SanScope(&CGF); + std::pair<ScalarExprEmitter::ImplicitConversionCheckKind, + std::pair<llvm::Value *, SanitizerMask>> + Check = + EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder); + // If the comparison result is 'i1 false', then the truncation was lossy. + + // Do we care about this type of truncation? + if (!CGF.SanOpts.has(Check.second.second)) + return; + + llvm::Constant *StaticArgs[] = { + CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType), + CGF.EmitCheckTypeDescriptor(DstType), + llvm::ConstantInt::get(Builder.getInt8Ty(), Check.first)}; + CGF.EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs, + {Src, Dst}); +} + +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the conversion Src -> Dst changed the sign. +static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind, + std::pair<llvm::Value *, SanitizerMask>> +EmitIntegerSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + llvm::Type *SrcTy = Src->getType(); + llvm::Type *DstTy = Dst->getType(); + + assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) && + "non-integer llvm type"); + + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + (void)SrcSigned; // Only used in assert() + (void)DstSigned; // Only used in assert() + unsigned SrcBits = SrcTy->getScalarSizeInBits(); + unsigned DstBits = DstTy->getScalarSizeInBits(); + (void)SrcBits; // Only used in assert() + (void)DstBits; // Only used in assert() + + assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) && + "either the widths should be different, or the signednesses."); + + // NOTE: zero value is considered to be non-negative. + auto EmitIsNegativeTest = [&Builder](Value *V, QualType VType, + const char *Name) -> Value * { + // Is this value a signed type? + bool VSigned = VType->isSignedIntegerOrEnumerationType(); + llvm::Type *VTy = V->getType(); + if (!VSigned) { + // If the value is unsigned, then it is never negative. + // FIXME: can we encounter non-scalar VTy here? + return llvm::ConstantInt::getFalse(VTy->getContext()); + } + // Get the zero of the same type with which we will be comparing. + llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); + // %V.isnegative = icmp slt %V, 0 + // I.e is %V *strictly* less than zero, does it have negative value? + return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, + llvm::Twine(Name) + "." + V->getName() + + ".negativitycheck"); + }; + + // 1. Was the old Value negative? + llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src"); + // 2. Is the new Value negative? + llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst"); + // 3. Now, was the 'negativity status' preserved during the conversion? + // NOTE: conversion from negative to zero is considered to change the sign. + // (We want to get 'false' when the conversion changed the sign) + // So we should just equality-compare the negativity statuses. llvm::Value *Check = nullptr; + Check = Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "signchangecheck"); + // If the comparison result is 'false', then the conversion changed the sign. + return std::make_pair( + ScalarExprEmitter::ICCK_IntegerSignChange, + std::make_pair(Check, SanitizerKind::ImplicitIntegerSignChange)); +} - // 1. Extend the truncated value back to the same width as the Src. - bool InputSigned = DstType->isSignedIntegerOrEnumerationType(); - Check = Builder.CreateIntCast(Dst, SrcTy, InputSigned, "anyext"); - // 2. Equality-compare with the original source value - Check = Builder.CreateICmpEQ(Check, Src, "truncheck"); - // If the comparison result is 'i1 false', then the truncation was lossy. +void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, + Value *Dst, QualType DstType, + SourceLocation Loc) { + if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange)) + return; + + llvm::Type *SrcTy = Src->getType(); + llvm::Type *DstTy = Dst->getType(); + + // We only care about int->int conversions here. + // We ignore conversions to/from pointer and/or bool. + if (!(SrcType->isIntegerType() && DstType->isIntegerType())) + return; + + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + unsigned SrcBits = SrcTy->getScalarSizeInBits(); + unsigned DstBits = DstTy->getScalarSizeInBits(); + + // Now, we do not need to emit the check in *all* of the cases. + // We can avoid emitting it in some obvious cases where it would have been + // dropped by the opt passes (instcombine) always anyways. + // If it's a cast between effectively the same type, no check. + // NOTE: this is *not* equivalent to checking the canonical types. + if (SrcSigned == DstSigned && SrcBits == DstBits) + return; + // At least one of the values needs to have signed type. + // If both are unsigned, then obviously, neither of them can be negative. + if (!SrcSigned && !DstSigned) + return; + // If the conversion is to *larger* *signed* type, then no check is needed. + // Because either sign-extension happens (so the sign will remain), + // or zero-extension will happen (the sign bit will be zero.) + if ((DstBits > SrcBits) && DstSigned) + return; + if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) && + (SrcBits > DstBits) && SrcSigned) { + // If the signed integer truncation sanitizer is enabled, + // and this is a truncation from signed type, then no check is needed. + // Because here sign change check is interchangeable with truncation check. + return; + } + // That's it. We can't rule out any more cases with the data we have. + + CodeGenFunction::SanitizerScope SanScope(&CGF); + + std::pair<ScalarExprEmitter::ImplicitConversionCheckKind, + std::pair<llvm::Value *, SanitizerMask>> + Check; + + // Each of these checks needs to return 'false' when an issue was detected. + ImplicitConversionCheckKind CheckKind; + llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks; + // So we can 'and' all the checks together, and still get 'false', + // if at least one of the checks detected an issue. + + Check = EmitIntegerSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder); + CheckKind = Check.first; + Checks.emplace_back(Check.second); + + if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) && + (SrcBits > DstBits) && !SrcSigned && DstSigned) { + // If the signed integer truncation sanitizer was enabled, + // and we are truncating from larger unsigned type to smaller signed type, + // let's handle the case we skipped in that check. + Check = + EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder); + CheckKind = ICCK_SignedIntegerTruncationOrSignChange; + Checks.emplace_back(Check.second); + // If the comparison result is 'i1 false', then the truncation was lossy. + } llvm::Constant *StaticArgs[] = { CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType), CGF.EmitCheckTypeDescriptor(DstType), - llvm::ConstantInt::get(Builder.getInt8Ty(), ICCK_IntegerTruncation)}; - CGF.EmitCheck(std::make_pair(Check, SanitizerKind::ImplicitIntegerTruncation), - SanitizerHandler::ImplicitConversion, StaticArgs, {Src, Dst}); + llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind)}; + // EmitCheck() will 'and' all the checks together. + CGF.EmitCheck(Checks, SanitizerHandler::ImplicitConversion, StaticArgs, + {Src, Dst}); } /// Emit a conversion from the specified type to the specified destination type, @@ -991,6 +1197,27 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, QualType DstType, SourceLocation Loc, ScalarConversionOpts Opts) { + // All conversions involving fixed point types should be handled by the + // EmitFixedPoint family functions. This is done to prevent bloating up this + // function more, and although fixed point numbers are represented by + // integers, we do not want to follow any logic that assumes they should be + // treated as integers. + // TODO(leonardchan): When necessary, add another if statement checking for + // conversions to fixed point types from other types. + if (SrcType->isFixedPointType()) { + if (DstType->isFixedPointType()) { + return EmitFixedPointConversion(Src, SrcType, DstType, Loc); + } else if (DstType->isBooleanType()) { + // We do not need to check the padding bit on unsigned types if unsigned + // padding is enabled because overflow into this bit is undefined + // behavior. + return Builder.CreateIsNotNull(Src, "tobool"); + } + + llvm_unreachable( + "Unhandled scalar conversion involving a fixed point type."); + } + QualType NoncanonicalSrcType = SrcType; QualType NoncanonicalDstType = DstType; @@ -1036,8 +1263,13 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, } // Ignore conversions like int -> uint. - if (SrcTy == DstTy) + if (SrcTy == DstTy) { + if (Opts.EmitImplicitIntegerSignChangeChecks) + EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Src, + NoncanonicalDstType, Loc); + return Src; + } // Handle pointer conversions next: pointers can only be converted to/from // other pointers and integers. Check for pointer types in terms of LLVM, as @@ -1181,9 +1413,91 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, EmitIntegerTruncationCheck(Src, NoncanonicalSrcType, Res, NoncanonicalDstType, Loc); + if (Opts.EmitImplicitIntegerSignChangeChecks) + EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Res, + NoncanonicalDstType, Loc); + return Res; } +Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy, + QualType DstTy, + SourceLocation Loc) { + using llvm::APInt; + using llvm::ConstantInt; + using llvm::Value; + + assert(SrcTy->isFixedPointType()); + assert(DstTy->isFixedPointType()); + + FixedPointSemantics SrcFPSema = + CGF.getContext().getFixedPointSemantics(SrcTy); + FixedPointSemantics DstFPSema = + CGF.getContext().getFixedPointSemantics(DstTy); + unsigned SrcWidth = SrcFPSema.getWidth(); + unsigned DstWidth = DstFPSema.getWidth(); + unsigned SrcScale = SrcFPSema.getScale(); + unsigned DstScale = DstFPSema.getScale(); + bool SrcIsSigned = SrcFPSema.isSigned(); + bool DstIsSigned = DstFPSema.isSigned(); + + llvm::Type *DstIntTy = Builder.getIntNTy(DstWidth); + + Value *Result = Src; + unsigned ResultWidth = SrcWidth; + + if (!DstFPSema.isSaturated()) { + // Downscale. + if (DstScale < SrcScale) + Result = SrcIsSigned ? + Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : + Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + + // Resize. + Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); + + // Upscale. + if (DstScale > SrcScale) + Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale"); + } else { + // Adjust the number of fractional bits. + if (DstScale > SrcScale) { + ResultWidth = SrcWidth + DstScale - SrcScale; + llvm::Type *UpscaledTy = Builder.getIntNTy(ResultWidth); + Result = Builder.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize"); + Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale"); + } else if (DstScale < SrcScale) { + Result = SrcIsSigned ? + Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") : + Builder.CreateLShr(Result, SrcScale - DstScale, "downscale"); + } + + // Handle saturation. + bool LessIntBits = DstFPSema.getIntegralBits() < SrcFPSema.getIntegralBits(); + if (LessIntBits) { + Value *Max = ConstantInt::get( + CGF.getLLVMContext(), + APFixedPoint::getMax(DstFPSema).getValue().extOrTrunc(ResultWidth)); + Value *TooHigh = SrcIsSigned ? Builder.CreateICmpSGT(Result, Max) + : Builder.CreateICmpUGT(Result, Max); + Result = Builder.CreateSelect(TooHigh, Max, Result, "satmax"); + } + // Cannot overflow min to dest type if src is unsigned since all fixed + // point types can cover the unsigned min of 0. + if (SrcIsSigned && (LessIntBits || !DstIsSigned)) { + Value *Min = ConstantInt::get( + CGF.getLLVMContext(), + APFixedPoint::getMin(DstFPSema).getValue().extOrTrunc(ResultWidth)); + Value *TooLow = Builder.CreateICmpSLT(Result, Min); + Result = Builder.CreateSelect(TooLow, Min, Result, "satmin"); + } + + // Resize the integer part to get the final destination size. + Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); + } + return Result; +} + /// Emit a conversion from the specified complex type to the specified /// destination type, where the destination type is an LLVM scalar type. Value *ScalarExprEmitter::EmitComplexToScalarConversion( @@ -1405,10 +1719,11 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) { if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) { CGF.EmitIgnoredExpr(E->getBase()); - return emitConstant(Constant, E); + return CGF.emitScalarConstant(Constant, E); } else { - llvm::APSInt Value; - if (E->EvaluateAsInt(Value, CGF.getContext(), Expr::SE_AllowSideEffects)) { + Expr::EvalResult Result; + if (E->EvaluateAsInt(Result, CGF.getContext(), Expr::SE_AllowSideEffects)) { + llvm::APSInt Value = Result.Val.getInt(); CGF.EmitIgnoredExpr(E->getBase()); return Builder.getInt(Value); } @@ -1681,7 +1996,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { CGF.EmitVTablePtrCheckForCast(PT->getPointeeType(), Src, /*MayBeNull=*/true, CodeGenFunction::CFITCK_UnrelatedCast, - CE->getLocStart()); + CE->getBeginLoc()); } if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) { @@ -1745,11 +2060,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { Derived.getPointer(), DestTy->getPointeeType()); if (CGF.SanOpts.has(SanitizerKind::CFIDerivedCast)) - CGF.EmitVTablePtrCheckForCast(DestTy->getPointeeType(), - Derived.getPointer(), - /*MayBeNull=*/true, - CodeGenFunction::CFITCK_DerivedCast, - CE->getLocStart()); + CGF.EmitVTablePtrCheckForCast( + DestTy->getPointeeType(), Derived.getPointer(), + /*MayBeNull=*/true, CodeGenFunction::CFITCK_DerivedCast, + CE->getBeginLoc()); return Derived.getPointer(); } @@ -1875,11 +2189,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return Builder.CreateVectorSplat(NumElements, Elt, "splat"); } + case CK_FixedPointCast: + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + + case CK_FixedPointToBoolean: + assert(E->getType()->isFixedPointType() && + "Expected src type to be fixed point type"); + assert(DestTy->isBooleanType() && "Expected dest type to be boolean type"); + return EmitScalarConversion(Visit(E), E->getType(), DestTy, + CE->getExprLoc()); + case CK_IntegralCast: { ScalarConversionOpts Opts; - if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation)) { - if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) - Opts.EmitImplicitIntegerTruncationChecks = !ICE->isPartOfExplicitCast(); + if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) { + if (!ICE->isPartOfExplicitCast()) + Opts = ScalarConversionOpts(CGF.SanOpts); } return EmitScalarConversion(Visit(E), E->getType(), DestTy, CE->getExprLoc(), Opts); @@ -1920,13 +2245,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { CE->getExprLoc()); } - case CK_ZeroToOCLEvent: { - assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non-event type"); - return llvm::Constant::getNullValue(ConvertType(DestTy)); - } - - case CK_ZeroToOCLQueue: { - assert(DestTy->isQueueT() && "CK_ZeroToOCLQueue cast on non queue_t type"); + case CK_ZeroToOCLOpaqueType: { + assert((DestTy->isEventT() || DestTy->isQueueT() || + DestTy->isOCLIntelSubgroupAVCType()) && + "CK_ZeroToOCLEvent cast on non-event type"); return llvm::Constant::getNullValue(ConvertType(DestTy)); } @@ -1985,7 +2307,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWAdd(InVal, Amount, Name); - // Fall through. + LLVM_FALLTHROUGH; case LangOptions::SOB_Trapping: if (!E->canOverflow()) return Builder.CreateNSWAdd(InVal, Amount, Name); @@ -2280,9 +2602,11 @@ Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) { Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) { // Try folding the offsetof to a constant. - llvm::APSInt Value; - if (E->EvaluateAsInt(Value, CGF.getContext())) + Expr::EvalResult EVResult; + if (E->EvaluateAsInt(EVResult, CGF.getContext())) { + llvm::APSInt Value = EVResult.Val.getInt(); return Builder.getInt(Value); + } // Loop over the components of the offsetof to compute the value. unsigned n = E->getNumComponents(); @@ -2551,9 +2875,10 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // Expand the binary operator. Result = (this->*Func)(OpInfo); - // Convert the result back to the LHS type. - Result = - EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, Loc); + // Convert the result back to the LHS type, + // potentially with Implicit Conversion sanitizer check. + Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, + Loc, ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *opBB = Builder.GetInsertBlock(); @@ -2991,7 +3316,7 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); - // Fall through. + LLVM_FALLTHROUGH; case LangOptions::SOB_Trapping: if (CanElideOverflowCheck(CGF.getContext(), op)) return Builder.CreateNSWAdd(op.LHS, op.RHS, "add"); @@ -3026,7 +3351,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { case LangOptions::SOB_Undefined: if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); - // Fall through. + LLVM_FALLTHROUGH; case LangOptions::SOB_Trapping: if (CanElideOverflowCheck(CGF.getContext(), op)) return Builder.CreateNSWSub(op.LHS, op.RHS, "sub"); diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp index 21e2b8dd8c31..fd0a9c773a2e 100644 --- a/lib/CodeGen/CGLoopInfo.cpp +++ b/lib/CodeGen/CGLoopInfo.cpp @@ -10,8 +10,8 @@ #include "CGLoopInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" -#include "clang/Sema/LoopHint.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" @@ -21,14 +21,17 @@ using namespace llvm; static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, const llvm::DebugLoc &StartLoc, - const llvm::DebugLoc &EndLoc) { + const llvm::DebugLoc &EndLoc, MDNode *&AccGroup) { if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && + Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && + Attrs.PipelineInitiationInterval == 0 && Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && - Attrs.DistributeEnable == LoopAttributes::Unspecified && - !StartLoc && !EndLoc) + Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && + Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && + !EndLoc) return nullptr; SmallVector<Metadata *, 4> Args; @@ -61,7 +64,7 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, Args.push_back(MDNode::get(Ctx, Vals)); } - // Setting interleave.count + // Setting unroll.count if (Attrs.UnrollCount > 0) { Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.count"), ConstantAsMetadata::get(ConstantInt::get( @@ -69,6 +72,14 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, Args.push_back(MDNode::get(Ctx, Vals)); } + // Setting unroll_and_jam.count + if (Attrs.UnrollAndJamCount > 0) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"), + ConstantAsMetadata::get(ConstantInt::get( + Type::getInt32Ty(Ctx), Attrs.UnrollAndJamCount))}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + // Setting vectorize.enable if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) { Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.enable"), @@ -91,6 +102,19 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, Args.push_back(MDNode::get(Ctx, Vals)); } + // Setting unroll_and_jam.full or unroll_and_jam.disable + if (Attrs.UnrollAndJamEnable != LoopAttributes::Unspecified) { + std::string Name; + if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable) + Name = "llvm.loop.unroll_and_jam.enable"; + else if (Attrs.UnrollAndJamEnable == LoopAttributes::Full) + Name = "llvm.loop.unroll_and_jam.full"; + else + Name = "llvm.loop.unroll_and_jam.disable"; + Metadata *Vals[] = {MDString::get(Ctx, Name)}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + if (Attrs.DistributeEnable != LoopAttributes::Unspecified) { Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"), ConstantAsMetadata::get(ConstantInt::get( @@ -99,6 +123,28 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, Args.push_back(MDNode::get(Ctx, Vals)); } + if (Attrs.IsParallel) { + AccGroup = MDNode::getDistinct(Ctx, {}); + Args.push_back(MDNode::get( + Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup})); + } + + if (Attrs.PipelineDisabled) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.pipeline.disable"), + ConstantAsMetadata::get(ConstantInt::get( + Type::getInt1Ty(Ctx), (Attrs.PipelineDisabled == true)))}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + + if (Attrs.PipelineInitiationInterval > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"), + ConstantAsMetadata::get(ConstantInt::get( + Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))}; + Args.push_back(MDNode::get(Ctx, Vals)); + } + // Set the first operand to itself. MDNode *LoopID = MDNode::get(Ctx, Args); LoopID->replaceOperandWith(0, LoopID); @@ -107,24 +153,31 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs, LoopAttributes::LoopAttributes(bool IsParallel) : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified), - UnrollEnable(LoopAttributes::Unspecified), VectorizeWidth(0), - InterleaveCount(0), UnrollCount(0), - DistributeEnable(LoopAttributes::Unspecified) {} + UnrollEnable(LoopAttributes::Unspecified), + UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizeWidth(0), + InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), + DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), + PipelineInitiationInterval(0) {} void LoopAttributes::clear() { IsParallel = false; VectorizeWidth = 0; InterleaveCount = 0; UnrollCount = 0; + UnrollAndJamCount = 0; VectorizeEnable = LoopAttributes::Unspecified; UnrollEnable = LoopAttributes::Unspecified; + UnrollAndJamEnable = LoopAttributes::Unspecified; DistributeEnable = LoopAttributes::Unspecified; + PipelineDisabled = false; + PipelineInitiationInterval = 0; } LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc) : LoopID(nullptr), Header(Header), Attrs(Attrs) { - LoopID = createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc); + LoopID = + createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc, AccGroup); } void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc, @@ -191,12 +244,20 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Unroll: setUnrollState(LoopAttributes::Disable); break; + case LoopHintAttr::UnrollAndJam: + setUnrollAndJamState(LoopAttributes::Disable); + break; case LoopHintAttr::Distribute: setDistributeState(false); break; + case LoopHintAttr::PipelineDisabled: + setPipelineDisabled(true); + break; case LoopHintAttr::UnrollCount: + case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: + case LoopHintAttr::PipelineInitiationInterval: llvm_unreachable("Options cannot be disabled."); break; } @@ -210,12 +271,18 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Unroll: setUnrollState(LoopAttributes::Enable); break; + case LoopHintAttr::UnrollAndJam: + setUnrollAndJamState(LoopAttributes::Enable); + break; case LoopHintAttr::Distribute: setDistributeState(true); break; case LoopHintAttr::UnrollCount: + case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: + case LoopHintAttr::PipelineDisabled: + case LoopHintAttr::PipelineInitiationInterval: llvm_unreachable("Options cannot enabled."); break; } @@ -229,10 +296,14 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, setVectorizeEnable(true); break; case LoopHintAttr::Unroll: + case LoopHintAttr::UnrollAndJam: case LoopHintAttr::UnrollCount: + case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: case LoopHintAttr::Distribute: + case LoopHintAttr::PipelineDisabled: + case LoopHintAttr::PipelineInitiationInterval: llvm_unreachable("Options cannot be used to assume mem safety."); break; } @@ -242,12 +313,18 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Unroll: setUnrollState(LoopAttributes::Full); break; + case LoopHintAttr::UnrollAndJam: + setUnrollAndJamState(LoopAttributes::Full); + break; case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: case LoopHintAttr::UnrollCount: + case LoopHintAttr::UnrollAndJamCount: case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: case LoopHintAttr::Distribute: + case LoopHintAttr::PipelineDisabled: + case LoopHintAttr::PipelineInitiationInterval: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -263,10 +340,18 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::UnrollCount: setUnrollCount(ValueInt); break; + case LoopHintAttr::UnrollAndJamCount: + setUnrollAndJamCount(ValueInt); + break; + case LoopHintAttr::PipelineInitiationInterval: + setPipelineInitiationInterval(ValueInt); + break; case LoopHintAttr::Unroll: + case LoopHintAttr::UnrollAndJam: case LoopHintAttr::Vectorize: case LoopHintAttr::Interleave: case LoopHintAttr::Distribute: + case LoopHintAttr::PipelineDisabled: llvm_unreachable("Options cannot be assigned a value."); break; } @@ -284,6 +369,21 @@ void LoopInfoStack::pop() { } void LoopInfoStack::InsertHelper(Instruction *I) const { + if (I->mayReadOrWriteMemory()) { + SmallVector<Metadata *, 4> AccessGroups; + for (const LoopInfo &AL : Active) { + // Here we assume that every loop that has an access group is parallel. + if (MDNode *Group = AL.getAccessGroup()) + AccessGroups.push_back(Group); + } + MDNode *UnionMD = nullptr; + if (AccessGroups.size() == 1) + UnionMD = cast<MDNode>(AccessGroups[0]); + else if (AccessGroups.size() >= 2) + UnionMD = MDNode::get(I->getContext(), AccessGroups); + I->setMetadata("llvm.access.group", UnionMD); + } + if (!hasInfo()) return; @@ -291,15 +391,12 @@ void LoopInfoStack::InsertHelper(Instruction *I) const { if (!L.getLoopID()) return; - if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) { - for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i) - if (TI->getSuccessor(i) == L.getHeader()) { - TI->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID()); + if (I->isTerminator()) { + for (BasicBlock *Succ : successors(I)) + if (Succ == L.getHeader()) { + I->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID()); break; } return; } - - if (L.getAttributes().IsParallel && I->mayReadOrWriteMemory()) - I->setMetadata("llvm.mem.parallel_loop_access", L.getLoopID()); } diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h index 9d5f23ff9a2a..84ba03bfb00b 100644 --- a/lib/CodeGen/CGLoopInfo.h +++ b/lib/CodeGen/CGLoopInfo.h @@ -49,6 +49,9 @@ struct LoopAttributes { /// Value for llvm.loop.unroll.* metadata (enable, disable, or full). LVEnableState UnrollEnable; + /// Value for llvm.loop.unroll_and_jam.* metadata (enable, disable, or full). + LVEnableState UnrollAndJamEnable; + /// Value for llvm.loop.vectorize.width metadata. unsigned VectorizeWidth; @@ -58,8 +61,17 @@ struct LoopAttributes { /// llvm.unroll. unsigned UnrollCount; + /// llvm.unroll. + unsigned UnrollAndJamCount; + /// Value for llvm.loop.distribute.enable metadata. LVEnableState DistributeEnable; + + /// Value for llvm.loop.pipeline.disable metadata. + bool PipelineDisabled; + + /// Value for llvm.loop.pipeline.iicount metadata. + unsigned PipelineInitiationInterval; }; /// Information used when generating a structured loop. @@ -78,6 +90,9 @@ public: /// Get the set of attributes active for this loop. const LoopAttributes &getAttributes() const { return Attrs; } + /// Return this loop's access group or nullptr if it does not have one. + llvm::MDNode *getAccessGroup() const { return AccGroup; } + private: /// Loop ID metadata. llvm::MDNode *LoopID; @@ -85,6 +100,8 @@ private: llvm::BasicBlock *Header; /// The attributes for this loop. LoopAttributes Attrs; + /// The access group for memory accesses parallel to this loop. + llvm::MDNode *AccGroup = nullptr; }; /// A stack of loop information corresponding to loop nesting levels. @@ -143,6 +160,11 @@ public: StagedAttrs.UnrollEnable = State; } + /// Set the next pushed loop unroll_and_jam state. + void setUnrollAndJamState(const LoopAttributes::LVEnableState &State) { + StagedAttrs.UnrollAndJamEnable = State; + } + /// Set the vectorize width for the next loop pushed. void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; } @@ -152,6 +174,17 @@ public: /// Set the unroll count for the next loop pushed. void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; } + /// \brief Set the unroll count for the next loop pushed. + void setUnrollAndJamCount(unsigned C) { StagedAttrs.UnrollAndJamCount = C; } + + /// Set the pipeline disabled state. + void setPipelineDisabled(bool S) { StagedAttrs.PipelineDisabled = S; } + + /// Set the pipeline initiation interval. + void setPipelineInitiationInterval(unsigned C) { + StagedAttrs.PipelineInitiationInterval = C; + } + private: /// Returns true if there is LoopInfo on the stack. bool hasInfo() const { return !Active.empty(); } diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp index 922e0934b866..c6a96a912622 100644 --- a/lib/CodeGen/CGNonTrivialStruct.cpp +++ b/lib/CodeGen/CGNonTrivialStruct.cpp @@ -187,6 +187,7 @@ template <class Derived> struct GenFuncNameBase { if (!FK) return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset); + asDerived().flushTrivialFields(); CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD); ASTContext &Ctx = asDerived().getContext(); const ConstantArrayType *CAT = cast<ConstantArrayType>(AT); @@ -283,8 +284,9 @@ struct GenDefaultInitializeFuncName struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>, DestructedTypeVisitor<GenDestructorFuncName> { using Super = DestructedTypeVisitor<GenDestructorFuncName>; - GenDestructorFuncName(CharUnits DstAlignment, ASTContext &Ctx) - : GenUnaryFuncName<GenDestructorFuncName>("__destructor_", DstAlignment, + GenDestructorFuncName(const char *Prefix, CharUnits DstAlignment, + ASTContext &Ctx) + : GenUnaryFuncName<GenDestructorFuncName>(Prefix, DstAlignment, Ctx) {} void visitWithKind(QualType::DestructionKind DK, QualType FT, const FieldDecl *FD, CharUnits CurStructOffset) { @@ -335,6 +337,7 @@ template <class Derived> struct GenFuncBase { return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset, Addrs); + asDerived().flushTrivialFields(Addrs); CodeGenFunction &CGF = *this->CGF; ASTContext &Ctx = CGF.getContext(); @@ -455,12 +458,13 @@ template <class Derived> struct GenFuncBase { llvm::Function::Create(FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, FuncName, &CGM.getModule()); F->setVisibility(llvm::GlobalValue::HiddenVisibility); - CGM.SetLLVMFunctionAttributes(nullptr, FI, F); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, F); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F); IdentifierInfo *II = &Ctx.Idents.get(FuncName); FunctionDecl *FD = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), - II, Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false); + II, Ctx.getFunctionType(Ctx.VoidTy, llvm::None, {}), nullptr, + SC_PrivateExtern, false, false); CodeGenFunction NewCGF(CGM); setCGF(&NewCGF); CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args); @@ -824,11 +828,28 @@ void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) { IsVolatile, *this, std::array<Address, 1>({{DstPtr}})); } +std::string +CodeGenFunction::getNonTrivialCopyConstructorStr(QualType QT, + CharUnits Alignment, + bool IsVolatile, + ASTContext &Ctx) { + GenBinaryFuncName<false> GenName("", Alignment, Alignment, Ctx); + return GenName.getName(QT, IsVolatile); +} + +std::string +CodeGenFunction::getNonTrivialDestructorStr(QualType QT, CharUnits Alignment, + bool IsVolatile, ASTContext &Ctx) { + GenDestructorFuncName GenName("", Alignment, Ctx); + return GenName.getName(QT, IsVolatile); +} + void CodeGenFunction::callCStructDestructor(LValue Dst) { bool IsVolatile = Dst.isVolatile(); Address DstPtr = Dst.getAddress(); QualType QT = Dst.getType(); - GenDestructorFuncName GenName(DstPtr.getAlignment(), getContext()); + GenDestructorFuncName GenName("__destructor_", DstPtr.getAlignment(), + getContext()); std::string FuncName = GenName.getName(QT, IsVolatile); callSpecialFunction(GenDestructor(getContext()), FuncName, QT, IsVolatile, *this, std::array<Address, 1>({{DstPtr}})); diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index b94bbf2a384f..9c66ff0e8fb2 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -127,7 +127,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E, llvm::Constant *Constant = CGM.CreateRuntimeVariable(ConvertType(IdTy), ConstantName); LValue LV = MakeNaturalAlignAddrLValue(Constant, IdTy); - llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getLocStart()); + llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getBeginLoc()); cast<llvm::LoadInst>(Ptr)->setMetadata( CGM.getModule().getMDKindID("invariant.load"), llvm::MDNode::get(getLLVMContext(), None)); @@ -352,6 +352,81 @@ static const Expr *findWeakLValue(const Expr *E) { return nullptr; } +/// The ObjC runtime may provide entrypoints that are likely to be faster +/// than an ordinary message send of the appropriate selector. +/// +/// The entrypoints are guaranteed to be equivalent to just sending the +/// corresponding message. If the entrypoint is implemented naively as just a +/// message send, using it is a trade-off: it sacrifices a few cycles of +/// overhead to save a small amount of code. However, it's possible for +/// runtimes to detect and special-case classes that use "standard" +/// behavior; if that's dynamically a large proportion of all objects, using +/// the entrypoint will also be faster than using a message send. +/// +/// If the runtime does support a required entrypoint, then this method will +/// generate a call and return the resulting value. Otherwise it will return +/// None and the caller can generate a msgSend instead. +static Optional<llvm::Value *> +tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType, + llvm::Value *Receiver, + const CallArgList& Args, Selector Sel, + const ObjCMethodDecl *method, + bool isClassMessage) { + auto &CGM = CGF.CGM; + if (!CGM.getCodeGenOpts().ObjCConvertMessagesToRuntimeCalls) + return None; + + auto &Runtime = CGM.getLangOpts().ObjCRuntime; + switch (Sel.getMethodFamily()) { + case OMF_alloc: + if (isClassMessage && + Runtime.shouldUseRuntimeFunctionsForAlloc() && + ResultType->isObjCObjectPointerType()) { + // [Foo alloc] -> objc_alloc(Foo) + if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "alloc") + return CGF.EmitObjCAlloc(Receiver, CGF.ConvertType(ResultType)); + // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo) + if (Sel.isKeywordSelector() && Sel.getNumArgs() == 1 && + Args.size() == 1 && Args.front().getType()->isPointerType() && + Sel.getNameForSlot(0) == "allocWithZone") { + const llvm::Value* arg = Args.front().getKnownRValue().getScalarVal(); + if (isa<llvm::ConstantPointerNull>(arg)) + return CGF.EmitObjCAllocWithZone(Receiver, + CGF.ConvertType(ResultType)); + return None; + } + } + break; + + case OMF_autorelease: + if (ResultType->isObjCObjectPointerType() && + CGM.getLangOpts().getGC() == LangOptions::NonGC && + Runtime.shouldUseARCFunctionsForRetainRelease()) + return CGF.EmitObjCAutorelease(Receiver, CGF.ConvertType(ResultType)); + break; + + case OMF_retain: + if (ResultType->isObjCObjectPointerType() && + CGM.getLangOpts().getGC() == LangOptions::NonGC && + Runtime.shouldUseARCFunctionsForRetainRelease()) + return CGF.EmitObjCRetainNonBlock(Receiver, CGF.ConvertType(ResultType)); + break; + + case OMF_release: + if (ResultType->isVoidType() && + CGM.getLangOpts().getGC() == LangOptions::NonGC && + Runtime.shouldUseARCFunctionsForRetainRelease()) { + CGF.EmitObjCRelease(Receiver, ARCPreciseLifetime); + return nullptr; + } + break; + + default: + break; + } + return None; +} + RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, ReturnValueSlot Return) { // Only the lookup mechanism and first two arguments of the method @@ -474,10 +549,17 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, Args, method); } else { - result = Runtime.GenerateMessageSend(*this, Return, ResultType, - E->getSelector(), - Receiver, Args, OID, - method); + // Call runtime methods directly if we can. + if (Optional<llvm::Value *> SpecializedResult = + tryGenerateSpecializedMessageSend(*this, ResultType, Receiver, Args, + E->getSelector(), method, + isClassMessage)) { + result = RValue::get(SpecializedResult.getValue()); + } else { + result = Runtime.GenerateMessageSend(*this, Return, ResultType, + E->getSelector(), Receiver, Args, + OID, method); + } } // For delegate init calls in ARC, implicitly store the result of @@ -531,7 +613,7 @@ struct FinishARCDealloc final : EHScopeStack::Cleanup { /// CodeGenFunction. void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD, const ObjCContainerDecl *CD) { - SourceLocation StartLoc = OMD->getLocStart(); + SourceLocation StartLoc = OMD->getBeginLoc(); FunctionArgList args; // Check if we should generate debug info for this method. if (OMD->hasAttr<NoDebugAttr>()) @@ -548,7 +630,7 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD, args.append(OMD->param_begin(), OMD->param_end()); CurGD = OMD; - CurEHLocation = OMD->getLocEnd(); + CurEHLocation = OMD->getEndLoc(); StartFunction(OMD, OMD->getReturnType(), Fn, FI, args, OMD->getLocation(), StartLoc); @@ -568,7 +650,7 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF, LValue lvalue, QualType type); /// Generate an Objective-C method. An Objective-C method is a C function with -/// its pointer, name, and types registered in the class struture. +/// its pointer, name, and types registered in the class structure. void CodeGenFunction::GenerateObjCMethod(const ObjCMethodDecl *OMD) { StartObjCMethod(OMD, OMD->getClassInterface()); PGO.assignRegionCounters(GlobalDecl(OMD), CurFn); @@ -883,9 +965,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // If there's a non-trivial 'get' expression, we just have to emit that. if (!hasTrivialGetExpr(propImpl)) { if (!AtomicHelperFn) { - ReturnStmt ret(SourceLocation(), propImpl->getGetterCXXConstructor(), - /*nrvo*/ nullptr); - EmitReturnStmt(ret); + auto *ret = ReturnStmt::Create(getContext(), SourceLocation(), + propImpl->getGetterCXXConstructor(), + /* NRVOCandidate=*/nullptr); + EmitReturnStmt(*ret); } else { ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl(); @@ -1068,8 +1151,9 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, // The second argument is the address of the parameter variable. ParmVarDecl *argVar = *OMD->param_begin(); - DeclRefExpr argRef(argVar, false, argVar->getType().getNonReferenceType(), - VK_LValue, SourceLocation()); + DeclRefExpr argRef(CGF.getContext(), argVar, false, + argVar->getType().getNonReferenceType(), VK_LValue, + SourceLocation()); llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(); argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy); args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy); @@ -1113,8 +1197,9 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, // The second argument is the address of the parameter variable. ParmVarDecl *argVar = *OMD->param_begin(); - DeclRefExpr argRef(argVar, false, argVar->getType().getNonReferenceType(), - VK_LValue, SourceLocation()); + DeclRefExpr argRef(CGF.getContext(), argVar, false, + argVar->getType().getNonReferenceType(), VK_LValue, + SourceLocation()); llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer(); argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy); args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy); @@ -1286,7 +1371,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, // Otherwise, fake up some ASTs and emit a normal assignment. ValueDecl *selfDecl = setterMethod->getSelfDecl(); - DeclRefExpr self(selfDecl, false, selfDecl->getType(), + DeclRefExpr self(getContext(), selfDecl, false, selfDecl->getType(), VK_LValue, SourceLocation()); ImplicitCastExpr selfLoad(ImplicitCastExpr::OnStack, selfDecl->getType(), CK_LValueToRValue, &self, @@ -1297,7 +1382,8 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, ParmVarDecl *argDecl = *setterMethod->param_begin(); QualType argType = argDecl->getType().getNonReferenceType(); - DeclRefExpr arg(argDecl, false, argType, VK_LValue, SourceLocation()); + DeclRefExpr arg(getContext(), argDecl, false, argType, VK_LValue, + SourceLocation()); ImplicitCastExpr argLoad(ImplicitCastExpr::OnStack, argType.getUnqualifiedType(), CK_LValueToRValue, &arg, VK_RValue); @@ -1459,7 +1545,8 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP, llvm::Value *CodeGenFunction::LoadObjCSelf() { VarDecl *Self = cast<ObjCMethodDecl>(CurFuncDecl)->getSelfDecl(); - DeclRefExpr DRE(Self, /*is enclosing local*/ (CurFuncDecl != CurCodeDecl), + DeclRefExpr DRE(getContext(), Self, + /*is enclosing local*/ (CurFuncDecl != CurCodeDecl), Self->getType(), VK_LValue, SourceLocation()); return EmitLoadOfScalar(EmitDeclRefLValue(&DRE), SourceLocation()); } @@ -1645,9 +1732,9 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ // Initialize the variable, in case it's a __block variable or something. EmitAutoVarInit(variable); - const VarDecl* D = cast<VarDecl>(SD->getSingleDecl()); - DeclRefExpr tempDRE(const_cast<VarDecl*>(D), false, D->getType(), - VK_LValue, SourceLocation()); + const VarDecl *D = cast<VarDecl>(SD->getSingleDecl()); + DeclRefExpr tempDRE(getContext(), const_cast<VarDecl *>(D), false, + D->getType(), VK_LValue, SourceLocation()); elementLValue = EmitLValue(&tempDRE); elementType = D->getType(); elementIsVariable = true; @@ -1805,23 +1892,16 @@ llvm::Value *CodeGenFunction::EmitObjCExtendObjectLifetime(QualType type, /// being intrinsically used up until this point in the program. void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) { llvm::Constant *&fn = CGM.getObjCEntrypoints().clang_arc_use; - if (!fn) { - llvm::FunctionType *fnType = - llvm::FunctionType::get(CGM.VoidTy, None, true); - fn = CGM.CreateRuntimeFunction(fnType, "clang.arc.use"); - } + if (!fn) + fn = CGM.getIntrinsic(llvm::Intrinsic::objc_clang_arc_use); // This isn't really a "runtime" function, but as an intrinsic it // doesn't really matter as long as we align things up. EmitNounwindRuntimeCall(fn, values); } - -static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, - llvm::FunctionType *FTy, - StringRef Name) { - llvm::Constant *RTF = CGM.CreateRuntimeFunction(FTy, Name); - +static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM, + llvm::Constant *RTF) { if (auto *F = dyn_cast<llvm::Function>(RTF)) { // If the target runtime doesn't naturally support ARC, emit weak // references to the runtime support library. We don't really @@ -1829,14 +1909,8 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, if (!CGM.getLangOpts().ObjCRuntime.hasNativeARC() && !CGM.getTriple().isOSBinFormatCOFF()) { F->setLinkage(llvm::Function::ExternalWeakLinkage); - } else if (Name == "objc_retain" || Name == "objc_release") { - // If we have Native ARC, set nonlazybind attribute for these APIs for - // performance. - F->addFnAttr(llvm::Attribute::NonLazyBind); } } - - return RTF; } /// Perform an operation having the signature @@ -1844,20 +1918,20 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, /// where a null input causes a no-op and returns null. static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, llvm::Value *value, + llvm::Type *returnType, llvm::Constant *&fn, - StringRef fnName, + llvm::Intrinsic::ID IntID, bool isTailCall = false) { if (isa<llvm::ConstantPointerNull>(value)) return value; if (!fn) { - llvm::FunctionType *fnType = - llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, false); - fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName); + fn = CGF.CGM.getIntrinsic(IntID); + setARCRuntimeFunctionLinkage(CGF.CGM, fn); } // Cast the argument to 'id'. - llvm::Type *origType = value->getType(); + llvm::Type *origType = returnType ? returnType : value->getType(); value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy); // Call the function. @@ -1874,11 +1948,10 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF, static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF, Address addr, llvm::Constant *&fn, - StringRef fnName) { + llvm::Intrinsic::ID IntID) { if (!fn) { - llvm::FunctionType *fnType = - llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrPtrTy, false); - fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName); + fn = CGF.CGM.getIntrinsic(IntID); + setARCRuntimeFunctionLinkage(CGF.CGM, fn); } // Cast the argument to 'id*'. @@ -1901,16 +1974,13 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF, Address addr, llvm::Value *value, llvm::Constant *&fn, - StringRef fnName, + llvm::Intrinsic::ID IntID, bool ignored) { assert(addr.getElementType() == value->getType()); if (!fn) { - llvm::Type *argTypes[] = { CGF.Int8PtrPtrTy, CGF.Int8PtrTy }; - - llvm::FunctionType *fnType - = llvm::FunctionType::get(CGF.Int8PtrTy, argTypes, false); - fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName); + fn = CGF.CGM.getIntrinsic(IntID); + setARCRuntimeFunctionLinkage(CGF.CGM, fn); } llvm::Type *origType = value->getType(); @@ -1932,15 +2002,12 @@ static void emitARCCopyOperation(CodeGenFunction &CGF, Address dst, Address src, llvm::Constant *&fn, - StringRef fnName) { + llvm::Intrinsic::ID IntID) { assert(dst.getType() == src.getType()); if (!fn) { - llvm::Type *argTypes[] = { CGF.Int8PtrPtrTy, CGF.Int8PtrPtrTy }; - - llvm::FunctionType *fnType - = llvm::FunctionType::get(CGF.Builder.getVoidTy(), argTypes, false); - fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName); + fn = CGF.CGM.getIntrinsic(IntID); + setARCRuntimeFunctionLinkage(CGF.CGM, fn); } llvm::Value *args[] = { @@ -1950,6 +2017,39 @@ static void emitARCCopyOperation(CodeGenFunction &CGF, CGF.EmitNounwindRuntimeCall(fn, args); } +/// Perform an operation having the signature +/// i8* (i8*) +/// where a null input causes a no-op and returns null. +static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF, + llvm::Value *value, + llvm::Type *returnType, + llvm::Constant *&fn, + StringRef fnName) { + if (isa<llvm::ConstantPointerNull>(value)) + return value; + + if (!fn) { + llvm::FunctionType *fnType = + llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, false); + fn = CGF.CGM.CreateRuntimeFunction(fnType, fnName); + + // We have Native ARC, so set nonlazybind attribute for performance + if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) + if (fnName == "objc_retain") + f->addFnAttr(llvm::Attribute::NonLazyBind); + } + + // Cast the argument to 'id'. + llvm::Type *origType = returnType ? returnType : value->getType(); + value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy); + + // Call the function. + llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value); + + // Cast the result back to the original type. + return CGF.Builder.CreateBitCast(call, origType); +} + /// Produce the code to do a retain. Based on the type, calls one of: /// call i8* \@objc_retain(i8* %value) /// call i8* \@objc_retainBlock(i8* %value) @@ -1963,9 +2063,9 @@ llvm::Value *CodeGenFunction::EmitARCRetain(QualType type, llvm::Value *value) { /// Retain the given object, with normal retain semantics. /// call i8* \@objc_retain(i8* %value) llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retain, - "objc_retain"); + llvm::Intrinsic::objc_retain); } /// Retain the given block, with _Block_copy semantics. @@ -1977,9 +2077,9 @@ llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) { llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value, bool mandatory) { llvm::Value *result - = emitARCValueOperation(*this, value, + = emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainBlock, - "objc_retainBlock"); + llvm::Intrinsic::objc_retainBlock); // If the copy isn't mandatory, add !clang.arc.copy_on_escape to // tell the optimizer that it doesn't need to do this copy if the @@ -2047,9 +2147,9 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) { llvm::Value * CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue, - "objc_retainAutoreleasedReturnValue"); + llvm::Intrinsic::objc_retainAutoreleasedReturnValue); } /// Claim a possibly-autoreleased return value at +0. This is only @@ -2062,9 +2162,9 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { llvm::Value * CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) { emitAutoreleasedReturnValueMarker(*this); - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue, - "objc_unsafeClaimAutoreleasedReturnValue"); + llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue); } /// Release the given object. @@ -2075,9 +2175,8 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value, llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release; if (!fn) { - llvm::FunctionType *fnType = - llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); - fn = createARCRuntimeFunction(CGM, fnType, "objc_release"); + fn = CGM.getIntrinsic(llvm::Intrinsic::objc_release); + setARCRuntimeFunctionLinkage(CGM, fn); } // Cast the argument to 'id'. @@ -2122,10 +2221,8 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(Address addr, llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_storeStrong; if (!fn) { - llvm::Type *argTypes[] = { Int8PtrPtrTy, Int8PtrTy }; - llvm::FunctionType *fnType - = llvm::FunctionType::get(Builder.getVoidTy(), argTypes, false); - fn = createARCRuntimeFunction(CGM, fnType, "objc_storeStrong"); + fn = CGM.getIntrinsic(llvm::Intrinsic::objc_storeStrong); + setARCRuntimeFunctionLinkage(CGM, fn); } llvm::Value *args[] = { @@ -2177,18 +2274,18 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst, /// Autorelease the given object. /// call i8* \@objc_autorelease(i8* %value) llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_autorelease, - "objc_autorelease"); + llvm::Intrinsic::objc_autorelease); } /// Autorelease the given object. /// call i8* \@objc_autoreleaseReturnValue(i8* %value) llvm::Value * CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_autoreleaseReturnValue, - "objc_autoreleaseReturnValue", + llvm::Intrinsic::objc_autoreleaseReturnValue, /*isTailCall*/ true); } @@ -2196,9 +2293,9 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { /// call i8* \@objc_retainAutoreleaseReturnValue(i8* %value) llvm::Value * CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue, - "objc_retainAutoreleaseReturnValue", + llvm::Intrinsic::objc_retainAutoreleaseReturnValue, /*isTailCall*/ true); } @@ -2225,9 +2322,9 @@ llvm::Value *CodeGenFunction::EmitARCRetainAutorelease(QualType type, /// call i8* \@objc_retainAutorelease(i8* %value) llvm::Value * CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) { - return emitARCValueOperation(*this, value, + return emitARCValueOperation(*this, value, nullptr, CGM.getObjCEntrypoints().objc_retainAutorelease, - "objc_retainAutorelease"); + llvm::Intrinsic::objc_retainAutorelease); } /// i8* \@objc_loadWeak(i8** %addr) @@ -2235,14 +2332,14 @@ CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) { llvm::Value *CodeGenFunction::EmitARCLoadWeak(Address addr) { return emitARCLoadOperation(*this, addr, CGM.getObjCEntrypoints().objc_loadWeak, - "objc_loadWeak"); + llvm::Intrinsic::objc_loadWeak); } /// i8* \@objc_loadWeakRetained(i8** %addr) llvm::Value *CodeGenFunction::EmitARCLoadWeakRetained(Address addr) { return emitARCLoadOperation(*this, addr, CGM.getObjCEntrypoints().objc_loadWeakRetained, - "objc_loadWeakRetained"); + llvm::Intrinsic::objc_loadWeakRetained); } /// i8* \@objc_storeWeak(i8** %addr, i8* %value) @@ -2252,7 +2349,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreWeak(Address addr, bool ignored) { return emitARCStoreOperation(*this, addr, value, CGM.getObjCEntrypoints().objc_storeWeak, - "objc_storeWeak", ignored); + llvm::Intrinsic::objc_storeWeak, ignored); } /// i8* \@objc_initWeak(i8** %addr, i8* %value) @@ -2272,7 +2369,7 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) { emitARCStoreOperation(*this, addr, value, CGM.getObjCEntrypoints().objc_initWeak, - "objc_initWeak", /*ignored*/ true); + llvm::Intrinsic::objc_initWeak, /*ignored*/ true); } /// void \@objc_destroyWeak(i8** %addr) @@ -2280,9 +2377,8 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) { void CodeGenFunction::EmitARCDestroyWeak(Address addr) { llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_destroyWeak; if (!fn) { - llvm::FunctionType *fnType = - llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrPtrTy, false); - fn = createARCRuntimeFunction(CGM, fnType, "objc_destroyWeak"); + fn = CGM.getIntrinsic(llvm::Intrinsic::objc_destroyWeak); + setARCRuntimeFunctionLinkage(CGM, fn); } // Cast the argument to 'id*'. @@ -2297,7 +2393,7 @@ void CodeGenFunction::EmitARCDestroyWeak(Address addr) { void CodeGenFunction::EmitARCMoveWeak(Address dst, Address src) { emitARCCopyOperation(*this, dst, src, CGM.getObjCEntrypoints().objc_moveWeak, - "objc_moveWeak"); + llvm::Intrinsic::objc_moveWeak); } /// void \@objc_copyWeak(i8** %dest, i8** %src) @@ -2306,7 +2402,7 @@ void CodeGenFunction::EmitARCMoveWeak(Address dst, Address src) { void CodeGenFunction::EmitARCCopyWeak(Address dst, Address src) { emitARCCopyOperation(*this, dst, src, CGM.getObjCEntrypoints().objc_copyWeak, - "objc_copyWeak"); + llvm::Intrinsic::objc_copyWeak); } void CodeGenFunction::emitARCCopyAssignWeak(QualType Ty, Address DstAddr, @@ -2329,9 +2425,8 @@ void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr, llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() { llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush; if (!fn) { - llvm::FunctionType *fnType = - llvm::FunctionType::get(Int8PtrTy, false); - fn = createARCRuntimeFunction(CGM, fnType, "objc_autoreleasePoolPush"); + fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush); + setARCRuntimeFunctionLinkage(CGM, fn); } return EmitNounwindRuntimeCall(fn); @@ -2342,18 +2437,28 @@ llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() { void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) { assert(value->getType() == Int8PtrTy); - llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop; - if (!fn) { - llvm::FunctionType *fnType = - llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); + if (getInvokeDest()) { + // Call the runtime method not the intrinsic if we are handling exceptions + llvm::Constant *&fn = + CGM.getObjCEntrypoints().objc_autoreleasePoolPopInvoke; + if (!fn) { + llvm::FunctionType *fnType = + llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); + fn = CGM.CreateRuntimeFunction(fnType, "objc_autoreleasePoolPop"); + setARCRuntimeFunctionLinkage(CGM, fn); + } - // We don't want to use a weak import here; instead we should not - // fall into this path. - fn = createARCRuntimeFunction(CGM, fnType, "objc_autoreleasePoolPop"); - } + // objc_autoreleasePoolPop can throw. + EmitRuntimeCallOrInvoke(fn, value); + } else { + llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop; + if (!fn) { + fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop); + setARCRuntimeFunctionLinkage(CGM, fn); + } - // objc_autoreleasePoolPop can throw. - EmitRuntimeCallOrInvoke(fn, value); + EmitRuntimeCall(fn, value); + } } /// Produce the code to do an MRR version objc_autoreleasepool_push. @@ -2384,6 +2489,24 @@ llvm::Value *CodeGenFunction::EmitObjCMRRAutoreleasePoolPush() { return InitRV.getScalarVal(); } +/// Allocate the given objc object. +/// call i8* \@objc_alloc(i8* %value) +llvm::Value *CodeGenFunction::EmitObjCAlloc(llvm::Value *value, + llvm::Type *resultType) { + return emitObjCValueOperation(*this, value, resultType, + CGM.getObjCEntrypoints().objc_alloc, + "objc_alloc"); +} + +/// Allocate the given objc object. +/// call i8* \@objc_allocWithZone(i8* %value) +llvm::Value *CodeGenFunction::EmitObjCAllocWithZone(llvm::Value *value, + llvm::Type *resultType) { + return emitObjCValueOperation(*this, value, resultType, + CGM.getObjCEntrypoints().objc_allocWithZone, + "objc_allocWithZone"); +} + /// Produce the code to do a primitive release. /// [tmp drain]; void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) { @@ -2418,6 +2541,55 @@ void CodeGenFunction::emitARCIntrinsicUse(CodeGenFunction &CGF, Address addr, CGF.EmitARCIntrinsicUse(value); } +/// Autorelease the given object. +/// call i8* \@objc_autorelease(i8* %value) +llvm::Value *CodeGenFunction::EmitObjCAutorelease(llvm::Value *value, + llvm::Type *returnType) { + return emitObjCValueOperation(*this, value, returnType, + CGM.getObjCEntrypoints().objc_autoreleaseRuntimeFunction, + "objc_autorelease"); +} + +/// Retain the given object, with normal retain semantics. +/// call i8* \@objc_retain(i8* %value) +llvm::Value *CodeGenFunction::EmitObjCRetainNonBlock(llvm::Value *value, + llvm::Type *returnType) { + return emitObjCValueOperation(*this, value, returnType, + CGM.getObjCEntrypoints().objc_retainRuntimeFunction, + "objc_retain"); +} + +/// Release the given object. +/// call void \@objc_release(i8* %value) +void CodeGenFunction::EmitObjCRelease(llvm::Value *value, + ARCPreciseLifetime_t precise) { + if (isa<llvm::ConstantPointerNull>(value)) return; + + llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release; + if (!fn) { + if (!fn) { + llvm::FunctionType *fnType = + llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false); + fn = CGM.CreateRuntimeFunction(fnType, "objc_release"); + setARCRuntimeFunctionLinkage(CGM, fn); + // We have Native ARC, so set nonlazybind attribute for performance + if (llvm::Function *f = dyn_cast<llvm::Function>(fn)) + f->addFnAttr(llvm::Attribute::NonLazyBind); + } + } + + // Cast the argument to 'id'. + value = Builder.CreateBitCast(value, Int8PtrTy); + + // Call objc_release. + llvm::CallInst *call = EmitNounwindRuntimeCall(fn, value); + + if (precise == ARCImpreciseLifetime) { + call->setMetadata("clang.imprecise_release", + llvm::MDNode::get(Builder.getContext(), None)); + } +} + namespace { struct CallObjCAutoreleasePoolObject final : EHScopeStack::Cleanup { llvm::Value *Token; @@ -2446,27 +2618,36 @@ void CodeGenFunction::EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr) { EHStack.pushCleanup<CallObjCMRRAutoreleasePoolObject>(NormalCleanup, Ptr); } -static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, - LValue lvalue, - QualType type) { - switch (type.getObjCLifetime()) { +static bool shouldRetainObjCLifetime(Qualifiers::ObjCLifetime lifetime) { + switch (lifetime) { case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: case Qualifiers::OCL_Strong: case Qualifiers::OCL_Autoreleasing: - return TryEmitResult(CGF.EmitLoadOfLValue(lvalue, - SourceLocation()).getScalarVal(), - false); + return true; case Qualifiers::OCL_Weak: - return TryEmitResult(CGF.EmitARCLoadWeakRetained(lvalue.getAddress()), - true); + return false; } llvm_unreachable("impossible lifetime!"); } static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, + LValue lvalue, + QualType type) { + llvm::Value *result; + bool shouldRetain = shouldRetainObjCLifetime(type.getObjCLifetime()); + if (shouldRetain) { + result = CGF.EmitLoadOfLValue(lvalue, SourceLocation()).getScalarVal(); + } else { + assert(type.getObjCLifetime() == Qualifiers::OCL_Weak); + result = CGF.EmitARCLoadWeakRetained(lvalue.getAddress()); + } + return TryEmitResult(result, !shouldRetain); +} + +static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, const Expr *e) { e = e->IgnoreParens(); QualType type = e->getType(); @@ -2500,6 +2681,16 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF, cast<BinaryOperator>(e)->getOpcode() == BO_Assign) return TryEmitResult(CGF.EmitScalarExpr(e), false); + // Try to emit code for scalar constant instead of emitting LValue and + // loading it because we are not guaranteed to have an l-value. One of such + // cases is DeclRefExpr referencing non-odr-used constant-evaluated variable. + if (const auto *decl_expr = dyn_cast<DeclRefExpr>(e)) { + auto *DRE = const_cast<DeclRefExpr *>(decl_expr); + if (CodeGenFunction::ConstantEmission constant = CGF.tryEmitAsConstant(DRE)) + return TryEmitResult(CGF.emitScalarConstant(constant, DRE), + !shouldRetainObjCLifetime(type.getObjCLifetime())); + } + return tryEmitARCRetainLoadOfScalar(CGF, CGF.EmitLValue(e), type); } @@ -3229,29 +3420,32 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( ASTContext &C = getContext(); IdentifierInfo *II = &CGM.getContext().Idents.get("__assign_helper_atomic_property_"); - FunctionDecl *FD = FunctionDecl::Create(C, - C.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, C.VoidTy, - nullptr, SC_Static, - false, - false); + QualType ReturnTy = C.VoidTy; QualType DestTy = C.getPointerType(Ty); QualType SrcTy = Ty; SrcTy.addConst(); SrcTy = C.getPointerType(SrcTy); + SmallVector<QualType, 2> ArgTys; + ArgTys.push_back(DestTy); + ArgTys.push_back(SrcTy); + QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); + + FunctionDecl *FD = FunctionDecl::Create( + C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + FunctionTy, nullptr, SC_Static, false, false); + FunctionArgList args; - ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, - DestTy, ImplicitParamDecl::Other); + ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy, + ImplicitParamDecl::Other); args.push_back(&DstDecl); - ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, - SrcTy, ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy, + ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); @@ -3262,25 +3456,25 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); - StartFunction(FD, C.VoidTy, Fn, FI, args); + StartFunction(FD, ReturnTy, Fn, FI, args); - DeclRefExpr DstExpr(&DstDecl, false, DestTy, - VK_RValue, SourceLocation()); + DeclRefExpr DstExpr(getContext(), &DstDecl, false, DestTy, VK_RValue, + SourceLocation()); UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(), VK_LValue, OK_Ordinary, SourceLocation(), false); - DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy, - VK_RValue, SourceLocation()); + DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue, + SourceLocation()); UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), VK_LValue, OK_Ordinary, SourceLocation(), false); Expr *Args[2] = { &DST, &SRC }; CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment()); - CXXOperatorCallExpr TheCall(C, OO_Equal, CalleeExp->getCallee(), - Args, DestTy->getPointeeType(), - VK_LValue, SourceLocation(), FPOptions()); + CXXOperatorCallExpr *TheCall = CXXOperatorCallExpr::Create( + C, OO_Equal, CalleeExp->getCallee(), Args, DestTy->getPointeeType(), + VK_LValue, SourceLocation(), FPOptions()); - EmitStmt(&TheCall); + EmitStmt(TheCall); FinishFunction(); HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy); @@ -3301,53 +3495,54 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic))) return nullptr; llvm::Constant *HelperFn = nullptr; - if (hasTrivialGetExpr(PID)) return nullptr; assert(PID->getGetterCXXConstructor() && "getGetterCXXConstructor - null"); if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty))) return HelperFn; - ASTContext &C = getContext(); - IdentifierInfo *II - = &CGM.getContext().Idents.get("__copy_helper_atomic_property_"); - FunctionDecl *FD = FunctionDecl::Create(C, - C.getTranslationUnitDecl(), - SourceLocation(), - SourceLocation(), II, C.VoidTy, - nullptr, SC_Static, - false, - false); + IdentifierInfo *II = + &CGM.getContext().Idents.get("__copy_helper_atomic_property_"); + QualType ReturnTy = C.VoidTy; QualType DestTy = C.getPointerType(Ty); QualType SrcTy = Ty; SrcTy.addConst(); SrcTy = C.getPointerType(SrcTy); + SmallVector<QualType, 2> ArgTys; + ArgTys.push_back(DestTy); + ArgTys.push_back(SrcTy); + QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); + + FunctionDecl *FD = FunctionDecl::Create( + C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, + FunctionTy, nullptr, SC_Static, false, false); + FunctionArgList args; - ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, - DestTy, ImplicitParamDecl::Other); + ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy, + ImplicitParamDecl::Other); args.push_back(&DstDecl); - ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr, - SrcTy, ImplicitParamDecl::Other); + ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy, + ImplicitParamDecl::Other); args.push_back(&SrcDecl); const CGFunctionInfo &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args); llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = - llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, - "__copy_helper_atomic_property_", &CGM.getModule()); + llvm::Function *Fn = llvm::Function::Create( + LTy, llvm::GlobalValue::InternalLinkage, "__copy_helper_atomic_property_", + &CGM.getModule()); CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); - StartFunction(FD, C.VoidTy, Fn, FI, args); + StartFunction(FD, ReturnTy, Fn, FI, args); - DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy, - VK_RValue, SourceLocation()); + DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue, + SourceLocation()); UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(), VK_LValue, OK_Ordinary, SourceLocation(), false); @@ -3372,8 +3567,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( CXXConstExpr->getConstructionKind(), SourceRange()); - DeclRefExpr DstExpr(&DstDecl, false, DestTy, - VK_RValue, SourceLocation()); + DeclRefExpr DstExpr(getContext(), &DstDecl, false, DestTy, VK_RValue, + SourceLocation()); RValue DV = EmitAnyExpr(&DstExpr); CharUnits Alignment diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index 3e994edc976b..548bd6b3fd72 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -18,6 +18,7 @@ #include "CGCleanup.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "CGCXXABI.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -178,6 +179,9 @@ protected: /// runtime provides some LLVM passes that can use this to do things like /// automatic IMP caching and speculative inlining. unsigned msgSendMDKind; + /// Does the current target use SEH-based exceptions? False implies + /// Itanium-style DWARF unwinding. + bool usesSEHExceptions; /// Helper to check if we are targeting a specific runtime version or later. bool isRuntime(ObjCRuntime::Kind kind, unsigned major, unsigned minor=0) { @@ -217,6 +221,7 @@ protected: llvm::Constant *value = llvm::ConstantDataArray::getString(VMContext,Str); auto *GV = new llvm::GlobalVariable(TheModule, value->getType(), true, llvm::GlobalValue::LinkOnceODRLinkage, value, name); + GV->setComdat(TheModule.getOrInsertComdat(name)); if (Private) GV->setVisibility(llvm::GlobalValue::HiddenVisibility); ConstStr = GV; @@ -272,6 +277,8 @@ protected: Fields.addInt(Int8Ty, 0); } + virtual llvm::Constant *GenerateCategoryProtocolList(const + ObjCCategoryDecl *OCD); virtual ConstantArrayBuilder PushPropertyListHeader(ConstantStructBuilder &Fields, int count) { // int count; @@ -510,8 +517,8 @@ protected: /// Returns a selector with the specified type encoding. An empty string is /// used to return an untyped selector (with the types field set to NULL). - virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, - const std::string &TypeEncoding); + virtual llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel, + const std::string &TypeEncoding); /// Returns the name of ivar offset variables. In the GNUstep v1 ABI, this /// contains the class and ivar names, in the v2 ABI this contains the type @@ -810,8 +817,12 @@ class CGObjCGNUstep : public CGObjCGNU { // Slot_t objc_slot_lookup_super(struct objc_super*, SEL); SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy, PtrToObjCSuperTy, SelectorTy); - // If we're in ObjC++ mode, then we want to make - if (CGM.getLangOpts().CPlusPlus) { + // If we're in ObjC++ mode, then we want to make + if (usesSEHExceptions) { + llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); + // void objc_exception_rethrow(void) + ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy); + } else if (CGM.getLangOpts().CPlusPlus) { llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext); // void *__cxa_begin_catch(void *e) EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy); @@ -888,22 +899,25 @@ class CGObjCGNUstep : public CGObjCGNU { /// This is the ABI that provides a clean break with the legacy GCC ABI and /// cleans up a number of things that were added to work around 1980s linkers. class CGObjCGNUstep2 : public CGObjCGNUstep { - /// The section for selectors. - static constexpr const char *const SelSection = "__objc_selectors"; - /// The section for classes. - static constexpr const char *const ClsSection = "__objc_classes"; - /// The section for references to classes. - static constexpr const char *const ClsRefSection = "__objc_class_refs"; - /// The section for categories. - static constexpr const char *const CatSection = "__objc_cats"; - /// The section for protocols. - static constexpr const char *const ProtocolSection = "__objc_protocols"; - /// The section for protocol references. - static constexpr const char *const ProtocolRefSection = "__objc_protocol_refs"; - /// The section for class aliases - static constexpr const char *const ClassAliasSection = "__objc_class_aliases"; - /// The section for constexpr constant strings - static constexpr const char *const ConstantStringSection = "__objc_constant_string"; + enum SectionKind + { + SelectorSection = 0, + ClassSection, + ClassReferenceSection, + CategorySection, + ProtocolSection, + ProtocolReferenceSection, + ClassAliasSection, + ConstantStringSection + }; + static const char *const SectionsBaseNames[8]; + template<SectionKind K> + std::string sectionName() { + std::string name(SectionsBaseNames[K]); + if (CGM.getTriple().isOSBinFormatCOFF()) + name += "$m"; + return name; + } /// The GCC ABI superclass message lookup function. Takes a pointer to a /// structure describing the receiver and the class, and a selector as /// arguments. Returns the IMP for the corresponding method. @@ -1069,7 +1083,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { isNamed ? StringRef(StringName) : ".objc_string", Align, false, isNamed ? llvm::GlobalValue::LinkOnceODRLinkage : llvm::GlobalValue::PrivateLinkage); - ObjCStrGV->setSection(ConstantStringSection); + ObjCStrGV->setSection(sectionName<ConstantStringSection>()); if (isNamed) { ObjCStrGV->setComdat(TheModule.getOrInsertComdat(StringName)); ObjCStrGV->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -1152,6 +1166,15 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { return MethodList.finishAndCreateGlobal(".objc_protocol_method_list", CGM.getPointerAlign()); } + llvm::Constant *GenerateCategoryProtocolList(const ObjCCategoryDecl *OCD) + override { + SmallVector<llvm::Constant*, 16> Protocols; + for (const auto *PI : OCD->getReferencedProtocols()) + Protocols.push_back( + llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI), + ProtocolPtrTy)); + return GenerateProtocolList(Protocols); + } llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper, llvm::Value *cmd, MessageSendInfo &MSI) override { @@ -1247,9 +1270,10 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { assert(!TheModule.getGlobalVariable(RefName)); // Emit a reference symbol. auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy, - false, llvm::GlobalValue::ExternalLinkage, + false, llvm::GlobalValue::LinkOnceODRLinkage, llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName); - GV->setSection(ProtocolRefSection); + GV->setComdat(TheModule.getOrInsertComdat(RefName)); + GV->setSection(sectionName<ProtocolReferenceSection>()); GV->setAlignment(CGM.getPointerAlign().getQuantity()); Ref = GV; } @@ -1282,9 +1306,22 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { EmittedProtocol = true; + auto SymName = SymbolForProtocol(ProtocolName); + auto *OldGV = TheModule.getGlobalVariable(SymName); + // Use the protocol definition, if there is one. if (const ObjCProtocolDecl *Def = PD->getDefinition()) PD = Def; + else { + // If there is no definition, then create an external linkage symbol and + // hope that someone else fills it in for us (and fail to link if they + // don't). + assert(!OldGV); + Protocol = new llvm::GlobalVariable(TheModule, ProtocolTy, + /*isConstant*/false, + llvm::GlobalValue::ExternalLinkage, nullptr, SymName); + return Protocol; + } SmallVector<llvm::Constant*, 16> Protocols; for (const auto *PI : PD->protocols()) @@ -1301,8 +1338,6 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { EmitProtocolMethodList(PD->class_methods(), ClassMethodList, OptionalClassMethodList); - auto SymName = SymbolForProtocol(ProtocolName); - auto *OldGV = TheModule.getGlobalVariable(SymName); // The isa pointer must be set to a magic number so the runtime knows it's // the correct layout. ConstantInitBuilder builder(CGM); @@ -1326,7 +1361,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { auto *GV = ProtocolBuilder.finishAndCreateGlobal(SymName, CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage); - GV->setSection(ProtocolSection); + GV->setSection(sectionName<ProtocolSection>()); GV->setComdat(TheModule.getOrInsertComdat(SymName)); if (OldGV) { OldGV->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GV, @@ -1342,8 +1377,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { return Val; return llvm::ConstantExpr::getBitCast(Val, Ty); } - llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel, - const std::string &TypeEncoding) override { + llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel, + const std::string &TypeEncoding) override { return GetConstantSelector(Sel, TypeEncoding); } llvm::Constant *GetTypeString(llvm::StringRef TypeEncoding) { @@ -1359,6 +1394,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { TypeEncoding); auto *GV = new llvm::GlobalVariable(TheModule, Init->getType(), true, llvm::GlobalValue::LinkOnceODRLinkage, Init, TypesVarName); + GV->setComdat(TheModule.getOrInsertComdat(TypesVarName)); GV->setVisibility(llvm::GlobalValue::HiddenVisibility); TypesGlobal = GV; } @@ -1387,12 +1423,41 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage); GV->setComdat(TheModule.getOrInsertComdat(SelVarName)); GV->setVisibility(llvm::GlobalValue::HiddenVisibility); - GV->setSection(SelSection); + GV->setSection(sectionName<SelectorSection>()); auto *SelVal = EnforceType(GV, SelectorTy); return SelVal; } + llvm::StructType *emptyStruct = nullptr; + + /// Return pointers to the start and end of a section. On ELF platforms, we + /// use the __start_ and __stop_ symbols that GNU-compatible linkers will set + /// to the start and end of section names, as long as those section names are + /// valid identifiers and the symbols are referenced but not defined. On + /// Windows, we use the fact that MSVC-compatible linkers will lexically sort + /// by subsections and place everything that we want to reference in a middle + /// subsection and then insert zero-sized symbols in subsections a and z. std::pair<llvm::Constant*,llvm::Constant*> GetSectionBounds(StringRef Section) { + if (CGM.getTriple().isOSBinFormatCOFF()) { + if (emptyStruct == nullptr) { + emptyStruct = llvm::StructType::create(VMContext, ".objc_section_sentinel"); + emptyStruct->setBody({}, /*isPacked*/true); + } + auto ZeroInit = llvm::Constant::getNullValue(emptyStruct); + auto Sym = [&](StringRef Prefix, StringRef SecSuffix) { + auto *Sym = new llvm::GlobalVariable(TheModule, emptyStruct, + /*isConstant*/false, + llvm::GlobalValue::LinkOnceODRLinkage, ZeroInit, Prefix + + Section); + Sym->setVisibility(llvm::GlobalValue::HiddenVisibility); + Sym->setSection((Section + SecSuffix).str()); + Sym->setComdat(TheModule.getOrInsertComdat((Prefix + + Section).str())); + Sym->setAlignment(1); + return Sym; + }; + return { Sym("__start_", "$a"), Sym("__stop", "$z") }; + } auto *Start = new llvm::GlobalVariable(TheModule, PtrTy, /*isConstant*/false, llvm::GlobalValue::ExternalLinkage, nullptr, StringRef("__start_") + @@ -1405,6 +1470,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { Stop->setVisibility(llvm::GlobalValue::HiddenVisibility); return { Start, Stop }; } + CatchTypeInfo getCatchAllTypeInfo() override { + return CGM.getCXXABI().getCatchAllTypeInfo(); + } llvm::Function *ModuleInitFunction() override { llvm::Function *LoadFunction = llvm::Function::Create( llvm::FunctionType::get(llvm::Type::getVoidTy(VMContext), false), @@ -1420,19 +1488,11 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { ConstantInitBuilder builder(CGM); auto InitStructBuilder = builder.beginStruct(); InitStructBuilder.addInt(Int64Ty, 0); - auto addSection = [&](const char *section) { - auto bounds = GetSectionBounds(section); + for (auto *s : SectionsBaseNames) { + auto bounds = GetSectionBounds(s); InitStructBuilder.add(bounds.first); InitStructBuilder.add(bounds.second); }; - addSection(SelSection); - addSection(ClsSection); - addSection(ClsRefSection); - addSection(CatSection); - addSection(ProtocolSection); - addSection(ProtocolRefSection); - addSection(ClassAliasSection); - addSection(ConstantStringSection); auto *InitStruct = InitStructBuilder.finishAndCreateGlobal(".objc_init", CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage); InitStruct->setVisibility(llvm::GlobalValue::HiddenVisibility); @@ -1451,18 +1511,23 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { // Check that this hasn't been renamed. This shouldn't happen, because // this function should be called precisely once. assert(InitVar->getName() == ".objc_ctor"); - InitVar->setSection(".ctors"); + // In Windows, initialisers are sorted by the suffix. XCL is for library + // initialisers, which run before user initialisers. We are running + // Objective-C loads at the end of library load. This means +load methods + // will run before any other static constructors, but that static + // constructors can see a fully initialised Objective-C state. + if (CGM.getTriple().isOSBinFormatCOFF()) + InitVar->setSection(".CRT$XCLz"); + else + InitVar->setSection(".ctors"); InitVar->setVisibility(llvm::GlobalValue::HiddenVisibility); InitVar->setComdat(TheModule.getOrInsertComdat(".objc_ctor")); - CGM.addCompilerUsedGlobal(InitVar); + CGM.addUsedGlobal(InitVar); for (auto *C : Categories) { auto *Cat = cast<llvm::GlobalVariable>(C->stripPointerCasts()); - Cat->setSection(CatSection); + Cat->setSection(sectionName<CategorySection>()); CGM.addUsedGlobal(Cat); } - // Add a null value fore each special section so that we can always - // guarantee that the _start and _stop symbols will exist and be - // meaningful. auto createNullGlobal = [&](StringRef Name, ArrayRef<llvm::Constant*> Init, StringRef Section) { auto nullBuilder = builder.beginStruct(); @@ -1476,38 +1541,48 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { CGM.addUsedGlobal(GV); return GV; }; - createNullGlobal(".objc_null_selector", {NULLPtr, NULLPtr}, SelSection); - if (Categories.empty()) - createNullGlobal(".objc_null_category", {NULLPtr, NULLPtr, - NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr}, CatSection); - if (!EmittedClass) { - createNullGlobal(".objc_null_cls_init_ref", NULLPtr, ClsSection); - createNullGlobal(".objc_null_class_ref", { NULLPtr, NULLPtr }, - ClsRefSection); - } - if (!EmittedProtocol) - createNullGlobal(".objc_null_protocol", {NULLPtr, NULLPtr, NULLPtr, - NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, - NULLPtr}, ProtocolSection); - if (!EmittedProtocolRef) - createNullGlobal(".objc_null_protocol_ref", {NULLPtr}, ProtocolRefSection); - if (!ClassAliases.empty()) - for (auto clsAlias : ClassAliases) - createNullGlobal(std::string(".objc_class_alias") + - clsAlias.second, { MakeConstantString(clsAlias.second), - GetClassVar(clsAlias.first) }, ClassAliasSection); - else - createNullGlobal(".objc_null_class_alias", { NULLPtr, NULLPtr }, - ClassAliasSection); - if (ConstantStrings.empty()) { - auto i32Zero = llvm::ConstantInt::get(Int32Ty, 0); - createNullGlobal(".objc_null_constant_string", { NULLPtr, i32Zero, - i32Zero, i32Zero, i32Zero, NULLPtr }, ConstantStringSection); + for (auto clsAlias : ClassAliases) + createNullGlobal(std::string(".objc_class_alias") + + clsAlias.second, { MakeConstantString(clsAlias.second), + GetClassVar(clsAlias.first) }, sectionName<ClassAliasSection>()); + // On ELF platforms, add a null value for each special section so that we + // can always guarantee that the _start and _stop symbols will exist and be + // meaningful. This is not required on COFF platforms, where our start and + // stop symbols will create the section. + if (!CGM.getTriple().isOSBinFormatCOFF()) { + createNullGlobal(".objc_null_selector", {NULLPtr, NULLPtr}, + sectionName<SelectorSection>()); + if (Categories.empty()) + createNullGlobal(".objc_null_category", {NULLPtr, NULLPtr, + NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr}, + sectionName<CategorySection>()); + if (!EmittedClass) { + createNullGlobal(".objc_null_cls_init_ref", NULLPtr, + sectionName<ClassSection>()); + createNullGlobal(".objc_null_class_ref", { NULLPtr, NULLPtr }, + sectionName<ClassReferenceSection>()); + } + if (!EmittedProtocol) + createNullGlobal(".objc_null_protocol", {NULLPtr, NULLPtr, NULLPtr, + NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, + NULLPtr}, sectionName<ProtocolSection>()); + if (!EmittedProtocolRef) + createNullGlobal(".objc_null_protocol_ref", {NULLPtr}, + sectionName<ProtocolReferenceSection>()); + if (ClassAliases.empty()) + createNullGlobal(".objc_null_class_alias", { NULLPtr, NULLPtr }, + sectionName<ClassAliasSection>()); + if (ConstantStrings.empty()) { + auto i32Zero = llvm::ConstantInt::get(Int32Ty, 0); + createNullGlobal(".objc_null_constant_string", { NULLPtr, i32Zero, + i32Zero, i32Zero, i32Zero, NULLPtr }, + sectionName<ConstantStringSection>()); + } } ConstantStrings.clear(); Categories.clear(); Classes.clear(); - return nullptr;//CGObjCGNU::ModuleInitFunction(); + return nullptr; } /// In the v2 ABI, ivar offset variables use the type encoding in their name /// to trigger linker failures if the types don't match. @@ -1774,7 +1849,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { } auto *classRefSymbol = GetClassVar(className); - classRefSymbol->setSection(ClsRefSection); + classRefSymbol->setSection(sectionName<ClassReferenceSection>()); classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy)); @@ -1805,7 +1880,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { auto classInitRef = new llvm::GlobalVariable(TheModule, classStruct->getType(), false, llvm::GlobalValue::ExternalLinkage, classStruct, "._OBJC_INIT_CLASS_" + className); - classInitRef->setSection(ClsSection); + classInitRef->setSection(sectionName<ClassSection>()); CGM.addUsedGlobal(classInitRef); EmittedClass = true; @@ -1829,6 +1904,18 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { }; +const char *const CGObjCGNUstep2::SectionsBaseNames[8] = +{ +"__objc_selectors", +"__objc_classes", +"__objc_class_refs", +"__objc_cats", +"__objc_protocols", +"__objc_protocol_refs", +"__objc_class_aliases", +"__objc_constant_string" +}; + /// Support for the ObjFW runtime. class CGObjCObjFW: public CGObjCGNU { protected: @@ -1931,6 +2018,8 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion, ProtocolVersion(protocolClassVersion), ClassABIVersion(classABI) { msgSendMDKind = VMContext.getMDKindID("GNUObjCMessageSend"); + usesSEHExceptions = + cgm.getContext().getTargetInfo().getTriple().isWindowsMSVCEnvironment(); CodeGenTypes &Types = CGM.getTypes(); IntTy = cast<llvm::IntegerType>( @@ -2121,8 +2210,8 @@ llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) { return Value; } -llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel, - const std::string &TypeEncoding) { +llvm::Value *CGObjCGNU::GetTypedSelector(CodeGenFunction &CGF, Selector Sel, + const std::string &TypeEncoding) { SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel]; llvm::GlobalAlias *SelValue = nullptr; @@ -2155,13 +2244,13 @@ Address CGObjCGNU::GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) { } llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel) { - return GetSelector(CGF, Sel, std::string()); + return GetTypedSelector(CGF, Sel, std::string()); } llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl *Method) { std::string SelTypes = CGM.getContext().getObjCEncodingForMethodDecl(Method); - return GetSelector(CGF, Method->getSelector(), SelTypes); + return GetTypedSelector(CGF, Method->getSelector(), SelTypes); } llvm::Constant *CGObjCGNU::GetEHType(QualType T) { @@ -2186,6 +2275,9 @@ llvm::Constant *CGObjCGNU::GetEHType(QualType T) { } llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) { + if (usesSEHExceptions) + return CGM.getCXXABI().getAddrOfRTTIDescriptor(T); + if (!CGM.getLangOpts().CPlusPlus) return CGObjCGNU::GetEHType(T); @@ -3018,18 +3110,21 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) { return ptr; } +llvm::Constant *CGObjCGNU::GenerateCategoryProtocolList(const + ObjCCategoryDecl *OCD) { + SmallVector<std::string, 16> Protocols; + for (const auto *PD : OCD->getReferencedProtocols()) + Protocols.push_back(PD->getNameAsString()); + return GenerateProtocolList(Protocols); +} + void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) { const ObjCInterfaceDecl *Class = OCD->getClassInterface(); std::string ClassName = Class->getNameAsString(); std::string CategoryName = OCD->getNameAsString(); // Collect the names of referenced protocols - SmallVector<std::string, 16> Protocols; const ObjCCategoryDecl *CatDecl = OCD->getCategoryDecl(); - const ObjCList<ObjCProtocolDecl> &Protos = CatDecl->getReferencedProtocols(); - for (ObjCList<ObjCProtocolDecl>::iterator I = Protos.begin(), - E = Protos.end(); I != E; ++I) - Protocols.push_back((*I)->getNameAsString()); ConstantInitBuilder Builder(CGM); auto Elements = Builder.beginStruct(); @@ -3051,7 +3146,7 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) { GenerateMethodList(ClassName, CategoryName, ClassMethods, true), PtrTy); // Protocol list - Elements.addBitCast(GenerateProtocolList(Protocols), PtrTy); + Elements.addBitCast(GenerateCategoryProtocolList(CatDecl), PtrTy); if (isRuntime(ObjCRuntime::GNUstep, 2)) { const ObjCCategoryDecl *Category = Class->FindCategoryDeclaration(OCD->getIdentifier()); @@ -3460,12 +3555,16 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() { ConstantInitBuilder builder(CGM); auto selectors = builder.beginArray(selStructTy); auto &table = SelectorTable; // MSVC workaround - for (auto &entry : table) { + std::vector<Selector> allSelectors; + for (auto &entry : table) + allSelectors.push_back(entry.first); + llvm::sort(allSelectors); - std::string selNameStr = entry.first.getAsString(); + for (auto &untypedSel : allSelectors) { + std::string selNameStr = untypedSel.getAsString(); llvm::Constant *selName = ExportUniqueString(selNameStr, ".objc_sel_name"); - for (TypedSelector &sel : entry.second) { + for (TypedSelector &sel : table[untypedSel]) { llvm::Constant *selectorTypeEncoding = NULLPtr; if (!sel.first.empty()) selectorTypeEncoding = @@ -3726,6 +3825,7 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, const ObjCAtThrowStmt &S, bool ClearInsertionPoint) { llvm::Value *ExceptionAsObject; + bool isRethrow = false; if (const Expr *ThrowExpr = S.getThrowExpr()) { llvm::Value *Exception = CGF.EmitObjCThrowOperand(ThrowExpr); @@ -3734,11 +3834,24 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, assert((!CGF.ObjCEHValueStack.empty() && CGF.ObjCEHValueStack.back()) && "Unexpected rethrow outside @catch block."); ExceptionAsObject = CGF.ObjCEHValueStack.back(); + isRethrow = true; + } + if (isRethrow && usesSEHExceptions) { + // For SEH, ExceptionAsObject may be undef, because the catch handler is + // not passed it for catchalls and so it is not visible to the catch + // funclet. The real thrown object will still be live on the stack at this + // point and will be rethrown. If we are explicitly rethrowing the object + // that was passed into the `@catch` block, then this code path is not + // reached and we will instead call `objc_exception_throw` with an explicit + // argument. + CGF.EmitRuntimeCallOrInvoke(ExceptionReThrowFn).setDoesNotReturn(); + } + else { + ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy); + llvm::CallSite Throw = + CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject); + Throw.setDoesNotReturn(); } - ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy); - llvm::CallSite Throw = - CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject); - Throw.setDoesNotReturn(); CGF.Builder.CreateUnreachable(); if (ClearInsertionPoint) CGF.Builder.ClearInsertionPoint(); @@ -3812,40 +3925,10 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable( // is. This allows code compiled with non-fragile ivars to work correctly // when linked against code which isn't (most of the time). llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name); - if (!IvarOffsetPointer) { - // This will cause a run-time crash if we accidentally use it. A value of - // 0 would seem more sensible, but will silently overwrite the isa pointer - // causing a great deal of confusion. - uint64_t Offset = -1; - // We can't call ComputeIvarBaseOffset() here if we have the - // implementation, because it will create an invalid ASTRecordLayout object - // that we are then stuck with forever, so we only initialize the ivar - // offset variable with a guess if we only have the interface. The - // initializer will be reset later anyway, when we are generating the class - // description. - if (!CGM.getContext().getObjCImplementation( - const_cast<ObjCInterfaceDecl *>(ID))) - Offset = ComputeIvarBaseOffset(CGM, ID, Ivar); - - llvm::ConstantInt *OffsetGuess = llvm::ConstantInt::get(Int32Ty, Offset, - /*isSigned*/true); - // Don't emit the guess in non-PIC code because the linker will not be able - // to replace it with the real version for a library. In non-PIC code you - // must compile with the fragile ABI if you want to use ivars from a - // GCC-compiled class. - if (CGM.getLangOpts().PICLevel) { - llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule, - Int32Ty, false, - llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess"); - IvarOffsetPointer = new llvm::GlobalVariable(TheModule, - IvarOffsetGV->getType(), false, llvm::GlobalValue::LinkOnceAnyLinkage, - IvarOffsetGV, Name); - } else { - IvarOffsetPointer = new llvm::GlobalVariable(TheModule, - llvm::Type::getInt32PtrTy(VMContext), false, - llvm::GlobalValue::ExternalLinkage, nullptr, Name); - } - } + if (!IvarOffsetPointer) + IvarOffsetPointer = new llvm::GlobalVariable(TheModule, + llvm::Type::getInt32PtrTy(VMContext), false, + llvm::GlobalValue::ExternalLinkage, nullptr, Name); return IvarOffsetPointer; } diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index 2b54e7bd67af..d91eb43ca322 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -23,9 +23,9 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtObjC.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/LangOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" @@ -37,6 +37,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include <cstdio> @@ -1085,9 +1086,14 @@ public: const CGBlockInfo &blockInfo) override; llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM, const CGBlockInfo &blockInfo) override; + std::string getRCBlockLayoutStr(CodeGen::CodeGenModule &CGM, + const CGBlockInfo &blockInfo) override; llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM, QualType T) override; + +private: + void fillRunSkipBlockVars(CodeGenModule &CGM, const CGBlockInfo &blockInfo); }; namespace { @@ -2795,8 +2801,44 @@ llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) { return getConstantGEP(VMContext, Entry, 0, 0); } -llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM, - const CGBlockInfo &blockInfo) { +static std::string getBlockLayoutInfoString( + const SmallVectorImpl<CGObjCCommonMac::RUN_SKIP> &RunSkipBlockVars, + bool HasCopyDisposeHelpers) { + std::string Str; + for (const CGObjCCommonMac::RUN_SKIP &R : RunSkipBlockVars) { + if (R.opcode == CGObjCCommonMac::BLOCK_LAYOUT_UNRETAINED) { + // Copy/dispose helpers don't have any information about + // __unsafe_unretained captures, so unconditionally concatenate a string. + Str += "u"; + } else if (HasCopyDisposeHelpers) { + // Information about __strong, __weak, or byref captures has already been + // encoded into the names of the copy/dispose helpers. We have to add a + // string here only when the copy/dispose helpers aren't generated (which + // happens when the block is non-escaping). + continue; + } else { + switch (R.opcode) { + case CGObjCCommonMac::BLOCK_LAYOUT_STRONG: + Str += "s"; + break; + case CGObjCCommonMac::BLOCK_LAYOUT_BYREF: + Str += "r"; + break; + case CGObjCCommonMac::BLOCK_LAYOUT_WEAK: + Str += "w"; + break; + default: + continue; + } + } + Str += llvm::to_string(R.block_var_bytepos.getQuantity()); + Str += "l" + llvm::to_string(R.block_var_size.getQuantity()); + } + return Str; +} + +void CGObjCCommonMac::fillRunSkipBlockVars(CodeGenModule &CGM, + const CGBlockInfo &blockInfo) { assert(CGM.getLangOpts().getGC() == LangOptions::NonGC); RunSkipBlockVars.clear(); @@ -2845,9 +2887,22 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM, UpdateRunSkipBlockVars(CI.isByRef(), getBlockCaptureLifetime(type, false), fieldOffset, fieldSize); } +} + +llvm::Constant * +CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM, + const CGBlockInfo &blockInfo) { + fillRunSkipBlockVars(CGM, blockInfo); return getBitmapBlockLayout(false); } +std::string CGObjCCommonMac::getRCBlockLayoutStr(CodeGenModule &CGM, + const CGBlockInfo &blockInfo) { + fillRunSkipBlockVars(CGM, blockInfo); + return getBlockLayoutInfoString(RunSkipBlockVars, + blockInfo.needsCopyDisposeHelpers()); +} + llvm::Constant *CGObjCCommonMac::BuildByrefLayout(CodeGen::CodeGenModule &CGM, QualType T) { assert(CGM.getLangOpts().getGC() == LangOptions::NonGC); @@ -6783,8 +6838,9 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol( return Entry; // Use the protocol definition, if there is one. - if (const ObjCProtocolDecl *Def = PD->getDefinition()) - PD = Def; + assert(PD->hasDefinition() && + "emitting protocol metadata without definition"); + PD = PD->getDefinition(); auto methodLists = ProtocolMethodLists::get(PD); @@ -7132,15 +7188,21 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name, Weak ? llvm::GlobalValue::ExternalWeakLinkage : llvm::GlobalValue::ExternalLinkage; - - llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name); - if (!GV) { - GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABITy, - false, L, nullptr, Name); + if (!GV || GV->getType() != ObjCTypes.ClassnfABITy->getPointerTo()) { + auto *NewGV = new llvm::GlobalVariable(ObjCTypes.ClassnfABITy, false, L, + nullptr, Name); if (DLLImport) - GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + NewGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + + if (GV) { + GV->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(NewGV, GV->getType())); + GV->eraseFromParent(); + } + GV = NewGV; + CGM.getModule().getGlobalList().push_back(GV); } assert(GV->getLinkage() == L); diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp index a43885c0f9a2..4b6f24a03f27 100644 --- a/lib/CodeGen/CGObjCRuntime.cpp +++ b/lib/CodeGen/CGObjCRuntime.cpp @@ -15,6 +15,7 @@ #include "CGObjCRuntime.h" #include "CGCleanup.h" +#include "CGCXXABI.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" @@ -22,6 +23,7 @@ #include "clang/AST/StmtObjC.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "llvm/IR/CallSite.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; using namespace CodeGen; @@ -120,6 +122,8 @@ namespace { const Stmt *Body; llvm::BasicBlock *Block; llvm::Constant *TypeInfo; + /// Flags used to differentiate cleanups and catchalls in Windows SEH + unsigned Flags; }; struct CallObjCEndCatch final : EHScopeStack::Cleanup { @@ -148,13 +152,17 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, if (S.getNumCatchStmts()) Cont = CGF.getJumpDestInCurrentScope("eh.cont"); + bool useFunclets = EHPersonality::get(CGF).usesFuncletPads(); + CodeGenFunction::FinallyInfo FinallyInfo; - if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt()) - FinallyInfo.enter(CGF, Finally->getFinallyBody(), - beginCatchFn, endCatchFn, exceptionRethrowFn); + if (!useFunclets) + if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt()) + FinallyInfo.enter(CGF, Finally->getFinallyBody(), + beginCatchFn, endCatchFn, exceptionRethrowFn); SmallVector<CatchHandler, 8> Handlers; + // Enter the catch, if there is one. if (S.getNumCatchStmts()) { for (unsigned I = 0, N = S.getNumCatchStmts(); I != N; ++I) { @@ -166,10 +174,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, Handler.Variable = CatchDecl; Handler.Body = CatchStmt->getCatchBody(); Handler.Block = CGF.createBasicBlock("catch"); + Handler.Flags = 0; // @catch(...) always matches. if (!CatchDecl) { - Handler.TypeInfo = nullptr; // catch-all + auto catchAll = getCatchAllTypeInfo(); + Handler.TypeInfo = catchAll.RTTI; + Handler.Flags = catchAll.Flags; // Don't consider any other catches. break; } @@ -179,9 +190,31 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, EHCatchScope *Catch = CGF.EHStack.pushCatch(Handlers.size()); for (unsigned I = 0, E = Handlers.size(); I != E; ++I) - Catch->setHandler(I, Handlers[I].TypeInfo, Handlers[I].Block); + Catch->setHandler(I, { Handlers[I].TypeInfo, Handlers[I].Flags }, Handlers[I].Block); } + if (useFunclets) + if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt()) { + CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true); + if (!CGF.CurSEHParent) + CGF.CurSEHParent = cast<NamedDecl>(CGF.CurFuncDecl); + // Outline the finally block. + const Stmt *FinallyBlock = Finally->getFinallyBody(); + HelperCGF.startOutlinedSEHHelper(CGF, /*isFilter*/false, FinallyBlock); + + // Emit the original filter expression, convert to i32, and return. + HelperCGF.EmitStmt(FinallyBlock); + + HelperCGF.FinishFunction(FinallyBlock->getEndLoc()); + + llvm::Function *FinallyFunc = HelperCGF.CurFn; + + + // Push a cleanup for __finally blocks. + CGF.pushSEHCleanup(NormalAndEHCleanup, FinallyFunc); + } + + // Emit the try body. CGF.EmitStmt(S.getTryBody()); @@ -197,6 +230,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, CatchHandler &Handler = Handlers[I]; CGF.EmitBlock(Handler.Block); + llvm::CatchPadInst *CPI = nullptr; + SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(CGF.CurrentFuncletPad); + if (useFunclets) + if ((CPI = dyn_cast_or_null<llvm::CatchPadInst>(Handler.Block->getFirstNonPHI()))) { + CGF.CurrentFuncletPad = CPI; + CPI->setOperand(2, CGF.getExceptionSlot().getPointer()); + } llvm::Value *RawExn = CGF.getExceptionFromSlot(); // Enter the catch. @@ -223,6 +263,8 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, CGF.EmitAutoVarDecl(*CatchParam); EmitInitOfCatchParam(CGF, CastExn, CatchParam); } + if (CPI) + CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI); CGF.ObjCEHValueStack.push_back(Exn); CGF.EmitStmt(Handler.Body); @@ -232,13 +274,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, cleanups.ForceCleanup(); CGF.EmitBranchThroughCleanup(Cont); - } + } // Go back to the try-statement fallthrough. CGF.Builder.restoreIP(SavedIP); // Pop out of the finally. - if (S.getFinallyStmt()) + if (!useFunclets && S.getFinallyStmt()) FinallyInfo.exit(CGF); if (Cont.isValid()) @@ -254,7 +296,7 @@ void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF, switch (paramDecl->getType().getQualifiers().getObjCLifetime()) { case Qualifiers::OCL_Strong: exn = CGF.EmitARCRetainNonBlock(exn); - // fallthrough + LLVM_FALLTHROUGH; case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: @@ -277,7 +319,7 @@ namespace { : SyncExitFn(SyncExitFn), SyncArg(SyncArg) {} void Emit(CodeGenFunction &CGF, Flags flags) override { - CGF.Builder.CreateCall(SyncExitFn, SyncArg)->setDoesNotThrow(); + CGF.EmitNounwindRuntimeCall(SyncExitFn, SyncArg); } }; } diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h index ce082a61eb5e..fa16c198adbc 100644 --- a/lib/CodeGen/CGObjCRuntime.h +++ b/lib/CodeGen/CGObjCRuntime.h @@ -17,6 +17,7 @@ #define LLVM_CLANG_LIB_CODEGEN_CGOBJCRUNTIME_H #include "CGBuilder.h" #include "CGCall.h" +#include "CGCleanup.h" #include "CGValue.h" #include "clang/AST/DeclObjC.h" #include "clang/Basic/IdentifierTable.h" // Selector @@ -141,6 +142,8 @@ public: /// error to Sema. virtual llvm::Constant *GetEHType(QualType T) = 0; + virtual CatchTypeInfo getCatchAllTypeInfo() { return { nullptr, 0 }; } + /// Generate a constant string object. virtual ConstantAddress GenerateConstantString(const StringLiteral *) = 0; @@ -275,6 +278,10 @@ public: const CodeGen::CGBlockInfo &blockInfo) = 0; virtual llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM, const CodeGen::CGBlockInfo &blockInfo) = 0; + virtual std::string getRCBlockLayoutStr(CodeGen::CodeGenModule &CGM, + const CGBlockInfo &blockInfo) { + return {}; + } /// Returns an i8* which points to the byref layout information. virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM, diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp index 1da19a90c387..7f6f595dd5d1 100644 --- a/lib/CodeGen/CGOpenCLRuntime.cpp +++ b/lib/CodeGen/CGOpenCLRuntime.cpp @@ -62,6 +62,11 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) { case BuiltinType::OCLReserveID: return llvm::PointerType::get( llvm::StructType::create(Ctx, "opencl.reserve_id_t"), AddrSpc); +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + case BuiltinType::Id: \ + return llvm::PointerType::get( \ + llvm::StructType::create(Ctx, "opencl." #ExtType), AddrSpc); +#include "clang/Basic/OpenCLExtensionTypes.def" } } @@ -118,25 +123,6 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() { CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); } -// Get the block literal from an expression derived from the block expression. -// OpenCL v2.0 s6.12.5: -// Block variable declarations are implicitly qualified with const. Therefore -// all block variables must be initialized at declaration time and may not be -// reassigned. -static const BlockExpr *getBlockExpr(const Expr *E) { - if (auto Cast = dyn_cast<CastExpr>(E)) { - E = Cast->getSubExpr(); - } - if (auto DR = dyn_cast<DeclRefExpr>(E)) { - E = cast<VarDecl>(DR->getDecl())->getInit(); - } - E = E->IgnoreImplicit(); - if (auto Cast = dyn_cast<CastExpr>(E)) { - E = Cast->getSubExpr(); - } - return cast<BlockExpr>(E); -} - /// Record emitted llvm invoke function and llvm block literal for the /// corresponding block expression. void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, @@ -151,15 +137,21 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E, EnqueuedBlockMap[E].Kernel = nullptr; } -llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) { - return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc; -} - CGOpenCLRuntime::EnqueuedBlockInfo CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) { CGF.EmitScalarExpr(E); - const BlockExpr *Block = getBlockExpr(E); + // The block literal may be assigned to a const variable. Chasing down + // to get the block literal. + if (auto DR = dyn_cast<DeclRefExpr>(E)) { + E = cast<VarDecl>(DR->getDecl())->getInit(); + } + E = E->IgnoreImplicit(); + if (auto Cast = dyn_cast<CastExpr>(E)) { + E = Cast->getSubExpr(); + } + auto *Block = cast<BlockExpr>(E); + assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() && "Block expression not emitted"); diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h index a513340827a8..750721f1b80f 100644 --- a/lib/CodeGen/CGOpenCLRuntime.h +++ b/lib/CodeGen/CGOpenCLRuntime.h @@ -16,6 +16,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENCLRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGOPENCLRUNTIME_H +#include "clang/AST/Expr.h" #include "clang/AST/Type.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Type.h" @@ -91,10 +92,6 @@ public: /// \param Block block literal emitted for the block expression. void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF, llvm::Value *Block); - - /// \return LLVM block invoke function emitted for an expression derived from - /// the block expression. - llvm::Function *getInvokeFunction(const Expr *E); }; } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index fa850155df4f..20eb0b29f427 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -353,7 +353,7 @@ public: if (VD->isLocalVarDeclOrParm()) continue; - DeclRefExpr DRE(const_cast<VarDecl *>(VD), + DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), /*RefersToEnclosingVariableOrCapture=*/false, VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); @@ -673,6 +673,9 @@ enum OpenMPRTLFunction { // // Offloading related calls // + // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 + // size); + OMPRTL__kmpc_push_target_tripcount, // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t // *arg_types); @@ -897,25 +900,6 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } -static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> -isDeclareTargetDeclaration(const ValueDecl *VD) { - for (const Decl *D : VD->redecls()) { - if (!D->hasAttrs()) - continue; - if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) - return Attr->getMapType(); - } - if (const auto *V = dyn_cast<VarDecl>(VD)) { - if (const VarDecl *TD = V->getTemplateInstantiationPattern()) - return isDeclareTargetDeclaration(TD); - } else if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { - if (const auto *TD = FD->getTemplateInstantiationPattern()) - return isDeclareTargetDeclaration(TD); - } - - return llvm::None; -} - LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { return CGF.EmitOMPSharedLValue(E); } @@ -1242,6 +1226,17 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, void CGOpenMPRuntime::clear() { InternalVars.clear(); + // Clean non-target variable declarations possibly used only in debug info. + for (const auto &Data : EmittedNonTargetVariables) { + if (!Data.getValue().pointsToAliveValue()) + continue; + auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); + if (!GV) + continue; + if (!GV->isDeclaration() || GV->getNumUses() > 0) + continue; + GV->eraseFromParent(); + } } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { @@ -1314,27 +1309,19 @@ void CGOpenMPRuntime::emitUserDefinedReduction( CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { if (UDRMap.count(D) > 0) return; - ASTContext &C = CGM.getContext(); - if (!In || !Out) { - In = &C.Idents.get("omp_in"); - Out = &C.Idents.get("omp_out"); - } llvm::Function *Combiner = emitCombinerOrInitializer( - CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()), - cast<VarDecl>(D->lookup(Out).front()), + CGM, D->getType(), D->getCombiner(), + cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), /*IsCombiner=*/true); llvm::Function *Initializer = nullptr; if (const Expr *Init = D->getInitializer()) { - if (!Priv || !Orig) { - Priv = &C.Idents.get("omp_priv"); - Orig = &C.Idents.get("omp_orig"); - } Initializer = emitCombinerOrInitializer( CGM, D->getType(), D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init : nullptr, - cast<VarDecl>(D->lookup(Orig).front()), - cast<VarDecl>(D->lookup(Priv).front()), + cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), /*IsCombiner=*/false); } UDRMap.try_emplace(D, Combiner, Initializer); @@ -1406,8 +1393,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( bool Tied, unsigned &NumberOfParts) { auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, PrePostActionTy &) { - llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart()); - llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); + llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); + llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); llvm::Value *TaskArgs[] = { UpLoc, ThreadID, CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), @@ -1456,17 +1443,17 @@ static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, template <class... As> static llvm::GlobalVariable * -createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty, - ArrayRef<llvm::Constant *> Data, const Twine &Name, - As &&... Args) { +createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, + ArrayRef<llvm::Constant *> Data, const Twine &Name, + As &&... Args) { const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); ConstantInitBuilder CIBuilder(CGM); ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); buildStructValue(Fields, CGM, RD, RL, Data); return Fields.finishAndCreateGlobal( - Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), - /*isConstant=*/true, std::forward<As>(Args)...); + Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, + std::forward<As>(Args)...); } template <typename T> @@ -1483,7 +1470,9 @@ createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); - llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags); + unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); + FlagsTy FlagsKey(Flags, Reserved2Flags); + llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); if (!Entry) { if (!DefaultOpenMPPSource) { // Initialize default location for psource field of ident_t structure of @@ -1496,21 +1485,47 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } - llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty), - llvm::ConstantInt::get(CGM.Int32Ty, Flags), - llvm::ConstantInt::getNullValue(CGM.Int32Ty), - llvm::ConstantInt::getNullValue(CGM.Int32Ty), - DefaultOpenMPPSource}; - llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct( - CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage); + llvm::Constant *Data[] = { + llvm::ConstantInt::getNullValue(CGM.Int32Ty), + llvm::ConstantInt::get(CGM.Int32Ty, Flags), + llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), + llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; + llvm::GlobalValue *DefaultOpenMPLocation = + createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", + llvm::GlobalValue::PrivateLinkage); DefaultOpenMPLocation->setUnnamedAddr( llvm::GlobalValue::UnnamedAddr::Global); - OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation; + OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; } return Address(Entry, Align); } +void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, + bool AtCurrentPoint) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); + + llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); + if (AtCurrentPoint) { + Elem.second.ServiceInsertPt = new llvm::BitCastInst( + Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); + } else { + Elem.second.ServiceInsertPt = + new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); + Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); + } +} + +void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + if (Elem.second.ServiceInsertPt) { + llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; + Elem.second.ServiceInsertPt = nullptr; + Ptr->eraseFromParent(); + } +} + llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { @@ -1537,8 +1552,10 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, Elem.second.DebugLoc = AI.getPointer(); LocValue = AI; + if (!Elem.second.ServiceInsertPt) + setLocThreadIdInsertPt(CGF); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); + CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), CGF.getTypeSize(IdentQTy)); } @@ -1608,21 +1625,25 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, // kmpc_global_thread_num(ident_t *loc). // Generate thread id value and cache this value for use across the // function. + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + if (!Elem.second.ServiceInsertPt) + setLocThreadIdInsertPt(CGF); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); - CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); + CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); llvm::CallInst *Call = CGF.Builder.CreateCall( createRuntimeFunction(OMPRTL__kmpc_global_thread_num), emitUpdateLocation(CGF, Loc)); Call->setCallingConv(CGF.getRuntimeCC()); - auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = Call; return Call; } void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { assert(CGF.CurFn && "No function in current CodeGenFunction."); - if (OpenMPLocThreadIDMap.count(CGF.CurFn)) + if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { + clearLocThreadIdInsertPt(CGF); OpenMPLocThreadIDMap.erase(CGF.CurFn); + } if (FunctionUDRMap.count(CGF.CurFn) > 0) { for(auto *D : FunctionUDRMap[CGF.CurFn]) UDRMap.erase(D); @@ -2145,6 +2166,15 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); break; } + case OMPRTL__kmpc_push_target_tripcount: { + // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 + // size); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t @@ -2417,7 +2447,7 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) { if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD); + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) { SmallString<64> PtrName; { @@ -2496,8 +2526,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; VD = VD->getDefinition(CGM.getContext()); - if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { - ThreadPrivateWithDefinition.insert(VD); + if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { QualType ASTTy = VD->getType(); llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; @@ -2639,16 +2668,16 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit) { Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD); + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) - return false; + return CGM.getLangOpts().OpenMPIsDevice; VD = VD->getDefinition(CGM.getContext()); - if (VD && !DeclareTargetWithDefinition.insert(VD).second) + if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) return CGM.getLangOpts().OpenMPIsDevice; QualType ASTTy = VD->getType(); - SourceLocation Loc = VD->getCanonicalDecl()->getLocStart(); + SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); // Produce the unique prefix to identify the new target regions. We use // the source location of the variable declaration which we know to not // conflict with any target region. @@ -3197,13 +3226,7 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } -void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDirectiveKind Kind, bool EmitChecks, - bool ForceSimpleCall) { - if (!CGF.HaveInsertPoint()) - return; - // Build call __kmpc_cancel_barrier(loc, thread_id); - // Build call __kmpc_barrier(loc, thread_id); +unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { unsigned Flags; if (Kind == OMPD_for) Flags = OMP_IDENT_BARRIER_IMPL_FOR; @@ -3215,6 +3238,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, Flags = OMP_IDENT_BARRIER_EXPL; else Flags = OMP_IDENT_BARRIER_IMPL; + return Flags; +} + +void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks, + bool ForceSimpleCall) { + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_cancel_barrier(loc, thread_id); + // Build call __kmpc_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), @@ -3287,6 +3321,18 @@ bool CGOpenMPRuntime::isStaticNonchunked( return Schedule == OMP_dist_sch_static; } +bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const { + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); + return Schedule == OMP_sch_static_chunked; +} + +bool CGOpenMPRuntime::isStaticChunked( + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + return Schedule == OMP_dist_sch_static_chunked; +} bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { OpenMPSchedType Schedule = @@ -3784,8 +3830,8 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { DeviceImages, Index), HostEntriesBegin, HostEntriesEnd}; std::string Descriptor = getName({"omp_offloading", "descriptor"}); - llvm::GlobalVariable *Desc = createConstantGlobalStruct( - CGM, getTgtBinaryDescriptorQTy(), Data, Descriptor); + llvm::GlobalVariable *Desc = createGlobalStruct( + CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); // Emit code to register or unregister the descriptor at execution // startup or closing, respectively. @@ -3818,7 +3864,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { CGF.disableDebugInfo(); const auto &FI = CGM.getTypes().arrangeNullaryFunction(); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - std::string Descriptor = getName({"omp_offloading", "descriptor_reg"}); + + // Encode offload target triples into the registration function name. It + // will serve as a comdat key for the registration/unregistration code for + // this particular combination of offloading targets. + SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); + RegFnNameParts[0] = "omp_offloading"; + RegFnNameParts[1] = "descriptor_reg"; + llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), + [](const llvm::Triple &T) -> const std::string& { + return T.getTriple(); + }); + llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); + std::string Descriptor = getName(RegFnNameParts); RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); @@ -3868,9 +3926,9 @@ void CGOpenMPRuntime::createOffloadEntry( llvm::ConstantInt::get(CGM.Int32Ty, Flags), llvm::ConstantInt::get(CGM.Int32Ty, 0)}; std::string EntryName = getName({"omp_offloading", "entry", ""}); - llvm::GlobalVariable *Entry = createConstantGlobalStruct( - CGM, getTgtOffloadEntryQTy(), Data, Twine(EntryName).concat(Name), - llvm::GlobalValue::WeakAnyLinkage); + llvm::GlobalVariable *Entry = createGlobalStruct( + CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, + Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); // The entry has to be created in the section the linker expects it to be. std::string Section = getName({"omp_offloading", "entries"}); @@ -3895,6 +3953,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { llvm::LLVMContext &C = M.getContext(); SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> OrderedEntries(OffloadEntriesInfoManager.size()); + llvm::SmallVector<StringRef, 16> ParentFunctions( + OffloadEntriesInfoManager.size()); // Auxiliary methods to create metadata values and strings. auto &&GetMDInt = [this](unsigned V) { @@ -3909,7 +3969,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = - [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( + [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { @@ -3929,6 +3989,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Save this entry in the right position of the ordered entries array. OrderedEntries[E.getOrder()] = &E; + ParentFunctions[E.getOrder()] = ParentName; // Add metadata to the named metadata node. MD->addOperand(llvm::MDNode::get(C, Ops)); @@ -3970,6 +4031,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( E)) { if (!CE->getID() || !CE->getAddress()) { + // Do not blame the entry if the parent funtion is not emitted. + StringRef FnName = ParentFunctions[CE->getOrder()]; + if (!CGM.GetGlobalValue(FnName)) + continue; unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, "Offloading entry for target region is incorrect: either the " @@ -3995,6 +4060,9 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { CGM.getDiags().Report(DiagID); continue; } + // The vaiable has no definition - no need to add the entry. + if (CE->getVarSize().isZero()) + continue; break; } case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: @@ -5226,8 +5294,8 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, LBLVal.getPointer(), UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, Loc), - llvm::ConstantInt::getNullValue( - CGF.IntTy), // Always 0 because taskgroup emitted by the compiler + llvm::ConstantInt::getSigned( + CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -5776,7 +5844,7 @@ static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, std::string Name = CGM.getOpenMPRuntime().getName( {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); Out << Prefix << Name << "_" - << D->getCanonicalDecl()->getLocStart().getRawEncoding(); + << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); return Out.str(); } @@ -6274,7 +6342,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( unsigned DeviceID; unsigned FileID; unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, + getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, Line); SmallString<64> EntryFnName; { @@ -6589,17 +6657,17 @@ private: struct MapInfo { OMPClauseMappableExprCommon::MappableExprComponentListRef Components; OpenMPMapClauseKind MapType = OMPC_MAP_unknown; - OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; + ArrayRef<OpenMPMapModifierKind> MapModifiers; bool ReturnDevicePointer = false; bool IsImplicit = false; MapInfo() = default; MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, bool IsImplicit) - : Components(Components), MapType(MapType), - MapTypeModifier(MapTypeModifier), + : Components(Components), MapType(MapType), MapModifiers(MapModifiers), ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} }; @@ -6676,10 +6744,9 @@ private: /// a flag marking the map as a pointer if requested. Add a flag marking the /// map as the first one of a series of maps that relate to the same map /// expression. - OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType, - OpenMPMapClauseKind MapTypeModifier, - bool IsImplicit, bool AddPtrFlag, - bool AddIsTargetParamFlag) const { + OpenMPOffloadMappingFlags getMapTypeBits( + OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, + bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { OpenMPOffloadMappingFlags Bits = IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; switch (MapType) { @@ -6702,7 +6769,6 @@ private: case OMPC_MAP_delete: Bits |= OMP_MAP_DELETE; break; - case OMPC_MAP_always: case OMPC_MAP_unknown: llvm_unreachable("Unexpected map type!"); } @@ -6710,7 +6776,8 @@ private: Bits |= OMP_MAP_PTR_AND_OBJ; if (AddIsTargetParamFlag) Bits |= OMP_MAP_TARGET_PARAM; - if (MapTypeModifier == OMPC_MAP_always) + if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) + != MapModifiers.end()) Bits |= OMP_MAP_ALWAYS; return Bits; } @@ -6746,10 +6813,11 @@ private: } // Check if the length evaluates to 1. - llvm::APSInt ConstLength; - if (!Length->EvaluateAsInt(ConstLength, CGF.getContext())) + Expr::EvalResult Result; + if (!Length->EvaluateAsInt(Result, CGF.getContext())) return true; // Can have more that size 1. + llvm::APSInt ConstLength = Result.Val.getInt(); return ConstLength.getSExtValue() != 1; } @@ -6758,12 +6826,15 @@ private: /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. void generateInfoForComponentList( - OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, - bool IsImplicit) const { + bool IsImplicit, + ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> + OverlappedElements = llvm::None) const { // The following summarizes what has to be generated for each map and the // types below. The generated information is expressed in this order: // base pointer, section pointer, size, flags @@ -6933,19 +7004,26 @@ private: // components. bool IsExpressionFirstInfo = true; Address BP = Address::invalid(); + const Expr *AssocExpr = I->getAssociatedExpression(); + const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); + const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); - if (isa<MemberExpr>(I->getAssociatedExpression())) { + if (isa<MemberExpr>(AssocExpr)) { // The base is the 'this' pointer. The content of the pointer is going // to be the base of the field being mapped. BP = CGF.LoadCXXThisAddress(); + } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || + (OASE && + isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); if (const auto *VD = dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD)) + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) if (*Res == OMPDeclareTargetDeclAttr::MT_Link) { IsLink = true; BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); @@ -7034,7 +7112,6 @@ private: Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); - llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object @@ -7043,6 +7120,70 @@ private: IsPointer && EncounteredME && (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == EncounteredME); + if (!OverlappedElements.empty()) { + // Handle base element with the info for overlapped elements. + assert(!PartialStruct.Base.isValid() && "The base element is set."); + assert(Next == CE && + "Expected last element for the overlapped elements."); + assert(!IsPointer && + "Unexpected base element with the pointer type."); + // Mark the whole struct as the struct that requires allocation on the + // device. + PartialStruct.LowestElem = {0, LB}; + CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( + I->getAssociatedExpression()->getType()); + Address HB = CGF.Builder.CreateConstGEP( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, + CGF.VoidPtrTy), + TypeSize.getQuantity() - 1, CharUnits::One()); + PartialStruct.HighestElem = { + std::numeric_limits<decltype( + PartialStruct.HighestElem.first)>::max(), + HB}; + PartialStruct.Base = BP; + // Emit data for non-overlapped data. + OpenMPOffloadMappingFlags Flags = + OMP_MAP_MEMBER_OF | + getMapTypeBits(MapType, MapModifiers, IsImplicit, + /*AddPtrFlag=*/false, + /*AddIsTargetParamFlag=*/false); + LB = BP; + llvm::Value *Size = nullptr; + // Do bitcopy of all non-overlapped structure elements. + for (OMPClauseMappableExprCommon::MappableExprComponentListRef + Component : OverlappedElements) { + Address ComponentLB = Address::invalid(); + for (const OMPClauseMappableExprCommon::MappableComponent &MC : + Component) { + if (MC.getAssociatedDeclaration()) { + ComponentLB = + CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) + .getAddress(); + Size = CGF.Builder.CreatePtrDiff( + CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), + CGF.EmitCastToVoidPtr(LB.getPointer())); + break; + } + } + BasePointers.push_back(BP.getPointer()); + Pointers.push_back(LB.getPointer()); + Sizes.push_back(Size); + Types.push_back(Flags); + LB = CGF.Builder.CreateConstGEP(ComponentLB, 1, + CGF.getPointerSize()); + } + BasePointers.push_back(BP.getPointer()); + Pointers.push_back(LB.getPointer()); + Size = CGF.Builder.CreatePtrDiff( + CGF.EmitCastToVoidPtr( + CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One()) + .getPointer()), + CGF.EmitCastToVoidPtr(LB.getPointer())); + Sizes.push_back(Size); + Types.push_back(Flags); + break; + } + llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); if (!IsMemberPointer) { BasePointers.push_back(BP.getPointer()); Pointers.push_back(LB.getPointer()); @@ -7053,7 +7194,7 @@ private: // this map is the first one that relates with the current capture // (there is a set of entries for each capture). OpenMPOffloadMappingFlags Flags = getMapTypeBits( - MapType, MapTypeModifier, IsImplicit, + MapType, MapModifiers, IsImplicit, !IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink); if (!IsExpressionFirstInfo) { @@ -7147,6 +7288,66 @@ private: Flags |= MemberOfFlag; } + void getPlainLayout(const CXXRecordDecl *RD, + llvm::SmallVectorImpl<const FieldDecl *> &Layout, + bool AsBase) const { + const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); + + llvm::StructType *St = + AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); + + unsigned NumElements = St->getNumElements(); + llvm::SmallVector< + llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> + RecordLayout(NumElements); + + // Fill bases. + for (const auto &I : RD->bases()) { + if (I.isVirtual()) + continue; + const auto *Base = I.getType()->getAsCXXRecordDecl(); + // Ignore empty bases. + if (Base->isEmpty() || CGF.getContext() + .getASTRecordLayout(Base) + .getNonVirtualSize() + .isZero()) + continue; + + unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); + RecordLayout[FieldIndex] = Base; + } + // Fill in virtual bases. + for (const auto &I : RD->vbases()) { + const auto *Base = I.getType()->getAsCXXRecordDecl(); + // Ignore empty bases. + if (Base->isEmpty()) + continue; + unsigned FieldIndex = RL.getVirtualBaseIndex(Base); + if (RecordLayout[FieldIndex]) + continue; + RecordLayout[FieldIndex] = Base; + } + // Fill in all the fields. + assert(!RD->isUnion() && "Unexpected union."); + for (const auto *Field : RD->fields()) { + // Fill in non-bitfields. (Bitfields always use a zero pattern, which we + // will fill in later.) + if (!Field->isBitField()) { + unsigned FieldIndex = RL.getLLVMFieldNo(Field); + RecordLayout[FieldIndex] = Field; + } + } + for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> + &Data : RecordLayout) { + if (Data.isNull()) + continue; + if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) + getPlainLayout(Base, Layout, /*AsBase=*/true); + else + Layout.push_back(Data.get<const FieldDecl *>()); + } + } + public: MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) : CurDir(Dir), CGF(CGF) { @@ -7213,28 +7414,29 @@ public: auto &&InfoGen = [&Info]( const ValueDecl *D, OMPClauseMappableExprCommon::MappableExprComponentListRef L, - OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, bool IsImplicit) { const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; - Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, + Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, IsImplicit); }; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) for (const auto &L : C->component_lists()) { - InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), + InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit()); } for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) for (const auto &L : C->component_lists()) { - InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, + InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) for (const auto &L : C->component_lists()) { - InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, + InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } @@ -7287,7 +7489,7 @@ public: // Nonetheless, generateInfoForComponentList must be called to take // the pointer into account for the calculation of the range of the // partial struct. - InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown, + InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); DeferredInfo[nullptr].emplace_back(IE, VD); } else { @@ -7321,7 +7523,7 @@ public: unsigned CurrentBasePointersIdx = CurBasePointers.size(); // FIXME: MSVC 2013 seems to require this-> to find the member method. this->generateInfoForComponentList( - L.MapType, L.MapTypeModifier, L.Components, CurBasePointers, + L.MapType, L.MapModifiers, L.Components, CurBasePointers, CurPointers, CurSizes, CurTypes, PartialStruct, IsFirstComponentList, L.IsImplicit); @@ -7375,6 +7577,82 @@ public: } } + /// Emit capture info for lambdas for variables captured by reference. + void generateInfoForLambdaCaptures( + const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types, + llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { + const auto *RD = VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl(); + if (!RD || !RD->isLambda()) + return; + Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); + LValue VDLVal = CGF.MakeAddrLValue( + VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); + llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + FieldDecl *ThisCapture = nullptr; + RD->getCaptureFields(Captures, ThisCapture); + if (ThisCapture) { + LValue ThisLVal = + CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); + LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); + LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); + BasePointers.push_back(ThisLVal.getPointer()); + Pointers.push_back(ThisLValVal.getPointer()); + Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); + Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + } + for (const LambdaCapture &LC : RD->captures()) { + if (LC.getCaptureKind() != LCK_ByRef) + continue; + const VarDecl *VD = LC.getCapturedVar(); + auto It = Captures.find(VD); + assert(It != Captures.end() && "Found lambda capture without field."); + LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); + LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); + LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); + BasePointers.push_back(VarLVal.getPointer()); + Pointers.push_back(VarLValVal.getPointer()); + Sizes.push_back(CGF.getTypeSize( + VD->getType().getCanonicalType().getNonReferenceType())); + Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + } + } + + /// Set correct indices for lambdas captures. + void adjustMemberOfForLambdaCaptures( + const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, + MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, + MapFlagsArrayTy &Types) const { + for (unsigned I = 0, E = Types.size(); I < E; ++I) { + // Set correct member_of idx for all implicit lambda captures. + if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) + continue; + llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); + assert(BasePtr && "Unable to find base lambda address."); + int TgtIdx = -1; + for (unsigned J = I; J > 0; --J) { + unsigned Idx = J - 1; + if (Pointers[Idx] != BasePtr) + continue; + TgtIdx = Idx; + break; + } + assert(TgtIdx != -1 && "Unable to find parent lambda."); + // All other current entries will be MEMBER_OF the combined entry + // (except for PTR_AND_OBJ entries which do not have a placeholder value + // 0xFFFF in the MEMBER_OF field). + OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); + setCorrectMemberOfFlag(Types[I], MemberOfFlag); + } + } + /// Generate the base pointers, section pointers, sizes and map types /// associated to a given capture. void generateInfoForCapture(const CapturedStmt::Capture *Cap, @@ -7387,9 +7665,6 @@ public: "Not expecting to generate map info for a variable array type!"); // We need to know when we generating information for the first component - // associated with a capture, because the mapping flags depend on it. - bool IsFirstComponentList = true; - const ValueDecl *VD = Cap->capturesThis() ? nullptr : Cap->getCapturedVar()->getCanonicalDecl(); @@ -7405,19 +7680,151 @@ public: return; } + using MapData = + std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, + OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; + SmallVector<MapData, 4> DeclComponentLists; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) + for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { for (const auto &L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); assert(!L.second.empty() && "Not expecting declaration with no component lists."); - generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), - L.second, BasePointers, Pointers, Sizes, - Types, PartialStruct, IsFirstComponentList, - C->isImplicit()); - IsFirstComponentList = false; + DeclComponentLists.emplace_back(L.second, C->getMapType(), + C->getMapTypeModifiers(), + C->isImplicit()); + } + } + + // Find overlapping elements (including the offset from the base element). + llvm::SmallDenseMap< + const MapData *, + llvm::SmallVector< + OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, + 4> + OverlappedData; + size_t Count = 0; + for (const MapData &L : DeclComponentLists) { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + ArrayRef<OpenMPMapModifierKind> MapModifiers; + bool IsImplicit; + std::tie(Components, MapType, MapModifiers, IsImplicit) = L; + ++Count; + for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; + std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; + auto CI = Components.rbegin(); + auto CE = Components.rend(); + auto SI = Components1.rbegin(); + auto SE = Components1.rend(); + for (; CI != CE && SI != SE; ++CI, ++SI) { + if (CI->getAssociatedExpression()->getStmtClass() != + SI->getAssociatedExpression()->getStmtClass()) + break; + // Are we dealing with different variables/fields? + if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) + break; + } + // Found overlapping if, at least for one component, reached the head of + // the components list. + if (CI == CE || SI == SE) { + assert((CI != CE || SI != SE) && + "Unexpected full match of the mapping components."); + const MapData &BaseData = CI == CE ? L : L1; + OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = + SI == SE ? Components : Components1; + auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); + OverlappedElements.getSecond().push_back(SubData); + } } + } + // Sort the overlapped elements for each item. + llvm::SmallVector<const FieldDecl *, 4> Layout; + if (!OverlappedData.empty()) { + if (const auto *CRD = + VD->getType().getCanonicalType()->getAsCXXRecordDecl()) + getPlainLayout(CRD, Layout, /*AsBase=*/false); + else { + const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); + Layout.append(RD->field_begin(), RD->field_end()); + } + } + for (auto &Pair : OverlappedData) { + llvm::sort( + Pair.getSecond(), + [&Layout]( + OMPClauseMappableExprCommon::MappableExprComponentListRef First, + OMPClauseMappableExprCommon::MappableExprComponentListRef + Second) { + auto CI = First.rbegin(); + auto CE = First.rend(); + auto SI = Second.rbegin(); + auto SE = Second.rend(); + for (; CI != CE && SI != SE; ++CI, ++SI) { + if (CI->getAssociatedExpression()->getStmtClass() != + SI->getAssociatedExpression()->getStmtClass()) + break; + // Are we dealing with different variables/fields? + if (CI->getAssociatedDeclaration() != + SI->getAssociatedDeclaration()) + break; + } + + // Lists contain the same elements. + if (CI == CE && SI == SE) + return false; + + // List with less elements is less than list with more elements. + if (CI == CE || SI == SE) + return CI == CE; + + const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); + const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); + if (FD1->getParent() == FD2->getParent()) + return FD1->getFieldIndex() < FD2->getFieldIndex(); + const auto It = + llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { + return FD == FD1 || FD == FD2; + }); + return *It == FD1; + }); + } + + // Associated with a capture, because the mapping flags depend on it. + // Go through all of the elements with the overlapped elements. + for (const auto &Pair : OverlappedData) { + const MapData &L = *Pair.getFirst(); + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + ArrayRef<OpenMPMapModifierKind> MapModifiers; + bool IsImplicit; + std::tie(Components, MapType, MapModifiers, IsImplicit) = L; + ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> + OverlappedComponents = Pair.getSecond(); + bool IsFirstComponentList = true; + generateInfoForComponentList(MapType, MapModifiers, Components, + BasePointers, Pointers, Sizes, Types, + PartialStruct, IsFirstComponentList, + IsImplicit, OverlappedComponents); + } + // Go through other elements without overlapped elements. + bool IsFirstComponentList = OverlappedData.empty(); + for (const MapData &L : DeclComponentLists) { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + OpenMPMapClauseKind MapType; + ArrayRef<OpenMPMapModifierKind> MapModifiers; + bool IsImplicit; + std::tie(Components, MapType, MapModifiers, IsImplicit) = L; + auto It = OverlappedData.find(&L); + if (It == OverlappedData.end()) + generateInfoForComponentList(MapType, MapModifiers, Components, + BasePointers, Pointers, Sizes, Types, + PartialStruct, IsFirstComponentList, + IsImplicit); + IsFirstComponentList = false; + } } /// Generate the base pointers, section pointers, sizes and map types @@ -7436,12 +7843,12 @@ public: if (!VD) continue; llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD); + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) continue; StructRangeInfoTy PartialStruct; generateInfoForComponentList( - C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, + C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, Pointers, Sizes, Types, PartialStruct, /*IsFirstComponentList=*/true, C->isImplicit()); assert(!PartialStruct.Base.isValid() && @@ -7658,6 +8065,183 @@ static void emitOffloadingArraysArgument( } } +/// Checks if the expression is constant or does not have non-trivial function +/// calls. +static bool isTrivial(ASTContext &Ctx, const Expr * E) { + // We can skip constant expressions. + // We can skip expressions with trivial calls or simple expressions. + return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || + !E->hasNonTrivialCall(Ctx)) && + !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); +} + +/// Checks if the \p Body is the \a CompoundStmt and returns its child statement +/// iff there is only one that is not evaluatable at the compile time. +static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { + if (const auto *C = dyn_cast<CompoundStmt>(Body)) { + const Stmt *Child = nullptr; + for (const Stmt *S : C->body()) { + if (const auto *E = dyn_cast<Expr>(S)) { + if (isTrivial(Ctx, E)) + continue; + } + // Some of the statements can be ignored. + if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || + isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) + continue; + // Analyze declarations. + if (const auto *DS = dyn_cast<DeclStmt>(S)) { + if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { + if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || + isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || + isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || + isa<UsingDirectiveDecl>(D) || + isa<OMPDeclareReductionDecl>(D) || + isa<OMPThreadPrivateDecl>(D)) + return true; + const auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + return false; + return VD->isConstexpr() || + ((VD->getType().isTrivialType(Ctx) || + VD->getType()->isReferenceType()) && + (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); + })) + continue; + } + // Found multiple children - cannot get the one child only. + if (Child) + return Body; + Child = S; + } + if (Child) + return Child; + } + return Body; +} + +/// Check for inner distribute directive. +static const OMPExecutableDirective * +getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { + const auto *CS = D.getInnermostCapturedStmt(); + const auto *Body = + CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); + const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + + if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); + switch (D.getDirectiveKind()) { + case OMPD_target: + if (isOpenMPDistributeDirective(DKind)) + return NestedDir; + if (DKind == OMPD_teams) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true); + if (!Body) + return nullptr; + ChildStmt = getSingleCompoundChild(Ctx, Body); + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + if (isOpenMPDistributeDirective(DKind)) + return NND; + } + } + return nullptr; + case OMPD_target_teams: + if (isOpenMPDistributeDirective(DKind)) + return NestedDir; + return nullptr; + case OMPD_target_parallel: + case OMPD_target_simd: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + return nullptr; + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + llvm_unreachable("Unexpected directive."); + } + } + + return nullptr; +} + +void CGOpenMPRuntime::emitTargetNumIterationsCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, + const llvm::function_ref<llvm::Value *( + CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { + OpenMPDirectiveKind Kind = D.getDirectiveKind(); + const OMPExecutableDirective *TD = &D; + // Get nested teams distribute kind directive, if any. + if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) + TD = getNestedDistributeDirective(CGM.getContext(), D); + if (!TD) + return; + const auto *LD = cast<OMPLoopDirective>(TD); + auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, + PrePostActionTy &) { + llvm::Value *NumIterations = SizeEmitter(CGF, *LD); + + // Emit device ID if any. + llvm::Value *DeviceID; + if (Device) + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int64Ty, /*isSigned=*/true); + else + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + + llvm::Value *Args[] = {DeviceID, NumIterations}; + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + }; + emitInlinedDirective(CGF, OMPD_unknown, CodeGen); +} + void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, @@ -7790,7 +8374,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CapturedVars.clear(); CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); + emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); @@ -7804,7 +8388,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CapturedVars.clear(); CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars); + emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, @@ -7818,6 +8402,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto CV = CapturedVars.begin(); @@ -7847,6 +8432,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, if (CurBasePointers.empty()) MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, CurPointers, CurSizes, CurMapTypes); + // Generate correct mapping for variables captured by reference in + // lambdas. + if (CI->capturesVariable()) + MEHandler.generateInfoForLambdaCaptures( + CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, + CurMapTypes, LambdaPointers); } // We expect to have at least an element of information for this capture. assert(!CurBasePointers.empty() && @@ -7868,6 +8459,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, Sizes.append(CurSizes.begin(), CurSizes.end()); MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } + // Adjust MEMBER_OF flags for the lambdas captures. + MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, + Pointers, MapTypes); // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, @@ -7935,7 +8529,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, unsigned DeviceID; unsigned FileID; unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID, + getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, FileID, Line); // Is this a target region that should not be emitted as an entry point? If @@ -8030,6 +8624,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_declare_reduction: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); } @@ -8055,19 +8650,20 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, } bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { - const auto *FD = cast<FunctionDecl>(GD.getDecl()); - // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. if (!CGM.getLangOpts().OpenMPIsDevice) return false; + const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); + StringRef Name = CGM.getMangledName(GD); // Try to detect target regions in the function. - scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD)); + if (const auto *FD = dyn_cast<FunctionDecl>(VD)) + scanForTargetRegionsFunctions(FD->getBody(), Name); // Do not to emit function if it is not marked as declare target. - return !isDeclareTargetDeclaration(FD) && - AlreadyEmittedTargetFunctions.count(FD->getCanonicalDecl()) == 0; + return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && + AlreadyEmittedTargetFunctions.count(Name) == 0; } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { @@ -8093,64 +8689,105 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { // Do not to emit variable if it is not marked as declare target. llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl())); - return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link; + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( + cast<VarDecl>(GD.getDecl())); + if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) { + DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); + return true; + } + return false; } void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { - if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - isDeclareTargetDeclaration(VD)) { - OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; - StringRef VarName; - CharUnits VarSize; - llvm::GlobalValue::LinkageTypes Linkage; - switch (*Res) { - case OMPDeclareTargetDeclAttr::MT_To: - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; - VarName = CGM.getMangledName(VD); + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + if (!Res) { + if (CGM.getLangOpts().OpenMPIsDevice) { + // Register non-target variables being emitted in device code (debug info + // may cause this). + StringRef VarName = CGM.getMangledName(VD); + EmittedNonTargetVariables.try_emplace(VarName, Addr); + } + return; + } + // Register declare target variables. + OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; + StringRef VarName; + CharUnits VarSize; + llvm::GlobalValue::LinkageTypes Linkage; + switch (*Res) { + case OMPDeclareTargetDeclAttr::MT_To: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; + VarName = CGM.getMangledName(VD); + if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); - Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); - // Temp solution to prevent optimizations of the internal variables. - if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { - std::string RefName = getName({VarName, "ref"}); - if (!CGM.GetGlobalValue(RefName)) { - llvm::Constant *AddrRef = - getOrCreateInternalVariable(Addr->getType(), RefName); - auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); - GVAddrRef->setConstant(/*Val=*/true); - GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); - GVAddrRef->setInitializer(Addr); - CGM.addCompilerUsedGlobal(GVAddrRef); - } - } - break; - case OMPDeclareTargetDeclAttr::MT_Link: - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; - if (CGM.getLangOpts().OpenMPIsDevice) { - VarName = Addr->getName(); - Addr = nullptr; - } else { - VarName = getAddrOfDeclareTargetLink(VD).getName(); - Addr = - cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); + assert(!VarSize.isZero() && "Expected non-zero size of the variable"); + } else { + VarSize = CharUnits::Zero(); + } + Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); + // Temp solution to prevent optimizations of the internal variables. + if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { + std::string RefName = getName({VarName, "ref"}); + if (!CGM.GetGlobalValue(RefName)) { + llvm::Constant *AddrRef = + getOrCreateInternalVariable(Addr->getType(), RefName); + auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); + GVAddrRef->setConstant(/*Val=*/true); + GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); + GVAddrRef->setInitializer(Addr); + CGM.addCompilerUsedGlobal(GVAddrRef); } - VarSize = CGM.getPointerSize(); - Linkage = llvm::GlobalValue::WeakAnyLinkage; - break; } - OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( - VarName, Addr, VarSize, Flags, Linkage); + break; + case OMPDeclareTargetDeclAttr::MT_Link: + Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; + if (CGM.getLangOpts().OpenMPIsDevice) { + VarName = Addr->getName(); + Addr = nullptr; + } else { + VarName = getAddrOfDeclareTargetLink(VD).getName(); + Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer()); + } + VarSize = CGM.getPointerSize(); + Linkage = llvm::GlobalValue::WeakAnyLinkage; + break; } + OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( + VarName, Addr, VarSize, Flags, Linkage); } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { - if (isa<FunctionDecl>(GD.getDecl())) + if (isa<FunctionDecl>(GD.getDecl()) || + isa<OMPDeclareReductionDecl>(GD.getDecl())) return emitTargetFunctions(GD); return emitTargetGlobalVariable(GD); } +void CGOpenMPRuntime::emitDeferredTargetDecls() const { + for (const VarDecl *VD : DeferredGlobalVariables) { + llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + if (!Res) + continue; + if (*Res == OMPDeclareTargetDeclAttr::MT_To) { + CGM.EmitGlobal(VD); + } else { + assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && + "Expected to or link clauses."); + (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + } + } +} + +void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const { + assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && + " Expected target-based directive."); +} + CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { @@ -8169,21 +8806,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) return true; + StringRef Name = CGM.getMangledName(GD); const auto *D = cast<FunctionDecl>(GD.getDecl()); - const FunctionDecl *FD = D->getCanonicalDecl(); // Do not to emit function if it is marked as declare target as it was already // emitted. - if (isDeclareTargetDeclaration(D)) { - if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) { - if (auto *F = dyn_cast_or_null<llvm::Function>( - CGM.GetGlobalValue(CGM.getMangledName(GD)))) + if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { + if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { + if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) return !F->isDeclaration(); return false; } return true; } - return !AlreadyEmittedTargetFunctions.insert(FD).second; + return !AlreadyEmittedTargetFunctions.insert(Name).second; } llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { @@ -8478,6 +9114,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: + case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected standalone target data directive."); break; @@ -8730,8 +9367,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ParamAttrTy &ParamAttr = ParamAttrs[Pos]; ParamAttr.Kind = Linear; if (*SI) { - if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C, - Expr::SE_AllowSideEffects)) { + Expr::EvalResult Result; + if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { if (const auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { @@ -8740,6 +9377,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ParamPositions[StridePVD->getCanonicalDecl()]); } } + } else { + ParamAttr.StrideOrArg = Result.Val.getInt(); } } ++SI; @@ -8782,7 +9421,8 @@ public: } // namespace void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, - const OMPLoopDirective &D) { + const OMPLoopDirective &D, + ArrayRef<Expr *> NumIterations) { if (!CGF.HaveInsertPoint()) return; @@ -8805,37 +9445,50 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, } else { RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); } + llvm::APInt Size(/*numBits=*/32, NumIterations.size()); + QualType ArrayTy = + C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); - Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); - CGF.EmitNullInitialization(DimsAddr, KmpDimTy); + Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); + CGF.EmitNullInitialization(DimsAddr, ArrayTy); enum { LowerFD = 0, UpperFD, StrideFD }; // Fill dims with data. - LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); - // dims.upper = num_iterations; - LValue UpperLVal = - CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); - llvm::Value *NumIterVal = CGF.EmitScalarConversion( - CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), - Int64Ty, D.getNumIterations()->getExprLoc()); - CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); - // dims.stride = 1; - LValue StrideLVal = - CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); - CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), - StrideLVal); + for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { + LValue DimsLVal = + CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP( + DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)), + KmpDimTy); + // dims.upper = num_iterations; + LValue UpperLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), UpperFD)); + llvm::Value *NumIterVal = + CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), + D.getNumIterations()->getType(), Int64Ty, + D.getNumIterations()->getExprLoc()); + CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); + // dims.stride = 1; + LValue StrideLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), StrideFD)); + CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), + StrideLVal); + } // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, // kmp_int32 num_dims, struct kmp_dim * dims); - llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), - getThreadID(CGF, D.getLocStart()), - llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - DimsAddr.getPointer(), CGM.VoidPtrTy)}; + llvm::Value *Args[] = { + emitUpdateLocation(CGF, D.getBeginLoc()), + getThreadID(CGF, D.getBeginLoc()), + llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder + .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy)) + .getPointer(), + CGM.VoidPtrTy)}; llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); CGF.EmitRuntimeCall(RTLFn, Args); llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { - emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; + emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, llvm::makeArrayRef(FiniArgs)); @@ -8845,16 +9498,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) { QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); - const Expr *CounterVal = C->getCounterValue(); - assert(CounterVal); - llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), - CounterVal->getType(), Int64Ty, - CounterVal->getExprLoc()); - Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); - CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); - llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), - getThreadID(CGF, C->getLocStart()), - CntAddr.getPointer()}; + llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); + QualType ArrayTy = CGM.getContext().getConstantArrayType( + Int64Ty, Size, ArrayType::Normal, 0); + Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); + for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { + const Expr *CounterVal = C->getLoopData(I); + assert(CounterVal); + llvm::Value *CntVal = CGF.EmitScalarConversion( + CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, + CounterVal->getExprLoc()); + CGF.EmitStoreOfScalar( + CntVal, + CGF.Builder.CreateConstArrayGEP( + CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)), + /*Volatile=*/false, Int64Ty); + } + llvm::Value *Args[] = { + emitUpdateLocation(CGF, C->getBeginLoc()), + getThreadID(CGF, C->getBeginLoc()), + CGF.Builder + .CreateConstArrayGEP(CntAddr, 0, + CGM.getContext().getTypeSizeInChars(Int64Ty)) + .getPointer()}; llvm::Value *RTLFn; if (C->getDependencyKind() == OMPC_DEPEND_source) { RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); @@ -9169,7 +9835,8 @@ void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( } void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, - const OMPLoopDirective &D) { + const OMPLoopDirective &D, + ArrayRef<Expr *> NumIterations) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 01ff0c20fd66..1822a6fd1974 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -15,12 +15,13 @@ #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H #include "CGValue.h" +#include "clang/AST/DeclOpenMP.h" #include "clang/AST/Type.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/ValueHandle.h" @@ -278,12 +279,39 @@ protected: /// stored. virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc); + void setLocThreadIdInsertPt(CodeGenFunction &CGF, + bool AtCurrentPoint = false); + void clearLocThreadIdInsertPt(CodeGenFunction &CGF); + + /// Check if the default location must be constant. + /// Default is false to support OMPT/OMPD. + virtual bool isDefaultLocationConstant() const { return false; } + + /// Returns additional flags that can be stored in reserved_2 field of the + /// default location. + virtual unsigned getDefaultLocationReserved2Flags() const { return 0; } + + /// Returns default flags for the barriers depending on the directive, for + /// which this barier is going to be emitted. + static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind); + + /// Get the LLVM type for the critical name. + llvm::ArrayType *getKmpCriticalNameTy() const {return KmpCriticalNameTy;} + + /// Returns corresponding lock object for the specified critical region + /// name. If the lock object does not exist it is created, otherwise the + /// reference to the existing copy is returned. + /// \param CriticalName Name of the critical region. + /// + llvm::Value *getCriticalRegionLock(StringRef CriticalName); + private: /// Default const ident_t object used for initialization of all other /// ident_t objects. llvm::Constant *DefaultOpenMPPSource = nullptr; + using FlagsTy = std::pair<unsigned, unsigned>; /// Map of flags and corresponding default locations. - typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy; + using OpenMPDefaultLocMapTy = llvm::DenseMap<FlagsTy, llvm::Value *>; OpenMPDefaultLocMapTy OpenMPDefaultLocMap; Address getOrCreateDefaultLocation(unsigned Flags); @@ -300,6 +328,8 @@ private: struct DebugLocThreadIdTy { llvm::Value *DebugLoc; llvm::Value *ThreadID; + /// Insert point for the service instructions. + llvm::AssertingVH<llvm::Instruction> ServiceInsertPt = nullptr; }; /// Map of local debug location, ThreadId and functions. typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy> @@ -315,10 +345,6 @@ private: SmallVector<const OMPDeclareReductionDecl *, 4>> FunctionUDRMapTy; FunctionUDRMapTy FunctionUDRMap; - IdentifierInfo *In = nullptr; - IdentifierInfo *Out = nullptr; - IdentifierInfo *Priv = nullptr; - IdentifierInfo *Orig = nullptr; /// Type kmp_critical_name, originally defined as typedef kmp_int32 /// kmp_critical_name[8]; llvm::ArrayType *KmpCriticalNameTy; @@ -600,7 +626,15 @@ private: OffloadEntriesInfoManagerTy OffloadEntriesInfoManager; bool ShouldMarkAsGlobal = true; - llvm::SmallDenseSet<const FunctionDecl *> AlreadyEmittedTargetFunctions; + /// List of the emitted functions. + llvm::StringSet<> AlreadyEmittedTargetFunctions; + /// List of the global variables with their addresses that should not be + /// emitted for the target. + llvm::StringMap<llvm::WeakTrackingVH> EmittedNonTargetVariables; + + /// List of variables that can become declare target implicitly and, thus, + /// must be emitted. + llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables; /// Creates and registers offloading binary descriptor for the current /// compilation unit. The function that does the registration is returned. @@ -673,10 +707,10 @@ private: const llvm::Twine &Name); /// Set of threadprivate variables with the generated initializer. - llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition; + llvm::StringSet<> ThreadPrivateWithDefinition; /// Set of declare target variables with the generated initializer. - llvm::SmallPtrSet<const VarDecl *, 4> DeclareTargetWithDefinition; + llvm::StringSet<> DeclareTargetWithDefinition; /// Emits initialization code for the threadprivate variables. /// \param VDAddr Address of the global variable \a VD. @@ -688,13 +722,6 @@ private: llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc); - /// Returns corresponding lock object for the specified critical region - /// name. If the lock object does not exist it is created, otherwise the - /// reference to the existing copy is returned. - /// \param CriticalName Name of the critical region. - /// - llvm::Value *getCriticalRegionLock(StringRef CriticalName); - struct TaskResultTy { llvm::Value *NewTask = nullptr; llvm::Value *TaskEntry = nullptr; @@ -884,6 +911,20 @@ public: virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const; + /// Check if the specified \a ScheduleKind is static chunked. + /// \param ScheduleKind Schedule kind specified in the 'schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const; + + /// Check if the specified \a ScheduleKind is static non-chunked. + /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause. + /// \param Chunked True if chunk is specified in the clause. + /// + virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind, + bool Chunked) const; + /// Check if the specified \a ScheduleKind is dynamic. /// This kind of worksharing directive is emitted without outer loop. /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause. @@ -1327,6 +1368,15 @@ public: bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); + /// Emit code that pushes the trip count of loops associated with constructs + /// 'target teams distribute' and 'teams distribute parallel for'. + /// \param SizeEmitter Emits the int64 value for the number of iterations of + /// the associated loop. + virtual void emitTargetNumIterationsCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, + const llvm::function_ref<llvm::Value *( + CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter); + /// Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of /// a failure it executes the host version outlined in \a OutlinedFn. @@ -1465,8 +1515,8 @@ public: /// Emit initialization for doacross loop nesting support. /// \param D Loop-based construct used in doacross nesting construct. - virtual void emitDoacrossInit(CodeGenFunction &CGF, - const OMPLoopDirective &D); + virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, + ArrayRef<Expr *> NumIterations); /// Emit code for doacross ordered directive with 'depend' clause. /// \param C 'depend' clause with 'sink|source' dependency kind. @@ -1490,6 +1540,18 @@ public: const VarDecl *NativeParam, const VarDecl *TargetParam) const; + /// Choose default schedule type and chunk value for the + /// dist_schedule clause. + virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, + const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, + llvm::Value *&Chunk) const {} + + /// Choose default schedule type and chunk value for the + /// schedule clause. + virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, + const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, + const Expr *&ChunkExpr) const {} + /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. virtual void @@ -1505,10 +1567,23 @@ public: virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD); - /// Marks the declaration as alread emitted for the device code and returns + /// Marks the declaration as already emitted for the device code and returns /// true, if it was marked already, and false, otherwise. bool markAsGlobalTarget(GlobalDecl GD); + /// Emit deferred declare target variables marked for deferred emission. + void emitDeferredTargetDecls() const; + + /// Adjust some parameters for the target-based directives, like addresses of + /// the variables captured by reference in lambdas. + virtual void + adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, + const OMPExecutableDirective &D) const; + + /// Perform check on requires decl to ensure that target architecture + /// supports unified addressing + virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM, + const OMPRequiresDecl *D) const {} }; /// Class supports emissionof SIMD-only code. @@ -2051,8 +2126,8 @@ public: /// Emit initialization for doacross loop nesting support. /// \param D Loop-based construct used in doacross nesting construct. - void emitDoacrossInit(CodeGenFunction &CGF, - const OMPLoopDirective &D) override; + void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, + ArrayRef<Expr *> NumIterations) override; /// Emit code for doacross ordered directive with 'depend' clause. /// \param C 'depend' clause with 'sink|source' dependency kind. diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 036b5371fe0b..7046ab3aa35c 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -17,6 +17,7 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Basic/Cuda.h" #include "llvm/ADT/SmallPtrSet.h" using namespace clang; @@ -32,8 +33,8 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit, /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); OMPRTL_NVPTX__kmpc_spmd_kernel_init, - /// Call to void __kmpc_spmd_kernel_deinit(); - OMPRTL_NVPTX__kmpc_spmd_kernel_deinit, + /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2, /// Call to void __kmpc_kernel_prepare_parallel(void /// *outlined_function, int16_t /// IsOMPRuntimeInitialized); @@ -55,37 +56,27 @@ enum OpenMPRTLFunctionNVPTX { /// Call to int64_t __kmpc_shuffle_int64(int64_t element, /// int16_t lane_offset, int16_t warp_size); OMPRTL_NVPTX__kmpc_shuffle_int64, - /// Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32 + /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t /// lane_offset, int16_t shortCircuit), /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); - OMPRTL_NVPTX__kmpc_parallel_reduce_nowait, - /// Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32 - /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); - OMPRTL_NVPTX__kmpc_simd_reduce_nowait, - /// Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, - /// int32_t num_vars, size_t reduce_size, void *reduce_data, - /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t - /// lane_offset, int16_t shortCircuit), - /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), - /// void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad, - /// int32_t index, int32_t width), - /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t - /// index, int32_t width, int32_t reduce)) - OMPRTL_NVPTX__kmpc_teams_reduce_nowait, + OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2, + /// Call to __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32 + /// global_tid, kmp_critical_name *lck) + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple, + /// Call to __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, + /// kmp_int32 global_tid, kmp_critical_name *lck) + OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple, /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid); OMPRTL_NVPTX__kmpc_end_reduce_nowait, /// Call to void __kmpc_data_sharing_init_stack(); OMPRTL_NVPTX__kmpc_data_sharing_init_stack, /// Call to void __kmpc_data_sharing_init_stack_spmd(); OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd, - /// Call to void* __kmpc_data_sharing_push_stack(size_t size, + /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size, /// int16_t UseSharedMemory); - OMPRTL_NVPTX__kmpc_data_sharing_push_stack, + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack, /// Call to void __kmpc_data_sharing_pop_stack(void *a); OMPRTL_NVPTX__kmpc_data_sharing_pop_stack, /// Call to void __kmpc_begin_sharing_variables(void ***args, @@ -100,6 +91,17 @@ enum OpenMPRTLFunctionNVPTX { OMPRTL_NVPTX__kmpc_parallel_level, /// Call to int8_t __kmpc_is_spmd_exec_mode(); OMPRTL_NVPTX__kmpc_is_spmd_exec_mode, + /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, + /// const void *buf, size_t size, int16_t is_shared, const void **res); + OMPRTL_NVPTX__kmpc_get_team_static_memory, + /// Call to void __kmpc_restore_team_static_memory(int16_t + /// isSPMDExecutionMode, int16_t is_shared); + OMPRTL_NVPTX__kmpc_restore_team_static_memory, + /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, + /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_barrier_simple_spmd, }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -142,19 +144,35 @@ public: /// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry /// to the target region and used by containing directives such as 'parallel' /// to emit optimized code. -class ExecutionModeRAII { +class ExecutionRuntimeModesRAII { private: - CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode; - CGOpenMPRuntimeNVPTX::ExecutionMode &Mode; + CGOpenMPRuntimeNVPTX::ExecutionMode SavedExecMode = + CGOpenMPRuntimeNVPTX::EM_Unknown; + CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode; + bool SavedRuntimeMode = false; + bool *RuntimeMode = nullptr; public: - ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD) - : Mode(Mode) { - SavedMode = Mode; - Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD - : CGOpenMPRuntimeNVPTX::EM_NonSPMD; + /// Constructor for Non-SPMD mode. + ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode) + : ExecMode(ExecMode) { + SavedExecMode = ExecMode; + ExecMode = CGOpenMPRuntimeNVPTX::EM_NonSPMD; + } + /// Constructor for SPMD mode. + ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode, + bool &RuntimeMode, bool FullRuntimeMode) + : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) { + SavedExecMode = ExecMode; + SavedRuntimeMode = RuntimeMode; + ExecMode = CGOpenMPRuntimeNVPTX::EM_SPMD; + RuntimeMode = FullRuntimeMode; + } + ~ExecutionRuntimeModesRAII() { + ExecMode = SavedExecMode; + if (RuntimeMode) + *RuntimeMode = SavedRuntimeMode; } - ~ExecutionModeRAII() { Mode = SavedMode; } }; /// GPU Configuration: This information can be derived from cuda registers, @@ -169,16 +187,113 @@ enum MachineConfiguration : unsigned { LaneIDMask = WarpSize - 1, /// Global memory alignment for performance. - GlobalMemoryAlignment = 256, -}; + GlobalMemoryAlignment = 128, -enum NamedBarrier : unsigned { - /// Synchronize on this barrier #ID using a named barrier primitive. - /// Only the subset of active threads in a parallel region arrive at the - /// barrier. - NB_Parallel = 1, + /// Maximal size of the shared memory buffer. + SharedMemorySize = 128, }; +static const ValueDecl *getPrivateItem(const Expr *RefExpr) { + RefExpr = RefExpr->IgnoreParens(); + if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) { + const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + RefExpr = Base; + } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) { + const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + RefExpr = Base; + } + RefExpr = RefExpr->IgnoreParenImpCasts(); + if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr)) + return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl()); + const auto *ME = cast<MemberExpr>(RefExpr); + return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl()); +} + +typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy; +static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { + return P1.first > P2.first; +} + +static RecordDecl *buildRecordForGlobalizedVars( + ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls, + ArrayRef<const ValueDecl *> EscapedDeclsForTeams, + llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &MappedDeclsFields) { + if (EscapedDecls.empty() && EscapedDeclsForTeams.empty()) + return nullptr; + SmallVector<VarsDataTy, 4> GlobalizedVars; + for (const ValueDecl *D : EscapedDecls) + GlobalizedVars.emplace_back( + CharUnits::fromQuantity(std::max( + C.getDeclAlign(D).getQuantity(), + static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))), + D); + for (const ValueDecl *D : EscapedDeclsForTeams) + GlobalizedVars.emplace_back(C.getDeclAlign(D), D); + std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), + stable_sort_comparator); + // Build struct _globalized_locals_ty { + // /* globalized vars */[WarSize] align (max(decl_align, + // GlobalMemoryAlignment)) + // /* globalized vars */ for EscapedDeclsForTeams + // }; + RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty"); + GlobalizedRD->startDefinition(); + llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped( + EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end()); + for (const auto &Pair : GlobalizedVars) { + const ValueDecl *VD = Pair.second; + QualType Type = VD->getType(); + if (Type->isLValueReferenceType()) + Type = C.getPointerType(Type.getNonReferenceType()); + else + Type = Type.getNonReferenceType(); + SourceLocation Loc = VD->getLocation(); + FieldDecl *Field; + if (SingleEscaped.count(VD)) { + Field = FieldDecl::Create( + C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, + C.getTrivialTypeSourceInfo(Type, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + if (VD->hasAttrs()) { + for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), + E(VD->getAttrs().end()); + I != E; ++I) + Field->addAttr(*I); + } + } else { + llvm::APInt ArraySize(32, WarpSize); + Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0); + Field = FieldDecl::Create( + C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, + C.getTrivialTypeSourceInfo(Type, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(), + static_cast<CharUnits::QuantityType>( + GlobalMemoryAlignment))); + Field->addAttr(AlignedAttr::CreateImplicit( + C, AlignedAttr::GNU_aligned, /*IsAlignmentExpr=*/true, + IntegerLiteral::Create(C, Align, + C.getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()))); + } + GlobalizedRD->addDecl(Field); + MappedDeclsFields.try_emplace(VD, Field); + } + GlobalizedRD->completeDefinition(); + return GlobalizedRD; +} + /// Get the list of variables that can escape their declaration context. class CheckVarsEscapingDeclContext final : public ConstStmtVisitor<CheckVarsEscapingDeclContext> { @@ -191,20 +306,10 @@ class CheckVarsEscapingDeclContext final bool AllEscaped = false; bool IsForCombinedParallelRegion = false; - static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> - isDeclareTargetDeclaration(const ValueDecl *VD) { - for (const Decl *D : VD->redecls()) { - if (!D->hasAttrs()) - continue; - if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>()) - return Attr->getMapType(); - } - return llvm::None; - } - void markAsEscaped(const ValueDecl *VD) { // Do not globalize declare target variables. - if (!isa<VarDecl>(VD) || isDeclareTargetDeclaration(VD)) + if (!isa<VarDecl>(VD) || + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) return; VD = cast<ValueDecl>(VD->getCanonicalDecl()); // Variables captured by value must be globalized. @@ -218,9 +323,11 @@ class CheckVarsEscapingDeclContext final const auto *Attr = FD->getAttr<OMPCaptureKindAttr>(); if (!Attr) return; - if (!isOpenMPPrivate( - static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) || - Attr->getCaptureKind() == OMPC_map) + if (((Attr->getCaptureKind() != OMPC_map) && + !isOpenMPPrivate( + static_cast<OpenMPClauseKind>(Attr->getCaptureKind()))) || + ((Attr->getCaptureKind() == OMPC_map) && + !FD->getType()->isAnyPointerType())) return; } if (!FD->getType()->isReferenceType()) { @@ -302,55 +409,24 @@ class CheckVarsEscapingDeclContext final } } - typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy; - static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) { - return P1.first > P2.first; - } - - void buildRecordForGlobalizedVars() { + void buildRecordForGlobalizedVars(bool IsInTTDRegion) { assert(!GlobalizedRD && "Record for globalized variables is built already."); - if (EscapedDecls.empty()) - return; - ASTContext &C = CGF.getContext(); - SmallVector<VarsDataTy, 4> GlobalizedVars; - for (const ValueDecl *D : EscapedDecls) - GlobalizedVars.emplace_back(C.getDeclAlign(D), D); - std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(), - stable_sort_comparator); - // Build struct _globalized_locals_ty { - // /* globalized vars */ - // }; - GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty"); - GlobalizedRD->startDefinition(); - for (const auto &Pair : GlobalizedVars) { - const ValueDecl *VD = Pair.second; - QualType Type = VD->getType(); - if (Type->isLValueReferenceType()) - Type = C.getPointerType(Type.getNonReferenceType()); - else - Type = Type.getNonReferenceType(); - SourceLocation Loc = VD->getLocation(); - auto *Field = FieldDecl::Create( - C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type, - C.getTrivialTypeSourceInfo(Type, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, - /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - GlobalizedRD->addDecl(Field); - if (VD->hasAttrs()) { - for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), - E(VD->getAttrs().end()); - I != E; ++I) - Field->addAttr(*I); - } - MappedDeclsFields.try_emplace(VD, Field); - } - GlobalizedRD->completeDefinition(); + ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams; + if (IsInTTDRegion) + EscapedDeclsForTeams = EscapedDecls.getArrayRef(); + else + EscapedDeclsForParallel = EscapedDecls.getArrayRef(); + GlobalizedRD = ::buildRecordForGlobalizedVars( + CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams, + MappedDeclsFields); } public: - CheckVarsEscapingDeclContext(CodeGenFunction &CGF) : CGF(CGF) {} + CheckVarsEscapingDeclContext(CodeGenFunction &CGF, + ArrayRef<const ValueDecl *> TeamsReductions) + : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) { + } virtual ~CheckVarsEscapingDeclContext() = default; void VisitDeclStmt(const DeclStmt *S) { if (!S) @@ -492,9 +568,9 @@ public: /// Returns the record that handles all the escaped local variables and used /// instead of their original storage. - const RecordDecl *getGlobalizedRecord() { + const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) { if (!GlobalizedRD) - buildRecordForGlobalizedVars(); + buildRecordForGlobalizedVars(IsInTTDRegion); return GlobalizedRD; } @@ -568,31 +644,6 @@ static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { "nvptx_num_threads"); } -/// Get barrier to synchronize all threads in a block. -static void getNVPTXCTABarrier(CodeGenFunction &CGF) { - CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0)); -} - -/// Get barrier #ID to synchronize selected (multiple of warp size) threads in -/// a CTA. -static void getNVPTXBarrier(CodeGenFunction &CGF, int ID, - llvm::Value *NumThreads) { - CGBuilderTy &Bld = CGF.Builder; - llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads}; - CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier), - Args); -} - -/// Synchronize all GPU threads in a block. -static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); } - -/// Synchronize worker threads in a parallel region. -static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) { - return getNVPTXBarrier(CGF, NB_Parallel, NumThreads); -} - /// Get the value of the thread_limit clause in the teams directive. /// For the 'generic' execution mode, the runtime encodes thread_limit in /// the launch parameters, always starting thread_limit+warpSize threads per @@ -654,12 +705,58 @@ getDataSharingMode(CodeGenModule &CGM) { : CGOpenMPRuntimeNVPTX::Generic; } +/// Checks if the expression is constant or does not have non-trivial function +/// calls. +static bool isTrivial(ASTContext &Ctx, const Expr * E) { + // We can skip constant expressions. + // We can skip expressions with trivial calls or simple expressions. + return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || + !E->hasNonTrivialCall(Ctx)) && + !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); +} + /// Checks if the \p Body is the \a CompoundStmt and returns its child statement -/// iff there is only one. -static const Stmt *getSingleCompoundChild(const Stmt *Body) { - if (const auto *C = dyn_cast<CompoundStmt>(Body)) - if (C->size() == 1) - return C->body_front(); +/// iff there is only one that is not evaluatable at the compile time. +static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { + if (const auto *C = dyn_cast<CompoundStmt>(Body)) { + const Stmt *Child = nullptr; + for (const Stmt *S : C->body()) { + if (const auto *E = dyn_cast<Expr>(S)) { + if (isTrivial(Ctx, E)) + continue; + } + // Some of the statements can be ignored. + if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || + isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) + continue; + // Analyze declarations. + if (const auto *DS = dyn_cast<DeclStmt>(S)) { + if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { + if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || + isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || + isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || + isa<UsingDirectiveDecl>(D) || + isa<OMPDeclareReductionDecl>(D) || + isa<OMPThreadPrivateDecl>(D)) + return true; + const auto *VD = dyn_cast<VarDecl>(D); + if (!VD) + return false; + return VD->isConstexpr() || + ((VD->getType().isTrivialType(Ctx) || + VD->getType()->isReferenceType()) && + (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); + })) + continue; + } + // Found multiple children - cannot get the one child only. + if (Child) + return Body; + Child = S; + } + if (Child) + return Child; + } return Body; } @@ -686,8 +783,9 @@ static bool hasParallelIfNumThreadsClause(ASTContext &Ctx, static bool hasNestedSPMDDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { const auto *CS = D.getInnermostCapturedStmt(); - const auto *Body = CS->getCapturedStmt()->IgnoreContainers(); - const Stmt *ChildStmt = getSingleCompoundChild(Body); + const auto *Body = + CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); + const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); @@ -696,27 +794,215 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, if (isOpenMPParallelDirective(DKind) && !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) return true; - if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) { - Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); + if (DKind == OMPD_teams) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && !hasParallelIfNumThreadsClause(Ctx, *NND)) return true; - if (DKind == OMPD_distribute) { - Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); + } + } + return false; + case OMPD_target_teams: + return isOpenMPParallelDirective(DKind) && + !hasParallelIfNumThreadsClause(Ctx, *NestedDir); + case OMPD_target_simd: + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + llvm_unreachable("Unexpected directive."); + } + } + + return false; +} + +static bool supportsSPMDExecutionMode(ASTContext &Ctx, + const OMPExecutableDirective &D) { + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + switch (DirectiveKind) { + case OMPD_target: + case OMPD_target_teams: + return hasNestedSPMDDirective(Ctx, D); + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + return !hasParallelIfNumThreadsClause(Ctx, D); + case OMPD_target_simd: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + return false; + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + break; + } + llvm_unreachable( + "Unknown programming model for OpenMP directive on NVPTX target."); +} + +/// Check if the directive is loops based and has schedule clause at all or has +/// static scheduling. +static bool hasStaticScheduling(const OMPExecutableDirective &D) { + assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) && + isOpenMPLoopDirective(D.getDirectiveKind()) && + "Expected loop-based directive."); + return !D.hasClausesOfKind<OMPOrderedClause>() && + (!D.hasClausesOfKind<OMPScheduleClause>() || + llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(), + [](const OMPScheduleClause *C) { + return C->getScheduleKind() == OMPC_SCHEDULE_static; + })); +} + +/// Check for inner (nested) lightweight runtime construct, if any +static bool hasNestedLightweightDirective(ASTContext &Ctx, + const OMPExecutableDirective &D) { + assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive."); + const auto *CS = D.getInnermostCapturedStmt(); + const auto *Body = + CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); + const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + + if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); + switch (D.getDirectiveKind()) { + case OMPD_target: + if (isOpenMPParallelDirective(DKind) && + isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && + hasStaticScheduling(*NestedDir)) + return true; + if (DKind == OMPD_parallel) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true); + if (!Body) + return false; + ChildStmt = getSingleCompoundChild(Ctx, Body); + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + if (isOpenMPWorksharingDirective(DKind) && + isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) + return true; + } + } else if (DKind == OMPD_teams) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true); + if (!Body) + return false; + ChildStmt = getSingleCompoundChild(Ctx, Body); + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + if (isOpenMPParallelDirective(DKind) && + isOpenMPWorksharingDirective(DKind) && + isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) + return true; + if (DKind == OMPD_parallel) { + Body = NND->getInnermostCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); - if (!ChildStmt) - return false; + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); - return isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NND); + if (isOpenMPWorksharingDirective(DKind) && + isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) + return true; } } } @@ -724,25 +1010,28 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, return false; case OMPD_target_teams: if (isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NestedDir)) + isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && + hasStaticScheduling(*NestedDir)) return true; - if (DKind == OMPD_distribute) { - Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); + if (DKind == OMPD_parallel) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Body); + ChildStmt = getSingleCompoundChild(Ctx, Body); if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { DKind = NND->getDirectiveKind(); - return isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NND); + if (isOpenMPWorksharingDirective(DKind) && + isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) + return true; } } return false; + case OMPD_target_parallel: + return isOpenMPWorksharingDirective(DKind) && + isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir); case OMPD_target_teams_distribute: - return isOpenMPParallelDirective(DKind) && - !hasParallelIfNumThreadsClause(Ctx, *NestedDir); case OMPD_target_simd: - case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_simd: @@ -790,6 +1079,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, case OMPD_declare_reduction: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); } @@ -798,21 +1088,26 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, return false; } -static bool supportsSPMDExecutionMode(ASTContext &Ctx, - const OMPExecutableDirective &D) { +/// Checks if the construct supports lightweight runtime. It must be SPMD +/// construct + inner loop-based construct with static scheduling. +static bool supportsLightweightRuntime(ASTContext &Ctx, + const OMPExecutableDirective &D) { + if (!supportsSPMDExecutionMode(Ctx, D)) + return false; OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); switch (DirectiveKind) { case OMPD_target: case OMPD_target_teams: - case OMPD_target_teams_distribute: - return hasNestedSPMDDirective(Ctx, D); case OMPD_target_parallel: + return hasNestedLightweightDirective(Ctx, D); case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: - return !hasParallelIfNumThreadsClause(Ctx, D); + // (Last|First)-privates must be shared in parallel region. + return hasStaticScheduling(D); case OMPD_target_simd: + case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: return false; case OMPD_parallel: @@ -857,6 +1152,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx, case OMPD_declare_reduction: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_requires: case OMPD_unknown: break; } @@ -870,9 +1166,9 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false); + ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode); EntryFunctionState EST; - WorkerFunctionState WST(CGM, D.getLocStart()); + WorkerFunctionState WST(CGM, D.getBeginLoc()); Work.clear(); WrapperFunctionsMap.clear(); @@ -886,17 +1182,35 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D, CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) : EST(EST), WST(WST) {} void Enter(CodeGenFunction &CGF) override { - static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) - .emitNonSPMDEntryHeader(CGF, EST, WST); + auto &RT = + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()); + RT.emitNonSPMDEntryHeader(CGF, EST, WST); + // Skip target region initialization. + RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); } void Exit(CodeGenFunction &CGF) override { - static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) - .emitNonSPMDEntryFooter(CGF, EST); + auto &RT = + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()); + RT.clearLocThreadIdInsertPt(CGF); + RT.emitNonSPMDEntryFooter(CGF, EST); } } Action(EST, WST); CodeGen.setAction(Action); + IsInTTDRegion = true; + // Reserve place for the globalized memory. + GlobalizedRecords.emplace_back(); + if (!KernelStaticGlobalized) { + KernelStaticGlobalized = new llvm::GlobalVariable( + CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, + llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); + } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); + IsInTTDRegion = false; // Now change the name of the worker function to correspond to this target // region's entry function. @@ -984,7 +1298,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true); + ExecutionRuntimeModesRAII ModeRAII( + CurrentExecutionMode, RequiresFullRuntime, + CGM.getLangOpts().OpenMPCUDAForceFullRuntime || + !supportsLightweightRuntime(CGM.getContext(), D)); EntryFunctionState EST; // Emit target region as a standalone region. @@ -1000,14 +1317,30 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, : RT(RT), EST(EST), D(D) {} void Enter(CodeGenFunction &CGF) override { RT.emitSPMDEntryHeader(CGF, EST, D); + // Skip target region initialization. + RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); } void Exit(CodeGenFunction &CGF) override { + RT.clearLocThreadIdInsertPt(CGF); RT.emitSPMDEntryFooter(CGF, EST); } } Action(*this, EST, D); CodeGen.setAction(Action); + IsInTTDRegion = true; + // Reserve place for the globalized memory. + GlobalizedRecords.emplace_back(); + if (!KernelStaticGlobalized) { + KernelStaticGlobalized = new llvm::GlobalVariable( + CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::ConstantPointerNull::get(CGM.VoidPtrTy), + "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, + llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); + } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); + IsInTTDRegion = false; } void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( @@ -1019,19 +1352,18 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute"); EST.ExitBB = CGF.createBasicBlock(".exit"); - // Initialize the OMP state in the runtime; called by all active threads. - // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters - // based on code analysis of the target region. llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), - /*RequiresOMPRuntime=*/Bld.getInt16(1), - /*RequiresDataSharing=*/Bld.getInt16(1)}; + /*RequiresOMPRuntime=*/ + Bld.getInt16(RequiresFullRuntime ? 1 : 0), + /*RequiresDataSharing=*/Bld.getInt16(0)}; CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args); - // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction( - OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); + if (RequiresFullRuntime) { + // For data sharing, we need to initialize the stack. + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd)); + } CGF.EmitBranch(ExecuteBB); @@ -1054,8 +1386,11 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF, CGF.EmitBlock(OMPDeInitBB); // DeInitialize the OMP state in the runtime; called by all active threads. + llvm::Value *Args[] = {/*RequiresOMPRuntime=*/ + CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)}; CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None); + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args); CGF.EmitBranch(EST.ExitBB); CGF.EmitBlock(EST.ExitBB); @@ -1142,6 +1477,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Signal start of parallel region. CGF.EmitBlock(ExecuteBB); + // Skip initialization. + setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); // Process work items: outlined parallel functions. for (llvm::Function *W : Work) { @@ -1202,6 +1539,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF, // Exit target region. CGF.EmitBlock(ExitBB); + // Skip initialization. + clearLocThreadIdInsertPt(CGF); } /// Returns specified OpenMP runtime function for the current OpenMP @@ -1238,11 +1577,12 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init"); break; } - case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: { - // Build void __kmpc_spmd_kernel_deinit(); + case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: { + // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); + llvm::Type *TypeParams[] = {CGM.Int16Ty}; auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit"); + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2"); break; } case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: { @@ -1307,12 +1647,12 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64"); break; } - case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait: { - // Build int32_t kmpc_nvptx_parallel_reduce_nowait(kmp_int32 global_tid, - // kmp_int32 num_vars, size_t reduce_size, void* reduce_data, - // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - // lane_offset, int16_t Algorithm Version), - // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); + case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2: { + // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, + // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void* + // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t + // lane_id, int16_t lane_offset, int16_t Algorithm Version), void + // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, CGM.Int16Ty, CGM.Int16Ty}; auto *ShuffleReduceFnTy = @@ -1322,7 +1662,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { auto *InterWarpCopyFnTy = llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, /*isVarArg=*/false); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, CGM.VoidPtrTy, @@ -1331,86 +1672,40 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait"); + FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2"); break; } - case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: { - // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid, - // kmp_int32 num_vars, size_t reduce_size, void* reduce_data, - // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - // lane_offset, int16_t Algorithm Version), - // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {CGM.Int32Ty, - CGM.Int32Ty, - CGM.SizeTy, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo()}; + case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { + // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); + llvm::Type *TypeParams[] = {CGM.Int32Ty}; auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait"); + FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); break; } - case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: { - // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, - // int32_t num_vars, size_t reduce_size, void *reduce_data, - // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t - // lane_offset, int16_t shortCircuit), - // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), - // void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad, - // int32_t index, int32_t width), - // void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, - // int32_t index, int32_t width, int32_t reduce)) - llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, - CGM.Int16Ty, CGM.Int16Ty}; - auto *ShuffleReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; - auto *InterWarpCopyFnTy = - llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, - /*isVarArg=*/false); - llvm::Type *CopyToScratchpadTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, - CGM.Int32Ty, CGM.Int32Ty}; - auto *CopyToScratchpadFnTy = - llvm::FunctionType::get(CGM.VoidTy, CopyToScratchpadTypeParams, - /*isVarArg=*/false); - llvm::Type *LoadReduceTypeParams[] = { - CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty}; - auto *LoadReduceFnTy = - llvm::FunctionType::get(CGM.VoidTy, LoadReduceTypeParams, - /*isVarArg=*/false); - llvm::Type *TypeParams[] = {CGM.Int32Ty, - CGM.Int32Ty, - CGM.SizeTy, - CGM.VoidPtrTy, - ShuffleReduceFnTy->getPointerTo(), - InterWarpCopyFnTy->getPointerTo(), - CopyToScratchpadFnTy->getPointerTo(), - LoadReduceFnTy->getPointerTo()}; + case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple: { + // Build __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32 + // global_tid, kmp_critical_name *lck) + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, + llvm::PointerType::getUnqual(getKmpCriticalNameTy())}; auto *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait"); + FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_simple"); break; } - case OMPRTL_NVPTX__kmpc_end_reduce_nowait: { - // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid); - llvm::Type *TypeParams[] = {CGM.Int32Ty}; + case OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple: { + // Build __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, kmp_int32 + // global_tid, kmp_critical_name *lck) + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, + llvm::PointerType::getUnqual(getKmpCriticalNameTy())}; auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait"); + FnTy, /*Name=*/"__kmpc_nvptx_teams_end_reduce_nowait_simple"); break; } case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: { @@ -1424,17 +1719,18 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { /// Build void __kmpc_data_sharing_init_stack_spmd(); auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false); - RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd"); break; } - case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: { - // Build void *__kmpc_data_sharing_push_stack(size_t size, + case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: { + // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size, // int16_t UseSharedMemory); llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty}; auto *FnTy = llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); RTLFn = CGM.CreateRuntimeFunction( - FnTy, /*Name=*/"__kmpc_data_sharing_push_stack"); + FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack"); break; } case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: { @@ -1484,6 +1780,46 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode"); break; } + case OMPRTL_NVPTX__kmpc_get_team_static_memory: { + // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode, + // const void *buf, size_t size, int16_t is_shared, const void **res); + llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy, + CGM.Int16Ty, CGM.VoidPtrPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory"); + break; + } + case OMPRTL_NVPTX__kmpc_restore_team_static_memory: { + // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode, + // int16_t is_shared); + llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); + break; + } + case OMPRTL__kmpc_barrier: { + // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); + cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + break; + } + case OMPRTL__kmpc_barrier_simple_spmd: { + // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); + cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + break; + } } return RTLFn; } @@ -1530,6 +1866,37 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode); } +namespace { +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +/// Enum for accesseing the reserved_2 field of the ident_t struct. +enum ModeFlagsTy : unsigned { + /// Bit set to 1 when in SPMD mode. + KMP_IDENT_SPMD_MODE = 0x01, + /// Bit set to 1 when a simplified runtime is used. + KMP_IDENT_SIMPLE_RT_MODE = 0x02, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE) +}; + +/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime. +static const ModeFlagsTy UndefinedMode = + (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE; +} // anonymous namespace + +unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const { + switch (getExecutionMode()) { + case EM_SPMD: + if (requiresFullRuntime()) + return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE); + return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE; + case EM_NonSPMD: + assert(requiresFullRuntime() && "Expected full runtime."); + return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE); + case EM_Unknown: + return UndefinedMode; + } + llvm_unreachable("Unknown flags are requested."); +} + CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) @@ -1581,12 +1948,15 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( } } Action(IsInParallelRegion); CodeGen.setAction(Action); + bool PrevIsInTTDRegion = IsInTTDRegion; + IsInTTDRegion = false; bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion; IsInTargetMasterThreadRegion = false; auto *OutlinedFun = cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; + IsInTTDRegion = PrevIsInTTDRegion; if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD && !IsInParallelRegion) { llvm::Function *WrapperFun = @@ -1597,26 +1967,106 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( return OutlinedFun; } +/// Get list of lastprivate variables from the teams distribute ... or +/// teams {distribute ...} directives. +static void +getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, + llvm::SmallVectorImpl<const ValueDecl *> &Vars) { + assert(isOpenMPTeamsDirective(D.getDirectiveKind()) && + "expected teams directive."); + const OMPExecutableDirective *Dir = &D; + if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { + if (const Stmt *S = getSingleCompoundChild( + Ctx, + D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( + /*IgnoreCaptured=*/true))) { + Dir = dyn_cast<OMPExecutableDirective>(S); + if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) + Dir = nullptr; + } + } + if (!Dir) + return; + for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) { + for (const Expr *E : C->getVarRefs()) + Vars.push_back(getPrivateItem(E)); + } +} + +/// Get list of reduction variables from the teams ... directives. +static void +getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D, + llvm::SmallVectorImpl<const ValueDecl *> &Vars) { + assert(isOpenMPTeamsDirective(D.getDirectiveKind()) && + "expected teams directive."); + for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { + for (const Expr *E : C->privates()) + Vars.push_back(getPrivateItem(E)); + } +} + llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { - SourceLocation Loc = D.getLocStart(); + SourceLocation Loc = D.getBeginLoc(); + + const RecordDecl *GlobalizedRD = nullptr; + llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions; + llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; + // Globalize team reductions variable unconditionally in all modes. + getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { + getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions); + if (!LastPrivatesReductions.empty()) { + GlobalizedRD = ::buildRecordForGlobalizedVars( + CGM.getContext(), llvm::None, LastPrivatesReductions, + MappedDeclsFields); + } + } else if (!LastPrivatesReductions.empty()) { + assert(!TeamAndReductions.first && + "Previous team declaration is not expected."); + TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl(); + std::swap(TeamAndReductions.second, LastPrivatesReductions); + } // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { SourceLocation &Loc; + const RecordDecl *GlobalizedRD; + llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &MappedDeclsFields; public: - NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {} + NVPTXPrePostActionTy( + SourceLocation &Loc, const RecordDecl *GlobalizedRD, + llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> + &MappedDeclsFields) + : Loc(Loc), GlobalizedRD(GlobalizedRD), + MappedDeclsFields(MappedDeclsFields) {} void Enter(CodeGenFunction &CGF) override { - static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) - .emitGenericVarsProlog(CGF, Loc); + auto &Rt = + static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()); + if (GlobalizedRD) { + auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; + I->getSecond().GlobalRecord = GlobalizedRD; + I->getSecond().MappedParams = + llvm::make_unique<CodeGenFunction::OMPMapVars>(); + DeclToAddrMapTy &Data = I->getSecond().LocalVarData; + for (const auto &Pair : MappedDeclsFields) { + assert(Pair.getFirst()->isCanonicalDecl() && + "Expected canonical declaration"); + Data.insert(std::make_pair(Pair.getFirst(), + MappedVarData(Pair.getSecond(), + /*IsOnePerTeam=*/true))); + } + } + Rt.emitGenericVarsProlog(CGF, Loc); } void Exit(CodeGenFunction &CGF) override { static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) .emitGenericVarsEpilog(CGF); } - } Action(Loc); + } Action(Loc, GlobalizedRD, MappedDeclsFields); CodeGen.setAction(Action); llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); @@ -1629,8 +2079,10 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( } void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, - SourceLocation Loc) { - if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) + SourceLocation Loc, + bool WithSPMDCheck) { + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic && + getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) return; CGBuilderTy &Bld = CGF.Builder; @@ -1639,33 +2091,187 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, if (I == FunctionGlobalizedDecls.end()) return; if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) { - QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord); + QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord); + QualType SecGlobalRecTy; // Recover pointer to this function's global record. The runtime will // handle the specifics of the allocation of the memory. // Use actual memory size of the record including the padding // for alignment purposes. unsigned Alignment = - CGM.getContext().getTypeAlignInChars(RecTy).getQuantity(); + CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity(); unsigned GlobalRecordSize = - CGM.getContext().getTypeSizeInChars(RecTy).getQuantity(); + CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity(); GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); - // TODO: allow the usage of shared memory to be controlled by - // the user, for now, default to global. - llvm::Value *GlobalRecordSizeArg[] = { - llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), - CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; - llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack), - GlobalRecordSizeArg); - llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( - GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo()); + + llvm::PointerType *GlobalRecPtrTy = + CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo(); + llvm::Value *GlobalRecCastAddr; + llvm::Value *IsTTD = nullptr; + if (!IsInTTDRegion && + (WithSPMDCheck || + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); + llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd"); + llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd"); + if (I->getSecond().SecondaryGlobalRecord.hasValue()) { + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *PL = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), + {RTLoc, ThreadID}); + IsTTD = Bld.CreateIsNull(PL); + } + llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); + Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(SPMDBB); + Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy), + CharUnits::fromQuantity(Alignment)); + CGF.EmitBranch(ExitBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(NonSPMDBB); + llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize); + if (const RecordDecl *SecGlobalizedVarsRecord = + I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) { + SecGlobalRecTy = + CGM.getContext().getRecordType(SecGlobalizedVarsRecord); + + // Recover pointer to this function's global record. The runtime will + // handle the specifics of the allocation of the memory. + // Use actual memory size of the record including the padding + // for alignment purposes. + unsigned Alignment = + CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity(); + unsigned GlobalRecordSize = + CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity(); + GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); + Size = Bld.CreateSelect( + IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size); + } + // TODO: allow the usage of shared memory to be controlled by + // the user, for now, default to global. + llvm::Value *GlobalRecordSizeArg[] = { + Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; + llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + GlobalRecordSizeArg); + GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + GlobalRecValue, GlobalRecPtrTy); + CGF.EmitBlock(ExitBB); + auto *Phi = Bld.CreatePHI(GlobalRecPtrTy, + /*NumReservedValues=*/2, "_select_stack"); + Phi->addIncoming(RecPtr.getPointer(), SPMDBB); + Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB); + GlobalRecCastAddr = Phi; + I->getSecond().GlobalRecordAddr = Phi; + I->getSecond().IsInSPMDModeFlag = IsSPMD; + } else if (IsInTTDRegion) { + assert(GlobalizedRecords.back().Records.size() < 2 && + "Expected less than 2 globalized records: one for target and one " + "for teams."); + unsigned Offset = 0; + for (const RecordDecl *RD : GlobalizedRecords.back().Records) { + QualType RDTy = CGM.getContext().getRecordType(RD); + unsigned Alignment = + CGM.getContext().getTypeAlignInChars(RDTy).getQuantity(); + unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity(); + Offset = + llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment); + } + unsigned Alignment = + CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity(); + Offset = llvm::alignTo(Offset, Alignment); + GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord); + ++GlobalizedRecords.back().RegionCounter; + if (GlobalizedRecords.back().Records.size() == 1) { + assert(KernelStaticGlobalized && + "Kernel static pointer must be initialized already."); + auto *UseSharedMemory = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, nullptr, + "_openmp_static_kernel$is_shared"); + UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/16, /*Signed=*/0); + llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( + Address(UseSharedMemory, + CGM.getContext().getTypeAlignInChars(Int16Ty)), + /*Volatile=*/false, Int16Ty, Loc); + auto *StaticGlobalized = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false, + llvm::GlobalValue::CommonLinkage, nullptr); + auto *RecSize = new llvm::GlobalVariable( + CGM.getModule(), CGM.SizeTy, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, nullptr, + "_openmp_static_kernel$size"); + RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + llvm::Value *Ld = CGF.EmitLoadOfScalar( + Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false, + CGM.getContext().getSizeType(), Loc); + llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + KernelStaticGlobalized, CGM.VoidPtrPtrTy); + llvm::Value *GlobalRecordSizeArg[] = { + llvm::ConstantInt::get( + CGM.Int16Ty, + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0), + StaticGlobalized, Ld, IsInSharedMemory, ResAddr}; + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_get_team_static_memory), + GlobalRecordSizeArg); + GlobalizedRecords.back().Buffer = StaticGlobalized; + GlobalizedRecords.back().RecSize = RecSize; + GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; + GlobalizedRecords.back().Loc = Loc; + } + assert(KernelStaticGlobalized && "Global address must be set already."); + Address FrameAddr = CGF.EmitLoadOfPointer( + Address(KernelStaticGlobalized, CGM.getPointerAlign()), + CGM.getContext() + .getPointerType(CGM.getContext().VoidPtrTy) + .castAs<PointerType>()); + llvm::Value *GlobalRecValue = + Bld.CreateConstInBoundsGEP(FrameAddr, Offset, CharUnits::One()) + .getPointer(); + I->getSecond().GlobalRecordAddr = GlobalRecValue; + I->getSecond().IsInSPMDModeFlag = nullptr; + GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo()); + } else { + // TODO: allow the usage of shared memory to be controlled by + // the user, for now, default to global. + llvm::Value *GlobalRecordSizeArg[] = { + llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), + CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; + llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), + GlobalRecordSizeArg); + GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( + GlobalRecValue, GlobalRecPtrTy); + I->getSecond().GlobalRecordAddr = GlobalRecValue; + I->getSecond().IsInSPMDModeFlag = nullptr; + } LValue Base = - CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy); - I->getSecond().GlobalRecordAddr = GlobalRecValue; + CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy); // Emit the "global alloca" which is a GEP from the global declaration // record using the pointer returned by the runtime. + LValue SecBase; + decltype(I->getSecond().LocalVarData)::const_iterator SecIt; + if (IsTTD) { + SecIt = I->getSecond().SecondaryLocalVarData->begin(); + llvm::PointerType *SecGlobalRecPtrTy = + CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo(); + SecBase = CGF.MakeNaturalAlignPointeeAddrLValue( + Bld.CreatePointerBitCastOrAddrSpaceCast( + I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy), + SecGlobalRecTy); + } for (auto &Rec : I->getSecond().LocalVarData) { bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first); llvm::Value *ParValue; @@ -1675,14 +2281,51 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc); } - const FieldDecl *FD = Rec.second.first; - LValue VarAddr = CGF.EmitLValueForField(Base, FD); - Rec.second.second = VarAddr.getAddress(); + LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD); + // Emit VarAddr basing on lane-id if required. + QualType VarTy; + if (Rec.second.IsOnePerTeam) { + VarTy = Rec.second.FD->getType(); + } else { + llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP( + VarAddr.getAddress().getPointer(), + {Bld.getInt32(0), getNVPTXLaneID(CGF)}); + VarTy = + Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType(); + VarAddr = CGF.MakeAddrLValue( + Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy, + AlignmentSource::Decl); + } + Rec.second.PrivateAddr = VarAddr.getAddress(); + if (!IsInTTDRegion && + (WithSPMDCheck || + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { + assert(I->getSecond().IsInSPMDModeFlag && + "Expected unknown execution mode or required SPMD check."); + if (IsTTD) { + assert(SecIt->second.IsOnePerTeam && + "Secondary glob data must be one per team."); + LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD); + VarAddr.setAddress( + Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(), + VarAddr.getPointer()), + VarAddr.getAlignment())); + Rec.second.PrivateAddr = VarAddr.getAddress(); + } + Address GlobalPtr = Rec.second.PrivateAddr; + Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName()); + Rec.second.PrivateAddr = Address( + Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag, + LocalAddr.getPointer(), GlobalPtr.getPointer()), + LocalAddr.getAlignment()); + } if (EscapedParam) { const auto *VD = cast<VarDecl>(Rec.first); CGF.EmitStoreOfScalar(ParValue, VarAddr); I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress()); } + if (IsTTD) + ++SecIt; } } for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) { @@ -1704,7 +2347,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, llvm::Value *GlobalRecordSizeArg[] = { Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack), GlobalRecordSizeArg); llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); @@ -1718,8 +2362,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF, I->getSecond().MappedParams->apply(CGF); } -void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) { - if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic) +void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF, + bool WithSPMDCheck) { + if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic && + getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD) return; const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); @@ -1734,9 +2380,48 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) { Addr); } if (I->getSecond().GlobalRecordAddr) { - CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), - I->getSecond().GlobalRecordAddr); + if (!IsInTTDRegion && + (WithSPMDCheck || + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) { + CGBuilderTy &Bld = CGF.Builder; + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); + llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd"); + Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(NonSPMDBB); + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); + CGF.EmitBlock(ExitBB); + } else if (IsInTTDRegion) { + assert(GlobalizedRecords.back().RegionCounter > 0 && + "region counter must be > 0."); + --GlobalizedRecords.back().RegionCounter; + // Emit the restore function only in the target region. + if (GlobalizedRecords.back().RegionCounter == 0) { + QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( + /*DestWidth=*/16, /*Signed=*/0); + llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( + Address(GlobalizedRecords.back().UseSharedMemory, + CGM.getContext().getTypeAlignInChars(Int16Ty)), + /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc); + llvm::Value *Args[] = { + llvm::ConstantInt::get( + CGM.Int16Ty, + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0), + IsInSharedMemory}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_restore_team_static_memory), + Args); + } + } else { + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_data_sharing_pop_stack), + I->getSecond().GlobalRecordAddr); + } } } } @@ -1830,7 +2515,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( // passed from the outside of the target region. CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF); - // There's somehting to share. + // There's something to share. if (!CapturedVars.empty()) { // Prepare for parallel region. Indicate the outlined function. Address SharedArgs = @@ -1884,30 +2569,24 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( Work.emplace_back(WFn); }; - auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen, - &ThreadIDAddr](CodeGenFunction &CGF, - PrePostActionTy &Action) { - RegionCodeGenTy RCG(CodeGen); + auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen]( + CodeGenFunction &CGF, PrePostActionTy &Action) { if (IsInParallelRegion) { SeqGen(CGF, Action); } else if (IsInTargetMasterThreadRegion) { L0ParallelGen(CGF, Action); - } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) { - RCG(CGF); } else { // Check for master and then parallelism: // if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) { - // Serialized execution. - // } else if (master) { - // Worker call. + // Serialized execution. // } else { - // Outlined function call. + // Worker call. // } CGBuilderTy &Bld = CGF.Builder; llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential"); llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck"); - llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); + llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master"); llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode))); Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB); @@ -1920,29 +2599,17 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level), {RTLoc, ThreadID}); llvm::Value *Res = Bld.CreateIsNotNull(PL); - Bld.CreateCondBr(Res, SeqBB, MasterCheckBB); + Bld.CreateCondBr(Res, SeqBB, MasterBB); CGF.EmitBlock(SeqBB); SeqGen(CGF, Action); CGF.EmitBranch(ExitBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(MasterCheckBB); - llvm::BasicBlock *MasterThenBB = CGF.createBasicBlock("master.then"); - llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); - llvm::Value *IsMaster = - Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); - Bld.CreateCondBr(IsMaster, MasterThenBB, ElseBlock); - CGF.EmitBlock(MasterThenBB); + CGF.EmitBlock(MasterBB); L0ParallelGen(CGF, Action); CGF.EmitBranch(ExitBB); // There is no need to emit line number for unconditional branch. (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(ElseBlock); - // In the worker need to use the real thread id. - ThreadIDAddr = emitThreadIDAddress(CGF, Loc); - RCG(CGF); - // There is no need to emit line number for unconditional branch. - (void)ApplyDebugLocation::CreateEmpty(CGF); // Emit the continuation block for code after the if. CGF.EmitBlock(ExitBB, /*IsFinished=*/true); } @@ -2013,6 +2680,34 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( } } +void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) { + // Always emit simple barriers! + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_barrier_simple_spmd(nullptr, 0); + // This function does not use parameters, so we can emit just default values. + llvm::Value *Args[] = { + llvm::ConstantPointerNull::get( + cast<llvm::PointerType>(getIdentTyPointerTy())), + llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); +} + +void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPDirectiveKind Kind, bool, + bool) { + // Always emit simple barriers! + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_cancel_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), + getThreadID(CGF, Loc)}; + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); +} + void CGOpenMPRuntimeNVPTX::emitCriticalRegion( CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, @@ -2055,14 +2750,16 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion( CGF.EmitBlock(BodyBB); // Output the critical statement. - CriticalOpGen(CGF); + CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc, + Hint); // After the body surrounded by the critical region, the single executing // thread will jump to the synchronisation point. // Block waits for all threads in current team to finish then increments the // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); - getNVPTXCTABarrier(CGF); + emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); @@ -2184,11 +2881,12 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr, CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc), IntType, Offset, Loc); CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType); - Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); - ElemPtr = + Address LocalPtr = + Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize)); + Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize)); - PhiSrc->addIncoming(Ptr.getPointer(), ThenBB); - PhiDest->addIncoming(ElemPtr.getPointer(), ThenBB); + PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB); + PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB); CGF.EmitBranch(PreCondBB); CGF.EmitBlock(ExitBB); } else { @@ -2414,235 +3112,18 @@ static void emitReductionListCopy( } } -/// This function emits a helper that loads data from the scratchpad array -/// and (optionally) reduces it with the input operand. -/// -/// load_and_reduce(local, scratchpad, index, width, should_reduce) -/// reduce_data remote; -/// for elem in remote: -/// remote.elem = Scratchpad[elem_id][index] -/// if (should_reduce) -/// local = local @ remote -/// else -/// local = remote -static llvm::Value *emitReduceScratchpadFunction( - CodeGenModule &CGM, ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) { - ASTContext &C = CGM.getContext(); - QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1); - - // Destination of the copy. - ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); - // Base address of the scratchpad array, with each element storing a - // Reduce list per team. - ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); - // A source index into the scratchpad array. - ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, - ImplicitParamDecl::Other); - // Row width of an element in the scratchpad array, typically - // the number of teams. - ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, - ImplicitParamDecl::Other); - // If should_reduce == 1, then it's load AND reduce, - // If should_reduce == 0 (or otherwise), then it only loads (+ copy). - // The latter case is used for initialization. - ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - Int32Ty, ImplicitParamDecl::Other); - - FunctionArgList Args; - Args.push_back(&ReduceListArg); - Args.push_back(&ScratchPadArg); - Args.push_back(&IndexArg); - Args.push_back(&WidthArg); - Args.push_back(&ShouldReduceArg); - - const CGFunctionInfo &CGFI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - "_omp_reduction_load_and_reduce", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); - Fn->setDoesNotRecurse(); - CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - - CGBuilderTy &Bld = CGF.Builder; - - // Get local Reduce list pointer. - Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); - Address ReduceListAddr( - Bld.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, - C.VoidPtrTy, Loc), - CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), - CGF.getPointerAlign()); - - Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); - llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( - AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc); - - Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); - llvm::Value *IndexVal = Bld.CreateIntCast( - CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc), - CGM.SizeTy, /*isSigned=*/true); - - Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); - llvm::Value *WidthVal = Bld.CreateIntCast( - CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, Int32Ty, Loc), - CGM.SizeTy, /*isSigned=*/true); - - Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg); - llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar( - AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, Loc); - - // The absolute ptr address to the base addr of the next element to copy. - llvm::Value *CumulativeElemBasePtr = - Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy); - Address SrcDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign()); - - // Create a Remote Reduce list to store the elements read from the - // scratchpad array. - Address RemoteReduceList = - CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_red_list"); - - // Assemble remote Reduce list from scratchpad array. - emitReductionListCopy(ScratchpadToThread, CGF, ReductionArrayTy, Privates, - SrcDataAddr, RemoteReduceList, - {/*RemoteLaneOffset=*/nullptr, - /*ScratchpadIndex=*/IndexVal, - /*ScratchpadWidth=*/WidthVal}); - - llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); - llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); - llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); - - llvm::Value *CondReduce = Bld.CreateIsNotNull(ShouldReduceVal); - Bld.CreateCondBr(CondReduce, ThenBB, ElseBB); - - CGF.EmitBlock(ThenBB); - // We should reduce with the local Reduce list. - // reduce_function(LocalReduceList, RemoteReduceList) - llvm::Value *LocalDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( - ReduceListAddr.getPointer(), CGF.VoidPtrTy); - llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( - RemoteReduceList.getPointer(), CGF.VoidPtrTy); - CGM.getOpenMPRuntime().emitOutlinedFunctionCall( - CGF, Loc, ReduceFn, {LocalDataPtr, RemoteDataPtr}); - Bld.CreateBr(MergeBB); - - CGF.EmitBlock(ElseBB); - // No reduction; just copy: - // Local Reduce list = Remote Reduce list. - emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates, - RemoteReduceList, ReduceListAddr); - Bld.CreateBr(MergeBB); - - CGF.EmitBlock(MergeBB); - - CGF.FinishFunction(); - return Fn; -} - -/// This function emits a helper that stores reduced data from the team -/// master to a scratchpad array in global memory. -/// -/// for elem in Reduce List: -/// scratchpad[elem_id][index] = elem -/// -static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, - ArrayRef<const Expr *> Privates, - QualType ReductionArrayTy, - SourceLocation Loc) { - - ASTContext &C = CGM.getContext(); - QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1); - - // Source of the copy. - ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); - // Base address of the scratchpad array, with each element storing a - // Reduce list per team. - ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); - // A destination index into the scratchpad array, typically the team - // identifier. - ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, - ImplicitParamDecl::Other); - // Row width of an element in the scratchpad array, typically - // the number of teams. - ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty, - ImplicitParamDecl::Other); - - FunctionArgList Args; - Args.push_back(&ReduceListArg); - Args.push_back(&ScratchPadArg); - Args.push_back(&IndexArg); - Args.push_back(&WidthArg); - - const CGFunctionInfo &CGFI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - auto *Fn = llvm::Function::Create( - CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, - "_omp_reduction_copy_to_scratchpad", &CGM.getModule()); - CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); - Fn->setDoesNotRecurse(); - CodeGenFunction CGF(CGM); - CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); - - CGBuilderTy &Bld = CGF.Builder; - - Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg); - Address SrcDataAddr( - Bld.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, - C.VoidPtrTy, Loc), - CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), - CGF.getPointerAlign()); - - Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg); - llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar( - AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc); - - Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg); - llvm::Value *IndexVal = Bld.CreateIntCast( - CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc), - CGF.SizeTy, /*isSigned=*/true); - - Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg); - llvm::Value *WidthVal = - Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, - Int32Ty, SourceLocation()), - CGF.SizeTy, /*isSigned=*/true); - - // The absolute ptr address to the base addr of the next element to copy. - llvm::Value *CumulativeElemBasePtr = - Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy); - Address DestDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign()); - - emitReductionListCopy(ThreadToScratchpad, CGF, ReductionArrayTy, Privates, - SrcDataAddr, DestDataAddr, - {/*RemoteLaneOffset=*/nullptr, - /*ScratchpadIndex=*/IndexVal, - /*ScratchpadWidth=*/WidthVal}); - - CGF.FinishFunction(); - return Fn; -} - /// This function emits a helper that gathers Reduce lists from the first /// lane of every active warp to lanes in the first warp. /// /// void inter_warp_copy_func(void* reduce_data, num_warps) /// shared smem[warp_size]; /// For all data entries D in reduce_data: +/// sync /// If (I am the first lane in each warp) /// Copy my local D to smem[warp_id] /// sync /// if (I am the first warp) /// Copy smem[thread_id] to my local D -/// sync static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, ArrayRef<const Expr *> Privates, QualType ReductionArrayTy, @@ -2688,11 +3169,10 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, llvm::GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName); if (!TransferMedium) { - auto *Ty = llvm::ArrayType::get(CGM.Int64Ty, WarpSize); + auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize); unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared); TransferMedium = new llvm::GlobalVariable( - M, Ty, - /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage, + M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage, llvm::Constant::getNullValue(Ty), TransferMediumName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, SharedAddressSpace); @@ -2710,7 +3190,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, Address LocalReduceList( Bld.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false, - C.VoidPtrTy, SourceLocation()), + C.VoidPtrTy, Loc), CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()), CGF.getPointerAlign()); @@ -2720,121 +3200,151 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, // Warp master copies reduce element to transfer medium in __shared__ // memory. // - llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); - llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); - llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); - - // if (lane_id == 0) - llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master"); - Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); - CGF.EmitBlock(ThenBB); - - // Reduce element = LocalReduceList[i] - Address ElemPtrPtrAddr = - Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); - llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( - ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - // elemptr = (type[i]*)(elemptrptr) - Address ElemPtr = - Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType())); - ElemPtr = Bld.CreateElementBitCast( - ElemPtr, CGF.ConvertTypeForMem(Private->getType())); - - // Get pointer to location in transfer medium. - // MediumPtr = &medium[warp_id] - llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP( - TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID}); - Address MediumPtr(MediumPtrVal, C.getTypeAlignInChars(Private->getType())); - // Casting to actual data type. - // MediumPtr = (type[i]*)MediumPtrAddr; - MediumPtr = Bld.CreateElementBitCast( - MediumPtr, CGF.ConvertTypeForMem(Private->getType())); - - // elem = *elemptr - //*MediumPtr = elem - if (Private->getType()->isScalarType()) { - llvm::Value *Elem = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, - Private->getType(), Loc); - // Store the source element value to the dest element address. - CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/false, - Private->getType()); - } else { - CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()), - CGF.MakeAddrLValue(MediumPtr, Private->getType()), - Private->getType(), AggValueSlot::DoesNotOverlap); - } - - Bld.CreateBr(MergeBB); - - CGF.EmitBlock(ElseBB); - Bld.CreateBr(MergeBB); - - CGF.EmitBlock(MergeBB); + unsigned RealTySize = + C.getTypeSizeInChars(Private->getType()) + .alignTo(C.getTypeAlignInChars(Private->getType())) + .getQuantity(); + for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) { + unsigned NumIters = RealTySize / TySize; + if (NumIters == 0) + continue; + QualType CType = C.getIntTypeForBitwidth( + C.toBits(CharUnits::fromQuantity(TySize)), /*Signed=*/1); + llvm::Type *CopyType = CGF.ConvertTypeForMem(CType); + CharUnits Align = CharUnits::fromQuantity(TySize); + llvm::Value *Cnt = nullptr; + Address CntAddr = Address::invalid(); + llvm::BasicBlock *PrecondBB = nullptr; + llvm::BasicBlock *ExitBB = nullptr; + if (NumIters > 1) { + CntAddr = CGF.CreateMemTemp(C.IntTy, ".cnt.addr"); + CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.IntTy), CntAddr, + /*Volatile=*/false, C.IntTy); + PrecondBB = CGF.createBasicBlock("precond"); + ExitBB = CGF.createBasicBlock("exit"); + llvm::BasicBlock *BodyBB = CGF.createBasicBlock("body"); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(PrecondBB); + Cnt = CGF.EmitLoadOfScalar(CntAddr, /*Volatile=*/false, C.IntTy, Loc); + llvm::Value *Cmp = + Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.IntTy, NumIters)); + Bld.CreateCondBr(Cmp, BodyBB, ExitBB); + CGF.EmitBlock(BodyBB); + } + // kmpc_barrier. + CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); + + // if (lane_id == 0) + llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master"); + Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB); + CGF.EmitBlock(ThenBB); - Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg); - llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar( - AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation()); + // Reduce element = LocalReduceList[i] + Address ElemPtrPtrAddr = + Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar( + ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); + // elemptr = ((CopyType*)(elemptrptr)) + I + Address ElemPtr = Address(ElemPtrPtr, Align); + ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType); + if (NumIters > 1) { + ElemPtr = Address(Bld.CreateGEP(ElemPtr.getPointer(), Cnt), + ElemPtr.getAlignment()); + } - llvm::Value *NumActiveThreads = Bld.CreateNSWMul( - NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads"); - // named_barrier_sync(ParallelBarrierID, num_active_threads) - syncParallelThreads(CGF, NumActiveThreads); + // Get pointer to location in transfer medium. + // MediumPtr = &medium[warp_id] + llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP( + TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID}); + Address MediumPtr(MediumPtrVal, Align); + // Casting to actual data type. + // MediumPtr = (CopyType*)MediumPtrAddr; + MediumPtr = Bld.CreateElementBitCast(MediumPtr, CopyType); + + // elem = *elemptr + //*MediumPtr = elem + llvm::Value *Elem = + CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc); + // Store the source element value to the dest element address. + CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType); + + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(ElseBB); + Bld.CreateBr(MergeBB); + + CGF.EmitBlock(MergeBB); + + // kmpc_barrier. + CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + + // + // Warp 0 copies reduce element from transfer medium. + // + llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then"); + llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else"); + llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont"); + + Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg); + llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar( + AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc); + + // Up to 32 threads in warp 0 are active. + llvm::Value *IsActiveThread = + Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); + Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); + + CGF.EmitBlock(W0ThenBB); + + // SrcMediumPtr = &medium[tid] + llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP( + TransferMedium, + {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID}); + Address SrcMediumPtr(SrcMediumPtrVal, Align); + // SrcMediumVal = *SrcMediumPtr; + SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType); + + // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I + Address TargetElemPtrPtr = + Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); + llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( + TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc); + Address TargetElemPtr = Address(TargetElemPtrVal, Align); + TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType); + if (NumIters > 1) { + TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getPointer(), Cnt), + TargetElemPtr.getAlignment()); + } - // - // Warp 0 copies reduce element from transfer medium. - // - llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then"); - llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else"); - llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont"); - - // Up to 32 threads in warp 0 are active. - llvm::Value *IsActiveThread = - Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread"); - Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB); - - CGF.EmitBlock(W0ThenBB); - - // SrcMediumPtr = &medium[tid] - llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP( - TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID}); - Address SrcMediumPtr(SrcMediumPtrVal, - C.getTypeAlignInChars(Private->getType())); - // SrcMediumVal = *SrcMediumPtr; - SrcMediumPtr = Bld.CreateElementBitCast( - SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType())); - - // TargetElemPtr = (type[i]*)(SrcDataAddr[i]) - Address TargetElemPtrPtr = - Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize()); - llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar( - TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation()); - Address TargetElemPtr = - Address(TargetElemPtrVal, C.getTypeAlignInChars(Private->getType())); - TargetElemPtr = Bld.CreateElementBitCast( - TargetElemPtr, CGF.ConvertTypeForMem(Private->getType())); - - // *TargetElemPtr = SrcMediumVal; - if (Private->getType()->isScalarType()) { - llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar( - SrcMediumPtr, /*Volatile=*/false, Private->getType(), Loc); + // *TargetElemPtr = SrcMediumVal; + llvm::Value *SrcMediumValue = + CGF.EmitLoadOfScalar(SrcMediumPtr, /*Volatile=*/true, CType, Loc); CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false, - Private->getType()); - } else { - CGF.EmitAggregateCopy( - CGF.MakeAddrLValue(SrcMediumPtr, Private->getType()), - CGF.MakeAddrLValue(TargetElemPtr, Private->getType()), - Private->getType(), AggValueSlot::DoesNotOverlap); - } - Bld.CreateBr(W0MergeBB); + CType); + Bld.CreateBr(W0MergeBB); - CGF.EmitBlock(W0ElseBB); - Bld.CreateBr(W0MergeBB); + CGF.EmitBlock(W0ElseBB); + Bld.CreateBr(W0MergeBB); - CGF.EmitBlock(W0MergeBB); + CGF.EmitBlock(W0MergeBB); - // While warp 0 copies values from transfer medium, all other warps must - // wait. - syncParallelThreads(CGF, NumActiveThreads); + if (NumIters > 1) { + Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1)); + CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy); + CGF.EmitBranch(PrecondBB); + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ExitBB); + } + RealTySize %= TySize; + } ++Idx; } @@ -3103,7 +3613,7 @@ static llvm::Value *emitShuffleAndReduceFunction( /// 3. Call the OpenMP runtime on the GPU to reduce within a team /// and store the result on the team master: /// -/// __kmpc_nvptx_parallel_reduce_nowait(..., +/// __kmpc_nvptx_parallel_reduce_nowait_v2(..., /// reduceData, shuffleReduceFn, interWarpCpyFn) /// /// where: @@ -3274,7 +3784,7 @@ static llvm::Value *emitShuffleAndReduceFunction( /// Intra-Team Reduction /// /// This function, as implemented in the runtime call -/// '__kmpc_nvptx_parallel_reduce_nowait', aggregates data across OpenMP +/// '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP /// threads in a team. It first reduces within a warp using the /// aforementioned algorithms. We then proceed to gather all such /// reduced values at the first warp. @@ -3297,7 +3807,7 @@ static llvm::Value *emitShuffleAndReduceFunction( /// 'loadAndReduceDataFn' to load and reduce values from the array, i.e., /// the k'th worker reduces every k'th element. /// -/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait' to +/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to /// reduce across workers and compute a globally reduced value. /// void CGOpenMPRuntimeNVPTX::emitReduction( @@ -3308,125 +3818,116 @@ void CGOpenMPRuntimeNVPTX::emitReduction( return; bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); +#ifndef NDEBUG bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); - bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind); - assert((TeamsReduction || ParallelReduction || SimdReduction) && - "Invalid reduction selection in emitReduction."); +#endif if (Options.SimpleReduction) { + assert(!TeamsReduction && !ParallelReduction && + "Invalid reduction selection in emitReduction."); CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, ReductionOps, Options); return; } - ASTContext &C = CGM.getContext(); - - // 1. Build a list of reduction variables. - // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; - auto Size = RHSExprs.size(); - for (const Expr *E : Privates) { - if (E->getType()->isVariablyModifiedType()) - // Reserve place for array size. - ++Size; - } - llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); - QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, - /*IndexTypeQuals=*/0); - Address ReductionList = - CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); - auto IPriv = Privates.begin(); - unsigned Idx = 0; - for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { - Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); - CGF.Builder.CreateStore( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), - Elem); - if ((*IPriv)->getType()->isVariablyModifiedType()) { - // Store array size. - ++Idx; - Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, - CGF.getPointerSize()); - llvm::Value *Size = CGF.Builder.CreateIntCast( - CGF.getVLASize( - CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) - .NumElts, - CGF.SizeTy, /*isSigned=*/false); - CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), - Elem); - } - } - - // 2. Emit reduce_func(). - llvm::Value *ReductionFn = emitReductionFunction( - CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), - Privates, LHSExprs, RHSExprs, ReductionOps); + assert((TeamsReduction || ParallelReduction) && + "Invalid reduction selection in emitReduction."); - // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList), + // Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList), // RedList, shuffle_reduce_func, interwarp_copy_func); + // or + // Build res = __kmpc_reduce_teams_nowait_simple(<loc>, <gtid>, <lck>); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadId = getThreadID(CGF, Loc); - llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); - llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - ReductionList.getPointer(), CGF.VoidPtrTy); - - llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction( - CGM, Privates, ReductionArrayTy, ReductionFn, Loc); - llvm::Value *InterWarpCopyFn = - emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); - - llvm::Value *Args[] = {ThreadId, - CGF.Builder.getInt32(RHSExprs.size()), - ReductionArrayTySize, - RL, - ShuffleAndReduceFn, - InterWarpCopyFn}; - - llvm::Value *Res = nullptr; - if (ParallelReduction) - Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait), - Args); - else if (SimdReduction) - Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_simd_reduce_nowait), - Args); - if (TeamsReduction) { - llvm::Value *ScratchPadCopyFn = - emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc); - llvm::Value *LoadAndReduceFn = emitReduceScratchpadFunction( + llvm::Value *Res; + if (ParallelReduction) { + ASTContext &C = CGM.getContext(); + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + auto Size = RHSExprs.size(); + for (const Expr *E : Privates) { + if (E->getType()->isVariablyModifiedType()) + // Reserve place for array size. + ++Size; + } + llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); + QualType ReductionArrayTy = + C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + Address ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + auto IPriv = Privates.begin(); + unsigned Idx = 0; + for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { + Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, + CGF.getPointerSize()); + CGF.Builder.CreateStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), + Elem); + if ((*IPriv)->getType()->isVariablyModifiedType()) { + // Store array size. + ++Idx; + Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, + CGF.getPointerSize()); + llvm::Value *Size = CGF.Builder.CreateIntCast( + CGF.getVLASize( + CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) + .NumElts, + CGF.SizeTy, /*isSigned=*/false); + CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), + Elem); + } + } + + llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); + llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + ReductionList.getPointer(), CGF.VoidPtrTy); + llvm::Value *ReductionFn = emitReductionFunction( + CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), + Privates, LHSExprs, RHSExprs, ReductionOps); + llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction( CGM, Privates, ReductionArrayTy, ReductionFn, Loc); + llvm::Value *InterWarpCopyFn = + emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); - llvm::Value *Args[] = {ThreadId, + llvm::Value *Args[] = {RTLoc, + ThreadId, CGF.Builder.getInt32(RHSExprs.size()), ReductionArrayTySize, RL, ShuffleAndReduceFn, - InterWarpCopyFn, - ScratchPadCopyFn, - LoadAndReduceFn}; + InterWarpCopyFn}; + + Res = CGF.EmitRuntimeCall(createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2), + Args); + } else { + assert(TeamsReduction && "expected teams reduction."); + std::string Name = getName({"reduction"}); + llvm::Value *Lock = getCriticalRegionLock(Name); + llvm::Value *Args[] = {RTLoc, ThreadId, Lock}; Res = CGF.EmitRuntimeCall( - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_teams_reduce_nowait), + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple), Args); } - // 5. Build switch(res) - llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); - llvm::SwitchInst *SwInst = - CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1); + // 5. Build if (res == 1) + llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.reduction.done"); + llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.then"); + llvm::Value *Cond = CGF.Builder.CreateICmpEQ( + Res, llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1)); + CGF.Builder.CreateCondBr(Cond, ThenBB, ExitBB); - // 6. Build case 1: where we have reduced values in the master + // 6. Build then branch: where we have reduced values in the master // thread in each team. // __kmpc_end_reduce{_nowait}(<gtid>); // break; - llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); - SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); - CGF.EmitBlock(Case1BB); + CGF.EmitBlock(ThenBB); // Add emission of __kmpc_end_reduce{_nowait}(<gtid>); - llvm::Value *EndArgs[] = {ThreadId}; auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps, this](CodeGenFunction &CGF, PrePostActionTy &Action) { auto IPriv = Privates.begin(); @@ -3440,15 +3941,33 @@ void CGOpenMPRuntimeNVPTX::emitReduction( ++IRHS; } }; - RegionCodeGenTy RCG(CodeGen); - NVPTXActionTy Action( - nullptr, llvm::None, - createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), - EndArgs); - RCG.setAction(Action); - RCG(CGF); - CGF.EmitBranch(DefaultBB); - CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); + if (ParallelReduction) { + llvm::Value *EndArgs[] = {ThreadId}; + RegionCodeGenTy RCG(CodeGen); + NVPTXActionTy Action( + nullptr, llvm::None, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait), + EndArgs); + RCG.setAction(Action); + RCG(CGF); + } else { + assert(TeamsReduction && "expected teams reduction."); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + std::string Name = getName({"reduction"}); + llvm::Value *Lock = getCriticalRegionLock(Name); + llvm::Value *EndArgs[] = {RTLoc, ThreadId, Lock}; + RegionCodeGenTy RCG(CodeGen); + NVPTXActionTy Action( + nullptr, llvm::None, + createNVPTXRuntimeFunction( + OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple), + EndArgs); + RCG.setAction(Action); + RCG(CGF); + } + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); } const VarDecl * @@ -3478,7 +3997,7 @@ CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD, return ParmVarDecl::Create( CGM.getContext(), const_cast<DeclContext *>(NativeParam->getDeclContext()), - NativeParam->getLocStart(), NativeParam->getLocation(), + NativeParam->getBeginLoc(), NativeParam->getLocation(), NativeParam->getIdentifier(), ArgType, /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); } @@ -3556,10 +4075,10 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false); QualType Int32QTy = Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false); - ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getLocStart(), + ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(), /*Id=*/nullptr, Int16QTy, ImplicitParamDecl::Other); - ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getLocStart(), + ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(), /*Id=*/nullptr, Int32QTy, ImplicitParamDecl::Other); WrapperArgs.emplace_back(&ParallelLevelArg); @@ -3577,7 +4096,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs, - D.getLocStart(), D.getLocStart()); + D.getBeginLoc(), D.getBeginLoc()); const auto *RD = CS.getCapturedRecordDecl(); auto CurField = RD->field_begin(); @@ -3662,7 +4181,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper( } } - emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedParallelFn, Args); + emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args); CGF.FinishFunction(); return Fn; } @@ -3675,6 +4194,8 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, assert(D && "Expected function or captured|block decl."); assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 && "Function is registered already."); + assert((!TeamAndReductions.first || TeamAndReductions.first == D) && + "Team is set but not processed."); const Stmt *Body = nullptr; bool NeedToDelayGlobalization = false; if (const auto *FD = dyn_cast<FunctionDecl>(D)) { @@ -3684,12 +4205,18 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, } else if (const auto *CD = dyn_cast<CapturedDecl>(D)) { Body = CD->getBody(); NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP; + if (NeedToDelayGlobalization && + getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) + return; } if (!Body) return; - CheckVarsEscapingDeclContext VarChecker(CGF); + CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second); VarChecker.Visit(Body); - const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord(); + const RecordDecl *GlobalizedVarsRecord = + VarChecker.getGlobalizedRecord(IsInTTDRegion); + TeamAndReductions.first = nullptr; + TeamAndReductions.second.clear(); ArrayRef<const ValueDecl *> EscapedVariableLengthDecls = VarChecker.getEscapedVariableLengthDecls(); if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty()) @@ -3707,16 +4234,30 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { assert(VD->isCanonicalDecl() && "Expected canonical declaration"); const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); - Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid()))); + Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion))); + } + if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { + CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None); + VarChecker.Visit(Body); + I->getSecond().SecondaryGlobalRecord = + VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true); + I->getSecond().SecondaryLocalVarData.emplace(); + DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue(); + for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { + assert(VD->isCanonicalDecl() && "Expected canonical declaration"); + const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); + Data.insert( + std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true))); + } } if (!NeedToDelayGlobalization) { - emitGenericVarsProlog(CGF, D->getLocStart()); + emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true); struct GlobalizationScope final : EHScopeStack::Cleanup { GlobalizationScope() = default; void Emit(CodeGenFunction &CGF, Flags flags) override { static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) - .emitGenericVarsEpilog(CGF); + .emitGenericVarsEpilog(CGF, /*WithSPMDCheck=*/true); } }; CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup); @@ -3734,7 +4275,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, return Address::invalid(); auto VDI = I->getSecond().LocalVarData.find(VD); if (VDI != I->getSecond().LocalVarData.end()) - return VDI->second.second; + return VDI->second.PrivateAddr; if (VD->hasAttrs()) { for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()), E(VD->attr_end()); @@ -3743,7 +4284,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl()) ->getCanonicalDecl()); if (VDI != I->getSecond().LocalVarData.end()) - return VDI->second.second; + return VDI->second.PrivateAddr; } } return Address::invalid(); @@ -3753,3 +4294,311 @@ void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) { FunctionGlobalizedDecls.erase(CGF.CurFn); CGOpenMPRuntime::functionFinished(CGF); } + +void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk( + CodeGenFunction &CGF, const OMPLoopDirective &S, + OpenMPDistScheduleClauseKind &ScheduleKind, + llvm::Value *&Chunk) const { + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { + ScheduleKind = OMPC_DIST_SCHEDULE_static; + Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF), + CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + S.getIterationVariable()->getType(), S.getBeginLoc()); + return; + } + CGOpenMPRuntime::getDefaultDistScheduleAndChunk( + CGF, S, ScheduleKind, Chunk); +} + +void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk( + CodeGenFunction &CGF, const OMPLoopDirective &S, + OpenMPScheduleClauseKind &ScheduleKind, + const Expr *&ChunkExpr) const { + ScheduleKind = OMPC_SCHEDULE_static; + // Chunk size is 1 in this case. + llvm::APInt ChunkSize(32, 1); + ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize, + CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); +} + +void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const { + assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && + " Expected target-based directive."); + const CapturedStmt *CS = D.getCapturedStmt(OMPD_target); + for (const CapturedStmt::Capture &C : CS->captures()) { + // Capture variables captured by reference in lambdas for target-based + // directives. + if (!C.capturesVariable()) + continue; + const VarDecl *VD = C.getCapturedVar(); + const auto *RD = VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl(); + if (!RD || !RD->isLambda()) + continue; + Address VDAddr = CGF.GetAddrOfLocalVar(VD); + LValue VDLVal; + if (VD->getType().getCanonicalType()->isReferenceType()) + VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType()); + else + VDLVal = CGF.MakeAddrLValue( + VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); + llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + FieldDecl *ThisCapture = nullptr; + RD->getCaptureFields(Captures, ThisCapture); + if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) { + LValue ThisLVal = + CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); + llvm::Value *CXXThis = CGF.LoadCXXThis(); + CGF.EmitStoreOfScalar(CXXThis, ThisLVal); + } + for (const LambdaCapture &LC : RD->captures()) { + if (LC.getCaptureKind() != LCK_ByRef) + continue; + const VarDecl *VD = LC.getCapturedVar(); + if (!CS->capturesVariable(VD)) + continue; + auto It = Captures.find(VD); + assert(It != Captures.end() && "Found lambda capture without field."); + LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); + Address VDAddr = CGF.GetAddrOfLocalVar(VD); + if (VD->getType().getCanonicalType()->isReferenceType()) + VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr, + VD->getType().getCanonicalType()) + .getAddress(); + CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal); + } + } +} + +// Get current CudaArch and ignore any unknown values +static CudaArch getCudaArch(CodeGenModule &CGM) { + if (!CGM.getTarget().hasFeature("ptx")) + return CudaArch::UNKNOWN; + llvm::StringMap<bool> Features; + CGM.getTarget().initFeatureMap(Features, CGM.getDiags(), + CGM.getTarget().getTargetOpts().CPU, + CGM.getTarget().getTargetOpts().Features); + for (const auto &Feature : Features) { + if (Feature.getValue()) { + CudaArch Arch = StringToCudaArch(Feature.getKey()); + if (Arch != CudaArch::UNKNOWN) + return Arch; + } + } + return CudaArch::UNKNOWN; +} + +/// Check to see if target architecture supports unified addressing which is +/// a restriction for OpenMP requires clause "unified_shared_memory". +void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing( + CodeGenModule &CGM, const OMPRequiresDecl *D) const { + for (const OMPClause *Clause : D->clauselists()) { + if (Clause->getClauseKind() == OMPC_unified_shared_memory) { + switch (getCudaArch(CGM)) { + case CudaArch::SM_20: + case CudaArch::SM_21: + case CudaArch::SM_30: + case CudaArch::SM_32: + case CudaArch::SM_35: + case CudaArch::SM_37: + case CudaArch::SM_50: + case CudaArch::SM_52: + case CudaArch::SM_53: + case CudaArch::SM_60: + case CudaArch::SM_61: + case CudaArch::SM_62: + CGM.Error(Clause->getBeginLoc(), + "Target architecture does not support unified addressing"); + return; + case CudaArch::SM_70: + case CudaArch::SM_72: + case CudaArch::SM_75: + case CudaArch::GFX600: + case CudaArch::GFX601: + case CudaArch::GFX700: + case CudaArch::GFX701: + case CudaArch::GFX702: + case CudaArch::GFX703: + case CudaArch::GFX704: + case CudaArch::GFX801: + case CudaArch::GFX802: + case CudaArch::GFX803: + case CudaArch::GFX810: + case CudaArch::GFX900: + case CudaArch::GFX902: + case CudaArch::GFX904: + case CudaArch::GFX906: + case CudaArch::GFX909: + case CudaArch::UNKNOWN: + break; + case CudaArch::LAST: + llvm_unreachable("Unexpected Cuda arch."); + } + } + } +} + +/// Get number of SMs and number of blocks per SM. +static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) { + std::pair<unsigned, unsigned> Data; + if (CGM.getLangOpts().OpenMPCUDANumSMs) + Data.first = CGM.getLangOpts().OpenMPCUDANumSMs; + if (CGM.getLangOpts().OpenMPCUDABlocksPerSM) + Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM; + if (Data.first && Data.second) + return Data; + switch (getCudaArch(CGM)) { + case CudaArch::SM_20: + case CudaArch::SM_21: + case CudaArch::SM_30: + case CudaArch::SM_32: + case CudaArch::SM_35: + case CudaArch::SM_37: + case CudaArch::SM_50: + case CudaArch::SM_52: + case CudaArch::SM_53: + return {16, 16}; + case CudaArch::SM_60: + case CudaArch::SM_61: + case CudaArch::SM_62: + return {56, 32}; + case CudaArch::SM_70: + case CudaArch::SM_72: + case CudaArch::SM_75: + return {84, 32}; + case CudaArch::GFX600: + case CudaArch::GFX601: + case CudaArch::GFX700: + case CudaArch::GFX701: + case CudaArch::GFX702: + case CudaArch::GFX703: + case CudaArch::GFX704: + case CudaArch::GFX801: + case CudaArch::GFX802: + case CudaArch::GFX803: + case CudaArch::GFX810: + case CudaArch::GFX900: + case CudaArch::GFX902: + case CudaArch::GFX904: + case CudaArch::GFX906: + case CudaArch::GFX909: + case CudaArch::UNKNOWN: + break; + case CudaArch::LAST: + llvm_unreachable("Unexpected Cuda arch."); + } + llvm_unreachable("Unexpected NVPTX target without ptx feature."); +} + +void CGOpenMPRuntimeNVPTX::clear() { + if (!GlobalizedRecords.empty()) { + ASTContext &C = CGM.getContext(); + llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs; + llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs; + RecordDecl *StaticRD = C.buildImplicitRecord( + "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); + StaticRD->startDefinition(); + RecordDecl *SharedStaticRD = C.buildImplicitRecord( + "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); + SharedStaticRD->startDefinition(); + for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) { + if (Records.Records.empty()) + continue; + unsigned Size = 0; + unsigned RecAlignment = 0; + for (const RecordDecl *RD : Records.Records) { + QualType RDTy = C.getRecordType(RD); + unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity(); + RecAlignment = std::max(RecAlignment, Alignment); + unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity(); + Size = + llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment); + } + Size = llvm::alignTo(Size, RecAlignment); + llvm::APInt ArySize(/*numBits=*/64, Size); + QualType SubTy = C.getConstantArrayType( + C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + const bool UseSharedMemory = Size <= SharedMemorySize; + auto *Field = + FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD, + SourceLocation(), SourceLocation(), nullptr, SubTy, + C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + if (UseSharedMemory) { + SharedStaticRD->addDecl(Field); + SharedRecs.push_back(&Records); + } else { + StaticRD->addDecl(Field); + GlobalRecs.push_back(&Records); + } + Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size)); + Records.UseSharedMemory->setInitializer( + llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0)); + } + // Allocate SharedMemorySize buffer for the shared memory. + // FIXME: nvlink does not handle weak linkage correctly (object with the + // different size are reported as erroneous). + // Restore this code as sson as nvlink is fixed. + if (!SharedStaticRD->field_empty()) { + llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize); + QualType SubTy = C.getConstantArrayType( + C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + auto *Field = FieldDecl::Create( + C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy, + C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, + /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + SharedStaticRD->addDecl(Field); + } + SharedStaticRD->completeDefinition(); + if (!SharedStaticRD->field_empty()) { + QualType StaticTy = C.getRecordType(SharedStaticRD); + llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), LLVMStaticTy, + /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, + llvm::Constant::getNullValue(LLVMStaticTy), + "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr, + llvm::GlobalValue::NotThreadLocal, + C.getTargetAddressSpace(LangAS::cuda_shared)); + auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + GV, CGM.VoidPtrTy); + for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) { + Rec->Buffer->replaceAllUsesWith(Replacement); + Rec->Buffer->eraseFromParent(); + } + } + StaticRD->completeDefinition(); + if (!StaticRD->field_empty()) { + QualType StaticTy = C.getRecordType(StaticRD); + std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM); + llvm::APInt Size1(32, SMsBlockPerSM.second); + QualType Arr1Ty = + C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal, + /*IndexTypeQuals=*/0); + llvm::APInt Size2(32, SMsBlockPerSM.first); + QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal, + /*IndexTypeQuals=*/0); + llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), LLVMArr2Ty, + /*isConstant=*/false, llvm::GlobalValue::CommonLinkage, + llvm::Constant::getNullValue(LLVMArr2Ty), + "_openmp_static_glob_rd_$_"); + auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( + GV, CGM.VoidPtrTy); + for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) { + Rec->Buffer->replaceAllUsesWith(Replacement); + Rec->Buffer->eraseFromParent(); + } + } + } + CGOpenMPRuntime::clear(); +} diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index f83e99f8a3b7..6091610c37e3 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -56,6 +56,11 @@ private: ExecutionMode getExecutionMode() const; + bool requiresFullRuntime() const { return RequiresFullRuntime; } + + /// Get barrier to synchronize all threads in a block. + void syncCTAThreads(CodeGenFunction &CGF); + /// Emit the worker function for the current target region. void emitWorkerFunction(WorkerFunctionState &WST); @@ -72,10 +77,11 @@ private: void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); /// Helper for generic variables globalization prolog. - void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc); + void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc, + bool WithSPMDCheck = false); /// Helper for generic variables globalization epilog. - void emitGenericVarsEpilog(CodeGenFunction &CGF); + void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false); /// Helper for SPMD mode target directive's entry function. void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, @@ -179,8 +185,19 @@ protected: return "__omp_outlined__"; } + /// Check if the default location must be constant. + /// Constant for NVPTX for better optimization. + bool isDefaultLocationConstant() const override { return true; } + + /// Returns additional flags that can be stored in reserved_2 field of the + /// default location. + /// For NVPTX target contains data about SPMD/Non-SPMD execution mode + + /// Full/Lightweight runtime mode. Used for better optimization. + unsigned getDefaultLocationReserved2Flags() const override; + public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); + void clear() override; /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause. @@ -260,6 +277,18 @@ public: ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; + /// Emit an implicit/explicit barrier for OpenMP threads. + /// \param Kind Directive for which this implicit barrier call must be + /// generated. Must be OMPD_barrier for explicit barrier generation. + /// \param EmitChecks true if need to emit checks for cancellation barriers. + /// \param ForceSimpleCall true simple barrier call must be emitted, false if + /// runtime class decides which one to emit (simple or with cancellation + /// checks). + /// + void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks = true, + bool ForceSimpleCall = false) override; + /// Emits a critical region. /// \param CriticalName Name of the critical region. /// \param CriticalOpGen Generator for the statement associated with the given @@ -339,6 +368,26 @@ public: /// void functionFinished(CodeGenFunction &CGF) override; + /// Choose a default value for the dist_schedule clause. + void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, + const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, + llvm::Value *&Chunk) const override; + + /// Choose a default value for the schedule clause. + void getDefaultScheduleAndChunk(CodeGenFunction &CGF, + const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, + const Expr *&ChunkExpr) const override; + + /// Adjust some parameters for the target-based directives, like addresses of + /// the variables captured by reference in lambdas. + void adjustTargetSpecificDataForLambdas( + CodeGenFunction &CGF, const OMPExecutableDirective &D) const override; + + /// Perform check on requires decl to ensure that target architecture + /// supports unified addressing + void checkArchForUnifiedAddressing(CodeGenModule &CGM, + const OMPRequiresDecl *D) const override; + private: /// Track the execution mode when codegening directives within a target /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the @@ -346,9 +395,15 @@ private: /// to emit optimized code. ExecutionMode CurrentExecutionMode = EM_Unknown; + /// Check if the full runtime is required (default - yes). + bool RequiresFullRuntime = true; + /// true if we're emitting the code for the target region and next parallel /// region is L0 for sure. bool IsInTargetMasterThreadRegion = false; + /// true if currently emitting code for target/teams/distribute region, false + /// - otherwise. + bool IsInTTDRegion = false; /// true if we're definitely in the parallel region. bool IsInParallelRegion = false; @@ -362,23 +417,59 @@ private: llvm::Function *createParallelDataSharingWrapper( llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D); + /// The data for the single globalized variable. + struct MappedVarData { + /// Corresponding field in the global record. + const FieldDecl *FD = nullptr; + /// Corresponding address. + Address PrivateAddr = Address::invalid(); + /// true, if only one element is required (for latprivates in SPMD mode), + /// false, if need to create based on the warp-size. + bool IsOnePerTeam = false; + MappedVarData() = delete; + MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false) + : FD(FD), IsOnePerTeam(IsOnePerTeam) {} + }; /// The map of local variables to their addresses in the global memory. - using DeclToAddrMapTy = llvm::MapVector<const Decl *, - std::pair<const FieldDecl *, Address>>; + using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>; /// Set of the parameters passed by value escaping OpenMP context. using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>; struct FunctionData { DeclToAddrMapTy LocalVarData; + llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None; EscapedParamsTy EscapedParameters; llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls; llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs; const RecordDecl *GlobalRecord = nullptr; + llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None; llvm::Value *GlobalRecordAddr = nullptr; + llvm::Value *IsInSPMDModeFlag = nullptr; std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams; }; /// Maps the function to the list of the globalized variables with their /// addresses. llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls; + /// List of records for the globalized variables in target/teams/distribute + /// contexts. Inner records are going to be joined into the single record, + /// while those resulting records are going to be joined into the single + /// union. This resulting union (one per CU) is the entry point for the static + /// memory management runtime functions. + struct GlobalPtrSizeRecsTy { + llvm::GlobalVariable *UseSharedMemory = nullptr; + llvm::GlobalVariable *RecSize = nullptr; + llvm::GlobalVariable *Buffer = nullptr; + SourceLocation Loc; + llvm::SmallVector<const RecordDecl *, 2> Records; + unsigned RegionCounter = 0; + }; + llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords; + /// Shared pointer for the global memory in the global memory buffer used for + /// the given kernel. + llvm::GlobalVariable *KernelStaticGlobalized = nullptr; + /// Pair of the Non-SPMD team and all reductions variables in this team + /// region. + std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>> + TeamAndReductions; }; } // CodeGen namespace. diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp index 58aaae692552..c754541ac121 100644 --- a/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -20,7 +20,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" #include "clang/AST/RecordLayout.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 79662ec0099f..0242b48659d1 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -19,8 +19,6 @@ #include "clang/Basic/Builtins.h" #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Sema/LoopHint.h" -#include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -38,7 +36,7 @@ using namespace CodeGen; void CodeGenFunction::EmitStopPoint(const Stmt *S) { if (CGDebugInfo *DI = getDebugInfo()) { SourceLocation Loc; - Loc = S->getLocStart(); + Loc = S->getBeginLoc(); DI->EmitLocation(Builder, Loc); LastStopPoint = Loc; @@ -932,6 +930,8 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, LexicalScope ForScope(*this, S.getSourceRange()); // Evaluate the first pieces before the loop. + if (S.getInit()) + EmitStmt(S.getInit()); EmitStmt(S.getRangeStmt()); EmitStmt(S.getBeginStmt()); EmitStmt(S.getEndStmt()); @@ -1020,7 +1020,7 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) { /// non-void. Fun stuff :). void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { if (requiresReturnValueCheck()) { - llvm::Constant *SLoc = EmitCheckSourceLocation(S.getLocStart()); + llvm::Constant *SLoc = EmitCheckSourceLocation(S.getBeginLoc()); auto *SLocPtr = new llvm::GlobalVariable(CGM.getModule(), SLoc->getType(), false, llvm::GlobalVariable::PrivateLinkage, SLoc); @@ -1045,10 +1045,9 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { // exception to our over-conservative rules about not jumping to // statements following block literals with non-trivial cleanups. RunCleanupsScope cleanupScope(*this); - if (const ExprWithCleanups *cleanups = - dyn_cast_or_null<ExprWithCleanups>(RV)) { - enterFullExpression(cleanups); - RV = cleanups->getSubExpr(); + if (const FullExpr *fe = dyn_cast_or_null<FullExpr>(RV)) { + enterFullExpression(fe); + RV = fe->getSubExpr(); } // FIXME: Clean this up by using an LValue for ReturnTemp, @@ -1821,11 +1820,14 @@ llvm::Value* CodeGenFunction::EmitAsmInput( // If this can't be a register or memory, i.e., has to be a constant // (immediate or symbolic), try to emit it as such. if (!Info.allowsRegister() && !Info.allowsMemory()) { - llvm::APSInt Result; + if (Info.requiresImmediateConstant()) { + llvm::APSInt AsmConst = InputExpr->EvaluateKnownConstInt(getContext()); + return llvm::ConstantInt::get(getLLVMContext(), AsmConst); + } + + Expr::EvalResult Result; if (InputExpr->EvaluateAsInt(Result, getContext())) - return llvm::ConstantInt::get(getLLVMContext(), Result); - assert(!Info.requiresImmediateConstant() && - "Required-immediate inlineasm arg isn't constant?"); + return llvm::ConstantInt::get(getLLVMContext(), Result.Val.getInt()); } if (Info.allowsRegister() || !Info.allowsMemory()) @@ -1848,7 +1850,7 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, SmallVector<llvm::Metadata *, 8> Locs; // Add the location of the first line to the MDNode. Locs.push_back(llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - CGF.Int32Ty, Str->getLocStart().getRawEncoding()))); + CGF.Int32Ty, Str->getBeginLoc().getRawEncoding()))); StringRef StrVal = Str->getString(); if (!StrVal.empty()) { const SourceManager &SM = CGF.CGM.getContext().getSourceManager(); @@ -1979,6 +1981,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { diag::err_asm_invalid_type_in_input) << OutExpr->getType() << OutputConstraint; } + + // Update largest vector width for any vector types. + if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); } else { ArgTypes.push_back(Dest.getAddress().getType()); Args.push_back(Dest.getPointer()); @@ -2000,6 +2007,10 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { Arg->getType())) Arg = Builder.CreateBitCast(Arg, AdjTy); + // Update largest vector width for any vector types. + if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); if (Info.allowsRegister()) InOutConstraints += llvm::utostr(i); else @@ -2080,6 +2091,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { CGM.getDiags().Report(S.getAsmLoc(), diag::err_asm_invalid_type_in_input) << InputExpr->getType() << InputConstraint; + // Update largest vector width for any vector types. + if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + ArgTypes.push_back(Arg->getType()); Args.push_back(Arg); Constraints += InputConstraint; @@ -2272,7 +2288,7 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) { "CapturedStmtInfo should be set when generating the captured function"); const CapturedDecl *CD = S.getCapturedDecl(); const RecordDecl *RD = S.getCapturedRecordDecl(); - SourceLocation Loc = S.getLocStart(); + SourceLocation Loc = S.getBeginLoc(); assert(CD->hasBody() && "missing CapturedDecl body"); // Build the argument list. @@ -2293,9 +2309,8 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) { F->addFnAttr(llvm::Attribute::NoUnwind); // Generate the function. - StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, - CD->getLocation(), - CD->getBody()->getLocStart()); + StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + CD->getBody()->getBeginLoc()); // Set the context parameter in CapturedStmtInfo. Address DeclPtr = GetAddrOfLocalVar(CD->getContextParam()); CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr)); @@ -2305,8 +2320,9 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) { Ctx.getTagDeclType(RD)); for (auto *FD : RD->fields()) { if (FD->hasCapturedVLAType()) { - auto *ExprArg = EmitLoadOfLValue(EmitLValueForField(Base, FD), - S.getLocStart()).getScalarVal(); + auto *ExprArg = + EmitLoadOfLValue(EmitLValueForField(Base, FD), S.getBeginLoc()) + .getScalarVal(); auto VAT = FD->getCapturedVLAType(); VLASizeMap[VAT->getSizeExpr()] = ExprArg; } diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 0d343f84c71f..eb1304d89345 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -73,7 +73,7 @@ public: assert(VD == VD->getCanonicalDecl() && "Canonical decl must be captured."); DeclRefExpr DRE( - const_cast<VarDecl *>(VD), + CGF.getContext(), const_cast<VarDecl *>(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && InlinedShareds.isGlobalVarCaptured(VD)), VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); @@ -191,7 +191,7 @@ public: auto *VD = C.getCapturedVar(); assert(VD == VD->getCanonicalDecl() && "Canonical decl must be captured."); - DeclRefExpr DRE(const_cast<VarDecl *>(VD), + DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && InlinedShareds.isGlobalVarCaptured(VD)), @@ -222,7 +222,7 @@ LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { LambdaCaptureFields.lookup(OrigVD) || (CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) || (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)); - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured, + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); return EmitLValue(&DRE); } @@ -385,12 +385,12 @@ static llvm::Function *emitOutlinedFunctionPrologue( FunctionDecl *DebugFunctionDecl = nullptr; if (!FO.UIntPtrCastRequired) { FunctionProtoType::ExtProtoInfo EPI; + QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI); DebugFunctionDecl = FunctionDecl::Create( - Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(), - SourceLocation(), DeclarationName(), Ctx.VoidTy, - Ctx.getTrivialTypeSourceInfo( - Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)), - SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); + Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), + SourceLocation(), DeclarationName(), FunctionTy, + Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static, + /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false); } for (const FieldDecl *FD : RD->fields()) { QualType ArgType = FD->getType(); @@ -422,7 +422,7 @@ static llvm::Function *emitOutlinedFunctionPrologue( if (DebugFunctionDecl && (CapVar || I->capturesThis())) { Arg = ParmVarDecl::Create( Ctx, DebugFunctionDecl, - CapVar ? CapVar->getLocStart() : FD->getLocStart(), + CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType, /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr); } else { @@ -459,7 +459,7 @@ static llvm::Function *emitOutlinedFunctionPrologue( // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, - FO.S->getLocStart(), CD->getBody()->getLocStart()); + FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); for (const FieldDecl *FD : RD->fields()) { @@ -602,7 +602,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { I->second.second, I->second.first ? I->second.first->getType() : Arg->getType(), AlignmentSource::Decl); - CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); } else { auto EI = VLASizes.find(Arg); if (EI != VLASizes.end()) { @@ -611,12 +611,12 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), Arg->getType(), AlignmentSource::Decl); - CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart()); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); } } CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); } - CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(), + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(), F, CallArgs); WrapperCGF.FinishFunction(); return WrapperF; @@ -763,7 +763,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, const auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); bool IsRegistered; - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); LValue OriginalLVal = EmitLValue(&DRE); @@ -878,8 +878,8 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { getContext().getTargetInfo().isTLSSupported()) { assert(CapturedStmtInfo->lookup(VD) && "Copyin threadprivates should have been captured!"); - DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), - VK_LValue, (*IRef)->getExprLoc()); + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); MasterAddr = EmitLValue(&DRE).getAddress(); LocalDeclMap.erase(VD); } else { @@ -953,11 +953,10 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( const auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() { - DeclRefExpr DRE( - const_cast<VarDecl *>(OrigVD), - /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup( - OrigVD) != nullptr, - (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/ + CapturedStmtInfo->lookup(OrigVD) != nullptr, + (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); return EmitLValue(&DRE).getAddress(); }); // Check if the variable is also a firstprivate: in this case IInit is @@ -1183,7 +1182,7 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( - *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, + *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, {WithNowait, SimpleReduction, ReductionKind}); } } @@ -1237,12 +1236,12 @@ static void emitCommonOMPParallelDirective( CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( - CGF, NumThreads, NumThreadsClause->getLocStart()); + CGF, NumThreads, NumThreadsClause->getBeginLoc()); } if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); CGF.CGM.getOpenMPRuntime().emitProcBindClause( - CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); + CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); } const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { @@ -1261,7 +1260,7 @@ static void emitCommonOMPParallelDirective( // parameters when necessary CodeGenBoundParameters(CGF, S, CapturedVars); CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, + CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, CapturedVars, IfCond); } @@ -1281,7 +1280,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // propagation master's thread values of threadprivate variables to local // instances of that variables of all other implicit threads. CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); } CGF.EmitOMPPrivateClause(S, PrivateScope); @@ -1384,7 +1383,7 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { AutoVarEmission Emission = EmitAutoVarAlloca(*VD); const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, VD->getInit()->getType(), VK_LValue, VD->getInit()->getExprLoc()); @@ -1429,7 +1428,7 @@ void CodeGenFunction::EmitOMPLinearClauseFinal( } } const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), CapturedStmtInfo->lookup(OrigVD) != nullptr, (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); Address OrigAddr = EmitLValue(&DRE).getAddress(); @@ -1473,7 +1472,8 @@ static void emitAlignedClause(CodeGenFunction &CGF, "alignment is not power of 2"); if (Alignment != 0) { llvm::Value *PtrValue = CGF.EmitScalarExpr(E); - CGF.EmitAlignmentAssumption(PtrValue, Alignment); + CGF.EmitAlignmentAssumption( + PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment); } } } @@ -1497,7 +1497,7 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || VD->hasGlobalStorage()) { (void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() { - DeclRefExpr DRE(const_cast<VarDecl *>(VD), + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), E->getType(), VK_LValue, E->getExprLoc()); return EmitLValue(&DRE).getAddress(); @@ -1509,6 +1509,23 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( } ++I; } + // Privatize extra loop counters used in loops for ordered(n) clauses. + for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { + if (!C->getNumForLoops()) + continue; + for (unsigned I = S.getCollapsedNumber(), + E = C->getLoopNumIterations().size(); + I < E; ++I) { + const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); + const auto *VD = cast<VarDecl>(DRE->getDecl()); + // Override only those variables that are really emitted already. + if (LocalDeclMap.count(VD)) { + (void)LoopScope.addPrivate(VD, [this, DRE, VD]() { + return CreateMemTemp(DRE->getType(), VD->getName()); + }); + } + } + } } static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, @@ -1627,7 +1644,7 @@ void CodeGenFunction::EmitOMPSimdFinal( if (CED) { OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress(); } else { - DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD), + DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), /*RefersToEnclosingVariableOrCapture=*/false, (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); OrigAddr = EmitLValue(&DRE).getAddress(); @@ -1721,6 +1738,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPReductionClauseInit(S, LoopScope); bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { @@ -1785,7 +1804,7 @@ void CodeGenFunction::EmitOMPOuterLoop( BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); } else { BoolCondVal = - RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, + RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); } @@ -1819,7 +1838,7 @@ void CodeGenFunction::EmitOMPOuterLoop( else EmitOMPSimdInit(S, IsMonotonic); - SourceLocation Loc = S.getLocStart(); + SourceLocation Loc = S.getBeginLoc(); // when 'distribute' is not combined with a 'for': // while (idx <= UB) { BODY; ++idx; } @@ -1851,7 +1870,7 @@ void CodeGenFunction::EmitOMPOuterLoop( // Tell the runtime we are done. auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) { if (!DynamicOrOrdered) - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), S.getDirectiveKind()); }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); @@ -1934,13 +1953,13 @@ void CodeGenFunction::EmitOMPForOuterLoop( llvm::Value *UBVal = DispatchBounds.second; CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, LoopArgs.Chunk}; - RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, + RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize, IVSigned, Ordered, DipatchRTInputValues); } else { CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); - RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), + RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); } @@ -1986,10 +2005,10 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit); + RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); // for combined 'distribute' and 'for' the increment expression of distribute - // is store in DistInc. For 'distribute' alone, it is in Inc. + // is stored in DistInc. For 'distribute' alone, it is in Inc. Expr *IncExpr; if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) IncExpr = S.getDistInc(); @@ -2082,9 +2101,9 @@ emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, // distribute chunk QualType IteratorTy = IVExpr->getType(); llvm::Value *LBVal = - CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart()); + CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); llvm::Value *UBVal = - CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart()); + CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc()); return {LBVal, UBVal}; } @@ -2244,7 +2263,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( bool Ordered = false; if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { if (OrderedClause->getNumForLoops()) - RT.emitDoacrossInit(*this, S); + RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations()); else Ordered = true; } @@ -2270,7 +2289,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // initialization of firstprivate variables and post-update of // lastprivate variables. CGM.getOpenMPRuntime().emitBarrierCall( - *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); } EmitOMPPrivateClause(S, LoopScope); @@ -2279,19 +2298,33 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( EmitOMPPrivateLoopCounters(S, LoopScope); EmitOMPLinearClause(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); // Detect the loop schedule kind and chunk. - llvm::Value *Chunk = nullptr; + const Expr *ChunkExpr = nullptr; OpenMPScheduleTy ScheduleKind; if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { ScheduleKind.Schedule = C->getScheduleKind(); ScheduleKind.M1 = C->getFirstScheduleModifier(); ScheduleKind.M2 = C->getSecondScheduleModifier(); - if (const Expr *Ch = C->getChunkSize()) { - Chunk = EmitScalarExpr(Ch); - Chunk = EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType(), - S.getLocStart()); + ChunkExpr = C->getChunkSize(); + } else { + // Default behaviour for schedule clause. + CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( + *this, S, ScheduleKind.Schedule, ChunkExpr); + } + bool HasChunkSizeOne = false; + llvm::Value *Chunk = nullptr; + if (ChunkExpr) { + Chunk = EmitScalarExpr(ChunkExpr); + Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), + S.getIterationVariable()->getType(), + S.getBeginLoc()); + Expr::EvalResult Result; + if (ChunkExpr->EvaluateAsInt(Result, getContext())) { + llvm::APSInt EvaluatedChunk = Result.Val.getInt(); + HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); } } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); @@ -2300,8 +2333,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // If the static schedule kind is specified or if the ordered clause is // specified, and if no monotonic modifier is specified, the effect will // be as if the monotonic modifier was specified. - if (RT.isStaticNonchunked(ScheduleKind.Schedule, - /* Chunked */ Chunk != nullptr) && + bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); + if ((RT.isStaticNonchunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) || + StaticChunkedOne) && !Ordered) { if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); @@ -2312,27 +2349,42 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // unspecified in this case. CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); - RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(), + UB.getAddress(), ST.getAddress(), + StaticChunkedOne ? Chunk : nullptr); + RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); JumpDest LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(S.getEnsureUpperBound()); + if (!StaticChunkedOne) + EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; EmitIgnoredExpr(S.getInit()); - // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); + // For unchunked static schedule generate: + // + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // For static schedule with chunk one: + // + // while (IV <= PrevUB) { + // BODY; + // IV += ST; + // } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(), + StaticChunkedOne ? S.getDistInc() : S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), S.getDirectiveKind()); }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); @@ -2351,11 +2403,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( LoopArguments, CGDispatchBounds); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { - EmitOMPSimdFinal(S, - [IL, &S](CodeGenFunction &CGF) { - return CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart())); - }); + EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { + return CGF.Builder.CreateIsNotNull( + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); + }); } EmitOMPReductionClauseFinal( S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) @@ -2365,17 +2416,17 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( emitPostUpdateForReductionClause( *this, S, [IL, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart())); + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); }); // Emit final copy of the lastprivate variables if IsLastIter != 0. if (HasLastprivateClause) EmitOMPLastprivateClauseFinal( S, isOpenMPSimdDirective(S.getDirectiveKind()), - Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); + Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); } EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart())); + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); }); DoacrossCleanupScope.ForceCleanup(); // We're now done with the loop, so jump to the continuation block. @@ -2432,7 +2483,7 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { @@ -2450,7 +2501,7 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for); + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); } static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, @@ -2485,16 +2536,16 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGF.Builder.getInt32(0)); // Loop counter. LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv."); - OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); - OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, - OK_Ordinary, S.getLocStart(), FPOptions()); + OK_Ordinary, S.getBeginLoc(), FPOptions()); // Increment for loop counter. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, - S.getLocStart(), true); + S.getBeginLoc(), true); auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { // Iterate through all sections and emit a switch construct: // switch (IV) { @@ -2509,7 +2560,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // .omp.sections.exit: llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); llvm::SwitchInst *SwitchStmt = - CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()), + CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), ExitBB, CS == nullptr ? 1 : CS->size()); if (CS) { unsigned CaseNumber = 0; @@ -2537,13 +2588,15 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // initialization of firstprivate variables and post-update of lastprivate // variables. CGF.CGM.getOpenMPRuntime().emitBarrierCall( - CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); } CGF.EmitOMPPrivateClause(S, LoopScope); HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); // Emit static non-chunked loop. OpenMPScheduleTy ScheduleKind; @@ -2552,20 +2605,20 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(), UB.getAddress(), ST.getAddress()); CGF.CGM.getOpenMPRuntime().emitForStaticInit( - CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit); + CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); // UB = min(UB, GlobalUB); - llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart()); + llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc()); llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); CGF.EmitStoreOfScalar(MinUBGlobalUB, UB); // IV = LB; - CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV); + CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); // while (idx <= UB) { BODY; ++idx; } CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, [](CodeGenFunction &) {}); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(), + CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), S.getDirectiveKind()); }; CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); @@ -2573,7 +2626,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart())); + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); }); // Emit final copy of the lastprivate variables if IsLastIter != 0. @@ -2581,7 +2634,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGF.EmitOMPLastprivateClauseFinal( S, /*NoFinals=*/false, CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart()))); + CGF.EmitLoadOfScalar(IL, S.getBeginLoc()))); }; bool HasCancel = false; @@ -2598,7 +2651,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { // Emit implicit barrier to synchronize threads and avoid data races on // initialization of firstprivate variables. - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_unknown); } } @@ -2610,7 +2663,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { } // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>()) { - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_sections); } } @@ -2652,7 +2705,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { }; { OMPLexicalScope Scope(*this, S, OMPD_unknown); - CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), + CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), CopyprivateVars, DestExprs, SrcExprs, AssignmentOps); } @@ -2660,7 +2713,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { // init or if no 'nowait' clause was specified and no 'copyprivate' clause). if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { CGM.getOpenMPRuntime().emitBarrierCall( - *this, S.getLocStart(), + *this, S.getBeginLoc(), S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); } } @@ -2671,7 +2724,7 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; OMPLexicalScope Scope(*this, S, OMPD_unknown); - CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); + CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { @@ -2685,7 +2738,7 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitCriticalRegion(*this, S.getDirectiveName().getAsString(), - CodeGen, S.getLocStart(), Hint); + CodeGen, S.getBeginLoc(), Hint); } void CodeGenFunction::EmitOMPParallelForDirective( @@ -2828,7 +2881,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( } } Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( - *this, S.getLocStart(), LHSs, RHSs, Data); + *this, S.getBeginLoc(), LHSs, RHSs, Data); // Build list of dependences. for (const auto *C : S.getClausesOfKind<OMPDependClause>()) for (const Expr *IRef : C->varlists()) @@ -2872,15 +2925,15 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), CopyFn, CallArgs); for (const auto &Pair : LastprivateDstsOrigs) { const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); - DeclRefExpr DRE( - const_cast<VarDecl *>(OrigVD), - /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( - OrigVD) != nullptr, - Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); + DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/ + CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, + Pair.second->getType(), VK_LValue, + Pair.second->getExprLoc()); Scope.addPrivate(Pair.first, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); @@ -2902,11 +2955,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( RedCG.emitAggregateType(CGF, Cnt); // FIXME: This must removed once the runtime library is fixed. // Emit required threadprivate variables for - // initilizer/combiner/finalizer. - CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + // initializer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), RedCG, Cnt); Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( - CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); Replacement = Address(CGF.EmitScalarConversion( Replacement.getPointer(), CGF.getContext().VoidPtrTy, @@ -2948,17 +3001,17 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( RedCG.emitSharedLValue(CGF, Cnt); RedCG.emitAggregateType(CGF, Cnt); // The taskgroup descriptor variable is always implicit firstprivate and - // privatized already during procoessing of the firstprivates. + // privatized already during processing of the firstprivates. // FIXME: This must removed once the runtime library is fixed. // Emit required threadprivate variables for - // initilizer/combiner/finalizer. - CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + // initializer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), RedCG, Cnt); llvm::Value *ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), TaskgroupDescriptors[Cnt]->getExprLoc()); Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( - CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); Replacement = Address( CGF.EmitScalarConversion( Replacement.getPointer(), CGF.getContext().VoidPtrTy, @@ -3049,14 +3102,14 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( getContext().VoidPtrTy, ArrSize, ArrayType::Normal, /*IndexTypeQuals=*/0); BPVD = createImplicitFirstprivateForType( - getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); + getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); PVD = createImplicitFirstprivateForType( - getContext(), Data, BaseAndPointersType, CD, S.getLocStart()); + getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); QualType SizesType = getContext().getConstantArrayType( getContext().getSizeType(), ArrSize, ArrayType::Normal, /*IndexTypeQuals=*/0); SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, - S.getLocStart()); + S.getBeginLoc()); TargetScope.addPrivate( BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); TargetScope.addPrivate(PVD, @@ -3091,7 +3144,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } - CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), CopyFn, CallArgs); for (const auto &Pair : PrivatePtrs) { Address Replacement(CGF.Builder.CreateLoad(Pair.second), @@ -3122,7 +3175,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( getContext().getIntTypeForBitwidth(32, /*Signed=*/0), SourceLocation()); - CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn, + CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, SharedsTy, CapturedStruct, &IfCond, Data); } @@ -3149,7 +3202,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, const OMPTaskDataTy &Data) { - CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn, + CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, Data); }; @@ -3158,15 +3211,15 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { void CodeGenFunction::EmitOMPTaskyieldDirective( const OMPTaskyieldDirective &S) { - CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); + CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); } void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier); + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); } void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { - CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart()); + CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc()); } void CodeGenFunction::EmitOMPTaskgroupDirective( @@ -3195,7 +3248,7 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( } } llvm::Value *ReductionDesc = - CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(), + CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), LHSs, RHSs, Data); const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); CGF.EmitVarDecl(*VD); @@ -3205,7 +3258,7 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; OMPLexicalScope Scope(*this, S, OMPD_unknown); - CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); + CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc()); } void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { @@ -3217,7 +3270,7 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { FlushClause->varlist_end()); return llvm::None; }(), - S.getLocStart()); + S.getBeginLoc()); } void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, @@ -3286,7 +3339,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // on initialization of firstprivate variables and post-update of // lastprivate variables. CGM.getOpenMPRuntime().emitBarrierCall( - *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false, + *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, /*ForceSimpleCall=*/true); } EmitOMPPrivateClause(S, LoopScope); @@ -3297,6 +3350,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); (void)LoopScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); // Detect the distribute schedule kind and chunk. llvm::Value *Chunk = nullptr; @@ -3307,8 +3362,12 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, Chunk = EmitScalarExpr(Ch); Chunk = EmitScalarConversion(Chunk, Ch->getType(), S.getIterationVariable()->getType(), - S.getLocStart()); + S.getBeginLoc()); } + } else { + // Default behaviour for dist_schedule clause. + CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( + *this, S, ScheduleKind, Chunk); } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); @@ -3321,14 +3380,19 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // iteration space is divided into chunks that are approximately equal // in size, and at most one chunk is distributed to each team of the // league. The size of the chunks is unspecified in this case. + bool StaticChunked = RT.isStaticChunked( + ScheduleKind, /* Chunked */ Chunk != nullptr) && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); if (RT.isStaticNonchunked(ScheduleKind, - /* Chunked */ Chunk != nullptr)) { + /* Chunked */ Chunk != nullptr) || + StaticChunked) { if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), - LB.getAddress(), UB.getAddress(), ST.getAddress()); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, + LB.getAddress(), UB.getAddress(), ST.getAddress(), + StaticChunked ? Chunk : nullptr); + RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); JumpDest LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); @@ -3346,18 +3410,48 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, ? S.getCombinedCond() : S.getCond(); - // for distribute alone, codegen - // while (idx <= UB) { BODY; ++idx; } - // when combined with 'for' (e.g. as in 'distribute parallel for') - // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + if (StaticChunked) + Cond = S.getCombinedDistCond(); + + // For static unchunked schedules generate: + // + // 1. For distribute alone, codegen + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // 2. When combined with 'for' (e.g. as in 'distribute parallel for') + // while (idx <= UB) { + // <CodeGen rest of pragma>(LB, UB); + // idx += ST; + // } + // + // For static chunk one schedule generate: + // + // while (IV <= GlobalUB) { + // <CodeGen rest of pragma>(LB, UB); + // LB += ST; + // UB += ST; + // UB = min(UB, GlobalUB); + // IV = LB; + // } + // EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { CodeGenLoop(CGF, S, LoopExit); }, - [](CodeGenFunction &) {}); + [&S, StaticChunked](CodeGenFunction &CGF) { + if (StaticChunked) { + CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); + CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedInit()); + } + }); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind()); + RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. @@ -3370,38 +3464,25 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, if (isOpenMPSimdDirective(S.getDirectiveKind())) { EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart())); + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); }); } if (isOpenMPSimdDirective(S.getDirectiveKind()) && !isOpenMPParallelDirective(S.getDirectiveKind()) && !isOpenMPTeamsDirective(S.getDirectiveKind())) { - OpenMPDirectiveKind ReductionKind = OMPD_unknown; - if (isOpenMPParallelDirective(S.getDirectiveKind()) && - isOpenMPSimdDirective(S.getDirectiveKind())) { - ReductionKind = OMPD_parallel_for_simd; - } else if (isOpenMPParallelDirective(S.getDirectiveKind())) { - ReductionKind = OMPD_parallel_for; - } else if (isOpenMPSimdDirective(S.getDirectiveKind())) { - ReductionKind = OMPD_simd; - } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) && - S.hasClausesOfKind<OMPReductionClause>()) { - llvm_unreachable( - "No reduction clauses is allowed in distribute directive."); - } - EmitOMPReductionClauseFinal(S, ReductionKind); + EmitOMPReductionClauseFinal(S, OMPD_simd); // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause( *this, S, [IL, &S](CodeGenFunction &CGF) { return CGF.Builder.CreateIsNotNull( - CGF.EmitLoadOfScalar(IL, S.getLocStart())); + CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); }); } // Emit final copy of the lastprivate variables if IsLastIter != 0. if (HasLastprivateClause) { EmitOMPLastprivateClauseFinal( S, /*NoFinals=*/false, - Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); + Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); } } @@ -3448,7 +3529,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); - CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(), + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), OutlinedFn, CapturedVars); } else { Action.Enter(CGF); @@ -3456,7 +3537,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { } }; OMPLexicalScope Scope(*this, S, OMPD_unknown); - CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); + CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C); } static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, @@ -3887,6 +3968,11 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_from: case OMPC_use_device_ptr: case OMPC_is_device_ptr: + case OMPC_unified_address: + case OMPC_unified_shared_memory: + case OMPC_reverse_offload: + case OMPC_dynamic_allocators: + case OMPC_atomic_default_mem_order: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } @@ -3903,13 +3989,13 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { } const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); - if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) - enterFullExpression(EWC); + if (const auto *FE = dyn_cast<FullExpr>(CS)) + enterFullExpression(FE); // Processing for statements under 'atomic capture'. if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { for (const Stmt *C : Compound->body()) { - if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) - enterFullExpression(EWC); + if (const auto *FE = dyn_cast<FullExpr>(C)) + enterFullExpression(FE); } } @@ -3918,7 +4004,7 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { CGF.EmitStopPoint(CS); emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), S.getV(), S.getExpr(), S.getUpdateExpr(), - S.isXLHSInRHSPart(), S.getLocStart()); + S.isXLHSInRHSPart(), S.getBeginLoc()); }; OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); @@ -3986,6 +4072,16 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, IsOffloadEntry, CodeGen); OMPLexicalScope Scope(CGF, S, OMPD_task); + auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) { + OMPLoopScope(CGF, D); + // Emit calculation of the iterations count. + llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); + NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, + /*IsSigned=*/false); + return NumIterations; + }; + CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device, + SizeEmitter); CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device); } @@ -3996,6 +4092,8 @@ static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); } @@ -4037,13 +4135,13 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, - S.getLocStart()); + S.getBeginLoc()); } OMPTeamsScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, + CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, CapturedVars); } @@ -4076,6 +4174,8 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; @@ -4394,7 +4494,7 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( void CodeGenFunction::EmitOMPCancellationPointDirective( const OMPCancellationPointDirective &S) { - CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), + CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), S.getCancelRegion()); } @@ -4407,7 +4507,7 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { break; } } - CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond, + CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, S.getCancelRegion()); } @@ -4634,6 +4734,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); + if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) + CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); // TODO: Add support for clauses. CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); @@ -4864,7 +4966,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { S, isOpenMPSimdDirective(S.getDirectiveKind()), CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, - (*LIP)->getType(), S.getLocStart()))); + (*LIP)->getType(), S.getBeginLoc()))); } }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, @@ -4873,7 +4975,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, &Data](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); - CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S, + CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, Data); }; @@ -4891,7 +4993,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); }, - S.getLocStart()); + S.getBeginLoc()); } } @@ -4934,16 +5036,37 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( if (isOpenMPSimdDirective(D.getDirectiveKind())) { emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); } else { + OMPPrivateScope LoopGlobals(CGF); if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { for (const Expr *E : LD->counters()) { - if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( - cast<DeclRefExpr>(E)->getDecl())) { + const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { + LValue GlobLVal = CGF.EmitLValue(E); + LoopGlobals.addPrivate( + VD, [&GlobLVal]() { return GlobLVal.getAddress(); }); + } + if (isa<OMPCapturedExprDecl>(VD)) { // Emit only those that were not explicitly referenced in clauses. if (!CGF.LocalDeclMap.count(VD)) CGF.EmitVarDecl(*VD); } } + for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { + if (!C->getNumForLoops()) + continue; + for (unsigned I = LD->getCollapsedNumber(), + E = C->getLoopNumIterations().size(); + I < E; ++I) { + if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( + cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { + // Emit only those that were not explicitly referenced in clauses. + if (!CGF.LocalDeclMap.count(VD)) + CGF.EmitVarDecl(*VD); + } + } + } } + LoopGlobals.Privatize(); CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); } }; diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp index b0a3a0bffa2e..fbd8146702a9 100644 --- a/lib/CodeGen/CGVTT.cpp +++ b/lib/CodeGen/CGVTT.cpp @@ -119,10 +119,10 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) { llvm::ArrayType *ArrayType = llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size()); + unsigned Align = CGM.getDataLayout().getABITypeAlignment(CGM.Int8PtrTy); - llvm::GlobalVariable *GV = - CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType, - llvm::GlobalValue::ExternalLinkage); + llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable( + Name, ArrayType, llvm::GlobalValue::ExternalLinkage, Align); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); return GV; } diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index cc334637a831..bfb089ff908e 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -16,9 +16,9 @@ #include "CodeGenModule.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/RecordLayout.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/ConstantInitBuilder.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Format.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -128,7 +128,7 @@ static void resolveTopLevelMetadata(llvm::Function *Fn, // they are referencing. for (auto &BB : Fn->getBasicBlockList()) { for (auto &I : BB) { - if (auto *DII = dyn_cast<llvm::DbgInfoIntrinsic>(&I)) { + if (auto *DII = dyn_cast<llvm::DbgVariableIntrinsic>(&I)) { auto *DILocal = DII->getVariable(); if (!DILocal->isResolved()) DILocal->resolve(); @@ -231,7 +231,7 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD, // Build FunctionArgs. const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl()); - QualType ThisType = MD->getThisType(getContext()); + QualType ThisType = MD->getThisType(); const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>(); QualType ResultType; if (IsUnprototyped) @@ -304,13 +304,13 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, CGM.ErrorUnsupported( MD, "non-trivial argument copy for return-adjusting thunk"); } - EmitMustTailThunk(MD, AdjustedThisPtr, CalleePtr); + EmitMustTailThunk(CurGD, AdjustedThisPtr, CalleePtr); return; } // Start building CallArgs. CallArgList CallArgs; - QualType ThisType = MD->getThisType(getContext()); + QualType ThisType = MD->getThisType(); CallArgs.add(RValue::get(AdjustedThisPtr), ThisType); if (isa<CXXDestructorDecl>(MD)) @@ -350,13 +350,12 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, : FPT->getReturnType(); ReturnValueSlot Slot; if (!ResultType->isVoidType() && - CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect && - !hasScalarEvaluationKind(CurFnInfo->getReturnType())) + CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect) Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified()); // Now emit our call. llvm::Instruction *CallOrInvoke; - CGCallee Callee = CGCallee::forDirect(CalleePtr, MD); + CGCallee Callee = CGCallee::forDirect(CalleePtr, CurGD); RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, &CallOrInvoke); // Consider return adjustment if we have ThunkInfo. @@ -375,7 +374,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr, FinishThunk(); } -void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD, +void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr, llvm::Value *CalleePtr) { // Emitting a musttail call thunk doesn't use any of the CGCall.cpp machinery @@ -412,7 +411,7 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD, // Apply the standard set of call attributes. unsigned CallingConv; llvm::AttributeList Attrs; - CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, MD, Attrs, + CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, GD, Attrs, CallingConv, /*AttrOnCallSite=*/true); Call->setAttributes(Attrs); Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); @@ -756,9 +755,11 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD, if (Linkage == llvm::GlobalVariable::AvailableExternallyLinkage) Linkage = llvm::GlobalVariable::InternalLinkage; + unsigned Align = CGM.getDataLayout().getABITypeAlignment(VTType); + // Create the variable that will hold the construction vtable. llvm::GlobalVariable *VTable = - CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage); + CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage, Align); CGM.setGVProperties(VTable, RD); // V-tables are always unnamed_addr. @@ -1020,8 +1021,8 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable, AP.second.AddressPointIndex)); // Sort the address points for determinism. - llvm::sort(AddressPoints.begin(), AddressPoints.end(), - [this](const AddressPoint &AP1, const AddressPoint &AP2) { + llvm::sort(AddressPoints, [this](const AddressPoint &AP1, + const AddressPoint &AP2) { if (&AP1 == &AP2) return false; diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index 0dcbea423ad7..da8a8efb840b 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -562,7 +562,10 @@ public: } void setVolatile(bool flag) { - Quals.setVolatile(flag); + if (flag) + Quals.addVolatile(); + else + Quals.removeVolatile(); } Qualifiers::ObjCLifetime getObjCLifetime() const { diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2a0f4f0e83ec..29c6793c601e 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -102,4 +102,5 @@ add_clang_library(clangCodeGen clangBasic clangFrontend clangLex + clangSerialization ) diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp index c152291b15b9..27f5d53ffe11 100644 --- a/lib/CodeGen/CodeGenABITypes.cpp +++ b/lib/CodeGen/CodeGenABITypes.cpp @@ -20,7 +20,6 @@ #include "CGRecordLayout.h" #include "CodeGenModule.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/PreprocessorOptions.h" diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp index d499364002f0..fd4506f2d197 100644 --- a/lib/CodeGen/CodeGenAction.cpp +++ b/lib/CodeGen/CodeGenAction.cpp @@ -127,6 +127,7 @@ namespace clang { CodeGenOpts, C, CoverageInfo)), LinkModules(std::move(LinkModules)) { FrontendTimesIsEnabled = TimePasses; + llvm::TimePassesIsEnabled = TimePasses; } llvm::Module *getModule() const { return Gen->GetModule(); } std::unique_ptr<llvm::Module> takeModule() { @@ -548,12 +549,16 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc( SourceLocation DILoc; if (D.isLocationAvailable()) { - D.getLocation(&Filename, &Line, &Column); - const FileEntry *FE = FileMgr.getFile(Filename); - if (FE && Line > 0) { - // If -gcolumn-info was not used, Column will be 0. This upsets the - // source manager, so pass 1 if Column is not set. - DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1); + D.getLocation(Filename, Line, Column); + if (Line > 0) { + const FileEntry *FE = FileMgr.getFile(Filename); + if (!FE) + FE = FileMgr.getFile(D.getAbsolutePath()); + if (FE) { + // If -gcolumn-info was not used, Column will be 0. This upsets the + // source manager, so pass 1 if Column is not set. + DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1); + } } BadDebugInfo = DILoc.isInvalid(); } diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 3c582688e91e..1713e40c312b 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -28,10 +28,10 @@ #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Frontend/CodeGenOptions.h" -#include "clang/Sema/SemaDiagnostic.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Intrinsics.h" @@ -430,10 +430,25 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { NormalCleanupDest = Address::invalid(); } - // Add the required-vector-width attribute. - if (LargestVectorWidth != 0) - CurFn->addFnAttr("min-legal-vector-width", - llvm::utostr(LargestVectorWidth)); + // Scan function arguments for vector width. + for (llvm::Argument &A : CurFn->args()) + if (auto *VT = dyn_cast<llvm::VectorType>(A.getType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + + // Update vector width based on return type. + if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType())) + LargestVectorWidth = std::max(LargestVectorWidth, + VT->getPrimitiveSizeInBits()); + + // Add the required-vector-width attribute. This contains the max width from: + // 1. min-vector-width attribute used in the source program. + // 2. Any builtins used that have a vector width specified. + // 3. Values passed in and out of inline assembly. + // 4. Width of vector arguments and return types for this function. + // 5. Width of vector aguments and return types for functions called by this + // function. + CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth)); } /// ShouldInstrumentFunction - Return true if the current function should be @@ -772,9 +787,11 @@ static bool endsWithReturn(const Decl* F) { return false; } -static void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) { - Fn->addFnAttr("sanitize_thread_no_checking_at_run_time"); - Fn->removeFnAttr(llvm::Attribute::SanitizeThread); +void CodeGenFunction::markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) { + if (SanOpts.has(SanitizerKind::Thread)) { + Fn->addFnAttr("sanitize_thread_no_checking_at_run_time"); + Fn->removeFnAttr(llvm::Attribute::SanitizeThread); + } } static bool matchesStlAllocatorFn(const Decl *D, const ASTContext &Ctx) { @@ -866,7 +883,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress); if (SanOpts.has(SanitizerKind::Thread)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); - if (SanOpts.has(SanitizerKind::Memory)) + if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); if (SanOpts.has(SanitizerKind::SafeStack)) Fn->addFnAttr(llvm::Attribute::SafeStack); @@ -887,10 +904,6 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, (OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) { markAsIgnoreThreadCheckingAtRuntime(Fn); } - } else if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) { - IdentifierInfo *II = FD->getIdentifier(); - if (II && II->isStr("__destroy_helper_block_")) - markAsIgnoreThreadCheckingAtRuntime(Fn); } } @@ -903,21 +916,21 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, } // Apply xray attributes to the function (as a string, for now) - bool InstrumentXray = ShouldXRayInstrumentFunction() && - CGM.getCodeGenOpts().XRayInstrumentationBundle.has( - XRayInstrKind::Function); - if (D && InstrumentXray) { + if (D) { if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) { - if (XRayAttr->alwaysXRayInstrument()) - Fn->addFnAttr("function-instrument", "xray-always"); - if (XRayAttr->neverXRayInstrument()) - Fn->addFnAttr("function-instrument", "xray-never"); - if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>()) { - Fn->addFnAttr("xray-log-args", - llvm::utostr(LogArgs->getArgumentCount())); + if (CGM.getCodeGenOpts().XRayInstrumentationBundle.has( + XRayInstrKind::Function)) { + if (XRayAttr->alwaysXRayInstrument() && ShouldXRayInstrumentFunction()) + Fn->addFnAttr("function-instrument", "xray-always"); + if (XRayAttr->neverXRayInstrument()) + Fn->addFnAttr("function-instrument", "xray-never"); + if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>()) + if (ShouldXRayInstrumentFunction()) + Fn->addFnAttr("xray-log-args", + llvm::utostr(LogArgs->getArgumentCount())); } } else { - if (!CGM.imbueXRayAttrs(Fn, Loc)) + if (ShouldXRayInstrumentFunction() && !CGM.imbueXRayAttrs(Fn, Loc)) Fn->addFnAttr( "xray-instruction-threshold", llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold)); @@ -981,6 +994,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (FD->isMain()) Fn->addFnAttr(llvm::Attribute::NoRecurse); + // If a custom alignment is used, force realigning to this alignment on + // any main function which certainly will need it. + if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) + if ((FD->isMain() || FD->isMSVCRTEntryPoint()) && + CGM.getCodeGenOpts().StackAlignment) + Fn->addFnAttr("stackrealign"); + llvm::BasicBlock *EntryBB = createBasicBlock("entry", CurFn); // Create a marker to make it easy to insert allocas into the entryblock @@ -1053,9 +1073,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, // Count the implicit return. if (!endsWithReturn(D)) ++NumReturnExprs; - } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect && - !hasScalarEvaluationKind(CurFnInfo->getReturnType())) { - // Indirect aggregate return; emit returned value directly into sret slot. + } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect) { + // Indirect return; emit returned value directly into sret slot. // This reduces code size, and affects correctness in C++. auto AI = CurFn->arg_begin(); if (CurFnInfo->getReturnInfo().isSRetAfterThis()) @@ -1137,7 +1156,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, if (CXXABIThisValue) { SanitizerSet SkippedChecks; SkippedChecks.set(SanitizerKind::ObjectSize, true); - QualType ThisTy = MD->getThisType(getContext()); + QualType ThisTy = MD->getThisType(); // If this is the call operator of a lambda with no capture-default, it // may have a static invoker function, which may call this operator with @@ -1183,8 +1202,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, LargestVectorWidth = VecWidth->getVectorWidth(); } -void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args, - const Stmt *Body) { +void CodeGenFunction::EmitFunctionBody(const Stmt *Body) { incrementProfileCounter(Body); if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body)) EmitCompoundStmtWithoutScope(*S); @@ -1238,7 +1256,7 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD, const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD); if (MD && MD->isInstance()) { if (CGM.getCXXABI().HasThisReturn(GD)) - ResTy = MD->getThisType(getContext()); + ResTy = MD->getThisType(); else if (CGM.getCXXABI().hasMostDerivedReturn(GD)) ResTy = CGM.getContext().VoidPtrTy; CGM.getCXXABI().buildThisParam(*this, Args); @@ -1352,7 +1370,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // copy-constructors. emitImplicitAssignmentOperatorBody(Args); } else if (Body) { - EmitFunctionBody(Args, Body); + EmitFunctionBody(Body); } else llvm_unreachable("no definition for emitted function"); @@ -1493,10 +1511,11 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, bool AllowLabels) { // FIXME: Rename and handle conversion of other evaluatable things // to bool. - llvm::APSInt Int; - if (!Cond->EvaluateAsInt(Int, getContext())) + Expr::EvalResult Result; + if (!Cond->EvaluateAsInt(Result, getContext())) return false; // Not foldable, not integer or not fully evaluatable. + llvm::APSInt Int = Result.Val.getInt(); if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond)) return false; // Contains a label. @@ -1681,7 +1700,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond, // create metadata that specifies that the branch is unpredictable. // Don't bother if not optimizing because that metadata would not be used. llvm::MDNode *Unpredictable = nullptr; - auto *Call = dyn_cast<CallExpr>(Cond); + auto *Call = dyn_cast<CallExpr>(Cond->IgnoreImpCasts()); if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) { auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl()); if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) { @@ -2089,9 +2108,8 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { SanitizerScope SanScope(this); llvm::Value *Zero = llvm::Constant::getNullValue(Size->getType()); llvm::Constant *StaticArgs[] = { - EmitCheckSourceLocation(size->getLocStart()), - EmitCheckTypeDescriptor(size->getType()) - }; + EmitCheckSourceLocation(size->getBeginLoc()), + EmitCheckTypeDescriptor(size->getType())}; EmitCheck(std::make_pair(Builder.CreateICmpSGT(Size, Zero), SanitizerKind::VLABound), SanitizerHandler::VLABoundNotPositive, StaticArgs, Size); @@ -2189,6 +2207,49 @@ void CodeGenFunction::unprotectFromPeepholes(PeepholeProtection protection) { protection.Inst->eraseFromParent(); } +void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, + QualType Ty, SourceLocation Loc, + SourceLocation AssumptionLoc, + llvm::Value *Alignment, + llvm::Value *OffsetValue) { + llvm::Value *TheCheck; + llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption( + CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck); + if (SanOpts.has(SanitizerKind::Alignment)) { + EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment, + OffsetValue, TheCheck, Assumption); + } +} + +void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, + QualType Ty, SourceLocation Loc, + SourceLocation AssumptionLoc, + unsigned Alignment, + llvm::Value *OffsetValue) { + llvm::Value *TheCheck; + llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption( + CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck); + if (SanOpts.has(SanitizerKind::Alignment)) { + llvm::Value *AlignmentVal = llvm::ConstantInt::get(IntPtrTy, Alignment); + EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, AlignmentVal, + OffsetValue, TheCheck, Assumption); + } +} + +void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue, + const Expr *E, + SourceLocation AssumptionLoc, + unsigned Alignment, + llvm::Value *OffsetValue) { + if (auto *CE = dyn_cast<CastExpr>(E)) + E = CE->getSubExprAsWritten(); + QualType Ty = E->getType(); + SourceLocation Loc = E->getExprLoc(); + + EmitAlignmentAssumption(PtrValue, Ty, Loc, AssumptionLoc, Alignment, + OffsetValue); +} + llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Value *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, @@ -2225,7 +2286,7 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D, // annotation on the first field of a struct and annotation on the struct // itself. if (VTy != CGM.Int8PtrTy) - V = Builder.Insert(new llvm::BitCastInst(V, CGM.Int8PtrTy)); + V = Builder.CreateBitCast(V, CGM.Int8PtrTy); V = EmitAnnotationCall(F, V, I->getAnnotation(), D->getLocation()); V = Builder.CreateBitCast(V, VTy); } @@ -2272,7 +2333,7 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures, // Now build up the set of caller features and verify that all the required // features are there. llvm::StringMap<bool> CallerFeatureMap; - CGM.getFunctionFeatureMap(CallerFeatureMap, FD); + CGM.getFunctionFeatureMap(CallerFeatureMap, GlobalDecl().getWithDecl(FD)); // If we have at least one of the features in the feature list return // true, otherwise return false. @@ -2280,14 +2341,13 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures, ReqFeatures.begin(), ReqFeatures.end(), [&](StringRef Feature) { SmallVector<StringRef, 1> OrFeatures; Feature.split(OrFeatures, '|'); - return std::any_of(OrFeatures.begin(), OrFeatures.end(), - [&](StringRef Feature) { - if (!CallerFeatureMap.lookup(Feature)) { - FirstMissing = Feature.str(); - return false; - } - return true; - }); + return llvm::any_of(OrFeatures, [&](StringRef Feature) { + if (!CallerFeatureMap.lookup(Feature)) { + FirstMissing = Feature.str(); + return false; + } + return true; + }); }); } @@ -2319,7 +2379,7 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E, return; StringRef(FeatureList).split(ReqFeatures, ','); if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) - CGM.getDiags().Report(E->getLocStart(), diag::err_builtin_needs_feature) + CGM.getDiags().Report(E->getBeginLoc(), diag::err_builtin_needs_feature) << TargetDecl->getDeclName() << CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); @@ -2345,7 +2405,7 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E, ReqFeatures.push_back(F.getKey()); } if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature)) - CGM.getDiags().Report(E->getLocStart(), diag::err_function_needs_feature) + CGM.getDiags().Report(E->getBeginLoc(), diag::err_function_needs_feature) << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature; } } @@ -2359,91 +2419,81 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) { CGM.getSanStats().create(IRB, SSK); } -llvm::Value *CodeGenFunction::FormResolverCondition( - const TargetMultiVersionResolverOption &RO) { - llvm::Value *TrueCondition = nullptr; - if (!RO.ParsedAttribute.Architecture.empty()) - TrueCondition = EmitX86CpuIs(RO.ParsedAttribute.Architecture); +llvm::Value * +CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) { + llvm::Value *Condition = nullptr; + + if (!RO.Conditions.Architecture.empty()) + Condition = EmitX86CpuIs(RO.Conditions.Architecture); - if (!RO.ParsedAttribute.Features.empty()) { - SmallVector<StringRef, 8> FeatureList; - llvm::for_each(RO.ParsedAttribute.Features, - [&FeatureList](const std::string &Feature) { - FeatureList.push_back(StringRef{Feature}.substr(1)); - }); - llvm::Value *FeatureCmp = EmitX86CpuSupports(FeatureList); - TrueCondition = TrueCondition ? Builder.CreateAnd(TrueCondition, FeatureCmp) - : FeatureCmp; + if (!RO.Conditions.Features.empty()) { + llvm::Value *FeatureCond = EmitX86CpuSupports(RO.Conditions.Features); + Condition = + Condition ? Builder.CreateAnd(Condition, FeatureCond) : FeatureCond; } - return TrueCondition; + return Condition; } -void CodeGenFunction::EmitTargetMultiVersionResolver( - llvm::Function *Resolver, - ArrayRef<TargetMultiVersionResolverOption> Options) { - assert((getContext().getTargetInfo().getTriple().getArch() == - llvm::Triple::x86 || - getContext().getTargetInfo().getTriple().getArch() == - llvm::Triple::x86_64) && - "Only implemented for x86 targets"); - - // Main function's basic block. - llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver); - Builder.SetInsertPoint(CurBlock); - EmitX86CpuInit(); +static void CreateMultiVersionResolverReturn(CodeGenModule &CGM, + llvm::Function *Resolver, + CGBuilderTy &Builder, + llvm::Function *FuncToReturn, + bool SupportsIFunc) { + if (SupportsIFunc) { + Builder.CreateRet(FuncToReturn); + return; + } - llvm::Function *DefaultFunc = nullptr; - for (const TargetMultiVersionResolverOption &RO : Options) { - Builder.SetInsertPoint(CurBlock); - llvm::Value *TrueCondition = FormResolverCondition(RO); + llvm::SmallVector<llvm::Value *, 10> Args; + llvm::for_each(Resolver->args(), + [&](llvm::Argument &Arg) { Args.push_back(&Arg); }); - if (!TrueCondition) { - DefaultFunc = RO.Function; - } else { - llvm::BasicBlock *RetBlock = createBasicBlock("ro_ret", Resolver); - llvm::IRBuilder<> RetBuilder(RetBlock); - RetBuilder.CreateRet(RO.Function); - CurBlock = createBasicBlock("ro_else", Resolver); - Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock); - } - } + llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args); + Result->setTailCallKind(llvm::CallInst::TCK_MustTail); - assert(DefaultFunc && "No default version?"); - // Emit return from the 'else-ist' block. - Builder.SetInsertPoint(CurBlock); - Builder.CreateRet(DefaultFunc); + if (Resolver->getReturnType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(Result); } -void CodeGenFunction::EmitCPUDispatchMultiVersionResolver( - llvm::Function *Resolver, - ArrayRef<CPUDispatchMultiVersionResolverOption> Options) { +void CodeGenFunction::EmitMultiVersionResolver( + llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) { assert((getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::x86 || getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64) && "Only implemented for x86 targets"); + bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc(); + // Main function's basic block. llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); Builder.SetInsertPoint(CurBlock); EmitX86CpuInit(); - for (const CPUDispatchMultiVersionResolverOption &RO : Options) { + for (const MultiVersionResolverOption &RO : Options) { Builder.SetInsertPoint(CurBlock); - - // "generic" case should catch-all. - if (RO.FeatureMask == 0) { - Builder.CreateRet(RO.Function); + llvm::Value *Condition = FormResolverCondition(RO); + + // The 'default' or 'generic' case. + if (!Condition) { + assert(&RO == Options.end() - 1 && + "Default or Generic case must be last"); + CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function, + SupportsIFunc); return; } + llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); - llvm::IRBuilder<> RetBuilder(RetBlock); - RetBuilder.CreateRet(RO.Function); + CGBuilderTy RetBuilder(*this, RetBlock); + CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function, + SupportsIFunc); CurBlock = createBasicBlock("resolver_else", Resolver); - llvm::Value *TrueCondition = EmitX86CpuSupports(RO.FeatureMask); - Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock); + Builder.CreateCondBr(Condition, RetBlock, CurBlock); } + // If no generic/default, emit an unreachable. Builder.SetInsertPoint(CurBlock); llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); TrapCall->setDoesNotReturn(); @@ -2452,6 +2502,61 @@ void CodeGenFunction::EmitCPUDispatchMultiVersionResolver( Builder.ClearInsertionPoint(); } +// Loc - where the diagnostic will point, where in the source code this +// alignment has failed. +// SecondaryLoc - if present (will be present if sufficiently different from +// Loc), the diagnostic will additionally point a "Note:" to this location. +// It should be the location where the __attribute__((assume_aligned)) +// was written e.g. +void CodeGenFunction::EmitAlignmentAssumptionCheck( + llvm::Value *Ptr, QualType Ty, SourceLocation Loc, + SourceLocation SecondaryLoc, llvm::Value *Alignment, + llvm::Value *OffsetValue, llvm::Value *TheCheck, + llvm::Instruction *Assumption) { + assert(Assumption && isa<llvm::CallInst>(Assumption) && + cast<llvm::CallInst>(Assumption)->getCalledValue() == + llvm::Intrinsic::getDeclaration( + Builder.GetInsertBlock()->getParent()->getParent(), + llvm::Intrinsic::assume) && + "Assumption should be a call to llvm.assume()."); + assert(&(Builder.GetInsertBlock()->back()) == Assumption && + "Assumption should be the last instruction of the basic block, " + "since the basic block is still being generated."); + + if (!SanOpts.has(SanitizerKind::Alignment)) + return; + + // Don't check pointers to volatile data. The behavior here is implementation- + // defined. + if (Ty->getPointeeType().isVolatileQualified()) + return; + + // We need to temorairly remove the assumption so we can insert the + // sanitizer check before it, else the check will be dropped by optimizations. + Assumption->removeFromParent(); + + { + SanitizerScope SanScope(this); + + if (!OffsetValue) + OffsetValue = Builder.getInt1(0); // no offset. + + llvm::Constant *StaticData[] = {EmitCheckSourceLocation(Loc), + EmitCheckSourceLocation(SecondaryLoc), + EmitCheckTypeDescriptor(Ty)}; + llvm::Value *DynamicData[] = {EmitCheckValue(Ptr), + EmitCheckValue(Alignment), + EmitCheckValue(OffsetValue)}; + EmitCheck({std::make_pair(TheCheck, SanitizerKind::Alignment)}, + SanitizerHandler::AlignmentAssumption, StaticData, DynamicData); + } + + // We are now in the (new, empty) "cont" basic block. + // Reintroduce the assumption. + Builder.Insert(Assumption); + // FIXME: Assumption still has it's original basic block as it's Parent. +} + llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) { if (CGDebugInfo *DI = getDebugInfo()) return DI->SourceLocToDebugLoc(Location); diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index f9e284232972..89cb850ab1b1 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -29,9 +29,9 @@ #include "clang/AST/Type.h" #include "clang/Basic/ABI.h" #include "clang/Basic/CapturedStmt.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" @@ -131,6 +131,7 @@ enum TypeEvaluationKind { SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \ SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \ SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \ + SANITIZER_CHECK(AlignmentAssumption, alignment_assumption, 0) \ SANITIZER_CHECK(VLABoundNotPositive, vla_bound_not_positive, 0) enum SanitizerHandler { @@ -470,7 +471,7 @@ public: /// potentially set the return value. bool SawAsmBlock = false; - const FunctionDecl *CurSEHParent = nullptr; + const NamedDecl *CurSEHParent = nullptr; /// True if the current function is an outlined SEH helper. This can be a /// finally block or filter expression. @@ -1197,6 +1198,8 @@ public: private: CGDebugInfo *DebugInfo; + /// Used to create unique names for artificial VLA size debug info variables. + unsigned VLAExprCounter = 0; bool DisableDebugInfo = false; /// DidCallStackSave - Whether llvm.stacksave has been called. Used to avoid @@ -1746,6 +1749,9 @@ public: bool IsLambdaConversionToBlock, bool BuildGlobalBlock); + /// Check if \p T is a C++ class that has a destructor that can throw. + static bool cxxDestructorCanThrow(QualType T); + llvm::Constant *GenerateCopyHelperFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo); llvm::Constant *GenerateObjCAtomicSetterCopyHelperFunction( @@ -1754,7 +1760,8 @@ public: const ObjCPropertyImplDecl *PID); llvm::Value *EmitBlockCopyAndAutorelease(llvm::Value *Block, QualType Ty); - void BuildBlockRelease(llvm::Value *DeclPtr, BlockFieldFlags flags); + void BuildBlockRelease(llvm::Value *DeclPtr, BlockFieldFlags flags, + bool CanThrow); class AutoVarEmission; @@ -1777,13 +1784,13 @@ public: /// \param LoadBlockVarAddr Indicates whether we need to emit a load from /// \p Addr to get the address of the __block structure. void enterByrefCleanup(CleanupKind Kind, Address Addr, BlockFieldFlags Flags, - bool LoadBlockVarAddr); + bool LoadBlockVarAddr, bool CanThrow); void setBlockContextParameter(const ImplicitParamDecl *D, unsigned argNum, llvm::Value *ptr); Address LoadBlockStruct(); - Address GetAddrOfBlockDecl(const VarDecl *var, bool ByRef); + Address GetAddrOfBlockDecl(const VarDecl *var); /// BuildBlockByrefAddress - Computes the location of the /// data in a variable which is declared as __block. @@ -1800,6 +1807,11 @@ public: void GenerateCode(GlobalDecl GD, llvm::Function *Fn, const CGFunctionInfo &FnInfo); + + /// Annotate the function with an attribute that disables TSan checking at + /// runtime. + void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn); + /// Emit code for the start of a function. /// \param Loc The location to be associated with the function. /// \param StartLoc The location of the function body. @@ -1816,7 +1828,7 @@ public: void EmitConstructorBody(FunctionArgList &Args); void EmitDestructorBody(FunctionArgList &Args); void emitImplicitAssignmentOperatorBody(FunctionArgList &Args); - void EmitFunctionBody(FunctionArgList &Args, const Stmt *Body); + void EmitFunctionBody(const Stmt *Body); void EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S); void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator, @@ -1845,7 +1857,7 @@ public: void FinishThunk(); /// Emit a musttail call for a thunk with a potentially adjusted this pointer. - void EmitMustTailThunk(const CXXMethodDecl *MD, llvm::Value *AdjustedThisPtr, + void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr, llvm::Value *Callee); /// Generate a thunk for the given method. @@ -2622,12 +2634,6 @@ public: ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre); - void EmitAlignmentAssumption(llvm::Value *PtrValue, unsigned Alignment, - llvm::Value *OffsetValue = nullptr) { - Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment, - OffsetValue); - } - /// Converts Location to a DebugLoc, if debug information is enabled. llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location); @@ -2674,8 +2680,9 @@ public: llvm::Value *NRVOFlag; - /// True if the variable is a __block variable. - bool IsByRef; + /// True if the variable is a __block variable that is captured by an + /// escaping block. + bool IsEscapingByRef; /// True if the variable is of aggregate type and has a constant /// initializer. @@ -2695,7 +2702,7 @@ public: AutoVarEmission(const VarDecl &variable) : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr), - IsByRef(false), IsConstantAggregate(false), + IsEscapingByRef(false), IsConstantAggregate(false), SizeForLifetimeMarkers(nullptr), AllocaAddr(Address::invalid()) {} bool wasEmittedAsGlobal() const { return !Addr.isValid(); } @@ -2725,7 +2732,7 @@ public: /// Note that this does not chase the forwarding pointer for /// __block decls. Address getObjectAddress(CodeGenFunction &CGF) const { - if (!IsByRef) return Addr; + if (!IsEscapingByRef) return Addr; return CGF.emitBlockByrefAddress(Addr, Variable, /*forward*/ false); } @@ -2790,11 +2797,27 @@ public: PeepholeProtection protectFromPeepholes(RValue rvalue); void unprotectFromPeepholes(PeepholeProtection protection); - void EmitAlignmentAssumption(llvm::Value *PtrValue, llvm::Value *Alignment, - llvm::Value *OffsetValue = nullptr) { - Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment, - OffsetValue); - } + void EmitAlignmentAssumptionCheck(llvm::Value *Ptr, QualType Ty, + SourceLocation Loc, + SourceLocation AssumptionLoc, + llvm::Value *Alignment, + llvm::Value *OffsetValue, + llvm::Value *TheCheck, + llvm::Instruction *Assumption); + + void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, + SourceLocation Loc, SourceLocation AssumptionLoc, + llvm::Value *Alignment, + llvm::Value *OffsetValue = nullptr); + + void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, + SourceLocation Loc, SourceLocation AssumptionLoc, + unsigned Alignment, + llvm::Value *OffsetValue = nullptr); + + void EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E, + SourceLocation AssumptionLoc, unsigned Alignment, + llvm::Value *OffsetValue = nullptr); //===--------------------------------------------------------------------===// // Statement Emission @@ -2878,6 +2901,8 @@ public: void EnterSEHTryStmt(const SEHTryStmt &S); void ExitSEHTryStmt(const SEHTryStmt &S); + void pushSEHCleanup(CleanupKind kind, + llvm::Function *FinallyFunc); void startOutlinedSEHHelper(CodeGenFunction &ParentCGF, bool IsFilter, const Stmt *OutlinedStmt); @@ -3512,6 +3537,7 @@ public: ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr); ConstantEmission tryEmitAsConstant(const MemberExpr *ME); + llvm::Value *emitScalarConstant(const ConstantEmission &Constant, Expr *E); RValue EmitPseudoObjectRValue(const PseudoObjectExpr *e, AggValueSlot slot = AggValueSlot::ignored()); @@ -3603,6 +3629,19 @@ public: CXXDtorType Type, const CXXRecordDecl *RD); + // Return the copy constructor name with the prefix "__copy_constructor_" + // removed. + static std::string getNonTrivialCopyConstructorStr(QualType QT, + CharUnits Alignment, + bool IsVolatile, + ASTContext &Ctx); + + // Return the destructor name with the prefix "__destructor_" removed. + static std::string getNonTrivialDestructorStr(QualType QT, + CharUnits Alignment, + bool IsVolatile, + ASTContext &Ctx); + // These functions emit calls to the special functions of non-trivial C // structs. void defaultInitNonTrivialCStructVar(LValue Dst); @@ -3653,9 +3692,10 @@ public: RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue); - RValue EmitBuiltinExpr(const FunctionDecl *FD, - unsigned BuiltinID, const CallExpr *E, - ReturnValueSlot ReturnValue); + RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, + const CallExpr *E, ReturnValueSlot ReturnValue); + + RValue emitRotate(const CallExpr *E, bool IsRotateRight); /// Emit IR for __builtin_os_log_format. RValue emitBuiltinOSLogFormat(const CallExpr &E); @@ -3769,6 +3809,11 @@ public: llvm::Value *EmitARCRetainAutoreleasedReturnValue(llvm::Value *value); llvm::Value *EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value); + llvm::Value *EmitObjCAutorelease(llvm::Value *value, llvm::Type *returnType); + llvm::Value *EmitObjCRetainNonBlock(llvm::Value *value, + llvm::Type *returnType); + void EmitObjCRelease(llvm::Value *value, ARCPreciseLifetime_t precise); + std::pair<LValue,llvm::Value*> EmitARCStoreAutoreleasing(const BinaryOperator *e); std::pair<LValue,llvm::Value*> @@ -3776,6 +3821,10 @@ public: std::pair<LValue,llvm::Value*> EmitARCStoreUnsafeUnretained(const BinaryOperator *e, bool ignored); + llvm::Value *EmitObjCAlloc(llvm::Value *value, + llvm::Type *returnType); + llvm::Value *EmitObjCAllocWithZone(llvm::Value *value, + llvm::Type *returnType); llvm::Value *EmitObjCThrowOperand(const Expr *expr); llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr); llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr); @@ -3865,6 +3914,8 @@ public: AddInitializerToStaticVarDecl(const VarDecl &D, llvm::GlobalVariable *GV); + // Emit an @llvm.invariant.start call for the given memory region. + void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size); /// EmitCXXGlobalVarDeclInit - Create the initializer for a C++ /// variable with global storage. @@ -3900,9 +3951,10 @@ public: /// GenerateCXXGlobalInitFunc - Generates code for initializing global /// variables. - void GenerateCXXGlobalInitFunc(llvm::Function *Fn, - ArrayRef<llvm::Function *> CXXThreadLocals, - Address Guard = Address::invalid()); + void + GenerateCXXGlobalInitFunc(llvm::Function *Fn, + ArrayRef<llvm::Function *> CXXThreadLocals, + ConstantAddress Guard = ConstantAddress::invalid()); /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global /// variables. @@ -3920,11 +3972,13 @@ public: void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp); - void enterFullExpression(const ExprWithCleanups *E) { - if (E->getNumObjects() == 0) return; + void enterFullExpression(const FullExpr *E) { + if (const auto *EWC = dyn_cast<ExprWithCleanups>(E)) + if (EWC->getNumObjects() == 0) + return; enterNonTrivialFullExpression(E); } - void enterNonTrivialFullExpression(const ExprWithCleanups *E); + void enterNonTrivialFullExpression(const FullExpr *E); void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true); @@ -4245,47 +4299,29 @@ public: void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK); - struct TargetMultiVersionResolverOption { + struct MultiVersionResolverOption { llvm::Function *Function; - TargetAttr::ParsedTargetAttr ParsedAttribute; - unsigned Priority; - TargetMultiVersionResolverOption( - const TargetInfo &TargInfo, llvm::Function *F, - const clang::TargetAttr::ParsedTargetAttr &PT) - : Function(F), ParsedAttribute(PT), Priority(0u) { - for (StringRef Feat : PT.Features) - Priority = std::max(Priority, - TargInfo.multiVersionSortPriority(Feat.substr(1))); - - if (!PT.Architecture.empty()) - Priority = std::max(Priority, - TargInfo.multiVersionSortPriority(PT.Architecture)); - } - - bool operator>(const TargetMultiVersionResolverOption &Other) const { - return Priority > Other.Priority; - } + FunctionDecl *FD; + struct Conds { + StringRef Architecture; + llvm::SmallVector<StringRef, 8> Features; + + Conds(StringRef Arch, ArrayRef<StringRef> Feats) + : Architecture(Arch), Features(Feats.begin(), Feats.end()) {} + } Conditions; + + MultiVersionResolverOption(llvm::Function *F, StringRef Arch, + ArrayRef<StringRef> Feats) + : Function(F), Conditions(Arch, Feats) {} }; - void EmitTargetMultiVersionResolver( - llvm::Function *Resolver, - ArrayRef<TargetMultiVersionResolverOption> Options); - struct CPUDispatchMultiVersionResolverOption { - llvm::Function *Function; - // Note: EmitX86CPUSupports only has 32 bits available, so we store the mask - // as 32 bits here. When 64-bit support is added to __builtin_cpu_supports, - // this can be extended to 64 bits. - uint32_t FeatureMask; - CPUDispatchMultiVersionResolverOption(llvm::Function *F, uint64_t Mask) - : Function(F), FeatureMask(static_cast<uint32_t>(Mask)) {} - bool operator>(const CPUDispatchMultiVersionResolverOption &Other) const { - return FeatureMask > Other.FeatureMask; - } - }; - void EmitCPUDispatchMultiVersionResolver( - llvm::Function *Resolver, - ArrayRef<CPUDispatchMultiVersionResolverOption> Options); - static uint32_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs); + // Emits the body of a multiversion function's resolver. Assumes that the + // options are already sorted in the proper order, with the 'default' option + // last (if it exists). + void EmitMultiVersionResolver(llvm::Function *Resolver, + ArrayRef<MultiVersionResolverOption> Options); + + static uint64_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs); private: QualType getVarArgType(const Expr *Arg); @@ -4302,10 +4338,9 @@ private: llvm::Value *EmitX86CpuIs(StringRef CPUStr); llvm::Value *EmitX86CpuSupports(const CallExpr *E); llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs); - llvm::Value *EmitX86CpuSupports(uint32_t Mask); + llvm::Value *EmitX86CpuSupports(uint64_t Mask); llvm::Value *EmitX86CpuInit(); - llvm::Value * - FormResolverCondition(const TargetMultiVersionResolverOption &RO); + llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO); }; inline DominatingLLVMValue::saved_type diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 8c5e0df0969b..244738042cef 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -36,14 +36,15 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" #include "clang/CodeGen/ConstantInitBuilder.h" -#include "clang/Frontend/CodeGenOptions.h" -#include "clang/Sema/SemaDiagnostic.h" +#include "clang/Frontend/FrontendDiagnostic.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" @@ -53,6 +54,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" @@ -124,7 +126,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); - if (LangOpts.ObjC1) + if (LangOpts.ObjC) createObjCRuntime(); if (LangOpts.OpenCL) createOpenCLRuntime(); @@ -147,12 +149,12 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO, Block.GlobalUniqueCount = 0; - if (C.getLangOpts().ObjC1) + if (C.getLangOpts().ObjC) ObjCData.reset(new ObjCEntrypoints()); if (CodeGenOpts.hasProfileClangUse()) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create( - CodeGenOpts.ProfileInstrumentUsePath); + CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile); if (auto E = ReaderOrErr.takeError()) { unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "Could not read profile %0: %1"); @@ -320,8 +322,6 @@ void CodeGenModule::checkAliases() { assert(FTy); if (!FTy->getReturnType()->isPointerTy()) Diags.Report(Location, diag::err_ifunc_resolver_return); - if (FTy->getNumParams()) - Diags.Report(Location, diag::err_ifunc_resolver_params); } llvm::Constant *Aliasee = Alias->getIndirectSymbol(); @@ -458,9 +458,12 @@ void CodeGenModule::Release() { // Indicate that we want CodeView in the metadata. getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1); } + if (CodeGenOpts.CodeViewGHash) { + getModule().addModuleFlag(llvm::Module::Warning, "CodeViewGHash", 1); + } if (CodeGenOpts.ControlFlowGuard) { // We want function ID tables for Control Flow Guard. - getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 1); + getModule().addModuleFlag(llvm::Module::Warning, "cfguardtable", 1); } if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) { // We don't support LTO with 2 with different StrictVTablePointers @@ -556,6 +559,20 @@ void CodeGenModule::Release() { getModule().setPIELevel(static_cast<llvm::PIELevel::Level>(PLevel)); } + if (getCodeGenOpts().CodeModel.size() > 0) { + unsigned CM = llvm::StringSwitch<unsigned>(getCodeGenOpts().CodeModel) + .Case("tiny", llvm::CodeModel::Tiny) + .Case("small", llvm::CodeModel::Small) + .Case("kernel", llvm::CodeModel::Kernel) + .Case("medium", llvm::CodeModel::Medium) + .Case("large", llvm::CodeModel::Large) + .Default(~0u); + if (CM != ~0u) { + llvm::CodeModel::Model codeModel = static_cast<llvm::CodeModel::Model>(CM); + getModule().setCodeModel(codeModel); + } + } + if (CodeGenOpts.NoPLT) getModule().setRtLibUseGOT(); @@ -573,6 +590,9 @@ void CodeGenModule::Release() { if (getCodeGenOpts().EmitVersionIdentMetadata) EmitVersionIdentMetadata(); + if (!getCodeGenOpts().RecordCommandLine.empty()) + EmitCommandLineMetadata(); + EmitTargetMetadata(); } @@ -683,8 +703,8 @@ void CodeGenModule::ErrorUnsupported(const Stmt *S, const char *Type) { unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "cannot compile this %0 yet"); std::string Msg = Type; - getDiags().Report(Context.getFullLoc(S->getLocStart()), DiagID) - << Msg << S->getSourceRange(); + getDiags().Report(Context.getFullLoc(S->getBeginLoc()), DiagID) + << Msg << S->getSourceRange(); } /// ErrorUnsupported - Print out an error that codegen doesn't support the @@ -730,6 +750,14 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, return false; const llvm::Triple &TT = CGM.getTriple(); + if (TT.isWindowsGNUEnvironment()) { + // In MinGW, variables without DLLImport can still be automatically + // imported from a DLL by the linker; don't mark variables that + // potentially could come from another DLL as DSO local. + if (GV->isDeclarationForLinker() && isa<llvm::GlobalVariable>(GV) && + !GV->isThreadLocal()) + return false; + } // Every other GV is local on COFF. // Make an exception for windows OS in the triple: Some firmware builds use // *-win32-macho triples. This (accidentally?) produced windows relocations @@ -869,11 +897,13 @@ static std::string getCPUSpecificMangling(const CodeGenModule &CGM, static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, const CPUSpecificAttr *Attr, + unsigned CPUIndex, raw_ostream &Out) { - // cpu_specific gets the current name, dispatch gets the resolver. + // cpu_specific gets the current name, dispatch gets the resolver if IFunc is + // supported. if (Attr) - Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName()); - else + Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName()); + else if (CGM.getTarget().supportsIFunc()) Out << ".resolver"; } @@ -939,11 +969,19 @@ static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD, if (const auto *FD = dyn_cast<FunctionDecl>(ND)) if (FD->isMultiVersion() && !OmitMultiVersionMangling) { - if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion()) - AppendCPUSpecificCPUDispatchMangling( - CGM, FD->getAttr<CPUSpecificAttr>(), Out); - else + switch (FD->getMultiVersionKind()) { + case MultiVersionKind::CPUDispatch: + case MultiVersionKind::CPUSpecific: + AppendCPUSpecificCPUDispatchMangling(CGM, + FD->getAttr<CPUSpecificAttr>(), + GD.getMultiVersionIndex(), Out); + break; + case MultiVersionKind::Target: AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out); + break; + case MultiVersionKind::None: + llvm_unreachable("None multiversion type isn't valid here"); + } } return Out.str(); @@ -968,8 +1006,10 @@ void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD, "Other GD should now be a multiversioned function"); // OtherFD is the version of this function that was mangled BEFORE // becoming a MultiVersion function. It potentially needs to be updated. - const FunctionDecl *OtherFD = - OtherGD.getCanonicalDecl().getDecl()->getAsFunction(); + const FunctionDecl *OtherFD = OtherGD.getCanonicalDecl() + .getDecl() + ->getAsFunction() + ->getMostRecentDecl(); std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD); // This is so that if the initial version was already the 'default' // version, we don't try to update it. @@ -1001,26 +1041,6 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { } } - const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()); - // Since CPUSpecific can require multiple emits per decl, store the manglings - // separately. - if (FD && - (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) { - const auto *SD = FD->getAttr<CPUSpecificAttr>(); - - std::pair<GlobalDecl, unsigned> SpecCanonicalGD{ - CanonicalGD, - SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()}; - - auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD); - if (FoundName != CPUSpecificMangledDeclNames.end()) - return FoundName->second; - - auto Result = CPUSpecificManglings.insert( - std::make_pair(getMangledNameImpl(*this, GD, FD), SpecCanonicalGD)); - return CPUSpecificMangledDeclNames[SpecCanonicalGD] = Result.first->first(); - } - auto FoundName = MangledDeclNames.find(CanonicalGD); if (FoundName != MangledDeclNames.end()) return FoundName->second; @@ -1082,11 +1102,12 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { // Ctor function type is void()*. llvm::FunctionType* CtorFTy = llvm::FunctionType::get(VoidTy, false); - llvm::Type *CtorPFTy = llvm::PointerType::getUnqual(CtorFTy); + llvm::Type *CtorPFTy = llvm::PointerType::get(CtorFTy, + TheModule.getDataLayout().getProgramAddressSpace()); // Get the type of a ctor entry, { i32, void ()*, i8* }. llvm::StructType *CtorStructTy = llvm::StructType::get( - Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy); + Int32Ty, CtorPFTy, VoidPtrTy); // Construct the constructor and destructor arrays. ConstantInitBuilder builder(*this); @@ -1142,12 +1163,12 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString())); } -void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D, +void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F) { unsigned CallingConv; llvm::AttributeList PAL; - ConstructAttributeList(F->getName(), Info, D, PAL, CallingConv, false); + ConstructAttributeList(F->getName(), Info, GD, PAL, CallingConv, false); F->setAttributes(PAL); F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv)); } @@ -1277,9 +1298,19 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // Otherwise, propagate the inline hint attribute and potentially use its // absence to mark things as noinline. if (auto *FD = dyn_cast<FunctionDecl>(D)) { - if (any_of(FD->redecls(), [&](const FunctionDecl *Redecl) { - return Redecl->isInlineSpecified(); - })) { + // Search function and template pattern redeclarations for inline. + auto CheckForInline = [](const FunctionDecl *FD) { + auto CheckRedeclForInline = [](const FunctionDecl *Redecl) { + return Redecl->isInlineSpecified(); + }; + if (any_of(FD->redecls(), CheckRedeclForInline)) + return true; + const FunctionDecl *Pattern = FD->getTemplateInstantiationPattern(); + if (!Pattern) + return false; + return any_of(Pattern->redecls(), CheckRedeclForInline); + }; + if (CheckForInline(FD)) { B.addAttribute(llvm::Attribute::InlineHint); } else if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining && @@ -1350,23 +1381,30 @@ void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { if (D && D->hasAttr<UsedAttr>()) addUsedGlobal(GV); + + if (CodeGenOpts.KeepStaticConsts && D && isa<VarDecl>(D)) { + const auto *VD = cast<VarDecl>(D); + if (VD->getType().isConstQualified() && + VD->getStorageDuration() == SD_Static) + addUsedGlobal(GV); + } } -bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D, +bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, llvm::AttrBuilder &Attrs) { // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then // parse that and add it to the feature set. StringRef TargetCPU = getTarget().getTargetOpts().CPU; std::vector<std::string> Features; - const auto *FD = dyn_cast_or_null<FunctionDecl>(D); + const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl()); FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr; const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr; bool AddedAttr = false; if (TD || SD) { llvm::StringMap<bool> FeatureMap; - getFunctionFeatureMap(FeatureMap, FD); + getFunctionFeatureMap(FeatureMap, GD); // Produce the canonical string for this set of features. for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap) @@ -1393,7 +1431,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D, AddedAttr = true; } if (!Features.empty()) { - llvm::sort(Features.begin(), Features.end()); + llvm::sort(Features); Attrs.addAttribute("target-features", llvm::join(Features, ",")); AddedAttr = true; } @@ -1422,7 +1460,7 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, F->addFnAttr("implicit-section-name", SA->getName()); llvm::AttrBuilder Attrs; - if (GetCPUAndFeaturesAttributes(D, Attrs)) { + if (GetCPUAndFeaturesAttributes(GD, Attrs)) { // We know that GetCPUAndFeaturesAttributes will always have the // newest set, since it has the newest possible FunctionDecl, so the // new ones should replace the old. @@ -1445,7 +1483,7 @@ void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI) { const Decl *D = GD.getDecl(); - SetLLVMFunctionAttributes(D, FI, F); + SetLLVMFunctionAttributes(GD, FI, F); SetLLVMFunctionAttributesForDefinition(D, F); F->setLinkage(llvm::Function::InternalLinkage); @@ -1507,7 +1545,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, const auto *FD = cast<FunctionDecl>(GD.getDecl()); if (!IsIncompleteFunction) { - SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F); + SetLLVMFunctionAttributes(GD, getTypes().arrangeGlobalDeclaration(GD), F); // Setup target-specific attributes. if (F->isDeclaration()) getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); @@ -1654,6 +1692,8 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, // Add linker options to link against the libraries/frameworks // described by this module. llvm::LLVMContext &Context = CGM.getLLVMContext(); + bool IsELF = CGM.getTarget().getTriple().isOSBinFormatELF(); + bool IsPS4 = CGM.getTarget().getTriple().isPS4(); // For modules that use export_as for linking, use that module // name instead. @@ -1673,11 +1713,19 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, } // Link against a library. - llvm::SmallString<24> Opt; - CGM.getTargetCodeGenInfo().getDependentLibraryOption( - Mod->LinkLibraries[I-1].Library, Opt); - auto *OptString = llvm::MDString::get(Context, Opt); - Metadata.push_back(llvm::MDNode::get(Context, OptString)); + if (IsELF && !IsPS4) { + llvm::Metadata *Args[2] = { + llvm::MDString::get(Context, "lib"), + llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library), + }; + Metadata.push_back(llvm::MDNode::get(Context, Args)); + } else { + llvm::SmallString<24> Opt; + CGM.getTargetCodeGenInfo().getDependentLibraryOption( + Mod->LinkLibraries[I - 1].Library, Opt); + auto *OptString = llvm::MDString::get(Context, Opt); + Metadata.push_back(llvm::MDNode::get(Context, OptString)); + } } } @@ -1708,16 +1756,14 @@ void CodeGenModule::EmitModuleLinkOptions() { bool AnyChildren = false; // Visit the submodules of this module. - for (clang::Module::submodule_iterator Sub = Mod->submodule_begin(), - SubEnd = Mod->submodule_end(); - Sub != SubEnd; ++Sub) { + for (const auto &SM : Mod->submodules()) { // Skip explicit children; they need to be explicitly imported to be // linked against. - if ((*Sub)->IsExplicit) + if (SM->IsExplicit) continue; - if (Visited.insert(*Sub).second) { - Stack.push_back(*Sub); + if (Visited.insert(SM).second) { + Stack.push_back(SM); AnyChildren = true; } } @@ -1747,6 +1793,10 @@ void CodeGenModule::EmitModuleLinkOptions() { } void CodeGenModule::EmitDeferred() { + // Emit deferred declare target declarations. + if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd) + getOpenMPRuntime().emitDeferredTargetDecls(); + // Emit code for any potentially referenced deferred decls. Since a // previously unused static decl may become used during the generation of code // for a static function, iterate until no changes are made. @@ -1949,9 +1999,6 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV, bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, StringRef Category) const { - if (!LangOpts.XRayInstrument) - return false; - const auto &XRayFilter = getContext().getXRayFilter(); using ImbueAttr = XRayFunctionFilter::ImbueAttribute; auto Attr = ImbueAttr::NONE; @@ -1981,6 +2028,13 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { if (LangOpts.EmitAllDecls) return true; + if (CodeGenOpts.KeepStaticConsts) { + const auto *VD = dyn_cast<VarDecl>(Global); + if (VD && VD->getType().isConstQualified() && + VD->getStorageDuration() == SD_Static) + return true; + } + return getContext().DeclMustBeEmitted(Global); } @@ -2000,7 +2054,8 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // codegen for global variables, because they may be marked as threadprivate. if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS && getContext().getTargetInfo().isTLSSupported() && isa<VarDecl>(Global) && - !isTypeConstant(Global->getType(), false)) + !isTypeConstant(Global->getType(), false) && + !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Global)) return false; return true; @@ -2141,16 +2196,22 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } else { const auto *VD = cast<VarDecl>(Global); assert(VD->isFileVarDecl() && "Cannot emit local var decl as global."); - // We need to emit device-side global CUDA variables even if a - // variable does not have a definition -- we still need to define - // host-side shadow for it. - bool MustEmitForCuda = LangOpts.CUDA && !LangOpts.CUDAIsDevice && - !VD->hasDefinition() && - (VD->hasAttr<CUDAConstantAttr>() || - VD->hasAttr<CUDADeviceAttr>()); - if (!MustEmitForCuda && - VD->isThisDeclarationADefinition() != VarDecl::Definition && + if (VD->isThisDeclarationADefinition() != VarDecl::Definition && !Context.isMSStaticDataMemberInlineDefinition(VD)) { + if (LangOpts.OpenMP) { + // Emit declaration of the must-be-emitted declare target variable. + if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { + if (*Res == OMPDeclareTargetDeclAttr::MT_To) { + (void)GetAddrOfGlobalVar(VD); + } else { + assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && + "link claue expected."); + (void)getOpenMPRuntime().getAddrOfDeclareTargetLink(VD); + } + return; + } + } // If this declaration may have caused an inline variable definition to // change linkage, make sure that it's emitted. if (Context.getInlineVariableDefinitionKind(VD) == @@ -2360,6 +2421,19 @@ bool CodeGenModule::shouldOpportunisticallyEmitVTables() { return CodeGenOpts.OptimizationLevel > 0; } +void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, + llvm::GlobalValue *GV) { + const auto *FD = cast<FunctionDecl>(GD.getDecl()); + + if (FD->isCPUSpecificMultiVersion()) { + auto *Spec = FD->getAttr<CPUSpecificAttr>(); + for (unsigned I = 0; I < Spec->cpus_size(); ++I) + EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); + // Requires multiple emits. + } else + EmitGlobalFunctionDefinition(GD, GV); +} + void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast<ValueDecl>(GD.getDecl()); @@ -2367,7 +2441,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { Context.getSourceManager(), "Generating code for declaration"); - if (isa<FunctionDecl>(D)) { + if (const auto *FD = dyn_cast<FunctionDecl>(D)) { // At -O0, don't generate IR for functions with available_externally // linkage. if (!shouldEmitFunction(GD)) @@ -2380,6 +2454,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { ABI->emitCXXStructor(CD, getFromCtorType(GD.getCtorType())); else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method)) ABI->emitCXXStructor(DD, getFromDtorType(GD.getDtorType())); + else if (FD->isMultiVersion()) + EmitMultiVersionFunctionDefinition(GD, GV); else EmitGlobalFunctionDefinition(GD, GV); @@ -2389,6 +2465,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { return; } + if (FD->isMultiVersion()) + return EmitMultiVersionFunctionDefinition(GD, GV); return EmitGlobalFunctionDefinition(GD, GV); } @@ -2401,9 +2479,22 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); +static unsigned +TargetMVPriority(const TargetInfo &TI, + const CodeGenFunction::MultiVersionResolverOption &RO) { + unsigned Priority = 0; + for (StringRef Feat : RO.Conditions.Features) + Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); + + if (!RO.Conditions.Architecture.empty()) + Priority = std::max( + Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture)); + return Priority; +} + void CodeGenModule::emitMultiVersionFunctions() { for (GlobalDecl GD : MultiVersionFuncs) { - SmallVector<CodeGenFunction::TargetMultiVersionResolverOption, 10> Options; + SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options; const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl()); getContext().forEachMultiversionedFunctionVersion( FD, [this, &GD, &Options](const FunctionDecl *CurFD) { @@ -2424,20 +2515,36 @@ void CodeGenModule::emitMultiVersionFunctions() { } assert(Func && "This should have just been created"); } - Options.emplace_back(getTarget(), cast<llvm::Function>(Func), - CurFD->getAttr<TargetAttr>()->parse()); + + const auto *TA = CurFD->getAttr<TargetAttr>(); + llvm::SmallVector<StringRef, 8> Feats; + TA->getAddedFeatures(Feats); + + Options.emplace_back(cast<llvm::Function>(Func), + TA->getArchitecture(), Feats); }); - llvm::Function *ResolverFunc = cast<llvm::Function>( - GetGlobalValue((getMangledName(GD) + ".resolver").str())); + llvm::Function *ResolverFunc; + const TargetInfo &TI = getTarget(); + + if (TI.supportsIFunc() || FD->isTargetMultiVersion()) + ResolverFunc = cast<llvm::Function>( + GetGlobalValue((getMangledName(GD) + ".resolver").str())); + else + ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD))); + if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); + std::stable_sort( Options.begin(), Options.end(), - std::greater<CodeGenFunction::TargetMultiVersionResolverOption>()); + [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, + const CodeGenFunction::MultiVersionResolverOption &RHS) { + return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); + }); CodeGenFunction CGF(*this); - CGF.EmitTargetMultiVersionResolver(ResolverFunc, Options); + CGF.EmitMultiVersionResolver(ResolverFunc, Options); } } @@ -2446,27 +2553,58 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { assert(FD && "Not a FunctionDecl?"); const auto *DD = FD->getAttr<CPUDispatchAttr>(); assert(DD && "Not a cpu_dispatch Function?"); - llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType()); + QualType CanonTy = Context.getCanonicalType(FD->getType()); + llvm::Type *DeclTy = getTypes().ConvertFunctionType(CanonTy, FD); + + if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) { + const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD); + DeclTy = getTypes().GetFunctionType(FInfo); + } StringRef ResolverName = getMangledName(GD); - llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), - false); - auto *ResolverFunc = cast<llvm::Function>( - GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, - /*ForVTable=*/false)); - - SmallVector<CodeGenFunction::CPUDispatchMultiVersionResolverOption, 10> - Options; + + llvm::Type *ResolverType; + GlobalDecl ResolverGD; + if (getTarget().supportsIFunc()) + ResolverType = llvm::FunctionType::get( + llvm::PointerType::get(DeclTy, + Context.getTargetAddressSpace(FD->getType())), + false); + else { + ResolverType = DeclTy; + ResolverGD = GD; + } + + auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction( + ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); + + SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options; const TargetInfo &Target = getTarget(); + unsigned Index = 0; for (const IdentifierInfo *II : DD->cpus()) { // Get the name of the target function so we can look it up/create it. std::string MangledName = getMangledNameImpl(*this, GD, FD, true) + getCPUSpecificMangling(*this, II->getName()); - llvm::Constant *Func = GetOrCreateLLVMFunction( - MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false, - /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + + llvm::Constant *Func = GetGlobalValue(MangledName); + + if (!Func) { + GlobalDecl ExistingDecl = Manglings.lookup(MangledName); + if (ExistingDecl.getDecl() && + ExistingDecl.getDecl()->getAsFunction()->isDefined()) { + EmitGlobalFunctionDefinition(ExistingDecl, nullptr); + Func = GetGlobalValue(MangledName); + } else { + if (!ExistingDecl.getDecl()) + ExistingDecl = GD.getWithMultiVersionIndex(Index); + + Func = GetOrCreateLLVMFunction( + MangledName, DeclTy, ExistingDecl, + /*ForVTable=*/false, /*DontDefer=*/true, + /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); + } + } + llvm::SmallVector<StringRef, 32> Features; Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features); llvm::transform(Features, Features.begin(), @@ -2475,27 +2613,54 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { Features.begin(), Features.end(), [&Target](StringRef Feat) { return !Target.validateCpuSupports(Feat); }), Features.end()); - Options.emplace_back(cast<llvm::Function>(Func), - CodeGenFunction::GetX86CpuSupportsMask(Features)); + Options.emplace_back(cast<llvm::Function>(Func), StringRef{}, Features); + ++Index; } llvm::sort( - Options.begin(), Options.end(), - std::greater<CodeGenFunction::CPUDispatchMultiVersionResolverOption>()); + Options, [](const CodeGenFunction::MultiVersionResolverOption &LHS, + const CodeGenFunction::MultiVersionResolverOption &RHS) { + return CodeGenFunction::GetX86CpuSupportsMask(LHS.Conditions.Features) > + CodeGenFunction::GetX86CpuSupportsMask(RHS.Conditions.Features); + }); + + // If the list contains multiple 'default' versions, such as when it contains + // 'pentium' and 'generic', don't emit the call to the generic one (since we + // always run on at least a 'pentium'). We do this by deleting the 'least + // advanced' (read, lowest mangling letter). + while (Options.size() > 1 && + CodeGenFunction::GetX86CpuSupportsMask( + (Options.end() - 2)->Conditions.Features) == 0) { + StringRef LHSName = (Options.end() - 2)->Function->getName(); + StringRef RHSName = (Options.end() - 1)->Function->getName(); + if (LHSName.compare(RHSName) < 0) + Options.erase(Options.end() - 2); + else + Options.erase(Options.end() - 1); + } + CodeGenFunction CGF(*this); - CGF.EmitCPUDispatchMultiVersionResolver(ResolverFunc, Options); + CGF.EmitMultiVersionResolver(ResolverFunc, Options); } -/// If an ifunc for the specified mangled name is not in the module, create and -/// return an llvm IFunc Function with the specified type. -llvm::Constant * -CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy, - const FunctionDecl *FD) { +/// If a dispatcher for the specified mangled name is not in the module, create +/// and return an llvm Function with the specified type. +llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver( + GlobalDecl GD, llvm::Type *DeclTy, const FunctionDecl *FD) { std::string MangledName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); - std::string IFuncName = MangledName + ".ifunc"; - if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName)) - return IFuncGV; + + // Holds the name of the resolver, in ifunc mode this is the ifunc (which has + // a separate resolver). + std::string ResolverName = MangledName; + if (getTarget().supportsIFunc()) + ResolverName += ".ifunc"; + else if (FD->isTargetMultiVersion()) + ResolverName += ".resolver"; + + // If this already exists, just return that one. + if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName)) + return ResolverGV; // Since this is the first time we've created this IFunc, make sure // that we put this multiversioned function into the list to be @@ -2503,20 +2668,28 @@ CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy, if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) MultiVersionFuncs.push_back(GD); - std::string ResolverName = MangledName + ".resolver"; - llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), - false); - llvm::Constant *Resolver = - GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, - /*ForVTable=*/false); - llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( - DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); - GIF->setName(IFuncName); - SetCommonAttributes(FD, GIF); + if (getTarget().supportsIFunc()) { + llvm::Type *ResolverType = llvm::FunctionType::get( + llvm::PointerType::get( + DeclTy, getContext().getTargetAddressSpace(FD->getType())), + false); + llvm::Constant *Resolver = GetOrCreateLLVMFunction( + MangledName + ".resolver", ResolverType, GlobalDecl{}, + /*ForVTable=*/false); + llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( + DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); + GIF->setName(ResolverName); + SetCommonAttributes(FD, GIF); - return GIF; + return GIF; + } + + llvm::Constant *Resolver = GetOrCreateLLVMFunction( + ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false); + assert(isa<llvm::GlobalValue>(Resolver) && + "Resolver should be created for the first time"); + SetCommonAttributes(FD, cast<llvm::GlobalValue>(Resolver)); + return Resolver; } /// GetOrCreateLLVMFunction - If the specified mangled name is not in the @@ -2539,15 +2712,16 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( if (getLangOpts().OpenMPIsDevice && OpenMPRuntime && !OpenMPRuntime->markAsGlobalTarget(GD) && FD->isDefined() && !DontDefer && !IsForDefinition) { - const FunctionDecl *FDDef = FD->getDefinition(); - GlobalDecl GDDef; - if (const auto *CD = dyn_cast<CXXConstructorDecl>(FDDef)) - GDDef = GlobalDecl(CD, GD.getCtorType()); - else if (const auto *DD = dyn_cast<CXXDestructorDecl>(FDDef)) - GDDef = GlobalDecl(DD, GD.getDtorType()); - else - GDDef = GlobalDecl(FDDef); - addDeferredDeclToEmit(GDDef); + if (const FunctionDecl *FDDef = FD->getDefinition()) { + GlobalDecl GDDef; + if (const auto *CD = dyn_cast<CXXConstructorDecl>(FDDef)) + GDDef = GlobalDecl(CD, GD.getCtorType()); + else if (const auto *DD = dyn_cast<CXXDestructorDecl>(FDDef)) + GDDef = GlobalDecl(DD, GD.getDtorType()); + else + GDDef = GlobalDecl(FDDef); + EmitGlobal(GDDef); + } } if (FD->isMultiVersion()) { @@ -2555,7 +2729,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( if (TA && TA->isDefaultVersion()) UpdateMultiVersionNames(GD, FD); if (!IsForDefinition) - return GetOrCreateMultiVersionIFunc(GD, Ty, FD); + return GetOrCreateMultiVersionResolver(GD, Ty, FD); } } @@ -3058,10 +3232,9 @@ CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, IsForDefinition); } -llvm::GlobalVariable * -CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, - llvm::Type *Ty, - llvm::GlobalValue::LinkageTypes Linkage) { +llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( + StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage, + unsigned Alignment) { llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name); llvm::GlobalVariable *OldGV = nullptr; @@ -3097,6 +3270,8 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, !GV->hasAvailableExternallyLinkage()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); + GV->setAlignment(Alignment); + return GV; } @@ -3313,8 +3488,15 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // CUDA E.2.4.1 "__shared__ variables cannot have an initialization // as part of their declaration." Sema has already checked for // error cases, so we just need to set Init to UndefValue. - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && - D->hasAttr<CUDASharedAttr>()) + bool IsCUDASharedVar = + getLangOpts().CUDAIsDevice && D->hasAttr<CUDASharedAttr>(); + // Shadows of initialized device-side global variables are also left + // undefined. + bool IsCUDAShadowVar = + !getLangOpts().CUDAIsDevice && + (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() || + D->hasAttr<CUDASharedAttr>()); + if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are @@ -3434,7 +3616,10 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, Flags |= CGCUDARuntime::ExternDeviceVar; if (D->hasAttr<CUDAConstantAttr>()) Flags |= CGCUDARuntime::ConstantDeviceVar; - getCUDARuntime().registerDeviceVar(*GV, Flags); + // Extern global variables will be registered in the TU where they are + // defined. + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceVar(*GV, Flags); } else if (D->hasAttr<CUDASharedAttr>()) // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they @@ -3577,6 +3762,15 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context, } } + // Microsoft's link.exe doesn't support alignments greater than 32 for common + // symbols, so symbols with greater alignment requirements cannot be common. + // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two + // alignments for common symbols via the aligncomm directive, so this + // restriction only applies to MSVC environments. + if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() && + Context.getTypeAlignIfKnown(D->getType()) > 32) + return true; + return false; } @@ -3592,6 +3786,10 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( return llvm::GlobalVariable::WeakAnyLinkage; } + if (const auto *FD = D->getAsFunction()) + if (FD->isMultiVersion() && Linkage == GVA_AvailableExternally) + return llvm::GlobalVariable::LinkOnceAnyLinkage; + // We are guaranteed to have a strong definition somewhere else, // so we can use available_externally linkage. if (Linkage == GVA_AvailableExternally) @@ -3828,15 +4026,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, AddGlobalDtor(Fn, DA->getPriority()); if (D->hasAttr<AnnotateAttr>()) AddGlobalAnnotations(D, Fn); - - if (D->isCPUSpecificMultiVersion()) { - auto *Spec = D->getAttr<CPUSpecificAttr>(); - // If there is another specific version we need to emit, do so here. - if (Spec->ActiveArgIndex + 1 < Spec->cpus_size()) { - ++Spec->ActiveArgIndex; - EmitGlobalFunctionDefinition(GD, nullptr); - } - } } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { @@ -4030,39 +4219,81 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty); llvm::Constant *Zeros[] = { Zero, Zero }; + const ASTContext &Context = getContext(); + const llvm::Triple &Triple = getTriple(); + + const auto CFRuntime = getLangOpts().CFRuntime; + const bool IsSwiftABI = + static_cast<unsigned>(CFRuntime) >= + static_cast<unsigned>(LangOptions::CoreFoundationABI::Swift); + const bool IsSwift4_1 = CFRuntime == LangOptions::CoreFoundationABI::Swift4_1; + // If we don't already have it, get __CFConstantStringClassReference. if (!CFConstantStringClassRef) { + const char *CFConstantStringClassName = "__CFConstantStringClassReference"; llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); Ty = llvm::ArrayType::get(Ty, 0); - llvm::GlobalValue *GV = cast<llvm::GlobalValue>( - CreateRuntimeVariable(Ty, "__CFConstantStringClassReference")); - - if (getTriple().isOSBinFormatCOFF()) { - IdentifierInfo &II = getContext().Idents.get(GV->getName()); - TranslationUnitDecl *TUDecl = getContext().getTranslationUnitDecl(); - DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); - - const VarDecl *VD = nullptr; - for (const auto &Result : DC->lookup(&II)) - if ((VD = dyn_cast<VarDecl>(Result))) - break; - - if (!VD || !VD->hasAttr<DLLExportAttr>()) { - GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); - } else { - GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + + switch (CFRuntime) { + default: break; + case LangOptions::CoreFoundationABI::Swift: LLVM_FALLTHROUGH; + case LangOptions::CoreFoundationABI::Swift5_0: + CFConstantStringClassName = + Triple.isOSDarwin() ? "$s15SwiftFoundation19_NSCFConstantStringCN" + : "$s10Foundation19_NSCFConstantStringCN"; + Ty = IntPtrTy; + break; + case LangOptions::CoreFoundationABI::Swift4_2: + CFConstantStringClassName = + Triple.isOSDarwin() ? "$S15SwiftFoundation19_NSCFConstantStringCN" + : "$S10Foundation19_NSCFConstantStringCN"; + Ty = IntPtrTy; + break; + case LangOptions::CoreFoundationABI::Swift4_1: + CFConstantStringClassName = + Triple.isOSDarwin() ? "__T015SwiftFoundation19_NSCFConstantStringCN" + : "__T010Foundation19_NSCFConstantStringCN"; + Ty = IntPtrTy; + break; + } + + llvm::Constant *C = CreateRuntimeVariable(Ty, CFConstantStringClassName); + + if (Triple.isOSBinFormatELF() || Triple.isOSBinFormatCOFF()) { + llvm::GlobalValue *GV = nullptr; + + if ((GV = dyn_cast<llvm::GlobalValue>(C))) { + IdentifierInfo &II = Context.Idents.get(GV->getName()); + TranslationUnitDecl *TUDecl = Context.getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + + const VarDecl *VD = nullptr; + for (const auto &Result : DC->lookup(&II)) + if ((VD = dyn_cast<VarDecl>(Result))) + break; + + if (Triple.isOSBinFormatELF()) { + if (!VD) + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + } else { + GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + if (!VD || !VD->hasAttr<DLLExportAttr>()) + GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); + else + GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); + } + + setDSOLocal(GV); } } - setDSOLocal(GV); // Decay array -> ptr CFConstantStringClassRef = - llvm::ConstantExpr::getGetElementPtr(Ty, GV, Zeros); + IsSwiftABI ? llvm::ConstantExpr::getPtrToInt(C, Ty) + : llvm::ConstantExpr::getGetElementPtr(Ty, C, Zeros); } - QualType CFTy = getContext().getCFConstantStringType(); + QualType CFTy = Context.getCFConstantStringType(); auto *STy = cast<llvm::StructType>(getTypes().ConvertType(CFTy)); @@ -4073,7 +4304,12 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { Fields.add(cast<llvm::ConstantExpr>(CFConstantStringClassRef)); // Flags. - Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8); + if (IsSwiftABI) { + Fields.addInt(IntPtrTy, IsSwift4_1 ? 0x05 : 0x01); + Fields.addInt(Int64Ty, isUTF16 ? 0x07d0 : 0x07c8); + } else { + Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8); + } // String pointer. llvm::Constant *C = nullptr; @@ -4094,17 +4330,20 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. - CharUnits Align = isUTF16 - ? getContext().getTypeAlignInChars(getContext().ShortTy) - : getContext().getTypeAlignInChars(getContext().CharTy); + CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy) + : Context.getTypeAlignInChars(Context.CharTy); GV->setAlignment(Align.getQuantity()); // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. // Without it LLVM can merge the string with a non unnamed_addr one during // LTO. Doing that changes the section it ends in, which surprises ld64. - if (getTriple().isOSBinFormatMachO()) + if (Triple.isOSBinFormatMachO()) GV->setSection(isUTF16 ? "__TEXT,__ustring" : "__TEXT,__cstring,cstring_literals"); + // Make sure the literal ends up in .rodata to allow for safe ICF and for + // the static linker to adjust permissions to read-only later on. + else if (Triple.isOSBinFormatELF()) + GV->setSection(".rodata"); // String. llvm::Constant *Str = @@ -4116,8 +4355,17 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { Fields.add(Str); // String length. - auto Ty = getTypes().ConvertType(getContext().LongTy); - Fields.addInt(cast<llvm::IntegerType>(Ty), StringLength); + llvm::IntegerType *LengthTy = + llvm::IntegerType::get(getModule().getContext(), + Context.getTargetInfo().getLongWidth()); + if (IsSwiftABI) { + if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 || + CFRuntime == LangOptions::CoreFoundationABI::Swift4_2) + LengthTy = Int32Ty; + else + LengthTy = IntPtrTy; + } + Fields.addInt(LengthTy, StringLength); CharUnits Alignment = getPointerAlign(); @@ -4125,7 +4373,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment, /*isConstant=*/false, llvm::GlobalVariable::PrivateLinkage); - switch (getTriple().getObjectFormat()) { + switch (Triple.getObjectFormat()) { case llvm::Triple::UnknownObjectFormat: llvm_unreachable("unknown file format"); case llvm::Triple::COFF: @@ -4264,15 +4512,13 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, StringRef GlobalVariableName; llvm::GlobalValue::LinkageTypes LT; - // Mangle the string literal if the ABI allows for it. However, we cannot - // do this if we are compiling with ASan or -fwritable-strings because they - // rely on strings having normal linkage. - if (!LangOpts.WritableStrings && - !LangOpts.Sanitize.has(SanitizerKind::Address) && - getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) { + // Mangle the string literal if that's how the ABI merges duplicate strings. + // Don't do it if they are writable, since we don't want writes in one TU to + // affect strings in another. + if (getCXXABI().getMangleContext().shouldMangleStringLiteral(S) && + !LangOpts.WritableStrings) { llvm::raw_svector_ostream Out(MangledNameBuffer); getCXXABI().getMangleContext().mangleStringLiteral(S, Out); - LT = llvm::GlobalValue::LinkOnceODRLinkage; GlobalVariableName = MangledNameBuffer; } else { @@ -4620,6 +4866,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { case Decl::TypeAliasTemplate: case Decl::Block: case Decl::Empty: + case Decl::Binding: break; case Decl::Using: // using X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) @@ -4787,6 +5034,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D)); break; + case Decl::OMPRequires: + EmitOMPRequiresDecl(cast<OMPRequiresDecl>(D)); + break; + default: // Make sure we handled everything we should, every other kind is a // non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind @@ -4810,7 +5061,7 @@ void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) { if (!cast<FunctionDecl>(D)->doesThisDeclarationHaveABody()) return; SourceManager &SM = getContext().getSourceManager(); - if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getLocStart())) + if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getBeginLoc())) return; auto I = DeferredEmptyCoverageMappingDecls.find(D); if (I == DeferredEmptyCoverageMappingDecls.end()) @@ -4981,6 +5232,16 @@ void CodeGenModule::EmitVersionIdentMetadata() { IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode)); } +void CodeGenModule::EmitCommandLineMetadata() { + llvm::NamedMDNode *CommandLineMetadata = + TheModule.getOrInsertNamedMetadata("llvm.commandline"); + std::string CommandLine = getCodeGenOpts().RecordCommandLine; + llvm::LLVMContext &Ctx = TheModule.getContext(); + + llvm::Metadata *CommandLineNode[] = {llvm::MDString::get(Ctx, CommandLine)}; + CommandLineMetadata->addOperand(llvm::MDNode::get(Ctx, CommandLineNode)); +} + void CodeGenModule::EmitTargetMetadata() { // Warning, new MangledDeclNames may be appended within this loop. // We rely on MapVector insertions adding new elements to the end @@ -5073,7 +5334,7 @@ void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { Address Addr(GetAddrOfGlobalVar(VD), getContext().getDeclAlign(VD)); if (auto InitFunction = getOpenMPRuntime().emitThreadPrivateVarDefinition( - VD, Addr, RefExpr->getLocStart(), PerformInit)) + VD, Addr, RefExpr->getBeginLoc(), PerformInit)) CXXGlobalInits.push_back(InitFunction); } } @@ -5196,8 +5457,9 @@ TargetAttr::ParsedTargetAttr CodeGenModule::filterFunctionTargetAttrs(const Targ // Fills in the supplied string map with the set of target features for the // passed in function. void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, - const FunctionDecl *FD) { + GlobalDecl GD) { StringRef TargetCPU = Target.getTargetOpts().CPU; + const FunctionDecl *FD = GD.getDecl()->getAsFunction(); if (const auto *TD = FD->getAttr<TargetAttr>()) { TargetAttr::ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD); @@ -5219,8 +5481,8 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, ParsedAttr.Features); } else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) { llvm::SmallVector<StringRef, 32> FeaturesTmp; - Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(), - FeaturesTmp); + Target.getCPUSpecificCPUDispatchFeatures( + SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp); std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end()); Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features); } else { diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 91f3d94330f1..75679d11c13c 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -119,15 +119,29 @@ struct OrderGlobalInits { struct ObjCEntrypoints { ObjCEntrypoints() { memset(this, 0, sizeof(*this)); } - /// void objc_autoreleasePoolPop(void*); + /// void objc_alloc(id); + llvm::Constant *objc_alloc; + + /// void objc_allocWithZone(id); + llvm::Constant *objc_allocWithZone; + + /// void objc_autoreleasePoolPop(void*); llvm::Constant *objc_autoreleasePoolPop; + /// void objc_autoreleasePoolPop(void*); + /// Note this method is used when we are using exception handling + llvm::Constant *objc_autoreleasePoolPopInvoke; + /// void *objc_autoreleasePoolPush(void); llvm::Constant *objc_autoreleasePoolPush; /// id objc_autorelease(id); llvm::Constant *objc_autorelease; + /// id objc_autorelease(id); + /// Note this is the runtime method not the intrinsic. + llvm::Constant *objc_autoreleaseRuntimeFunction; + /// id objc_autoreleaseReturnValue(id); llvm::Constant *objc_autoreleaseReturnValue; @@ -152,6 +166,10 @@ struct ObjCEntrypoints { /// id objc_retain(id); llvm::Constant *objc_retain; + /// id objc_retain(id); + /// Note this is the runtime method not the intrinsic. + llvm::Constant *objc_retainRuntimeFunction; + /// id objc_retainAutorelease(id); llvm::Constant *objc_retainAutorelease; @@ -167,6 +185,10 @@ struct ObjCEntrypoints { /// void objc_release(id); llvm::Constant *objc_release; + /// void objc_release(id); + /// Note this is the runtime method not the intrinsic. + llvm::Constant *objc_releaseRuntimeFunction; + /// void objc_storeStrong(id*, id); llvm::Constant *objc_storeStrong; @@ -764,7 +786,8 @@ public: /// bitcast to the new variable. llvm::GlobalVariable * CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty, - llvm::GlobalValue::LinkageTypes Linkage); + llvm::GlobalValue::LinkageTypes Linkage, + unsigned Alignment); llvm::Function * CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name, @@ -1042,8 +1065,7 @@ public: const CGFunctionInfo &FI); /// Set the LLVM function attributes (sext, zext, etc). - void SetLLVMFunctionAttributes(const Decl *D, - const CGFunctionInfo &Info, + void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F); /// Set the LLVM function attributes which only apply to a function @@ -1103,8 +1125,7 @@ public: // Fills in the supplied string map with the set of target features for the // passed in function. - void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, - const FunctionDecl *FD); + void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, GlobalDecl GD); StringRef getMangledName(GlobalDecl GD); StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD); @@ -1223,6 +1244,10 @@ public: void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D, CodeGenFunction *CGF = nullptr); + /// Emit a code for requires directive. + /// \param D Requires declaration + void EmitOMPRequiresDecl(const OMPRequiresDecl *D); + /// Returns whether the given record has hidden LTO visibility and therefore /// may participate in (single-module) CFI and whole-program vtable /// optimization. @@ -1288,9 +1313,9 @@ private: llvm::AttributeList ExtraAttrs = llvm::AttributeList(), ForDefinition_t IsForDefinition = NotForDefinition); - llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD, - llvm::Type *DeclTy, - const FunctionDecl *FD); + llvm::Constant *GetOrCreateMultiVersionResolver(GlobalDecl GD, + llvm::Type *DeclTy, + const FunctionDecl *FD); void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD); llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, @@ -1299,7 +1324,7 @@ private: ForDefinition_t IsForDefinition = NotForDefinition); - bool GetCPUAndFeaturesAttributes(const Decl *D, + bool GetCPUAndFeaturesAttributes(GlobalDecl GD, llvm::AttrBuilder &AttrBuilder); void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO); @@ -1310,6 +1335,8 @@ private: void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr); void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV); + void EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV); + void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false); void EmitAliasDefinition(GlobalDecl GD); void emitIFuncDefinition(GlobalDecl GD); @@ -1397,6 +1424,9 @@ private: /// Emit the Clang version as llvm.ident metadata. void EmitVersionIdentMetadata(); + /// Emit the Clang commandline as llvm.commandline metadata. + void EmitCommandLineMetadata(); + /// Emits target specific Metadata for global declarations. void EmitTargetMetadata(); diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index c8c2a1b956b8..776060743a63 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -165,7 +165,12 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { // Blocks and lambdas are handled as separate functions, so we need not // traverse them in the parent context. bool TraverseBlockExpr(BlockExpr *BE) { return true; } - bool TraverseLambdaBody(LambdaExpr *LE) { return true; } + bool TraverseLambdaExpr(LambdaExpr *LE) { + // Traverse the captures, but not the body. + for (const auto &C : zip(LE->captures(), LE->capture_inits())) + TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C)); + return true; + } bool TraverseCapturedStmt(CapturedStmt *CS) { return true; } bool VisitDecl(const Decl *D) { @@ -544,6 +549,8 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> { void VisitCXXForRangeStmt(const CXXForRangeStmt *S) { RecordStmtCount(S); + if (S->getInit()) + Visit(S->getInit()); Visit(S->getLoopVarStmt()); Visit(S->getRangeStmt()); Visit(S->getBeginStmt()); @@ -815,7 +822,7 @@ bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) { // Don't map the functions in system headers. const auto &SM = CGM.getContext().getSourceManager(); - auto Loc = D->getBody()->getLocStart(); + auto Loc = D->getBody()->getBeginLoc(); return SM.isInSystemHeader(Loc); } diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h index 0759e65388b8..120ab651a4a8 100644 --- a/lib/CodeGen/CodeGenPGO.h +++ b/lib/CodeGen/CodeGenPGO.h @@ -17,7 +17,6 @@ #include "CGBuilder.h" #include "CodeGenModule.h" #include "CodeGenTypes.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ProfileData/InstrProfReader.h" #include <array> #include <memory> diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp index ec48231e5247..27d39716d22f 100644 --- a/lib/CodeGen/CodeGenTBAA.cpp +++ b/lib/CodeGen/CodeGenTBAA.cpp @@ -20,7 +20,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" -#include "clang/Frontend/CodeGenOptions.h" +#include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index 1a1395e6ae74..2acf1ac16180 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -503,6 +503,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + case BuiltinType::Id: +#include "clang/Basic/OpenCLExtensionTypes.def" case BuiltinType::OCLSampler: case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h index 626869f00021..8e344e91b8cd 100644 --- a/lib/CodeGen/CodeGenTypes.h +++ b/lib/CodeGen/CodeGenTypes.h @@ -17,7 +17,6 @@ #include "CGCall.h" #include "clang/Basic/ABI.h" #include "clang/CodeGen/CGFunctionInfo.h" -#include "clang/Sema/Sema.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Module.h" diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h index b4d1b65743c7..7ad8e5d37cd1 100644 --- a/lib/CodeGen/ConstantEmitter.h +++ b/lib/CodeGen/ConstantEmitter.h @@ -38,6 +38,9 @@ private: /// Whether the constant-emission failed. bool Failed = false; + /// Whether we're in a constant context. + bool InConstantContext = false; + /// The AST address space where this (non-abstract) initializer is going. /// Used for generating appropriate placeholders. LangAS DestAddressSpace; diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp index 2d8446463594..35962c73d9a8 100644 --- a/lib/CodeGen/CoverageMappingGen.cpp +++ b/lib/CodeGen/CoverageMappingGen.cpp @@ -67,7 +67,7 @@ public: void setStartLoc(SourceLocation Loc) { LocStart = Loc; } - SourceLocation getStartLoc() const { + SourceLocation getBeginLoc() const { assert(LocStart && "Region has no start location"); return *LocStart; } @@ -116,7 +116,7 @@ struct SpellingRegion { } SpellingRegion(SourceManager &SM, SourceMappingRegion &R) - : SpellingRegion(SM, R.getStartLoc(), R.getEndLoc()) {} + : SpellingRegion(SM, R.getBeginLoc(), R.getEndLoc()) {} /// Check if the start and end locations appear in source order, i.e /// top->bottom, left->right. @@ -204,7 +204,7 @@ public: /// Get the start of \c S ignoring macro arguments and builtin macros. SourceLocation getStart(const Stmt *S) { - SourceLocation Loc = S->getLocStart(); + SourceLocation Loc = S->getBeginLoc(); while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc)) Loc = SM.getImmediateExpansionRange(Loc).getBegin(); return Loc; @@ -212,7 +212,7 @@ public: /// Get the end of \c S ignoring macro arguments and builtin macros. SourceLocation getEnd(const Stmt *S) { - SourceLocation Loc = S->getLocEnd(); + SourceLocation Loc = S->getEndLoc(); while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc)) Loc = SM.getImmediateExpansionRange(Loc).getBegin(); return getPreciseTokenLocEnd(Loc); @@ -229,7 +229,7 @@ public: llvm::SmallSet<FileID, 8> Visited; SmallVector<std::pair<SourceLocation, unsigned>, 8> FileLocs; for (const auto &Region : SourceRegions) { - SourceLocation Loc = Region.getStartLoc(); + SourceLocation Loc = Region.getBeginLoc(); FileID File = SM.getFileID(Loc); if (!Visited.insert(File).second) continue; @@ -311,7 +311,7 @@ public: for (const auto &Region : SourceRegions) { assert(Region.hasEndLoc() && "incomplete region"); - SourceLocation LocStart = Region.getStartLoc(); + SourceLocation LocStart = Region.getBeginLoc(); assert(SM.getFileID(LocStart).isValid() && "region in invalid file"); // Ignore regions from system headers. @@ -502,7 +502,7 @@ struct CounterCoverageMappingBuilder DeferredRegion = None; // If the region ends in an expansion, find the expansion site. - FileID StartFile = SM.getFileID(DR.getStartLoc()); + FileID StartFile = SM.getFileID(DR.getBeginLoc()); if (SM.getFileID(DeferredEndLoc) != StartFile) { if (isNestedIn(DeferredEndLoc, StartFile)) { do { @@ -515,12 +515,12 @@ struct CounterCoverageMappingBuilder // The parent of this deferred region ends where the containing decl ends, // so the region isn't useful. - if (DR.getStartLoc() == DeferredEndLoc) + if (DR.getBeginLoc() == DeferredEndLoc) return Index; // If we're visiting statements in non-source order (e.g switch cases or // a loop condition) we can't construct a sensible deferred region. - if (!SpellingRegion(SM, DR.getStartLoc(), DeferredEndLoc).isInSourceOrder()) + if (!SpellingRegion(SM, DR.getBeginLoc(), DeferredEndLoc).isInSourceOrder()) return Index; DR.setGap(true); @@ -552,6 +552,15 @@ struct CounterCoverageMappingBuilder completeDeferred(Count, DeferredEndLoc); } + size_t locationDepth(SourceLocation Loc) { + size_t Depth = 0; + while (Loc.isValid()) { + Loc = getIncludeOrExpansionLoc(Loc); + Depth++; + } + return Depth; + } + /// Pop regions from the stack into the function's list of regions. /// /// Adds all regions from \c ParentIndex to the top of the stack to the @@ -562,23 +571,45 @@ struct CounterCoverageMappingBuilder while (RegionStack.size() > ParentIndex) { SourceMappingRegion &Region = RegionStack.back(); if (Region.hasStartLoc()) { - SourceLocation StartLoc = Region.getStartLoc(); + SourceLocation StartLoc = Region.getBeginLoc(); SourceLocation EndLoc = Region.hasEndLoc() ? Region.getEndLoc() : RegionStack[ParentIndex].getEndLoc(); + size_t StartDepth = locationDepth(StartLoc); + size_t EndDepth = locationDepth(EndLoc); while (!SM.isWrittenInSameFile(StartLoc, EndLoc)) { - // The region ends in a nested file or macro expansion. Create a - // separate region for each expansion. - SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc); - assert(SM.isWrittenInSameFile(NestedLoc, EndLoc)); - - if (!isRegionAlreadyAdded(NestedLoc, EndLoc)) - SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc); - - EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc)); - if (EndLoc.isInvalid()) - llvm::report_fatal_error("File exit not handled before popRegions"); + bool UnnestStart = StartDepth >= EndDepth; + bool UnnestEnd = EndDepth >= StartDepth; + if (UnnestEnd) { + // The region ends in a nested file or macro expansion. Create a + // separate region for each expansion. + SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc); + assert(SM.isWrittenInSameFile(NestedLoc, EndLoc)); + + if (!isRegionAlreadyAdded(NestedLoc, EndLoc)) + SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc); + + EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc)); + if (EndLoc.isInvalid()) + llvm::report_fatal_error("File exit not handled before popRegions"); + EndDepth--; + } + if (UnnestStart) { + // The region begins in a nested file or macro expansion. Create a + // separate region for each expansion. + SourceLocation NestedLoc = getEndOfFileOrMacro(StartLoc); + assert(SM.isWrittenInSameFile(StartLoc, NestedLoc)); + + if (!isRegionAlreadyAdded(StartLoc, NestedLoc)) + SourceRegions.emplace_back(Region.getCounter(), StartLoc, NestedLoc); + + StartLoc = getIncludeOrExpansionLoc(StartLoc); + if (StartLoc.isInvalid()) + llvm::report_fatal_error("File exit not handled before popRegions"); + StartDepth--; + } } + Region.setStartLoc(StartLoc); Region.setEndLoc(EndLoc); MostRecentLocation = EndLoc; @@ -588,7 +619,7 @@ struct CounterCoverageMappingBuilder EndLoc == getEndOfFileOrMacro(EndLoc)) MostRecentLocation = getIncludeOrExpansionLoc(EndLoc); - assert(SM.isWrittenInSameFile(Region.getStartLoc(), EndLoc)); + assert(SM.isWrittenInSameFile(Region.getBeginLoc(), EndLoc)); assert(SpellingRegion(SM, Region).isInSourceOrder()); SourceRegions.push_back(Region); @@ -625,18 +656,21 @@ struct CounterCoverageMappingBuilder return RegionStack.back(); } - /// Propagate counts through the children of \c S. - Counter propagateCounts(Counter TopCount, const Stmt *S) { + /// Propagate counts through the children of \p S if \p VisitChildren is true. + /// Otherwise, only emit a count for \p S itself. + Counter propagateCounts(Counter TopCount, const Stmt *S, + bool VisitChildren = true) { SourceLocation StartLoc = getStart(S); SourceLocation EndLoc = getEnd(S); size_t Index = pushRegion(TopCount, StartLoc, EndLoc); - Visit(S); + if (VisitChildren) + Visit(S); Counter ExitCount = getRegion().getCounter(); popRegions(Index); // The statement may be spanned by an expansion. Make sure we handle a file // exit out of this expansion before moving to the next statement. - if (SM.isBeforeInTranslationUnit(StartLoc, S->getLocStart())) + if (SM.isBeforeInTranslationUnit(StartLoc, S->getBeginLoc())) MostRecentLocation = EndLoc; return ExitCount; @@ -648,7 +682,7 @@ struct CounterCoverageMappingBuilder return SourceRegions.rend() != std::find_if(SourceRegions.rbegin(), SourceRegions.rend(), [&](const SourceMappingRegion &Region) { - return Region.getStartLoc() == StartLoc && + return Region.getBeginLoc() == StartLoc && Region.getEndLoc() == EndLoc; }); } @@ -700,7 +734,7 @@ struct CounterCoverageMappingBuilder for (SourceMappingRegion &I : llvm::reverse(RegionStack)) { if (!I.hasStartLoc()) continue; - SourceLocation Loc = I.getStartLoc(); + SourceLocation Loc = I.getBeginLoc(); if (!isNestedIn(Loc, ParentFile)) { ParentCounter = I.getCounter(); break; @@ -826,7 +860,7 @@ struct CounterCoverageMappingBuilder } void VisitStmt(const Stmt *S) { - if (S->getLocStart().isValid()) + if (S->getBeginLoc().isValid()) extendRegion(S); for (const Stmt *Child : S->children()) if (Child) @@ -843,7 +877,16 @@ struct CounterCoverageMappingBuilder if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body)))) return; - propagateCounts(getRegionCounter(Body), Body); + // Do not visit the artificial children nodes of defaulted methods. The + // lexer may not be able to report back precise token end locations for + // these children nodes (llvm.org/PR39822), and moreover users will not be + // able to see coverage for them. + bool Defaulted = false; + if (auto *Method = dyn_cast<CXXMethodDecl>(D)) + Defaulted = Method->isDefaulted(); + + propagateCounts(getRegionCounter(Body), Body, + /*VisitChildren=*/!Defaulted); assert(RegionStack.empty() && "Regions entered but never exited"); // Discard the last uncompleted deferred region in a decl, if one exists. @@ -1004,6 +1047,8 @@ struct CounterCoverageMappingBuilder void VisitCXXForRangeStmt(const CXXForRangeStmt *S) { extendRegion(S); + if (S->getInit()) + Visit(S->getInit()); Visit(S->getLoopVarStmt()); Visit(S->getRangeStmt()); @@ -1109,7 +1154,7 @@ struct CounterCoverageMappingBuilder Counter Count = addCounters(Parent.getCounter(), getRegionCounter(S)); // Reuse the existing region if it starts at our label. This is typical of // the first case in a switch. - if (Parent.hasStartLoc() && Parent.getStartLoc() == getStart(S)) + if (Parent.hasStartLoc() && Parent.getBeginLoc() == getStart(S)) Parent.setCounter(Count); else pushRegion(Count, getStart(S)); diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h index b08ad896d7a5..c62db096952a 100644 --- a/lib/CodeGen/CoverageMappingGen.h +++ b/lib/CodeGen/CoverageMappingGen.h @@ -16,7 +16,6 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Lex/PPCallbacks.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/GlobalValue.h" diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index 00fff144b597..b53304528c3d 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -287,6 +287,7 @@ public: void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override; bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override; + bool canSpeculativelyEmitVTableAsBaseClass(const CXXRecordDecl *RD) const; void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD, bool ReturnAdjustment) override { @@ -634,7 +635,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer( if (ShouldEmitCFICheck) { CodeGenFunction::SanitizerScope SanScope(&CGF); - CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getLocStart()); + CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc()); CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0)); llvm::Constant *StaticData[] = { llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall), @@ -1562,9 +1563,8 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF, Type != Dtor_Base && DD->isVirtual()) Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent()); else - Callee = - CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), - DD); + Callee = CGCallee::forDirect( + CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), GD); CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(), This.getPointer(), VTT, VTTTy, @@ -1598,12 +1598,6 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT, // Set the right visibility. CGM.setGVProperties(VTable, RD); - // Use pointer alignment for the vtable. Otherwise we would align them based - // on the size of the initializer which doesn't make sense as only single - // values are read. - unsigned PAlign = CGM.getTarget().getPointerAlign(0); - VTable->setAlignment(getContext().toCharUnitsFromBits(PAlign).getQuantity()); - // If this is the magic class __cxxabiv1::__fundamental_type_info, // we will emit the typeinfo for the fundamental types. This is the // same behaviour as GCC. @@ -1703,8 +1697,14 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD, CGM.getItaniumVTableContext().getVTableLayout(RD); llvm::Type *VTableType = CGM.getVTables().getVTableType(VTLayout); + // Use pointer alignment for the vtable. Otherwise we would align them based + // on the size of the initializer which doesn't make sense as only single + // values are read. + unsigned PAlign = CGM.getTarget().getPointerAlign(0); + VTable = CGM.CreateOrReplaceCXXRuntimeVariable( - Name, VTableType, llvm::GlobalValue::ExternalLinkage); + Name, VTableType, llvm::GlobalValue::ExternalLinkage, + getContext().toCharUnitsFromBits(PAlign).getQuantity()); VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CGM.setGVProperties(VTable, RD); @@ -1750,7 +1750,7 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, VFunc = VFuncLoad; } - CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc); + CGCallee Callee(GD, VFunc); return Callee; } @@ -1778,7 +1778,8 @@ void ItaniumCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) { VTables.EmitVTTDefinition(VTT, CGM.getVTableLinkage(RD), RD); } -bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { +bool ItaniumCXXABI::canSpeculativelyEmitVTableAsBaseClass( + const CXXRecordDecl *RD) const { // We don't emit available_externally vtables if we are in -fapple-kext mode // because kext mode does not permit devirtualization. if (CGM.getLangOpts().AppleKext) @@ -1796,7 +1797,43 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { // to emit an available_externally copy of vtable. // FIXME we can still emit a copy of the vtable if we // can emit definition of the inline functions. - return !hasAnyUnusedVirtualInlineFunction(RD); + if (hasAnyUnusedVirtualInlineFunction(RD)) + return false; + + // For a class with virtual bases, we must also be able to speculatively + // emit the VTT, because CodeGen doesn't have separate notions of "can emit + // the vtable" and "can emit the VTT". For a base subobject, this means we + // need to be able to emit non-virtual base vtables. + if (RD->getNumVBases()) { + for (const auto &B : RD->bases()) { + auto *BRD = B.getType()->getAsCXXRecordDecl(); + assert(BRD && "no class for base specifier"); + if (B.isVirtual() || !BRD->isDynamicClass()) + continue; + if (!canSpeculativelyEmitVTableAsBaseClass(BRD)) + return false; + } + } + + return true; +} + +bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const { + if (!canSpeculativelyEmitVTableAsBaseClass(RD)) + return false; + + // For a complete-object vtable (or more specifically, for the VTT), we need + // to be able to speculatively emit the vtables of all dynamic virtual bases. + for (const auto &B : RD->vbases()) { + auto *BRD = B.getType()->getAsCXXRecordDecl(); + assert(BRD && "no class for base specifier"); + if (!BRD->isDynamicClass()) + continue; + if (!canSpeculativelyEmitVTableAsBaseClass(BRD)) + return false; + } + + return true; } static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF, Address InitialPtr, @@ -1916,7 +1953,7 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, // Handle the array cookie specially in ASan. if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 && (expr->getOperatorNew()->isReplaceableGlobalAllocationFunction() || - CGM.getCodeGenOpts().SanitizeAddressPoisonClassMemberArrayNewCookie)) { + CGM.getCodeGenOpts().SanitizeAddressPoisonCustomArrayCookie)) { // The store to the CookiePtr does not need to be instrumented. CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI); llvm::FunctionType *FTy = @@ -2315,11 +2352,13 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() { FTy, GlobalInitFnName, getTypes().arrangeNullaryFunction(), SourceLocation()); ASTContext &Ctx = getContext(); + QualType ReturnTy = Ctx.VoidTy; + QualType FunctionTy = Ctx.getFunctionType(ReturnTy, llvm::None, {}); FunctionDecl *FD = FunctionDecl::Create( Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), - &Ctx.Idents.get(GlobalInitFnName), Ctx.VoidTy, nullptr, SC_Static, + &Ctx.Idents.get(GlobalInitFnName), FunctionTy, nullptr, SC_Static, false, false); - CGF.StartFunction(GlobalDecl(FD), getContext().VoidTy, GlobalInitFn, + CGF.StartFunction(GlobalDecl(FD), ReturnTy, GlobalInitFn, getTypes().arrangeNullaryFunction(), FunctionArgList(), SourceLocation(), SourceLocation()); @@ -2342,6 +2381,9 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, llvm::Constant *dtor, llvm::Constant *addr) { + if (D.isNoDestroy(CGM.getContext())) + return; + // Use __cxa_atexit if available. if (CGM.getCodeGenOpts().CXAAtExit) return emitGlobalDtorWithCXAAtExit(CGF, dtor, addr, D.getTLSKind()); @@ -2415,7 +2457,7 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD, llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM), WrapperName.str(), &CGM.getModule()); - CGM.SetLLVMFunctionAttributes(nullptr, FI, Wrapper); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper); if (VD->hasDefinition()) CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper); @@ -2469,8 +2511,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( CharUnits GuardAlign = CharUnits::One(); Guard->setAlignment(GuardAlign.getQuantity()); - CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, OrderedInits, - Address(Guard, GuardAlign)); + CodeGenFunction(CGM).GenerateCXXGlobalInitFunc( + InitFunc, OrderedInits, ConstantAddress(Guard, GuardAlign)); // On Darwin platforms, use CXX_FAST_TLS calling convention. if (CGM.getTarget().getTriple().isOSDarwin()) { InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS); @@ -2522,7 +2564,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs( llvm::GlobalVariable::ExternalWeakLinkage, InitFnName.str(), &CGM.getModule()); const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init)); + CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, + cast<llvm::Function>(Init)); } if (Init) { @@ -2722,9 +2765,10 @@ llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName( // get the mangled name of the type. llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext, Name.substr(4)); + auto Align = CGM.getContext().getTypeAlignInChars(CGM.getContext().CharTy); - llvm::GlobalVariable *GV = - CGM.CreateOrReplaceCXXRuntimeVariable(Name, Init->getType(), Linkage); + llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable( + Name, Init->getType(), Linkage, Align.getQuantity()); GV->setInitializer(Init); @@ -2808,6 +2852,9 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) { #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ case BuiltinType::Id: #include "clang/Basic/OpenCLImageTypes.def" +#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \ + case BuiltinType::Id: +#include "clang/Basic/OpenCLExtensionTypes.def" case BuiltinType::OCLSampler: case BuiltinType::OCLEvent: case BuiltinType::OCLClkEvent: @@ -3084,7 +3131,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { } assert(isa<ObjCInterfaceType>(Ty)); - // Fall through. + LLVM_FALLTHROUGH; case Type::ObjCInterface: if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) { @@ -3363,6 +3410,10 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( if (CGM.supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(M.getOrInsertComdat(GV->getName())); + CharUnits Align = + CGM.getContext().toCharUnitsFromBits(CGM.getTarget().getPointerAlign(0)); + GV->setAlignment(Align.getQuantity()); + // The Itanium ABI specifies that type_info objects must be globally // unique, with one exception: if the type is an incomplete class // type or a (possibly indirect) pointer to one. That exception @@ -4017,7 +4068,7 @@ static void InitCatchParam(CodeGenFunction &CGF, switch (CatchType.getQualifiers().getObjCLifetime()) { case Qualifiers::OCL_Strong: CastExn = CGF.EmitARCRetainNonBlock(CastExn); - // fallthrough + LLVM_FALLTHROUGH; case Qualifiers::OCL_None: case Qualifiers::OCL_ExplicitNone: @@ -4146,7 +4197,7 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF, // Emit the local. CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam); - InitCatchParam(CGF, *CatchParam, var.getObjectAddress(CGF), S->getLocStart()); + InitCatchParam(CGF, *CatchParam, var.getObjectAddress(CGF), S->getBeginLoc()); CGF.EmitAutoVarCleanups(var); } diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp index 48dea7d54b1e..013ca15e2391 100644 --- a/lib/CodeGen/MacroPPCallbacks.cpp +++ b/lib/CodeGen/MacroPPCallbacks.cpp @@ -14,7 +14,8 @@ #include "MacroPPCallbacks.h" #include "CGDebugInfo.h" #include "clang/CodeGen/ModuleBuilder.h" -#include "clang/Parse/Parser.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" using namespace clang; @@ -88,16 +89,6 @@ SourceLocation MacroPPCallbacks::getCorrectLocation(SourceLocation Loc) { return SourceLocation(); } -static bool isBuiltinFile(SourceManager &SM, SourceLocation Loc) { - StringRef Filename(SM.getPresumedLoc(Loc).getFilename()); - return Filename.equals("<built-in>"); -} - -static bool isCommandLineFile(SourceManager &SM, SourceLocation Loc) { - StringRef Filename(SM.getPresumedLoc(Loc).getFilename()); - return Filename.equals("<command line>"); -} - void MacroPPCallbacks::updateStatusToNextScope() { switch (Status) { case NoScope: @@ -127,7 +118,7 @@ void MacroPPCallbacks::FileEntered(SourceLocation Loc) { updateStatusToNextScope(); return; case BuiltinScope: - if (isCommandLineFile(PP.getSourceManager(), Loc)) + if (PP.getSourceManager().isWrittenInCommandLineFile(Loc)) return; updateStatusToNextScope(); LLVM_FALLTHROUGH; @@ -147,7 +138,7 @@ void MacroPPCallbacks::FileExited(SourceLocation Loc) { default: llvm_unreachable("Do not expect to exit a file from current scope"); case BuiltinScope: - if (!isBuiltinFile(PP.getSourceManager(), Loc)) + if (!PP.getSourceManager().isWrittenInBuiltinFile(Loc)) // Skip next scope and change status to MainFileScope. Status = MainFileScope; return; diff --git a/lib/CodeGen/MacroPPCallbacks.h b/lib/CodeGen/MacroPPCallbacks.h index 48c67e2d36ad..b87a4005d481 100644 --- a/lib/CodeGen/MacroPPCallbacks.h +++ b/lib/CodeGen/MacroPPCallbacks.h @@ -11,6 +11,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_LIB_CODEGEN_MACROPPCALLBACKS_H +#define LLVM_CLANG_LIB_CODEGEN_MACROPPCALLBACKS_H + #include "clang/Lex/PPCallbacks.h" namespace llvm { @@ -116,3 +119,5 @@ public: }; } // end namespace clang + +#endif diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index 059adb78ca30..5545bc6647e6 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1552,9 +1552,9 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF, if (Type == Dtor_Complete && DD->getParent()->getNumVBases() == 0) Type = Dtor_Base; - CGCallee Callee = CGCallee::forDirect( - CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), - DD); + CGCallee Callee = + CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), + GlobalDecl(DD, Type)); if (DD->isVirtual()) { assert(Type != CXXDtorType::Dtor_Deleting && @@ -1872,7 +1872,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF, VFunc = Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign()); } - CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc); + CGCallee Callee(GD, VFunc); return Callee; } @@ -2024,8 +2024,10 @@ MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD, assert(!CGM.getModule().getNamedGlobal(Name) && "vbtable with this name already exists: mangling bug?"); - llvm::GlobalVariable *GV = - CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType, Linkage); + CharUnits Alignment = + CGM.getContext().getTypeAlignInChars(CGM.getContext().IntTy); + llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable( + Name, VBTableType, Linkage, Alignment.getQuantity()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (RD->hasAttr<DLLImportAttr>()) @@ -2240,6 +2242,9 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD, void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, llvm::Constant *Dtor, llvm::Constant *Addr) { + if (D.isNoDestroy(CGM.getContext())) + return; + if (D.getTLSKind()) return emitGlobalDtorWithTLRegDtor(CGF, D, Dtor, Addr); @@ -3924,7 +3929,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, CallArgList Args; // Push the this ptr. - Args.add(RValue::get(This), CD->getThisType(getContext())); + Args.add(RValue::get(This), CD->getThisType()); // Push the src ptr. if (SrcVal) @@ -3951,7 +3956,8 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD, // Call the destructor with our arguments. llvm::Constant *CalleePtr = CGM.getAddrOfCXXStructor(CD, StructorType::Complete); - CGCallee Callee = CGCallee::forDirect(CalleePtr, CD); + CGCallee Callee = + CGCallee::forDirect(CalleePtr, GlobalDecl(CD, Ctor_Complete)); const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall( Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix); CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args); diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp index 8aa9bfb421b4..c0a37698e762 100644 --- a/lib/CodeGen/ModuleBuilder.cpp +++ b/lib/CodeGen/ModuleBuilder.cpp @@ -17,9 +17,9 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetInfo.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" @@ -64,7 +64,7 @@ namespace { std::unique_ptr<CodeGen::CodeGenModule> Builder; private: - SmallVector<CXXMethodDecl *, 8> DeferredInlineMethodDefinitions; + SmallVector<FunctionDecl *, 8> DeferredInlineMemberFuncDefs; public: CodeGeneratorImpl(DiagnosticsEngine &diags, llvm::StringRef ModuleName, @@ -80,7 +80,7 @@ namespace { ~CodeGeneratorImpl() override { // There should normally not be any leftover inline method definitions. - assert(DeferredInlineMethodDefinitions.empty() || + assert(DeferredInlineMemberFuncDefs.empty() || Diags.hasErrorOccurred()); } @@ -132,6 +132,9 @@ namespace { M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple()); M->setDataLayout(Ctx->getTargetInfo().getDataLayout()); + const auto &SDKVersion = Ctx->getTargetInfo().getSDKVersion(); + if (!SDKVersion.empty()) + M->setSDKVersion(SDKVersion); Builder.reset(new CodeGen::CodeGenModule(Context, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags, CoverageInfo)); @@ -163,16 +166,16 @@ namespace { } void EmitDeferredDecls() { - if (DeferredInlineMethodDefinitions.empty()) + if (DeferredInlineMemberFuncDefs.empty()) return; // Emit any deferred inline method definitions. Note that more deferred // methods may be added during this loop, since ASTConsumer callbacks // can be invoked if AST inspection results in declarations being added. HandlingTopLevelDeclRAII HandlingDecl(*this); - for (unsigned I = 0; I != DeferredInlineMethodDefinitions.size(); ++I) - Builder->EmitTopLevelDecl(DeferredInlineMethodDefinitions[I]); - DeferredInlineMethodDefinitions.clear(); + for (unsigned I = 0; I != DeferredInlineMemberFuncDefs.size(); ++I) + Builder->EmitTopLevelDecl(DeferredInlineMemberFuncDefs[I]); + DeferredInlineMemberFuncDefs.clear(); } void HandleInlineFunctionDefinition(FunctionDecl *D) override { @@ -181,17 +184,6 @@ namespace { assert(D->doesThisDeclarationHaveABody()); - // Handle friend functions. - if (D->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend)) { - if (Ctx->getTargetInfo().getCXXABI().isMicrosoft() - && !D->getLexicalDeclContext()->isDependentContext()) - Builder->EmitTopLevelDecl(D); - return; - } - - // Otherwise, must be a method. - auto MD = cast<CXXMethodDecl>(D); - // We may want to emit this definition. However, that decision might be // based on computing the linkage, and we have to defer that in case we // are inside of something that will change the method's final linkage, @@ -200,13 +192,13 @@ namespace { // void bar(); // void foo() { bar(); } // } A; - DeferredInlineMethodDefinitions.push_back(MD); + DeferredInlineMemberFuncDefs.push_back(D); // Provide some coverage mapping even for methods that aren't emitted. // Don't do this for templated classes though, as they may not be // instantiable. - if (!MD->getParent()->isDependentContext()) - Builder->AddDeferredUnusedCoverageMapping(MD); + if (!D->getLexicalDeclContext()->isDependentContext()) + Builder->AddDeferredUnusedCoverageMapping(D); } /// HandleTagDeclDefinition - This callback is invoked each time a TagDecl diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp index c164cec5d942..6f00c836f93d 100644 --- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp +++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp @@ -14,14 +14,13 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/BackendUtil.h" -#include "clang/Frontend/CodeGenOptions.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" -#include "clang/Serialization/ASTWriter.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitstreamReader.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -156,6 +155,8 @@ public: LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule; CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo); CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning()); + CodeGenOpts.DebugPrefixMap = + CI.getInvocation().getCodeGenOpts().DebugPrefixMap; } ~PCHContainerGenerator() override = default; diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp index b411a501ea81..75a0fa5ce189 100644 --- a/lib/CodeGen/SwiftCallingConv.cpp +++ b/lib/CodeGen/SwiftCallingConv.cpp @@ -415,6 +415,40 @@ static bool areBytesInSameUnit(CharUnits first, CharUnits second, == getOffsetAtStartOfUnit(second, chunkSize); } +static bool isMergeableEntryType(llvm::Type *type) { + // Opaquely-typed memory is always mergeable. + if (type == nullptr) return true; + + // Pointers and integers are always mergeable. In theory we should not + // merge pointers, but (1) it doesn't currently matter in practice because + // the chunk size is never greater than the size of a pointer and (2) + // Swift IRGen uses integer types for a lot of things that are "really" + // just storing pointers (like Optional<SomePointer>). If we ever have a + // target that would otherwise combine pointers, we should put some effort + // into fixing those cases in Swift IRGen and then call out pointer types + // here. + + // Floating-point and vector types should never be merged. + // Most such types are too large and highly-aligned to ever trigger merging + // in practice, but it's important for the rule to cover at least 'half' + // and 'float', as well as things like small vectors of 'i1' or 'i8'. + return (!type->isFloatingPointTy() && !type->isVectorTy()); +} + +bool SwiftAggLowering::shouldMergeEntries(const StorageEntry &first, + const StorageEntry &second, + CharUnits chunkSize) { + // Only merge entries that overlap the same chunk. We test this first + // despite being a bit more expensive because this is the condition that + // tends to prevent merging. + if (!areBytesInSameUnit(first.End - CharUnits::One(), second.Begin, + chunkSize)) + return false; + + return (isMergeableEntryType(first.Type) && + isMergeableEntryType(second.Type)); +} + void SwiftAggLowering::finish() { if (Entries.empty()) { Finished = true; @@ -425,12 +459,12 @@ void SwiftAggLowering::finish() { // which is generally the size of a pointer. const CharUnits chunkSize = getMaximumVoluntaryIntegerSize(CGM); - // First pass: if two entries share a chunk, make them both opaque + // First pass: if two entries should be merged, make them both opaque // and stretch one to meet the next. + // Also, remember if there are any opaque entries. bool hasOpaqueEntries = (Entries[0].Type == nullptr); for (size_t i = 1, e = Entries.size(); i != e; ++i) { - if (areBytesInSameUnit(Entries[i - 1].End - CharUnits::One(), - Entries[i].Begin, chunkSize)) { + if (shouldMergeEntries(Entries[i - 1], Entries[i], chunkSize)) { Entries[i - 1].Type = nullptr; Entries[i].Type = nullptr; Entries[i - 1].End = Entries[i].Begin; diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 6f6c5f50c2e7..89ec73670a73 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -19,9 +19,9 @@ #include "CGValue.h" #include "CodeGenFunction.h" #include "clang/AST/RecordLayout.h" +#include "clang/Basic/CodeGenOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" -#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -720,10 +720,12 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { // This is a very simple ABI that relies a lot on DefaultABIInfo. //===----------------------------------------------------------------------===// -class WebAssemblyABIInfo final : public DefaultABIInfo { +class WebAssemblyABIInfo final : public SwiftABIInfo { + DefaultABIInfo defaultInfo; + public: explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT) - : DefaultABIInfo(CGT) {} + : SwiftABIInfo(CGT), defaultInfo(CGT) {} private: ABIArgInfo classifyReturnType(QualType RetTy) const; @@ -741,6 +743,15 @@ private: Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; + + bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars, + bool asReturnValue) const override { + return occupiesMoreThan(CGT, scalars, /*total*/ 4); + } + + bool isSwiftErrorInRegister() const override { + return false; + } }; class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { @@ -778,7 +789,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const { } // Otherwise just do the default thing. - return DefaultABIInfo::classifyArgumentType(Ty); + return defaultInfo.classifyArgumentType(Ty); } ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { @@ -798,7 +809,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { } // Otherwise just do the default thing. - return DefaultABIInfo::classifyReturnType(RetTy); + return defaultInfo.classifyReturnType(RetTy); } Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -2337,7 +2348,7 @@ static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { bool Quote = (Lib.find(" ") != StringRef::npos); std::string ArgStr = Quote ? "\"" : ""; ArgStr += Lib; - if (!Lib.endswith_lower(".lib")) + if (!Lib.endswith_lower(".lib") && !Lib.endswith_lower(".a")) ArgStr += ".lib"; ArgStr += Quote ? "\"" : ""; return ArgStr; @@ -3944,18 +3955,39 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width)); } - // Bool type is always extended to the ABI, other builtin types are not - // extended. - const BuiltinType *BT = Ty->getAs<BuiltinType>(); - if (BT && BT->getKind() == BuiltinType::Bool) - return ABIArgInfo::getExtend(Ty); + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { + switch (BT->getKind()) { + case BuiltinType::Bool: + // Bool type is always extended to the ABI, other builtin types are not + // extended. + return ABIArgInfo::getExtend(Ty); - // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It - // passes them indirectly through memory. - if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) { - const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); - if (LDF == &llvm::APFloat::x87DoubleExtended()) - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + case BuiltinType::LongDouble: + // Mingw64 GCC uses the old 80 bit extended precision floating point + // unit. It passes them indirectly through memory. + if (IsMingw64) { + const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); + if (LDF == &llvm::APFloat::x87DoubleExtended()) + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } + break; + + case BuiltinType::Int128: + case BuiltinType::UInt128: + // If it's a parameter type, the normal ABI rule is that arguments larger + // than 8 bytes are passed indirectly. GCC follows it. We follow it too, + // even though it isn't particularly efficient. + if (!IsReturnType) + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + + // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. + // Clang matches them for compatibility. + return ABIArgInfo::getDirect( + llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2)); + + default: + break; + } } return ABIArgInfo::getDirect(); @@ -4969,6 +5001,31 @@ public: } bool doesReturnSlotInterfereWithArgs() const override { return false; } + + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override { + const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D); + if (!FD) + return; + llvm::Function *Fn = cast<llvm::Function>(GV); + + auto Kind = CGM.getCodeGenOpts().getSignReturnAddress(); + if (Kind != CodeGenOptions::SignReturnAddressScope::None) { + Fn->addFnAttr("sign-return-address", + Kind == CodeGenOptions::SignReturnAddressScope::All + ? "all" + : "non-leaf"); + + auto Key = CGM.getCodeGenOpts().getSignReturnAddressKey(); + Fn->addFnAttr("sign-return-address-key", + Key == CodeGenOptions::SignReturnAddressKeyValue::AKey + ? "a_key" + : "b_key"); + } + + if (CGM.getCodeGenOpts().BranchTargetEnforcement) + Fn->addFnAttr("branch-target-enforcement"); + } }; class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo { @@ -4976,6 +5033,9 @@ public: WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K) : AArch64TargetCodeGenInfo(CGT, K) {} + void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &CGM) const override; + void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); @@ -4986,6 +5046,14 @@ public: Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; + +void WindowsAArch64TargetCodeGenInfo::setTargetAttributes( + const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { + AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM); + if (GV->isDeclaration()) + return; + addStackProbeTargetAttributes(D, GV, CGM); +} } ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const { @@ -5532,6 +5600,9 @@ public: private: ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const; ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const; + ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, + uint64_t Members) const; + ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isIllegalVectorType(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; @@ -5706,6 +5777,41 @@ void ARMABIInfo::setCCs() { RuntimeCC = abiCC; } +ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const { + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 32) { + llvm::Type *ResType = + llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(ResType); + } + if (Size == 64 || Size == 128) { + llvm::Type *ResType = llvm::VectorType::get( + llvm::Type::getInt32Ty(getVMContext()), Size / 32); + return ABIArgInfo::getDirect(ResType); + } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, + const Type *Base, + uint64_t Members) const { + assert(Base && "Base class should be set for homogeneous aggregate"); + // Base can be a floating-point or a vector. + if (const VectorType *VT = Base->getAs<VectorType>()) { + // FP16 vectors should be converted to integer vectors + if (!getTarget().hasLegalHalfType() && + (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType())) { + uint64_t Size = getContext().getTypeSize(VT); + llvm::Type *NewVecTy = llvm::VectorType::get( + llvm::Type::getInt32Ty(getVMContext()), Size / 32); + llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members); + return ABIArgInfo::getDirect(Ty, 0, nullptr, false); + } + } + return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); +} + ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic) const { // 6.1.2.1 The following argument types are VFP CPRCs: @@ -5720,25 +5826,8 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. - if (isIllegalVectorType(Ty)) { - uint64_t Size = getContext().getTypeSize(Ty); - if (Size <= 32) { - llvm::Type *ResType = - llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(ResType); - } - if (Size == 64) { - llvm::Type *ResType = llvm::VectorType::get( - llvm::Type::getInt32Ty(getVMContext()), 2); - return ABIArgInfo::getDirect(ResType); - } - if (Size == 128) { - llvm::Type *ResType = llvm::VectorType::get( - llvm::Type::getInt32Ty(getVMContext()), 4); - return ABIArgInfo::getDirect(ResType); - } - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); - } + if (isIllegalVectorType(Ty)) + return coerceIllegalVector(Ty); // _Float16 and __fp16 get passed as if it were an int or float, but with // the top 16 bits unspecified. This is not done for OpenCL as it handles the @@ -5774,11 +5863,8 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, // into VFP registers. const Type *Base = nullptr; uint64_t Members = 0; - if (isHomogeneousAggregate(Ty, Base, Members)) { - assert(Base && "Base class should be set for homogeneous aggregate"); - // Base can be a floating-point or a vector. - return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); - } + if (isHomogeneousAggregate(Ty, Base, Members)) + return classifyHomogeneousAggregate(Ty, Base, Members); } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) { // WatchOS does have homogeneous aggregates. Note that we intentionally use // this convention even for a variadic function: the backend will use GPRs @@ -5937,9 +6023,15 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); - // Large vector types should be returned via memory. - if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) { - return getNaturalAlignIndirect(RetTy); + if (const VectorType *VT = RetTy->getAs<VectorType>()) { + // Large vector types should be returned via memory. + if (getContext().getTypeSize(RetTy) > 128) + return getNaturalAlignIndirect(RetTy); + // FP16 vectors should be converted to integer vectors + if (!getTarget().hasLegalHalfType() && + (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType())) + return coerceIllegalVector(RetTy); } // _Float16 and __fp16 get returned as if it were an int or float, but with @@ -5999,11 +6091,8 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, if (IsEffectivelyAAPCS_VFP) { const Type *Base = nullptr; uint64_t Members = 0; - if (isHomogeneousAggregate(RetTy, Base, Members)) { - assert(Base && "Base class should be set for homogeneous aggregate"); - // Homogeneous Aggregates are returned directly. - return ABIArgInfo::getDirect(nullptr, 0, nullptr, false); - } + if (isHomogeneousAggregate(RetTy, Base, Members)) + return classifyHomogeneousAggregate(RetTy, Base, Members); } // Aggregates <= 4 bytes are returned in r0; other aggregates @@ -6038,6 +6127,13 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, /// isIllegalVector - check whether Ty is an illegal vector type. bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs<VectorType> ()) { + // On targets that don't support FP16, FP16 is expanded into float, and we + // don't want the ABI to depend on whether or not FP16 is supported in + // hardware. Thus return false to coerce FP16 vectors into integer vectors. + if (!getTarget().hasLegalHalfType() && + (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType())) + return true; if (isAndroid()) { // Android shipped using Clang 3.1, which supported a slightly different // vector ABI. The primary differences were that 3-element vector types @@ -8164,6 +8260,137 @@ SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, return false; } +// ARC ABI implementation. +namespace { + +class ARCABIInfo : public DefaultABIInfo { +public: + using DefaultABIInfo::DefaultABIInfo; + +private: + Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const override; + + void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const { + if (!State.FreeRegs) + return; + if (Info.isIndirect() && Info.getInReg()) + State.FreeRegs--; + else if (Info.isDirect() && Info.getInReg()) { + unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32; + if (sz < State.FreeRegs) + State.FreeRegs -= sz; + else + State.FreeRegs = 0; + } + } + + void computeInfo(CGFunctionInfo &FI) const override { + CCState State(FI.getCallingConvention()); + // ARC uses 8 registers to pass arguments. + State.FreeRegs = 8; + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + updateState(FI.getReturnInfo(), FI.getReturnType(), State); + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type, State.FreeRegs); + updateState(I.info, I.type, State); + } + } + + ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const; + ABIArgInfo getIndirectByValue(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; +}; + +class ARCTargetCodeGenInfo : public TargetCodeGenInfo { +public: + ARCTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new ARCABIInfo(CGT)) {} +}; + + +ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const { + return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) : + getNaturalAlignIndirect(Ty, false); +} + +ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const { + // Compute the byval alignment. + const unsigned MinABIStackAlignInBytes = 4; + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + TypeAlign > MinABIStackAlignInBytes); +} + +Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, + QualType Ty) const { + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false, + getContext().getTypeInfoInChars(Ty), + CharUnits::fromQuantity(4), true); +} + +ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty, + uint8_t FreeRegs) const { + // Handle the generic C++ ABI. + const RecordType *RT = Ty->getAs<RecordType>(); + if (RT) { + CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + if (RAA == CGCXXABI::RAA_Indirect) + return getIndirectByRef(Ty, FreeRegs > 0); + + if (RAA == CGCXXABI::RAA_DirectInMemory) + return getIndirectByValue(Ty); + } + + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32; + + if (isAggregateTypeForABI(Ty)) { + // Structures with flexible arrays are always indirect. + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectByValue(Ty); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + llvm::LLVMContext &LLVMContext = getVMContext(); + + llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); + SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + + return FreeRegs >= SizeInRegs ? + ABIArgInfo::getDirectInReg(Result) : + ABIArgInfo::getDirect(Result, 0, nullptr, false); + } + + return Ty->isPromotableIntegerType() ? + (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) : + ABIArgInfo::getExtend(Ty)) : + (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() : + ABIArgInfo::getDirect()); +} + +ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const { + if (RetTy->isAnyComplexType()) + return ABIArgInfo::getDirectInReg(); + + // Arguments of size > 4 registers are indirect. + auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32; + if (RetSize > 4) + return getIndirectByRef(RetTy, /*HasFreeRegs*/ true); + + return DefaultABIInfo::classifyReturnType(RetTy); +} + +} // End anonymous namespace. //===----------------------------------------------------------------------===// // XCore ABI Implementation @@ -8553,7 +8780,7 @@ static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT, // The ABI requires unions to be sorted but not structures. // See FieldEncoding::operator< for sort algorithm. if (RT->isUnionType()) - llvm::sort(FE.begin(), FE.end()); + llvm::sort(FE); // We can now complete the TypeString. unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { @@ -8597,7 +8824,7 @@ static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET, EnumEnc += '}'; FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc)); } - llvm::sort(FE.begin(), FE.end()); + llvm::sort(FE); unsigned E = FE.size(); for (unsigned I = 0; I != E; ++I) { if (I) @@ -9185,6 +9412,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new SparcV9TargetCodeGenInfo(Types)); case llvm::Triple::xcore: return SetCGInfo(new XCoreTargetCodeGenInfo(Types)); + case llvm::Triple::arc: + return SetCGInfo(new ARCTargetCodeGenInfo(Types)); case llvm::Triple::spir: case llvm::Triple::spir64: return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); diff --git a/lib/CodeGen/VarBypassDetector.cpp b/lib/CodeGen/VarBypassDetector.cpp index 2f8a591a3e7f..859cdd4282cc 100644 --- a/lib/CodeGen/VarBypassDetector.cpp +++ b/lib/CodeGen/VarBypassDetector.cpp @@ -78,7 +78,7 @@ bool VarBypassDetector::BuildScopeInformation(const Stmt *S, return false; ++StmtsToSkip; } - // Fall through + LLVM_FALLTHROUGH; case Stmt::GotoStmtClass: FromScopes.push_back({S, ParentScope}); diff --git a/lib/CodeGen/VarBypassDetector.h b/lib/CodeGen/VarBypassDetector.h index f50baf4bab9f..47fe13cfacd6 100644 --- a/lib/CodeGen/VarBypassDetector.h +++ b/lib/CodeGen/VarBypassDetector.h @@ -15,6 +15,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_VARBYPASSDETECTOR_H #define LLVM_CLANG_LIB_CODEGEN_VARBYPASSDETECTOR_H +#include "clang/AST/Decl.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" |