aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/BackendUtil.cpp129
-rw-r--r--lib/CodeGen/CGAtomic.cpp28
-rw-r--r--lib/CodeGen/CGBlocks.cpp886
-rw-r--r--lib/CodeGen/CGBlocks.h10
-rw-r--r--lib/CodeGen/CGBuiltin.cpp1787
-rw-r--r--lib/CodeGen/CGCUDANV.cpp33
-rw-r--r--lib/CodeGen/CGCXX.cpp4
-rw-r--r--lib/CodeGen/CGCXXABI.cpp2
-rw-r--r--lib/CodeGen/CGCall.cpp128
-rw-r--r--lib/CodeGen/CGCall.h14
-rw-r--r--lib/CodeGen/CGClass.cpp33
-rw-r--r--lib/CodeGen/CGCleanup.cpp4
-rw-r--r--lib/CodeGen/CGCoroutine.cpp14
-rw-r--r--lib/CodeGen/CGDebugInfo.cpp505
-rw-r--r--lib/CodeGen/CGDebugInfo.h64
-rw-r--r--lib/CodeGen/CGDecl.cpp568
-rw-r--r--lib/CodeGen/CGDeclCXX.cpp103
-rw-r--r--lib/CodeGen/CGException.cpp45
-rw-r--r--lib/CodeGen/CGExpr.cpp159
-rw-r--r--lib/CodeGen/CGExprAgg.cpp9
-rw-r--r--lib/CodeGen/CGExprCXX.cpp36
-rw-r--r--lib/CodeGen/CGExprComplex.cpp8
-rw-r--r--lib/CodeGen/CGExprConstant.cpp46
-rw-r--r--lib/CodeGen/CGExprScalar.cpp455
-rw-r--r--lib/CodeGen/CGLoopInfo.cpp129
-rw-r--r--lib/CodeGen/CGLoopInfo.h33
-rw-r--r--lib/CodeGen/CGNonTrivialStruct.cpp31
-rw-r--r--lib/CodeGen/CGObjC.cpp511
-rw-r--r--lib/CodeGen/CGObjCGNU.cpp335
-rw-r--r--lib/CodeGen/CGObjCMac.cpp84
-rw-r--r--lib/CodeGen/CGObjCRuntime.cpp60
-rw-r--r--lib/CodeGen/CGObjCRuntime.h7
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.cpp40
-rw-r--r--lib/CodeGen/CGOpenCLRuntime.h5
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp1079
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h117
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp2371
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.h99
-rw-r--r--lib/CodeGen/CGRecordLayoutBuilder.cpp2
-rw-r--r--lib/CodeGen/CGStmt.cpp54
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp443
-rw-r--r--lib/CodeGen/CGVTT.cpp6
-rw-r--r--lib/CodeGen/CGVTables.cpp27
-rw-r--r--lib/CodeGen/CGValue.h5
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CodeGenABITypes.cpp1
-rw-r--r--lib/CodeGen/CodeGenAction.cpp17
-rw-r--r--lib/CodeGen/CodeGenFunction.cpp325
-rw-r--r--lib/CodeGen/CodeGenFunction.h179
-rw-r--r--lib/CodeGen/CodeGenModule.cpp648
-rw-r--r--lib/CodeGen/CodeGenModule.h50
-rw-r--r--lib/CodeGen/CodeGenPGO.cpp11
-rw-r--r--lib/CodeGen/CodeGenPGO.h1
-rw-r--r--lib/CodeGen/CodeGenTBAA.cpp2
-rw-r--r--lib/CodeGen/CodeGenTypes.cpp3
-rw-r--r--lib/CodeGen/CodeGenTypes.h1
-rw-r--r--lib/CodeGen/ConstantEmitter.h3
-rw-r--r--lib/CodeGen/CoverageMappingGen.cpp107
-rw-r--r--lib/CodeGen/CoverageMappingGen.h1
-rw-r--r--lib/CodeGen/ItaniumCXXABI.cpp103
-rw-r--r--lib/CodeGen/MacroPPCallbacks.cpp17
-rw-r--r--lib/CodeGen/MacroPPCallbacks.h5
-rw-r--r--lib/CodeGen/MicrosoftCXXABI.cpp22
-rw-r--r--lib/CodeGen/ModuleBuilder.cpp34
-rw-r--r--lib/CodeGen/ObjectFilePCHContainerOperations.cpp5
-rw-r--r--lib/CodeGen/SwiftCallingConv.cpp40
-rw-r--r--lib/CodeGen/TargetInfo.cpp331
-rw-r--r--lib/CodeGen/VarBypassDetector.cpp2
-rw-r--r--lib/CodeGen/VarBypassDetector.h1
69 files changed, 9104 insertions, 3314 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 415bd9626220..b927acabac59 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -8,10 +8,10 @@
//===----------------------------------------------------------------------===//
#include "clang/CodeGen/BackendUtil.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TargetOptions.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Frontend/Utils.h"
#include "clang/Lex/HeaderSearchOptions.h"
@@ -37,6 +37,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
@@ -54,10 +55,13 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include <memory>
@@ -235,11 +239,12 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address);
bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope;
+ bool UseOdrIndicator = CGOpts.SanitizeAddressUseOdrIndicator;
bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts);
PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover,
UseAfterScope));
PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover,
- UseGlobalsGC));
+ UseGlobalsGC, UseOdrIndicator));
}
static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -247,7 +252,8 @@ static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
PM.add(createAddressSanitizerFunctionPass(
/*CompileKernel*/ true, /*Recover*/ true, /*UseAfterScope*/ false));
PM.add(createAddressSanitizerModulePass(
- /*CompileKernel*/ true, /*Recover*/ true));
+ /*CompileKernel*/ true, /*Recover*/ true, /*UseGlobalsGC*/ true,
+ /*UseOdrIndicator*/ false));
}
static void addHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -265,14 +271,15 @@ static void addKernelHWAddressSanitizerPasses(const PassManagerBuilder &Builder,
/*CompileKernel*/ true, /*Recover*/ true));
}
-static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
- legacy::PassManagerBase &PM) {
+static void addGeneralOptsForMemorySanitizer(const PassManagerBuilder &Builder,
+ legacy::PassManagerBase &PM,
+ bool CompileKernel) {
const PassManagerBuilderWrapper &BuilderWrapper =
static_cast<const PassManagerBuilderWrapper&>(Builder);
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
int TrackOrigins = CGOpts.SanitizeMemoryTrackOrigins;
bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Memory);
- PM.add(createMemorySanitizerPass(TrackOrigins, Recover));
+ PM.add(createMemorySanitizerLegacyPassPass(TrackOrigins, Recover, CompileKernel));
// MemorySanitizer inserts complex instrumentation that mostly follows
// the logic of the original code, but operates on "shadow" values.
@@ -287,9 +294,19 @@ static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
}
}
+static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
+ legacy::PassManagerBase &PM) {
+ addGeneralOptsForMemorySanitizer(Builder, PM, /*CompileKernel*/ false);
+}
+
+static void addKernelMemorySanitizerPass(const PassManagerBuilder &Builder,
+ legacy::PassManagerBase &PM) {
+ addGeneralOptsForMemorySanitizer(Builder, PM, /*CompileKernel*/ true);
+}
+
static void addThreadSanitizerPass(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
- PM.add(createThreadSanitizerPass());
+ PM.add(createThreadSanitizerLegacyPassPass());
}
static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder,
@@ -368,6 +385,7 @@ static CodeGenOpt::Level getCGOptLevel(const CodeGenOptions &CodeGenOpts) {
static Optional<llvm::CodeModel::Model>
getCodeModel(const CodeGenOptions &CodeGenOpts) {
unsigned CodeModel = llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
+ .Case("tiny", llvm::CodeModel::Tiny)
.Case("small", llvm::CodeModel::Small)
.Case("kernel", llvm::CodeModel::Kernel)
.Case("medium", llvm::CodeModel::Medium)
@@ -416,7 +434,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
switch (LangOpts.getDefaultFPContractMode()) {
case LangOptions::FPC_Off:
// Preserve any contraction performed by the front-end. (Strict performs
- // splitting of the muladd instrinsic in the backend.)
+ // splitting of the muladd intrinsic in the backend.)
Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
break;
case LangOptions::FPC_On:
@@ -456,7 +474,7 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.EmitStackSizeSection = CodeGenOpts.StackSizeSection;
Options.EmitAddrsig = CodeGenOpts.Addrsig;
- if (CodeGenOpts.EnableSplitDwarf)
+ if (CodeGenOpts.getSplitDwarfMode() != CodeGenOptions::NoFission)
Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
@@ -491,6 +509,8 @@ static Optional<GCOVOptions> getGCOVOptions(const CodeGenOptions &CodeGenOpts) {
Options.UseCfgChecksum = CodeGenOpts.CoverageExtraChecksum;
Options.NoRedZone = CodeGenOpts.DisableRedZone;
Options.FunctionNamesInData = !CodeGenOpts.CoverageNoFunctionNamesInData;
+ Options.Filter = CodeGenOpts.ProfileFilterFiles;
+ Options.Exclude = CodeGenOpts.ProfileExcludeFiles;
Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
return Options;
}
@@ -613,6 +633,13 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
addMemorySanitizerPass);
}
+ if (LangOpts.Sanitize.has(SanitizerKind::KernelMemory)) {
+ PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+ addKernelMemorySanitizerPass);
+ PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+ addKernelMemorySanitizerPass);
+ }
+
if (LangOpts.Sanitize.has(SanitizerKind::Thread)) {
PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
addThreadSanitizerPass);
@@ -653,6 +680,11 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
InstrProfOptions Options;
Options.NoRedZone = CodeGenOpts.DisableRedZone;
Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput;
+
+ // TODO: Surface the option to emit atomic profile counter increments at
+ // the driver level.
+ Options.Atomic = LangOpts.Sanitize.has(SanitizerKind::Thread);
+
MPM.add(createInstrProfilingLegacyPass(Options));
}
if (CodeGenOpts.hasProfileIRInstr()) {
@@ -777,12 +809,14 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
break;
case Backend_EmitBC:
- if (CodeGenOpts.PrepareForThinLTO) {
+ if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
if (!ThinLinkOS)
return;
}
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
PerModulePasses.add(createWriteThinLTOBitcodePass(
*OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
} else {
@@ -790,14 +824,18 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
// targets
bool EmitLTOSummary =
(CodeGenOpts.PrepareForLTO &&
+ !CodeGenOpts.DisableLLVMPasses &&
llvm::Triple(TheModule->getTargetTriple()).getVendor() !=
llvm::Triple::Apple);
- if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO"))
- TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+ if (EmitLTOSummary) {
+ if (!TheModule->getModuleFlag("ThinLTO"))
+ TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
+ }
- PerModulePasses.add(
- createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
- EmitLTOSummary));
+ PerModulePasses.add(createBitcodeWriterPass(
+ *OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary));
}
break;
@@ -807,7 +845,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
break;
default:
- if (!CodeGenOpts.SplitDwarfFile.empty()) {
+ if (!CodeGenOpts.SplitDwarfFile.empty() &&
+ (CodeGenOpts.getSplitDwarfMode() == CodeGenOptions::SplitFileFission)) {
DwoOS = openOutputFile(CodeGenOpts.SplitDwarfFile);
if (!DwoOS)
return;
@@ -905,18 +944,21 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
PGOOpt = PGOOptions(CodeGenOpts.InstrProfileOutput.empty()
? DefaultProfileGenName
: CodeGenOpts.InstrProfileOutput,
- "", "", true, CodeGenOpts.DebugInfoForProfiling);
+ "", "", "", true,
+ CodeGenOpts.DebugInfoForProfiling);
else if (CodeGenOpts.hasProfileIRUse())
// -fprofile-use.
- PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "", false,
+ PGOOpt = PGOOptions("", CodeGenOpts.ProfileInstrumentUsePath, "",
+ CodeGenOpts.ProfileRemappingFile, false,
CodeGenOpts.DebugInfoForProfiling);
else if (!CodeGenOpts.SampleProfileFile.empty())
// -fprofile-sample-use
- PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile, false,
+ PGOOpt = PGOOptions("", "", CodeGenOpts.SampleProfileFile,
+ CodeGenOpts.ProfileRemappingFile, false,
CodeGenOpts.DebugInfoForProfiling);
else if (CodeGenOpts.DebugInfoForProfiling)
// -fdebug-info-for-profiling
- PGOOpt = PGOOptions("", "", "", false, true);
+ PGOOpt = PGOOptions("", "", "", "", false, true);
PassBuilder PB(TM.get(), PGOOpt);
@@ -961,9 +1003,11 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds))
MPM.addPass(createModuleToFunctionPassAdaptor(BoundsCheckingPass()));
- // Lastly, add a semantically necessary pass for LTO.
- if (IsLTO || IsThinLTO)
+ // Lastly, add semantically necessary passes for LTO.
+ if (IsLTO || IsThinLTO) {
+ MPM.addPass(CanonicalizeAliasesPass());
MPM.addPass(NameAnonGlobalPass());
+ }
} else {
// Map our optimization levels into one of the distinct levels used to
// configure the pipeline.
@@ -984,10 +1028,12 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
if (IsThinLTO) {
MPM = PB.buildThinLTOPreLinkDefaultPipeline(
Level, CodeGenOpts.DebugPassManager);
+ MPM.addPass(CanonicalizeAliasesPass());
MPM.addPass(NameAnonGlobalPass());
} else if (IsLTO) {
MPM = PB.buildLTOPreLinkDefaultPipeline(Level,
CodeGenOpts.DebugPassManager);
+ MPM.addPass(CanonicalizeAliasesPass());
MPM.addPass(NameAnonGlobalPass());
} else {
MPM = PB.buildPerModuleDefaultPipeline(Level,
@@ -1008,12 +1054,14 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
break;
case Backend_EmitBC:
- if (CodeGenOpts.PrepareForThinLTO) {
+ if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
if (!ThinLinkOS)
return;
}
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os()
: nullptr));
} else {
@@ -1021,13 +1069,17 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
// targets
bool EmitLTOSummary =
(CodeGenOpts.PrepareForLTO &&
+ !CodeGenOpts.DisableLLVMPasses &&
llvm::Triple(TheModule->getTargetTriple()).getVendor() !=
llvm::Triple::Apple);
- if (EmitLTOSummary && !TheModule->getModuleFlag("ThinLTO"))
- TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
-
- MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists,
- EmitLTOSummary));
+ if (EmitLTOSummary) {
+ if (!TheModule->getModuleFlag("ThinLTO"))
+ TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
+ }
+ MPM.addPass(
+ BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary));
}
break;
@@ -1104,6 +1156,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
const LangOptions &LOpts,
std::unique_ptr<raw_pwrite_stream> OS,
std::string SampleProfile,
+ std::string ProfileRemapping,
BackendAction Action) {
StringMap<DenseMap<GlobalValue::GUID, GlobalValueSummary *>>
ModuleToDefinedGVSummaries;
@@ -1121,15 +1174,14 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
continue;
auto GUID = GlobalList.first;
- assert(GlobalList.second.SummaryList.size() == 1 &&
- "Expected individual combined index to have one summary per GUID");
- auto &Summary = GlobalList.second.SummaryList[0];
- // Skip the summaries for the importing module. These are included to
- // e.g. record required linkage changes.
- if (Summary->modulePath() == M->getModuleIdentifier())
- continue;
- // Add an entry to provoke importing by thinBackend.
- ImportList[Summary->modulePath()].insert(GUID);
+ for (auto &Summary : GlobalList.second.SummaryList) {
+ // Skip the summaries for the importing module. These are included to
+ // e.g. record required linkage changes.
+ if (Summary->modulePath() == M->getModuleIdentifier())
+ continue;
+ // Add an entry to provoke importing by thinBackend.
+ ImportList[Summary->modulePath()].insert(GUID);
+ }
}
std::vector<std::unique_ptr<llvm::MemoryBuffer>> OwnedImports;
@@ -1176,6 +1228,7 @@ static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
Conf.CGOptLevel = getCGOptLevel(CGOpts);
initTargetOptions(Conf.Options, CGOpts, TOpts, LOpts, HeaderOpts);
Conf.SampleProfile = std::move(SampleProfile);
+ Conf.ProfileRemapping = std::move(ProfileRemapping);
Conf.UseNewPM = CGOpts.ExperimentalNewPassManager;
Conf.DebugPassManager = CGOpts.DebugPassManager;
Conf.RemarksWithHotness = CGOpts.DiagnosticsWithHotness;
@@ -1242,7 +1295,7 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
if (!CombinedIndex->skipModuleByDistributedBackend()) {
runThinLTOBackend(CombinedIndex.get(), M, HeaderOpts, CGOpts, TOpts,
LOpts, std::move(OS), CGOpts.SampleProfileFile,
- Action);
+ CGOpts.ProfileRemappingFile, Action);
return;
}
// Distributed indexing detected that nothing from the module is needed
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index b34bcdc1fc38..24056a449def 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -18,7 +18,7 @@
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
@@ -765,11 +765,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
uint64_t Size = sizeChars.getQuantity();
unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
- bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 ||
- getContext().toBits(sizeChars) > MaxInlineWidthInBits);
- if (UseLibcall)
- CGM.getDiags().Report(E->getLocStart(), diag::warn_atomic_op_misaligned);
+ bool Oversized = getContext().toBits(sizeChars) > MaxInlineWidthInBits;
+ bool Misaligned = (Ptr.getAlignment() % sizeChars) != 0;
+ bool UseLibcall = Misaligned | Oversized;
+
+ if (UseLibcall) {
+ CGM.getDiags().Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
+ << !Oversized;
+ }
llvm::Value *Order = EmitScalarExpr(E->getOrder());
llvm::Value *Scope =
@@ -923,6 +927,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
UseOptimizedLibcall = true;
break;
+ case AtomicExpr::AO__atomic_load:
+ case AtomicExpr::AO__atomic_store:
+ case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__atomic_compare_exchange:
+ // Use the generic version if we don't know that the operand will be
+ // suitably aligned for the optimized version.
+ if (Misaligned)
+ break;
+ LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
@@ -934,14 +947,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__atomic_load_n:
- case AtomicExpr::AO__atomic_load:
case AtomicExpr::AO__atomic_store_n:
- case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_exchange_n:
- case AtomicExpr::AO__atomic_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
- case AtomicExpr::AO__atomic_compare_exchange:
// Only use optimized library calls for sizes for which they exist.
+ // FIXME: Size == 16 optimized library functions exist too.
if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
UseOptimizedLibcall = true;
break;
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 8269b5b229a2..fa3c3ee8610c 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CGBlocks.h"
+#include "CGCXXABI.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
#include "CGOpenCLRuntime.h"
@@ -25,6 +26,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/ScopedPrinter.h"
#include <algorithm>
#include <cstdio>
@@ -34,8 +36,8 @@ using namespace CodeGen;
CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
: Name(name), CXXThisIndex(0), CanBeGlobal(false), NeedsCopyDispose(false),
HasCXXObject(false), UsesStret(false), HasCapturedVariableLayout(false),
- LocalAddress(Address::invalid()), StructureType(nullptr), Block(block),
- DominatingIP(nullptr) {
+ CapturesNonExternalType(false), LocalAddress(Address::invalid()),
+ StructureType(nullptr), Block(block), DominatingIP(nullptr) {
// Skip asm prefix, if any. 'name' is usually taken directly from
// the mangled name of the enclosing function.
@@ -63,6 +65,110 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM,
return CodeGenFunction(CGM).GenerateDestroyHelperFunction(blockInfo);
}
+namespace {
+
+/// Represents a type of copy/destroy operation that should be performed for an
+/// entity that's captured by a block.
+enum class BlockCaptureEntityKind {
+ CXXRecord, // Copy or destroy
+ ARCWeak,
+ ARCStrong,
+ NonTrivialCStruct,
+ BlockObject, // Assign or release
+ None
+};
+
+/// Represents a captured entity that requires extra operations in order for
+/// this entity to be copied or destroyed correctly.
+struct BlockCaptureManagedEntity {
+ BlockCaptureEntityKind CopyKind, DisposeKind;
+ BlockFieldFlags CopyFlags, DisposeFlags;
+ const BlockDecl::Capture *CI;
+ const CGBlockInfo::Capture *Capture;
+
+ BlockCaptureManagedEntity(BlockCaptureEntityKind CopyType,
+ BlockCaptureEntityKind DisposeType,
+ BlockFieldFlags CopyFlags,
+ BlockFieldFlags DisposeFlags,
+ const BlockDecl::Capture &CI,
+ const CGBlockInfo::Capture &Capture)
+ : CopyKind(CopyType), DisposeKind(DisposeType), CopyFlags(CopyFlags),
+ DisposeFlags(DisposeFlags), CI(&CI), Capture(&Capture) {}
+
+ bool operator<(const BlockCaptureManagedEntity &Other) const {
+ return Capture->getOffset() < Other.Capture->getOffset();
+ }
+};
+
+enum class CaptureStrKind {
+ // String for the copy helper.
+ CopyHelper,
+ // String for the dispose helper.
+ DisposeHelper,
+ // Merge the strings for the copy helper and dispose helper.
+ Merged
+};
+
+} // end anonymous namespace
+
+static void findBlockCapturedManagedEntities(
+ const CGBlockInfo &BlockInfo, const LangOptions &LangOpts,
+ SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures);
+
+static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E,
+ CaptureStrKind StrKind,
+ CharUnits BlockAlignment,
+ CodeGenModule &CGM);
+
+static std::string getBlockDescriptorName(const CGBlockInfo &BlockInfo,
+ CodeGenModule &CGM) {
+ std::string Name = "__block_descriptor_";
+ Name += llvm::to_string(BlockInfo.BlockSize.getQuantity()) + "_";
+
+ if (BlockInfo.needsCopyDisposeHelpers()) {
+ if (CGM.getLangOpts().Exceptions)
+ Name += "e";
+ if (CGM.getCodeGenOpts().ObjCAutoRefCountExceptions)
+ Name += "a";
+ Name += llvm::to_string(BlockInfo.BlockAlign.getQuantity()) + "_";
+
+ SmallVector<BlockCaptureManagedEntity, 4> ManagedCaptures;
+ findBlockCapturedManagedEntities(BlockInfo, CGM.getContext().getLangOpts(),
+ ManagedCaptures);
+
+ for (const BlockCaptureManagedEntity &E : ManagedCaptures) {
+ Name += llvm::to_string(E.Capture->getOffset().getQuantity());
+
+ if (E.CopyKind == E.DisposeKind) {
+ // If CopyKind and DisposeKind are the same, merge the capture
+ // information.
+ assert(E.CopyKind != BlockCaptureEntityKind::None &&
+ "shouldn't see BlockCaptureManagedEntity that is None");
+ Name += getBlockCaptureStr(E, CaptureStrKind::Merged,
+ BlockInfo.BlockAlign, CGM);
+ } else {
+ // If CopyKind and DisposeKind are not the same, which can happen when
+ // either Kind is None or the captured object is a __strong block,
+ // concatenate the copy and dispose strings.
+ Name += getBlockCaptureStr(E, CaptureStrKind::CopyHelper,
+ BlockInfo.BlockAlign, CGM);
+ Name += getBlockCaptureStr(E, CaptureStrKind::DisposeHelper,
+ BlockInfo.BlockAlign, CGM);
+ }
+ }
+ Name += "_";
+ }
+
+ std::string TypeAtEncoding =
+ CGM.getContext().getObjCEncodingForBlock(BlockInfo.getBlockExpr());
+ /// Replace occurrences of '@' with '\1'. '@' is reserved on ELF platforms as
+ /// a separator between symbol name and symbol version.
+ std::replace(TypeAtEncoding.begin(), TypeAtEncoding.end(), '@', '\1');
+ Name += "e" + llvm::to_string(TypeAtEncoding.size()) + "_" + TypeAtEncoding;
+ Name += "l" + CGM.getObjCRuntime().getRCBlockLayoutStr(CGM, BlockInfo);
+ return Name;
+}
+
/// buildBlockDescriptor - Build the block descriptor meta-data for a block.
/// buildBlockDescriptor is accessed from 5th field of the Block_literal
/// meta-data and contains stationary information about the block literal.
@@ -72,7 +178,7 @@ static llvm::Constant *buildDisposeHelper(CodeGenModule &CGM,
/// unsigned long reserved;
/// unsigned long size; // size of Block_literal metadata in bytes.
/// void *copy_func_helper_decl; // optional copy helper.
-/// void *destroy_func_decl; // optioanl destructor helper.
+/// void *destroy_func_decl; // optional destructor helper.
/// void *block_method_encoding_address; // @encode for block literal signature.
/// void *block_layout_info; // encoding of captured block variables.
/// };
@@ -91,6 +197,19 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
else
i8p = CGM.VoidPtrTy;
+ std::string descName;
+
+ // If an equivalent block descriptor global variable exists, return it.
+ if (C.getLangOpts().ObjC &&
+ CGM.getLangOpts().getGC() == LangOptions::NonGC) {
+ descName = getBlockDescriptorName(blockInfo, CGM);
+ if (llvm::GlobalValue *desc = CGM.getModule().getNamedValue(descName))
+ return llvm::ConstantExpr::getBitCast(desc,
+ CGM.getBlockDescriptorType());
+ }
+
+ // If there isn't an equivalent block descriptor global variable, create a new
+ // one.
ConstantInitBuilder builder(CGM);
auto elements = builder.beginStruct();
@@ -104,12 +223,20 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
elements.addInt(ulong, blockInfo.BlockSize.getQuantity());
// Optional copy/dispose helpers.
+ bool hasInternalHelper = false;
if (blockInfo.needsCopyDisposeHelpers()) {
// copy_func_helper_decl
- elements.add(buildCopyHelper(CGM, blockInfo));
+ llvm::Constant *copyHelper = buildCopyHelper(CGM, blockInfo);
+ elements.add(copyHelper);
// destroy_func_decl
- elements.add(buildDisposeHelper(CGM, blockInfo));
+ llvm::Constant *disposeHelper = buildDisposeHelper(CGM, blockInfo);
+ elements.add(disposeHelper);
+
+ if (cast<llvm::Function>(copyHelper->getOperand(0))->hasInternalLinkage() ||
+ cast<llvm::Function>(disposeHelper->getOperand(0))
+ ->hasInternalLinkage())
+ hasInternalHelper = true;
}
// Signature. Mandatory ObjC-style method descriptor @encode sequence.
@@ -119,7 +246,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p));
// GC layout.
- if (C.getLangOpts().ObjC1) {
+ if (C.getLangOpts().ObjC) {
if (CGM.getLangOpts().getGC() != LangOptions::NonGC)
elements.add(CGM.getObjCRuntime().BuildGCBlockLayout(CGM, blockInfo));
else
@@ -132,12 +259,26 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
if (C.getLangOpts().OpenCL)
AddrSpace = C.getTargetAddressSpace(LangAS::opencl_constant);
+ llvm::GlobalValue::LinkageTypes linkage;
+ if (descName.empty()) {
+ linkage = llvm::GlobalValue::InternalLinkage;
+ descName = "__block_descriptor_tmp";
+ } else if (hasInternalHelper) {
+ // If either the copy helper or the dispose helper has internal linkage,
+ // the block descriptor must have internal linkage too.
+ linkage = llvm::GlobalValue::InternalLinkage;
+ } else {
+ linkage = llvm::GlobalValue::LinkOnceODRLinkage;
+ }
+
llvm::GlobalVariable *global =
- elements.finishAndCreateGlobal("__block_descriptor_tmp",
- CGM.getPointerAlign(),
- /*constant*/ true,
- llvm::GlobalValue::InternalLinkage,
- AddrSpace);
+ elements.finishAndCreateGlobal(descName, CGM.getPointerAlign(),
+ /*constant*/ true, linkage, AddrSpace);
+
+ if (linkage == llvm::GlobalValue::LinkOnceODRLinkage) {
+ global->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ global->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ }
return llvm::ConstantExpr::getBitCast(global, CGM.getBlockDescriptorType());
}
@@ -308,12 +449,25 @@ static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
assert(elementTypes.empty());
if (CGM.getLangOpts().OpenCL) {
- // The header is basically 'struct { int; int;
+ // The header is basically 'struct { int; int; generic void *;
// custom_fields; }'. Assert that struct is packed.
+ auto GenericAS =
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic);
+ auto GenPtrAlign =
+ CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 8);
+ auto GenPtrSize =
+ CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 8);
+ assert(CGM.getIntSize() <= GenPtrSize);
+ assert(CGM.getIntAlign() <= GenPtrAlign);
+ assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
elementTypes.push_back(CGM.IntTy); /* total size */
elementTypes.push_back(CGM.IntTy); /* align */
- unsigned Offset = 2 * CGM.getIntSize().getQuantity();
- unsigned BlockAlign = CGM.getIntAlign().getQuantity();
+ elementTypes.push_back(
+ CGM.getOpenCLRuntime()
+ .getGenericVoidPointerType()); /* invoke function */
+ unsigned Offset =
+ 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
+ unsigned BlockAlign = GenPtrAlign.getQuantity();
if (auto *Helper =
CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
@@ -355,7 +509,11 @@ static QualType getCaptureFieldType(const CodeGenFunction &CGF,
return CGF.BlockInfo->getCapture(VD).fieldType();
if (auto *FD = CGF.LambdaCaptureFields.lookup(VD))
return FD->getType();
- return VD->getType();
+ // If the captured variable is a non-escaping __block variable, the field
+ // type is the reference type. If the variable is a __block variable that
+ // already has a reference type, the field type is the variable's type.
+ return VD->isNonEscapingByref() ?
+ CGF.getContext().getLValueReferenceType(VD->getType()) : VD->getType();
}
/// Compute the layout of the given block. Attempts to lay the block
@@ -378,7 +536,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
info.CanBeGlobal = true;
return;
}
- else if (C.getLangOpts().ObjC1 &&
+ else if (C.getLangOpts().ObjC &&
CGM.getLangOpts().getGC() == LangOptions::NonGC)
info.HasCapturedVariableLayout = true;
@@ -393,7 +551,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
if (block->capturesCXXThis()) {
assert(CGF && CGF->CurFuncDecl && isa<CXXMethodDecl>(CGF->CurFuncDecl) &&
"Can't capture 'this' outside a method");
- QualType thisType = cast<CXXMethodDecl>(CGF->CurFuncDecl)->getThisType(C);
+ QualType thisType = cast<CXXMethodDecl>(CGF->CurFuncDecl)->getThisType();
// Theoretically, this could be in a different address space, so
// don't assume standard pointer size/align.
@@ -411,7 +569,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
for (const auto &CI : block->captures()) {
const VarDecl *variable = CI.getVariable();
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
// We have to copy/dispose of the __block reference.
info.NeedsCopyDispose = true;
@@ -419,6 +577,10 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
CharUnits align = CGM.getPointerAlign();
maxFieldAlign = std::max(maxFieldAlign, align);
+ // Since a __block variable cannot be captured by lambdas, its type and
+ // the capture field type should always match.
+ assert(getCaptureFieldType(*CGF, CI) == variable->getType() &&
+ "capture type differs from the variable type");
layout.push_back(BlockLayoutChunk(align, CGM.getPointerSize(),
Qualifiers::OCL_None, &CI,
CGM.VoidPtrTy, variable->getType()));
@@ -432,10 +594,11 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
continue;
}
+ QualType VT = getCaptureFieldType(*CGF, CI);
+
// If we have a lifetime qualifier, honor it for capture purposes.
// That includes *not* copying it if it's __unsafe_unretained.
- Qualifiers::ObjCLifetime lifetime =
- variable->getType().getObjCLifetime();
+ Qualifiers::ObjCLifetime lifetime = VT.getObjCLifetime();
if (lifetime) {
switch (lifetime) {
case Qualifiers::OCL_None: llvm_unreachable("impossible");
@@ -449,10 +612,10 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
}
// Block pointers require copy/dispose. So do Objective-C pointers.
- } else if (variable->getType()->isObjCRetainableType()) {
+ } else if (VT->isObjCRetainableType()) {
// But honor the inert __unsafe_unretained qualifier, which doesn't
// actually make it into the type system.
- if (variable->getType()->isObjCInertUnsafeUnretainedType()) {
+ if (VT->isObjCInertUnsafeUnretainedType()) {
lifetime = Qualifiers::OCL_ExplicitNone;
} else {
info.NeedsCopyDispose = true;
@@ -464,27 +627,27 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
} else if (CI.hasCopyExpr()) {
info.NeedsCopyDispose = true;
info.HasCXXObject = true;
+ if (!VT->getAsCXXRecordDecl()->isExternallyVisible())
+ info.CapturesNonExternalType = true;
// So do C structs that require non-trivial copy construction or
// destruction.
- } else if (variable->getType().isNonTrivialToPrimitiveCopy() ==
- QualType::PCK_Struct ||
- variable->getType().isDestructedType() ==
- QualType::DK_nontrivial_c_struct) {
+ } else if (VT.isNonTrivialToPrimitiveCopy() == QualType::PCK_Struct ||
+ VT.isDestructedType() == QualType::DK_nontrivial_c_struct) {
info.NeedsCopyDispose = true;
// And so do types with destructors.
} else if (CGM.getLangOpts().CPlusPlus) {
- if (const CXXRecordDecl *record =
- variable->getType()->getAsCXXRecordDecl()) {
+ if (const CXXRecordDecl *record = VT->getAsCXXRecordDecl()) {
if (!record->hasTrivialDestructor()) {
info.HasCXXObject = true;
info.NeedsCopyDispose = true;
+ if (!record->isExternallyVisible())
+ info.CapturesNonExternalType = true;
}
}
}
- QualType VT = getCaptureFieldType(*CGF, CI);
CharUnits size = C.getTypeSizeInChars(VT);
CharUnits align = C.getDeclAlign(variable);
@@ -699,10 +862,12 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
/// Enter a full-expression with a non-trivial number of objects to
/// clean up. This is in this file because, at the moment, the only
/// kind of cleanup object is a BlockDecl*.
-void CodeGenFunction::enterNonTrivialFullExpression(const ExprWithCleanups *E) {
- assert(E->getNumObjects() != 0);
- for (const ExprWithCleanups::CleanupObject &C : E->getObjects())
- enterBlockScope(*this, C);
+void CodeGenFunction::enterNonTrivialFullExpression(const FullExpr *E) {
+ if (const auto EWC = dyn_cast<ExprWithCleanups>(E)) {
+ assert(EWC->getNumObjects() != 0);
+ for (const ExprWithCleanups::CleanupObject &C : EWC->getObjects())
+ enterBlockScope(*this, C);
+ }
}
/// Find the layout for the given block in a linked list and remove it.
@@ -759,12 +924,20 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
+ auto GenVoidPtrTy =
+ IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
+ LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
+ auto GenVoidPtrSize = CharUnits::fromQuantity(
+ CGM.getTarget().getPointerWidth(
+ CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) /
+ 8);
// Using the computed layout, generate the actual block function.
bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
CodeGenFunction BlockCGF{CGM, true};
BlockCGF.SanOpts = SanOpts;
auto *InvokeFn = BlockCGF.GenerateBlockFunction(
CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
+ auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
// If there is nothing to capture, we can emit this as a global block.
if (blockInfo.CanBeGlobal)
@@ -840,12 +1013,11 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
getIntSize(), "block.align");
}
- if (!IsOpenCL) {
- addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy),
- getPointerSize(), "block.invoke");
+ addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
+ if (!IsOpenCL)
addHeaderField(descriptor, getPointerSize(), "block.descriptor");
- } else if (auto *Helper =
- CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ else if (auto *Helper =
+ CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
addHeaderField(
I.first,
@@ -889,7 +1061,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// The lambda capture in a lambda's conversion-to-block-pointer is
// special; we'll simply emit it directly.
src = Address::invalid();
- } else if (CI.isByRef()) {
+ } else if (CI.isEscapingByref()) {
if (BlockInfo && CI.isNested()) {
// We need to use the capture from the enclosing block.
const CGBlockInfo::Capture &enclosingCapture =
@@ -906,7 +1078,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
src = I->second;
}
} else {
- DeclRefExpr declRef(const_cast<VarDecl *>(variable),
+ DeclRefExpr declRef(getContext(), const_cast<VarDecl *>(variable),
/*RefersToEnclosingVariableOrCapture*/ CI.isNested(),
type.getNonReferenceType(), VK_LValue,
SourceLocation());
@@ -917,7 +1089,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// the block field. There's no need to chase the forwarding
// pointer at this point, since we're building something that will
// live a shorter life than the stack byref anyway.
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
// Get a void* that points to the byref struct.
llvm::Value *byrefPointer;
if (CI.isNested())
@@ -980,7 +1152,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
// We use one of these or the other depending on whether the
// reference is nested.
- DeclRefExpr declRef(const_cast<VarDecl *>(variable),
+ DeclRefExpr declRef(getContext(), const_cast<VarDecl *>(variable),
/*RefersToEnclosingVariableOrCapture*/ CI.isNested(),
type, VK_LValue, SourceLocation());
@@ -1049,23 +1221,38 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() {
}
llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
- assert(!getLangOpts().OpenCL && "OpenCL does not need this");
-
if (GenericBlockLiteralType)
return GenericBlockLiteralType;
llvm::Type *BlockDescPtrTy = getBlockDescriptorType();
- // struct __block_literal_generic {
- // void *__isa;
- // int __flags;
- // int __reserved;
- // void (*__invoke)(void *);
- // struct __block_descriptor *__descriptor;
- // };
- GenericBlockLiteralType =
- llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
- IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
+ if (getLangOpts().OpenCL) {
+ // struct __opencl_block_literal_generic {
+ // int __size;
+ // int __align;
+ // __generic void *__invoke;
+ // /* custom fields */
+ // };
+ SmallVector<llvm::Type *, 8> StructFields(
+ {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()});
+ if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ for (auto I : Helper->getCustomFieldTypes())
+ StructFields.push_back(I);
+ }
+ GenericBlockLiteralType = llvm::StructType::create(
+ StructFields, "struct.__opencl_block_literal_generic");
+ } else {
+ // struct __block_literal_generic {
+ // void *__isa;
+ // int __flags;
+ // int __reserved;
+ // void (*__invoke)(void *);
+ // struct __block_descriptor *__descriptor;
+ // };
+ GenericBlockLiteralType =
+ llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+ IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
+ }
return GenericBlockLiteralType;
}
@@ -1076,21 +1263,27 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
E->getCallee()->getType()->getAs<BlockPointerType>();
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
- llvm::Value *FuncPtr;
- if (!CGM.getLangOpts().OpenCL) {
- // Get a pointer to the generic block literal.
- llvm::Type *BlockLiteralTy =
- llvm::PointerType::get(CGM.getGenericBlockLiteralType(), 0);
+ // Get a pointer to the generic block literal.
+ // For OpenCL we generate generic AS void ptr to be able to reuse the same
+ // block definition for blocks with captures generated as private AS local
+ // variables and without captures generated as global AS program scope
+ // variables.
+ unsigned AddrSpace = 0;
+ if (getLangOpts().OpenCL)
+ AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_generic);
- // Bitcast the callee to a block literal.
- BlockPtr =
- Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
+ llvm::Type *BlockLiteralTy =
+ llvm::PointerType::get(CGM.getGenericBlockLiteralType(), AddrSpace);
- // Get the function pointer from the literal.
- FuncPtr =
- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
- }
+ // Bitcast the callee to a block literal.
+ BlockPtr =
+ Builder.CreatePointerCast(BlockPtr, BlockLiteralTy, "block.literal");
+
+ // Get the function pointer from the literal.
+ llvm::Value *FuncPtr =
+ Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
+ CGM.getLangOpts().OpenCL ? 2 : 3);
// Add the block literal.
CallArgList Args;
@@ -1113,11 +1306,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
// Load the function.
- llvm::Value *Func;
- if (CGM.getLangOpts().OpenCL)
- Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
- else
- Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
+ llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
const FunctionType *FuncTy = FnType->castAs<FunctionType>();
const CGFunctionInfo &FnInfo =
@@ -1136,8 +1325,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
return EmitCall(FnInfo, Callee, ReturnValue, Args);
}
-Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
- bool isByRef) {
+Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) {
assert(BlockInfo && "evaluating block ref without block information?");
const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable);
@@ -1148,7 +1336,7 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(),
capture.getOffset(), "block.capture.addr");
- if (isByRef) {
+ if (variable->isEscapingByref()) {
// addr should be a void** right now. Load, then cast the result
// to byref*.
@@ -1162,6 +1350,10 @@ Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
variable->getName());
}
+ assert((!variable->isNonEscapingByref() ||
+ capture.fieldType()->isReferenceType()) &&
+ "the capture field of a non-escaping variable should have a "
+ "reference type");
if (capture.fieldType()->isReferenceType())
addr = EmitLoadOfReference(MakeAddrLValue(addr, capture.fieldType()));
@@ -1213,9 +1405,13 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
auto fields = builder.beginStruct();
bool IsOpenCL = CGM.getLangOpts().OpenCL;
+ bool IsWindows = CGM.getTarget().getTriple().isOSWindows();
if (!IsOpenCL) {
// isa
- fields.add(CGM.getNSConcreteGlobalBlock());
+ if (IsWindows)
+ fields.addNullPointer(CGM.Int8PtrPtrTy);
+ else
+ fields.add(CGM.getNSConcreteGlobalBlock());
// __flags
BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
@@ -1226,14 +1422,14 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
// Reserved
fields.addInt(CGM.IntTy, 0);
-
- // Function
- fields.add(blockFn);
} else {
fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity());
fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity());
}
+ // Function
+ fields.add(blockFn);
+
if (!IsOpenCL) {
// Descriptor
fields.add(buildBlockDescriptor(CGM, blockInfo));
@@ -1250,7 +1446,27 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
llvm::Constant *literal = fields.finishAndCreateGlobal(
"__block_literal_global", blockInfo.BlockAlign,
- /*constant*/ true, llvm::GlobalVariable::InternalLinkage, AddrSpace);
+ /*constant*/ !IsWindows, llvm::GlobalVariable::InternalLinkage, AddrSpace);
+
+ // Windows does not allow globals to be initialised to point to globals in
+ // different DLLs. Any such variables must run code to initialise them.
+ if (IsWindows) {
+ auto *Init = llvm::Function::Create(llvm::FunctionType::get(CGM.VoidTy,
+ {}), llvm::GlobalValue::InternalLinkage, ".block_isa_init",
+ &CGM.getModule());
+ llvm::IRBuilder<> b(llvm::BasicBlock::Create(CGM.getLLVMContext(), "entry",
+ Init));
+ b.CreateAlignedStore(CGM.getNSConcreteGlobalBlock(),
+ b.CreateStructGEP(literal, 0), CGM.getPointerAlign().getQuantity());
+ b.CreateRetVoid();
+ // We can't use the normal LLVM global initialisation array, because we
+ // need to specify that this runs early in library initialisation.
+ auto *InitVar = new llvm::GlobalVariable(CGM.getModule(), Init->getType(),
+ /*isConstant*/true, llvm::GlobalValue::InternalLinkage,
+ Init, ".block_isa_init_ptr");
+ InitVar->setSection(".CRT$XCLa");
+ CGM.addUsedGlobal(InitVar);
+ }
// Return a constant of the appropriately-casted type.
llvm::Type *RequiredType =
@@ -1284,7 +1500,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
}
}
- SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getLocStart();
+ SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getBeginLoc();
ApplyDebugLocation Scope(*this, StartLoc);
// Instead of messing around with LocalDeclMap, just set the value
@@ -1314,7 +1530,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
CurGD = GD;
- CurEHLocation = blockInfo.getBlockExpr()->getLocEnd();
+ CurEHLocation = blockInfo.getBlockExpr()->getEndLoc();
BlockInfo = &blockInfo;
@@ -1379,7 +1595,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
// Begin generating the function.
StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args,
blockDecl->getLocation(),
- blockInfo.getBlockExpr()->getBody()->getLocStart());
+ blockInfo.getBlockExpr()->getBody()->getBeginLoc());
// Okay. Undo some of what StartFunction did.
@@ -1480,35 +1696,6 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
return fn;
}
-namespace {
-
-/// Represents a type of copy/destroy operation that should be performed for an
-/// entity that's captured by a block.
-enum class BlockCaptureEntityKind {
- CXXRecord, // Copy or destroy
- ARCWeak,
- ARCStrong,
- NonTrivialCStruct,
- BlockObject, // Assign or release
- None
-};
-
-/// Represents a captured entity that requires extra operations in order for
-/// this entity to be copied or destroyed correctly.
-struct BlockCaptureManagedEntity {
- BlockCaptureEntityKind Kind;
- BlockFieldFlags Flags;
- const BlockDecl::Capture &CI;
- const CGBlockInfo::Capture &Capture;
-
- BlockCaptureManagedEntity(BlockCaptureEntityKind Type, BlockFieldFlags Flags,
- const BlockDecl::Capture &CI,
- const CGBlockInfo::Capture &Capture)
- : Kind(Type), Flags(Flags), CI(CI), Capture(Capture) {}
-};
-
-} // end anonymous namespace
-
static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
const LangOptions &LangOpts) {
@@ -1518,7 +1705,7 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags());
}
BlockFieldFlags Flags;
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
Flags = BLOCK_FIELD_IS_BYREF;
if (T.isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
@@ -1566,23 +1753,32 @@ computeCopyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
llvm_unreachable("after exhaustive PrimitiveCopyKind switch");
}
+static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
+computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
+ const LangOptions &LangOpts);
+
/// Find the set of block captures that need to be explicitly copied or destroy.
static void findBlockCapturedManagedEntities(
const CGBlockInfo &BlockInfo, const LangOptions &LangOpts,
- SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures,
- llvm::function_ref<std::pair<BlockCaptureEntityKind, BlockFieldFlags>(
- const BlockDecl::Capture &, QualType, const LangOptions &)>
- Predicate) {
+ SmallVectorImpl<BlockCaptureManagedEntity> &ManagedCaptures) {
for (const auto &CI : BlockInfo.getBlockDecl()->captures()) {
const VarDecl *Variable = CI.getVariable();
const CGBlockInfo::Capture &Capture = BlockInfo.getCapture(Variable);
if (Capture.isConstant())
continue;
- auto Info = Predicate(CI, Variable->getType(), LangOpts);
- if (Info.first != BlockCaptureEntityKind::None)
- ManagedCaptures.emplace_back(Info.first, Info.second, CI, Capture);
+ QualType VT = Capture.fieldType();
+ auto CopyInfo = computeCopyInfoForBlockCapture(CI, VT, LangOpts);
+ auto DisposeInfo = computeDestroyInfoForBlockCapture(CI, VT, LangOpts);
+ if (CopyInfo.first != BlockCaptureEntityKind::None ||
+ DisposeInfo.first != BlockCaptureEntityKind::None)
+ ManagedCaptures.emplace_back(CopyInfo.first, DisposeInfo.first,
+ CopyInfo.second, DisposeInfo.second, CI,
+ Capture);
}
+
+ // Sort the captures by offset.
+ llvm::sort(ManagedCaptures);
}
namespace {
@@ -1590,10 +1786,12 @@ namespace {
struct CallBlockRelease final : EHScopeStack::Cleanup {
Address Addr;
BlockFieldFlags FieldFlags;
- bool LoadBlockVarAddr;
+ bool LoadBlockVarAddr, CanThrow;
- CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue)
- : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue) {}
+ CallBlockRelease(Address Addr, BlockFieldFlags Flags, bool LoadValue,
+ bool CT)
+ : Addr(Addr), FieldFlags(Flags), LoadBlockVarAddr(LoadValue),
+ CanThrow(CT) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
llvm::Value *BlockVarAddr;
@@ -1604,15 +1802,145 @@ struct CallBlockRelease final : EHScopeStack::Cleanup {
BlockVarAddr = Addr.getPointer();
}
- CGF.BuildBlockRelease(BlockVarAddr, FieldFlags);
+ CGF.BuildBlockRelease(BlockVarAddr, FieldFlags, CanThrow);
}
};
} // end anonymous namespace
+/// Check if \p T is a C++ class that has a destructor that can throw.
+bool CodeGenFunction::cxxDestructorCanThrow(QualType T) {
+ if (const auto *RD = T->getAsCXXRecordDecl())
+ if (const CXXDestructorDecl *DD = RD->getDestructor())
+ return DD->getType()->getAs<FunctionProtoType>()->canThrow();
+ return false;
+}
+
+// Return a string that has the information about a capture.
+static std::string getBlockCaptureStr(const BlockCaptureManagedEntity &E,
+ CaptureStrKind StrKind,
+ CharUnits BlockAlignment,
+ CodeGenModule &CGM) {
+ std::string Str;
+ ASTContext &Ctx = CGM.getContext();
+ const BlockDecl::Capture &CI = *E.CI;
+ QualType CaptureTy = CI.getVariable()->getType();
+
+ BlockCaptureEntityKind Kind;
+ BlockFieldFlags Flags;
+
+ // CaptureStrKind::Merged should be passed only when the operations and the
+ // flags are the same for copy and dispose.
+ assert((StrKind != CaptureStrKind::Merged ||
+ (E.CopyKind == E.DisposeKind && E.CopyFlags == E.DisposeFlags)) &&
+ "different operations and flags");
+
+ if (StrKind == CaptureStrKind::DisposeHelper) {
+ Kind = E.DisposeKind;
+ Flags = E.DisposeFlags;
+ } else {
+ Kind = E.CopyKind;
+ Flags = E.CopyFlags;
+ }
+
+ switch (Kind) {
+ case BlockCaptureEntityKind::CXXRecord: {
+ Str += "c";
+ SmallString<256> TyStr;
+ llvm::raw_svector_ostream Out(TyStr);
+ CGM.getCXXABI().getMangleContext().mangleTypeName(CaptureTy, Out);
+ Str += llvm::to_string(TyStr.size()) + TyStr.c_str();
+ break;
+ }
+ case BlockCaptureEntityKind::ARCWeak:
+ Str += "w";
+ break;
+ case BlockCaptureEntityKind::ARCStrong:
+ Str += "s";
+ break;
+ case BlockCaptureEntityKind::BlockObject: {
+ const VarDecl *Var = CI.getVariable();
+ unsigned F = Flags.getBitMask();
+ if (F & BLOCK_FIELD_IS_BYREF) {
+ Str += "r";
+ if (F & BLOCK_FIELD_IS_WEAK)
+ Str += "w";
+ else {
+ // If CaptureStrKind::Merged is passed, check both the copy expression
+ // and the destructor.
+ if (StrKind != CaptureStrKind::DisposeHelper) {
+ if (Ctx.getBlockVarCopyInit(Var).canThrow())
+ Str += "c";
+ }
+ if (StrKind != CaptureStrKind::CopyHelper) {
+ if (CodeGenFunction::cxxDestructorCanThrow(CaptureTy))
+ Str += "d";
+ }
+ }
+ } else {
+ assert((F & BLOCK_FIELD_IS_OBJECT) && "unexpected flag value");
+ if (F == BLOCK_FIELD_IS_BLOCK)
+ Str += "b";
+ else
+ Str += "o";
+ }
+ break;
+ }
+ case BlockCaptureEntityKind::NonTrivialCStruct: {
+ bool IsVolatile = CaptureTy.isVolatileQualified();
+ CharUnits Alignment =
+ BlockAlignment.alignmentAtOffset(E.Capture->getOffset());
+
+ Str += "n";
+ std::string FuncStr;
+ if (StrKind == CaptureStrKind::DisposeHelper)
+ FuncStr = CodeGenFunction::getNonTrivialDestructorStr(
+ CaptureTy, Alignment, IsVolatile, Ctx);
+ else
+ // If CaptureStrKind::Merged is passed, use the copy constructor string.
+ // It has all the information that the destructor string has.
+ FuncStr = CodeGenFunction::getNonTrivialCopyConstructorStr(
+ CaptureTy, Alignment, IsVolatile, Ctx);
+ // The underscore is necessary here because non-trivial copy constructor
+ // and destructor strings can start with a number.
+ Str += llvm::to_string(FuncStr.size()) + "_" + FuncStr;
+ break;
+ }
+ case BlockCaptureEntityKind::None:
+ break;
+ }
+
+ return Str;
+}
+
+static std::string getCopyDestroyHelperFuncName(
+ const SmallVectorImpl<BlockCaptureManagedEntity> &Captures,
+ CharUnits BlockAlignment, CaptureStrKind StrKind, CodeGenModule &CGM) {
+ assert((StrKind == CaptureStrKind::CopyHelper ||
+ StrKind == CaptureStrKind::DisposeHelper) &&
+ "unexpected CaptureStrKind");
+ std::string Name = StrKind == CaptureStrKind::CopyHelper
+ ? "__copy_helper_block_"
+ : "__destroy_helper_block_";
+ if (CGM.getLangOpts().Exceptions)
+ Name += "e";
+ if (CGM.getCodeGenOpts().ObjCAutoRefCountExceptions)
+ Name += "a";
+ Name += llvm::to_string(BlockAlignment.getQuantity()) + "_";
+
+ for (const BlockCaptureManagedEntity &E : Captures) {
+ Name += llvm::to_string(E.Capture->getOffset().getQuantity());
+ Name += getBlockCaptureStr(E, StrKind, BlockAlignment, CGM);
+ }
+
+ return Name;
+}
+
static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind,
Address Field, QualType CaptureType,
- BlockFieldFlags Flags, bool EHOnly,
- CodeGenFunction &CGF) {
+ BlockFieldFlags Flags, bool ForCopyHelper,
+ VarDecl *Var, CodeGenFunction &CGF) {
+ bool EHOnly = ForCopyHelper;
+
switch (CaptureKind) {
case BlockCaptureEntityKind::CXXRecord:
case BlockCaptureEntityKind::ARCWeak:
@@ -1634,15 +1962,34 @@ static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind,
case BlockCaptureEntityKind::BlockObject: {
if (!EHOnly || CGF.getLangOpts().Exceptions) {
CleanupKind Kind = EHOnly ? EHCleanup : NormalAndEHCleanup;
- CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true);
+ // Calls to _Block_object_dispose along the EH path in the copy helper
+ // function don't throw as newly-copied __block variables always have a
+ // reference count of 2.
+ bool CanThrow =
+ !ForCopyHelper && CGF.cxxDestructorCanThrow(CaptureType);
+ CGF.enterByrefCleanup(Kind, Field, Flags, /*LoadBlockVarAddr*/ true,
+ CanThrow);
}
break;
}
case BlockCaptureEntityKind::None:
- llvm_unreachable("unexpected BlockCaptureEntityKind");
+ break;
}
}
+static void setBlockHelperAttributesVisibility(bool CapturesNonExternalType,
+ llvm::Function *Fn,
+ const CGFunctionInfo &FI,
+ CodeGenModule &CGM) {
+ if (CapturesNonExternalType) {
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
+ } else {
+ Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ Fn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
+ CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
+ }
+}
/// Generate the copy-helper function for a block closure object:
/// static void block_copy_helper(block_t *dst, block_t *src);
/// The runtime will have previously initialized 'dst' by doing a
@@ -1653,42 +2000,51 @@ static void pushCaptureCleanup(BlockCaptureEntityKind CaptureKind,
/// the contents of an individual __block variable to the heap.
llvm::Constant *
CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
+ SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures;
+ findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures);
+ std::string FuncName =
+ getCopyDestroyHelperFuncName(CopiedCaptures, blockInfo.BlockAlign,
+ CaptureStrKind::CopyHelper, CGM);
+
+ if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName))
+ return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy);
+
ASTContext &C = getContext();
+ QualType ReturnTy = C.VoidTy;
+
FunctionArgList args;
- ImplicitParamDecl DstDecl(getContext(), C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
// FIXME: it would be nice if these were mergeable with things with
// identical semantics.
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn =
- llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
- "__copy_helper_block_", &CGM.getModule());
+ llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage,
+ FuncName, &CGM.getModule());
- IdentifierInfo *II
- = &CGM.getContext().Idents.get("__copy_helper_block_");
+ IdentifierInfo *II = &C.Idents.get(FuncName);
- FunctionDecl *FD = FunctionDecl::Create(C,
- C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false,
- false);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(C.VoidPtrTy);
+ ArgTys.push_back(C.VoidPtrTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
- CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
- ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
+ setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI,
+ CGM);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
+ ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()};
llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
Address src = GetAddrOfLocalVar(&SrcDecl);
@@ -1699,88 +2055,81 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign);
dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest");
- SmallVector<BlockCaptureManagedEntity, 4> CopiedCaptures;
- findBlockCapturedManagedEntities(blockInfo, getLangOpts(), CopiedCaptures,
- computeCopyInfoForBlockCapture);
-
for (const auto &CopiedCapture : CopiedCaptures) {
- const BlockDecl::Capture &CI = CopiedCapture.CI;
- const CGBlockInfo::Capture &capture = CopiedCapture.Capture;
+ const BlockDecl::Capture &CI = *CopiedCapture.CI;
+ const CGBlockInfo::Capture &capture = *CopiedCapture.Capture;
QualType captureType = CI.getVariable()->getType();
- BlockFieldFlags flags = CopiedCapture.Flags;
+ BlockFieldFlags flags = CopiedCapture.CopyFlags;
unsigned index = capture.getIndex();
Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset());
Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset());
- // If there's an explicit copy expression, we do that.
- if (CI.getCopyExpr()) {
- assert(CopiedCapture.Kind == BlockCaptureEntityKind::CXXRecord);
+ switch (CopiedCapture.CopyKind) {
+ case BlockCaptureEntityKind::CXXRecord:
+ // If there's an explicit copy expression, we do that.
+ assert(CI.getCopyExpr() && "copy expression for variable is missing");
EmitSynthesizedCXXCopyCtor(dstField, srcField, CI.getCopyExpr());
- } else if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCWeak) {
+ break;
+ case BlockCaptureEntityKind::ARCWeak:
EmitARCCopyWeak(dstField, srcField);
- // If this is a C struct that requires non-trivial copy construction, emit a
- // call to its copy constructor.
- } else if (CopiedCapture.Kind ==
- BlockCaptureEntityKind::NonTrivialCStruct) {
+ break;
+ case BlockCaptureEntityKind::NonTrivialCStruct: {
+ // If this is a C struct that requires non-trivial copy construction,
+ // emit a call to its copy constructor.
QualType varType = CI.getVariable()->getType();
callCStructCopyConstructor(MakeAddrLValue(dstField, varType),
MakeAddrLValue(srcField, varType));
- } else {
+ break;
+ }
+ case BlockCaptureEntityKind::ARCStrong: {
llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
- if (CopiedCapture.Kind == BlockCaptureEntityKind::ARCStrong) {
- // At -O0, store null into the destination field (so that the
- // storeStrong doesn't over-release) and then call storeStrong.
- // This is a workaround to not having an initStrong call.
- if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
- auto *ty = cast<llvm::PointerType>(srcValue->getType());
- llvm::Value *null = llvm::ConstantPointerNull::get(ty);
- Builder.CreateStore(null, dstField);
- EmitARCStoreStrongCall(dstField, srcValue, true);
-
- // With optimization enabled, take advantage of the fact that
- // the blocks runtime guarantees a memcpy of the block data, and
- // just emit a retain of the src field.
- } else {
- EmitARCRetainNonBlock(srcValue);
-
- // Unless EH cleanup is required, we don't need this anymore, so kill
- // it. It's not quite worth the annoyance to avoid creating it in the
- // first place.
- if (!needsEHCleanup(captureType.isDestructedType()))
- cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
- }
+ // At -O0, store null into the destination field (so that the
+ // storeStrong doesn't over-release) and then call storeStrong.
+ // This is a workaround to not having an initStrong call.
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+ auto *ty = cast<llvm::PointerType>(srcValue->getType());
+ llvm::Value *null = llvm::ConstantPointerNull::get(ty);
+ Builder.CreateStore(null, dstField);
+ EmitARCStoreStrongCall(dstField, srcValue, true);
+
+ // With optimization enabled, take advantage of the fact that
+ // the blocks runtime guarantees a memcpy of the block data, and
+ // just emit a retain of the src field.
} else {
- assert(CopiedCapture.Kind == BlockCaptureEntityKind::BlockObject);
- srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy);
- llvm::Value *dstAddr =
- Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy);
- llvm::Value *args[] = {
- dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
- };
-
- const VarDecl *variable = CI.getVariable();
- bool copyCanThrow = false;
- if (CI.isByRef() && variable->getType()->getAsCXXRecordDecl()) {
- const Expr *copyExpr =
- CGM.getContext().getBlockVarCopyInits(variable);
- if (copyExpr) {
- copyCanThrow = true; // FIXME: reuse the noexcept logic
- }
- }
+ EmitARCRetainNonBlock(srcValue);
- if (copyCanThrow) {
- EmitRuntimeCallOrInvoke(CGM.getBlockObjectAssign(), args);
- } else {
- EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args);
- }
+ // Unless EH cleanup is required, we don't need this anymore, so kill
+ // it. It's not quite worth the annoyance to avoid creating it in the
+ // first place.
+ if (!needsEHCleanup(captureType.isDestructedType()))
+ cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
}
+ break;
+ }
+ case BlockCaptureEntityKind::BlockObject: {
+ llvm::Value *srcValue = Builder.CreateLoad(srcField, "blockcopy.src");
+ srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy);
+ llvm::Value *dstAddr =
+ Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy);
+ llvm::Value *args[] = {
+ dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
+ };
+
+ if (CI.isByRef() && C.getBlockVarCopyInit(CI.getVariable()).canThrow())
+ EmitRuntimeCallOrInvoke(CGM.getBlockObjectAssign(), args);
+ else
+ EmitNounwindRuntimeCall(CGM.getBlockObjectAssign(), args);
+ break;
+ }
+ case BlockCaptureEntityKind::None:
+ continue;
}
// Ensure that we destroy the copied object if an exception is thrown later
// in the helper function.
- pushCaptureCleanup(CopiedCapture.Kind, dstField, captureType, flags, /*EHOnly*/ true,
- *this);
+ pushCaptureCleanup(CopiedCapture.CopyKind, dstField, captureType, flags,
+ /*ForCopyHelper*/ true, CI.getVariable(), *this);
}
FinishFunction();
@@ -1800,7 +2149,7 @@ getBlockFieldFlagsForObjCObjectPointer(const BlockDecl::Capture &CI,
static std::pair<BlockCaptureEntityKind, BlockFieldFlags>
computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
const LangOptions &LangOpts) {
- if (CI.isByRef()) {
+ if (CI.isEscapingByref()) {
BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
if (T.isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
@@ -1844,37 +2193,50 @@ computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T,
/// variable.
llvm::Constant *
CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
+ SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures;
+ findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures);
+ std::string FuncName =
+ getCopyDestroyHelperFuncName(DestroyedCaptures, blockInfo.BlockAlign,
+ CaptureStrKind::DisposeHelper, CGM);
+
+ if (llvm::GlobalValue *Func = CGM.getModule().getNamedValue(FuncName))
+ return llvm::ConstantExpr::getBitCast(Func, VoidPtrTy);
+
ASTContext &C = getContext();
+ QualType ReturnTy = C.VoidTy;
+
FunctionArgList args;
- ImplicitParamDecl SrcDecl(getContext(), C.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, C.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
// FIXME: We'd like to put these into a mergable by content, with
// internal linkage.
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn =
- llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
- "__destroy_helper_block_", &CGM.getModule());
+ llvm::Function::Create(LTy, llvm::GlobalValue::LinkOnceODRLinkage,
+ FuncName, &CGM.getModule());
- IdentifierInfo *II
- = &CGM.getContext().Idents.get("__destroy_helper_block_");
+ IdentifierInfo *II = &C.Idents.get(FuncName);
+
+ SmallVector<QualType, 1> ArgTys;
+ ArgTys.push_back(C.VoidPtrTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
- FunctionDecl *FD = FunctionDecl::Create(C, C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false, false);
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
- CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
+ setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI,
+ CGM);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
+ markAsIgnoreThreadCheckingAtRuntime(Fn);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
- ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getLocStart()};
+ ApplyDebugLocation NL{*this, blockInfo.getBlockExpr()->getBeginLoc()};
llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
@@ -1884,20 +2246,17 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
CodeGenFunction::RunCleanupsScope cleanups(*this);
- SmallVector<BlockCaptureManagedEntity, 4> DestroyedCaptures;
- findBlockCapturedManagedEntities(blockInfo, getLangOpts(), DestroyedCaptures,
- computeDestroyInfoForBlockCapture);
-
for (const auto &DestroyedCapture : DestroyedCaptures) {
- const BlockDecl::Capture &CI = DestroyedCapture.CI;
- const CGBlockInfo::Capture &capture = DestroyedCapture.Capture;
- BlockFieldFlags flags = DestroyedCapture.Flags;
+ const BlockDecl::Capture &CI = *DestroyedCapture.CI;
+ const CGBlockInfo::Capture &capture = *DestroyedCapture.Capture;
+ BlockFieldFlags flags = DestroyedCapture.DisposeFlags;
Address srcField =
Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset());
- pushCaptureCleanup(DestroyedCapture.Kind, srcField,
- CI.getVariable()->getType(), flags, /*EHOnly*/ false, *this);
+ pushCaptureCleanup(DestroyedCapture.DisposeKind, srcField,
+ CI.getVariable()->getType(), flags,
+ /*ForCopyHelper*/ false, CI.getVariable(), *this);
}
cleanups.ForceCleanup();
@@ -1937,7 +2296,7 @@ public:
field = CGF.Builder.CreateBitCast(field, CGF.Int8PtrTy->getPointerTo(0));
llvm::Value *value = CGF.Builder.CreateLoad(field);
- CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER);
+ CGF.BuildBlockRelease(value, Flags | BLOCK_BYREF_CALLER, false);
}
void profileImpl(llvm::FoldingSetNodeID &id) const override {
@@ -2093,19 +2452,17 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
BlockByrefHelpers &generator) {
ASTContext &Context = CGF.getContext();
- QualType R = Context.VoidTy;
+ QualType ReturnTy = Context.VoidTy;
FunctionArgList args;
- ImplicitParamDecl Dst(CGF.getContext(), Context.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl Dst(Context, Context.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&Dst);
- ImplicitParamDecl Src(CGF.getContext(), Context.VoidPtrTy,
- ImplicitParamDecl::Other);
+ ImplicitParamDecl Src(Context, Context.VoidPtrTy, ImplicitParamDecl::Other);
args.push_back(&Src);
const CGFunctionInfo &FI =
- CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
+ CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI);
@@ -2118,16 +2475,18 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
IdentifierInfo *II
= &Context.Idents.get("__Block_byref_object_copy_");
- FunctionDecl *FD = FunctionDecl::Create(Context,
- Context.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, R, nullptr,
- SC_Static,
- false, false);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(Context.VoidPtrTy);
+ ArgTys.push_back(Context.VoidPtrTy);
+ QualType FunctionTy = Context.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ Context, Context.getTranslationUnitDecl(), SourceLocation(),
+ SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false);
CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
- CGF.StartFunction(FD, R, Fn, FI, args);
+ CGF.StartFunction(FD, ReturnTy, Fn, FI, args);
if (generator.needsCopy()) {
llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0);
@@ -2192,12 +2551,13 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
IdentifierInfo *II
= &Context.Idents.get("__Block_byref_object_dispose_");
- FunctionDecl *FD = FunctionDecl::Create(Context,
- Context.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, R, nullptr,
- SC_Static,
- false, false);
+ SmallVector<QualType, 1> ArgTys;
+ ArgTys.push_back(Context.VoidPtrTy);
+ QualType FunctionTy = Context.getFunctionType(R, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ Context, Context.getTranslationUnitDecl(), SourceLocation(),
+ SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false);
CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
@@ -2254,6 +2614,9 @@ BlockByrefHelpers *
CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
const AutoVarEmission &emission) {
const VarDecl &var = *emission.Variable;
+ assert(var.isEscapingByref() &&
+ "only escaping __block variables need byref helpers");
+
QualType type = var.getType();
auto &byrefInfo = getBlockByrefInfo(&var);
@@ -2264,7 +2627,8 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
byrefInfo.ByrefAlignment.alignmentAtOffset(byrefInfo.FieldOffset);
if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
- const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(&var);
+ const Expr *copyExpr =
+ CGM.getContext().getBlockVarCopyInit(&var).getCopyExpr();
if (!copyExpr && record->hasTrivialDestructor()) return nullptr;
return ::buildByrefHelpers(
@@ -2567,19 +2931,25 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
}
}
-void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags) {
+void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags,
+ bool CanThrow) {
llvm::Value *F = CGM.getBlockObjectDispose();
llvm::Value *args[] = {
Builder.CreateBitCast(V, Int8PtrTy),
llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
};
- EmitNounwindRuntimeCall(F, args); // FIXME: throwing destructors?
+
+ if (CanThrow)
+ EmitRuntimeCallOrInvoke(F, args);
+ else
+ EmitNounwindRuntimeCall(F, args);
}
void CodeGenFunction::enterByrefCleanup(CleanupKind Kind, Address Addr,
BlockFieldFlags Flags,
- bool LoadBlockVarAddr) {
- EHStack.pushCleanup<CallBlockRelease>(Kind, Addr, Flags, LoadBlockVarAddr);
+ bool LoadBlockVarAddr, bool CanThrow) {
+ EHStack.pushCleanup<CallBlockRelease>(Kind, Addr, Flags, LoadBlockVarAddr,
+ CanThrow);
}
/// Adjust the declaration of something from the blocks API.
diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h
index 5abf82b3f6e1..3f9fc16d9b10 100644
--- a/lib/CodeGen/CGBlocks.h
+++ b/lib/CodeGen/CGBlocks.h
@@ -60,7 +60,7 @@ enum BlockLiteralFlags {
BLOCK_IS_GLOBAL = (1 << 28),
BLOCK_USE_STRET = (1 << 29),
BLOCK_HAS_SIGNATURE = (1 << 30),
- BLOCK_HAS_EXTENDED_LAYOUT = (1 << 31)
+ BLOCK_HAS_EXTENDED_LAYOUT = (1u << 31)
};
class BlockFlags {
uint32_t flags;
@@ -132,6 +132,9 @@ public:
friend bool operator&(BlockFieldFlags l, BlockFieldFlags r) {
return (l.flags & r.flags);
}
+ bool operator==(BlockFieldFlags Other) const {
+ return flags == Other.flags;
+ }
};
inline BlockFieldFlags operator|(BlockFieldFlag_t l, BlockFieldFlag_t r) {
return BlockFieldFlags(l) | BlockFieldFlags(r);
@@ -231,6 +234,11 @@ public:
/// and their layout meta-data has been generated.
bool HasCapturedVariableLayout : 1;
+ /// Indicates whether an object of a non-external C++ class is captured. This
+ /// bit is used to determine the linkage of the block copy/destroy helper
+ /// functions.
+ bool CapturesNonExternalType : 1;
+
/// The mapping of allocated indexes within the block.
llvm::DenseMap<const VarDecl*, Capture> Captures;
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index e99121c46d9b..a718f2f19aa6 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -21,10 +21,11 @@
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
-#include "clang/Analysis/Analyses/OSLog.h"
+#include "clang/AST/OSLog.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -93,11 +94,11 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
return V;
}
-/// Utility to insert an atomic instruction based on Instrinsic::ID
+/// Utility to insert an atomic instruction based on Intrinsic::ID
/// and the expression node.
-static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
- llvm::AtomicRMWInst::BinOp Kind,
- const CallExpr *E) {
+static Value *MakeBinaryAtomicValue(
+ CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
QualType T = E->getType();
assert(E->getArg(0)->getType()->isPointerType());
assert(CGF.getContext().hasSameUnqualifiedType(T,
@@ -119,7 +120,7 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
Args[1] = EmitToInt(CGF, Args[1], T, IntType);
llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
- Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
+ Kind, Args[0], Args[1], Ordering);
return EmitFromInt(CGF, Result, T, ValueType);
}
@@ -151,7 +152,7 @@ static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
}
-/// Utility to insert an atomic instruction based Instrinsic::ID and
+/// Utility to insert an atomic instruction based Intrinsic::ID and
/// the expression node, where the return value is the result of the
/// operation.
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
@@ -200,6 +201,9 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
/// cmpxchg result or the old value.
///
/// @returns result of cmpxchg, according to ReturnBool
+///
+/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
+/// invoke the function EmitAtomicCmpXchgForMSIntrin.
static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
bool ReturnBool) {
QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
@@ -230,6 +234,72 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
ValueType);
}
+/// This function should be invoked to emit atomic cmpxchg for Microsoft's
+/// _InterlockedCompareExchange* intrinsics which have the following signature:
+/// T _InterlockedCompareExchange(T volatile *Destination,
+/// T Exchange,
+/// T Comparand);
+///
+/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
+/// cmpxchg *Destination, Comparand, Exchange.
+/// So we need to swap Comparand and Exchange when invoking
+/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
+/// function MakeAtomicCmpXchgValue since it expects the arguments to be
+/// already swapped.
+
+static
+Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
+ AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
+ assert(E->getArg(0)->getType()->isPointerType());
+ assert(CGF.getContext().hasSameUnqualifiedType(
+ E->getType(), E->getArg(0)->getType()->getPointeeType()));
+ assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
+ E->getArg(1)->getType()));
+ assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
+ E->getArg(2)->getType()));
+
+ auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
+ auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
+ auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
+
+ // For Release ordering, the failure ordering should be Monotonic.
+ auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
+ AtomicOrdering::Monotonic :
+ SuccessOrdering;
+
+ auto *Result = CGF.Builder.CreateAtomicCmpXchg(
+ Destination, Comparand, Exchange,
+ SuccessOrdering, FailureOrdering);
+ Result->setVolatile(true);
+ return CGF.Builder.CreateExtractValue(Result, 0);
+}
+
+static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
+ assert(E->getArg(0)->getType()->isPointerType());
+
+ auto *IntTy = CGF.ConvertType(E->getType());
+ auto *Result = CGF.Builder.CreateAtomicRMW(
+ AtomicRMWInst::Add,
+ CGF.EmitScalarExpr(E->getArg(0)),
+ ConstantInt::get(IntTy, 1),
+ Ordering);
+ return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
+}
+
+static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
+ assert(E->getArg(0)->getType()->isPointerType());
+
+ auto *IntTy = CGF.ConvertType(E->getType());
+ auto *Result = CGF.Builder.CreateAtomicRMW(
+ AtomicRMWInst::Sub,
+ CGF.EmitScalarExpr(E->getArg(0)),
+ ConstantInt::get(IntTy, 1),
+ Ordering);
+ return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
+}
+
// Emit a simple mangled intrinsic that has 1 argument and a return type
// matching the argument type.
static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
@@ -316,7 +386,7 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
const CallExpr *E, llvm::Constant *calleeValue) {
- CGCallee callee = CGCallee::forDirect(calleeValue, FD);
+ CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
}
@@ -461,7 +531,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
assert(DIter != LocalDeclMap.end());
return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
- getContext().getSizeType(), E->getLocStart());
+ getContext().getSizeType(), E->getBeginLoc());
}
}
@@ -485,7 +555,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
}
namespace {
-/// A struct to generically desribe a bit test intrinsic.
+/// A struct to generically describe a bit test intrinsic.
struct BitTest {
enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
enum InterlockingKind : uint8_t {
@@ -711,8 +781,11 @@ static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
} else {
Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
Arg1Ty = CGF.Int8PtrTy;
- Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
- llvm::ConstantInt::get(CGF.Int32Ty, 0));
+ if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
+ Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry));
+ } else
+ Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
+ llvm::ConstantInt::get(CGF.Int32Ty, 0));
}
// Mark the call site and declaration with ReturnsTwice.
@@ -745,6 +818,30 @@ enum class CodeGenFunction::MSVCIntrin {
_InterlockedIncrement,
_InterlockedOr,
_InterlockedXor,
+ _InterlockedExchangeAdd_acq,
+ _InterlockedExchangeAdd_rel,
+ _InterlockedExchangeAdd_nf,
+ _InterlockedExchange_acq,
+ _InterlockedExchange_rel,
+ _InterlockedExchange_nf,
+ _InterlockedCompareExchange_acq,
+ _InterlockedCompareExchange_rel,
+ _InterlockedCompareExchange_nf,
+ _InterlockedOr_acq,
+ _InterlockedOr_rel,
+ _InterlockedOr_nf,
+ _InterlockedXor_acq,
+ _InterlockedXor_rel,
+ _InterlockedXor_nf,
+ _InterlockedAnd_acq,
+ _InterlockedAnd_rel,
+ _InterlockedAnd_nf,
+ _InterlockedIncrement_acq,
+ _InterlockedIncrement_rel,
+ _InterlockedIncrement_nf,
+ _InterlockedDecrement_acq,
+ _InterlockedDecrement_rel,
+ _InterlockedDecrement_nf,
__fastfail,
};
@@ -811,25 +908,74 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
case MSVCIntrin::_InterlockedXor:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
-
- case MSVCIntrin::_InterlockedDecrement: {
- llvm::Type *IntTy = ConvertType(E->getType());
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Sub,
- EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
- }
- case MSVCIntrin::_InterlockedIncrement: {
- llvm::Type *IntTy = ConvertType(E->getType());
- AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Add,
- EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
- }
+ case MSVCIntrin::_InterlockedExchangeAdd_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedExchangeAdd_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedExchangeAdd_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedExchange_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedExchange_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedExchange_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedCompareExchange_acq:
+ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedCompareExchange_rel:
+ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedCompareExchange_nf:
+ return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedOr_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedOr_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedOr_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedXor_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedXor_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedXor_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedAnd_acq:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
+ AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedAnd_rel:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
+ AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedAnd_nf:
+ return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
+ AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedIncrement_acq:
+ return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedIncrement_rel:
+ return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedIncrement_nf:
+ return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedDecrement_acq:
+ return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedDecrement_rel:
+ return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedDecrement_nf:
+ return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
+
+ case MSVCIntrin::_InterlockedDecrement:
+ return EmitAtomicDecrementValue(*this, E);
+ case MSVCIntrin::_InterlockedIncrement:
+ return EmitAtomicIncrementValue(*this, E);
case MSVCIntrin::__fastfail: {
// Request immediate process termination from the kernel. The instruction
@@ -923,35 +1069,42 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
if (llvm::Function *F = CGM.getModule().getFunction(Name))
return F;
+ llvm::SmallVector<QualType, 4> ArgTys;
llvm::SmallVector<ImplicitParamDecl, 4> Params;
Params.emplace_back(Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"),
Ctx.VoidPtrTy, ImplicitParamDecl::Other);
+ ArgTys.emplace_back(Ctx.VoidPtrTy);
for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
char Size = Layout.Items[I].getSizeByte();
if (!Size)
continue;
+ QualType ArgTy = getOSLogArgType(Ctx, Size);
Params.emplace_back(
Ctx, nullptr, SourceLocation(),
- &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)),
- getOSLogArgType(Ctx, Size), ImplicitParamDecl::Other);
+ &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
+ ImplicitParamDecl::Other);
+ ArgTys.emplace_back(ArgTy);
}
FunctionArgList Args;
for (auto &P : Params)
Args.push_back(&P);
+ QualType ReturnTy = Ctx.VoidTy;
+ QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {});
+
// The helper function has linkonce_odr linkage to enable the linker to merge
// identical functions. To ensure the merging always happens, 'noinline' is
// attached to the function when compiling with -Oz.
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
llvm::Function *Fn = llvm::Function::Create(
FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.SetLLVMFunctionAttributes(nullptr, FI, Fn);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
// Attach 'noinline' at -Oz.
@@ -962,9 +1115,9 @@ llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(
IdentifierInfo *II = &Ctx.Idents.get(Name);
FunctionDecl *FD = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
- Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+ FuncionTy, nullptr, SC_PrivateExtern, false, false);
- StartFunction(FD, Ctx.VoidTy, Fn, FI, Args);
+ StartFunction(FD, ReturnTy, Fn, FI, Args);
// Create a scope with an artificial location for the body of this function.
auto AL = ApplyDebugLocation::CreateArtificial(*this);
@@ -1024,7 +1177,12 @@ RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {
llvm::Value *ArgVal;
- if (const Expr *TheExpr = Item.getExpr()) {
+ if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
+ uint64_t Val = 0;
+ for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
+ Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
+ ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
+ } else if (const Expr *TheExpr = Item.getExpr()) {
ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
// Check if this is a retainable type.
@@ -1077,7 +1235,7 @@ static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
WidthAndSignedness Op2Info,
WidthAndSignedness ResultInfo) {
return BuiltinID == Builtin::BI__builtin_mul_overflow &&
- Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
+ std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
Op1Info.Signed != Op2Info.Signed;
}
@@ -1098,11 +1256,20 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
+ unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
+ unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
+
+ // One of the operands may be smaller than the other. If so, [s|z]ext it.
+ if (SignedOpWidth < UnsignedOpWidth)
+ Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
+ if (UnsignedOpWidth < SignedOpWidth)
+ Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
llvm::Type *OpTy = Signed->getType();
llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
llvm::Type *ResTy = ResultPtr.getElementType();
+ unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
// Take the absolute value of the signed operand.
llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
@@ -1120,8 +1287,8 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
if (ResultInfo.Signed) {
// Signed overflow occurs if the result is greater than INT_MAX or lesser
// than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
- auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
- .zextOrSelf(Op1Info.Width);
+ auto IntMax =
+ llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth);
llvm::Value *MaxResult =
CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
CGF.Builder.CreateZExt(IsNegative, OpTy));
@@ -1139,9 +1306,9 @@ EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
llvm::Value *Underflow = CGF.Builder.CreateAnd(
IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
- if (ResultInfo.Width < Op1Info.Width) {
+ if (ResultInfo.Width < OpWidth) {
auto IntMax =
- llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
+ llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
@@ -1252,9 +1419,61 @@ static llvm::Value *dumpRecord(CodeGenFunction &CGF, QualType RType,
return Res;
}
-RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
- unsigned BuiltinID, const CallExpr *E,
+static bool
+TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,
+ llvm::SmallPtrSetImpl<const Decl *> &Seen) {
+ if (const auto *Arr = Ctx.getAsArrayType(Ty))
+ Ty = Ctx.getBaseElementType(Arr);
+
+ const auto *Record = Ty->getAsCXXRecordDecl();
+ if (!Record)
+ return false;
+
+ // We've already checked this type, or are in the process of checking it.
+ if (!Seen.insert(Record).second)
+ return false;
+
+ assert(Record->hasDefinition() &&
+ "Incomplete types should already be diagnosed");
+
+ if (Record->isDynamicClass())
+ return true;
+
+ for (FieldDecl *F : Record->fields()) {
+ if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
+ return true;
+ }
+ return false;
+}
+
+/// Determine if the specified type requires laundering by checking if it is a
+/// dynamic class type or contains a subobject which is a dynamic class type.
+static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {
+ if (!CGM.getCodeGenOpts().StrictVTablePointers)
+ return false;
+ llvm::SmallPtrSet<const Decl *, 16> Seen;
+ return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
+}
+
+RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
+ llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
+
+ // The builtin's shift arg may have a different type than the source arg and
+ // result, but the LLVM intrinsic uses the same type for all values.
+ llvm::Type *Ty = Src->getType();
+ ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
+
+ // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
+ unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
+ Value *F = CGM.getIntrinsic(IID, Ty);
+ return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
+}
+
+RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
+ const CallExpr *E,
ReturnValueSlot ReturnValue) {
+ const FunctionDecl *FD = GD.getDecl()->getAsFunction();
// See if we can constant fold this builtin. If so, don't emit it at all.
Expr::EvalResult Result;
if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
@@ -1537,6 +1756,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(ComplexVal.second);
}
+ case Builtin::BI__builtin_clrsb:
+ case Builtin::BI__builtin_clrsbl:
+ case Builtin::BI__builtin_clrsbll: {
+ // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
+ Value *ArgValue = EmitScalarExpr(E->getArg(0));
+
+ llvm::Type *ArgType = ArgValue->getType();
+ Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
+
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *Zero = llvm::Constant::getNullValue(ArgType);
+ Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
+ Value *Inverse = Builder.CreateNot(ArgValue, "not");
+ Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
+ Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
+ Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
+ Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
+ "cast");
+ return RValue::get(Result);
+ }
case Builtin::BI__builtin_ctzs:
case Builtin::BI__builtin_ctz:
case Builtin::BI__builtin_ctzl:
@@ -1609,6 +1848,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
"cast");
return RValue::get(Result);
}
+ case Builtin::BI__lzcnt16:
+ case Builtin::BI__lzcnt:
+ case Builtin::BI__lzcnt64: {
+ Value *ArgValue = EmitScalarExpr(E->getArg(0));
+
+ llvm::Type *ArgType = ArgValue->getType();
+ Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
+
+ llvm::Type *ResultType = ConvertType(E->getType());
+ Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
+ if (Result->getType() != ResultType)
+ Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
+ "cast");
+ return RValue::get(Result);
+ }
case Builtin::BI__popcnt16:
case Builtin::BI__popcnt:
case Builtin::BI__popcnt64:
@@ -1627,46 +1881,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
"cast");
return RValue::get(Result);
}
- case Builtin::BI_rotr8:
- case Builtin::BI_rotr16:
- case Builtin::BI_rotr:
- case Builtin::BI_lrotr:
- case Builtin::BI_rotr64: {
- Value *Val = EmitScalarExpr(E->getArg(0));
- Value *Shift = EmitScalarExpr(E->getArg(1));
-
- llvm::Type *ArgType = Val->getType();
- Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = ArgType->getIntegerBitWidth();
- Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
-
- Value *RightShiftAmt = Builder.CreateAnd(Shift, Mask);
- Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
- Value *LeftShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
- Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
- Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
- return RValue::get(Result);
- }
- case Builtin::BI_rotl8:
- case Builtin::BI_rotl16:
- case Builtin::BI_rotl:
- case Builtin::BI_lrotl:
- case Builtin::BI_rotl64: {
- Value *Val = EmitScalarExpr(E->getArg(0));
- Value *Shift = EmitScalarExpr(E->getArg(1));
-
- llvm::Type *ArgType = Val->getType();
- Shift = Builder.CreateIntCast(Shift, ArgType, false);
- unsigned ArgWidth = ArgType->getIntegerBitWidth();
- Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
-
- Value *LeftShiftAmt = Builder.CreateAnd(Shift, Mask);
- Value *LeftShifted = Builder.CreateShl(Val, LeftShiftAmt);
- Value *RightShiftAmt = Builder.CreateAnd(Builder.CreateNeg(Shift), Mask);
- Value *RightShifted = Builder.CreateLShr(Val, RightShiftAmt);
- Value *Result = Builder.CreateOr(LeftShifted, RightShifted);
- return RValue::get(Result);
- }
case Builtin::BI__builtin_unpredictable: {
// Always return the argument of __builtin_unpredictable. LLVM does not
// handle this builtin. Metadata for this builtin should be added directly
@@ -1690,15 +1904,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(Result);
}
case Builtin::BI__builtin_assume_aligned: {
- Value *PtrValue = EmitScalarExpr(E->getArg(0));
+ const Expr *Ptr = E->getArg(0);
+ Value *PtrValue = EmitScalarExpr(Ptr);
Value *OffsetValue =
(E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
- unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
+ unsigned Alignment = (unsigned)AlignmentCI->getZExtValue();
- EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
+ EmitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(),
+ Alignment, OffsetValue);
return RValue::get(PtrValue);
}
case Builtin::BI__assume:
@@ -1721,6 +1937,48 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_bitreverse64: {
return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
}
+ case Builtin::BI__builtin_rotateleft8:
+ case Builtin::BI__builtin_rotateleft16:
+ case Builtin::BI__builtin_rotateleft32:
+ case Builtin::BI__builtin_rotateleft64:
+ case Builtin::BI_rotl8: // Microsoft variants of rotate left
+ case Builtin::BI_rotl16:
+ case Builtin::BI_rotl:
+ case Builtin::BI_lrotl:
+ case Builtin::BI_rotl64:
+ return emitRotate(E, false);
+
+ case Builtin::BI__builtin_rotateright8:
+ case Builtin::BI__builtin_rotateright16:
+ case Builtin::BI__builtin_rotateright32:
+ case Builtin::BI__builtin_rotateright64:
+ case Builtin::BI_rotr8: // Microsoft variants of rotate right
+ case Builtin::BI_rotr16:
+ case Builtin::BI_rotr:
+ case Builtin::BI_lrotr:
+ case Builtin::BI_rotr64:
+ return emitRotate(E, true);
+
+ case Builtin::BI__builtin_constant_p: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+ // At -O0, we don't perform inlining, so we don't need to delay the
+ // processing.
+ return RValue::get(ConstantInt::get(ResultType, 0));
+
+ const Expr *Arg = E->getArg(0);
+ QualType ArgType = Arg->getType();
+ if (!hasScalarEvaluationKind(ArgType) || ArgType->isFunctionType())
+ // We can only reason about scalar types.
+ return RValue::get(ConstantInt::get(ResultType, 0));
+
+ Value *ArgValue = EmitScalarExpr(Arg);
+ Value *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
+ Value *Result = Builder.CreateCall(F, ArgValue);
+ if (Result->getType() != ResultType)
+ Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
+ return RValue::get(Result);
+ }
case Builtin::BI__builtin_object_size: {
unsigned Type =
E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
@@ -1985,10 +2243,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin___memcpy_chk: {
// fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
- llvm::APSInt Size, DstSize;
- if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
+ Expr::EvalResult SizeResult, DstSizeResult;
+ if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
+ !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
break;
+ llvm::APSInt Size = SizeResult.Val.getInt();
+ llvm::APSInt DstSize = DstSizeResult.Val.getInt();
if (Size.ugt(DstSize))
break;
Address Dest = EmitPointerWithAlignment(E->getArg(0));
@@ -2009,10 +2269,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin___memmove_chk: {
// fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
- llvm::APSInt Size, DstSize;
- if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
+ Expr::EvalResult SizeResult, DstSizeResult;
+ if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
+ !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
break;
+ llvm::APSInt Size = SizeResult.Val.getInt();
+ llvm::APSInt DstSize = DstSizeResult.Val.getInt();
if (Size.ugt(DstSize))
break;
Address Dest = EmitPointerWithAlignment(E->getArg(0));
@@ -2047,10 +2309,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
}
case Builtin::BI__builtin___memset_chk: {
// fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
- llvm::APSInt Size, DstSize;
- if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
- !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
+ Expr::EvalResult SizeResult, DstSizeResult;
+ if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
+ !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
break;
+ llvm::APSInt Size = SizeResult.Val.getInt();
+ llvm::APSInt DstSize = DstSizeResult.Val.getInt();
if (Size.ugt(DstSize))
break;
Address Dest = EmitPointerWithAlignment(E->getArg(0));
@@ -2258,6 +2522,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
return RValue::get(nullptr);
}
+ case Builtin::BI__builtin_launder: {
+ const Expr *Arg = E->getArg(0);
+ QualType ArgTy = Arg->getType()->getPointeeType();
+ Value *Ptr = EmitScalarExpr(Arg);
+ if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
+ Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
+
+ return RValue::get(Ptr);
+ }
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_sub:
case Builtin::BI__sync_fetch_and_or:
@@ -2952,7 +3225,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedExchangePointer:
return RValue::get(
EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
- case Builtin::BI_InterlockedCompareExchangePointer: {
+ case Builtin::BI_InterlockedCompareExchangePointer:
+ case Builtin::BI_InterlockedCompareExchangePointer_nf: {
llvm::Type *RTy;
llvm::IntegerType *IntType =
IntegerType::get(getLLVMContext(),
@@ -2969,10 +3243,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Value *Comparand =
Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
- auto Result =
- Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
- AtomicOrdering::SequentiallyConsistent,
- AtomicOrdering::SequentiallyConsistent);
+ auto Ordering =
+ BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
+ AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
+
+ auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ Ordering, Ordering);
Result->setVolatile(true);
return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
@@ -2982,16 +3258,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI_InterlockedCompareExchange8:
case Builtin::BI_InterlockedCompareExchange16:
case Builtin::BI_InterlockedCompareExchange:
- case Builtin::BI_InterlockedCompareExchange64: {
- AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
- EmitScalarExpr(E->getArg(0)),
- EmitScalarExpr(E->getArg(2)),
- EmitScalarExpr(E->getArg(1)),
- AtomicOrdering::SequentiallyConsistent,
- AtomicOrdering::SequentiallyConsistent);
- CXI->setVolatile(true);
- return RValue::get(Builder.CreateExtractValue(CXI, 0));
- }
+ case Builtin::BI_InterlockedCompareExchange64:
+ return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
case Builtin::BI_InterlockedIncrement16:
case Builtin::BI_InterlockedIncrement:
return RValue::get(
@@ -3337,24 +3605,31 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// Create a temporary array to hold the sizes of local pointer arguments
// for the block. \p First is the position of the first size argument.
- auto CreateArrayForSizeVar = [=](unsigned First) {
- auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
- auto *Arr = Builder.CreateAlloca(AT);
- llvm::Value *Ptr;
+ auto CreateArrayForSizeVar = [=](unsigned First)
+ -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
+ llvm::APInt ArraySize(32, NumArgs - First);
+ QualType SizeArrayTy = getContext().getConstantArrayType(
+ getContext().getSizeType(), ArraySize, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
+ llvm::Value *TmpPtr = Tmp.getPointer();
+ llvm::Value *TmpSize = EmitLifetimeStart(
+ CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
+ llvm::Value *ElemPtr;
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
auto *Zero = llvm::ConstantInt::get(IntTy, 0);
for (unsigned I = First; I < NumArgs; ++I) {
auto *Index = llvm::ConstantInt::get(IntTy, I - First);
- auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
+ auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index});
if (I == First)
- Ptr = GEP;
+ ElemPtr = GEP;
auto *V =
Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
Builder.CreateAlignedStore(
V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
}
- return Ptr;
+ return std::tie(ElemPtr, TmpSize, TmpPtr);
};
// Could have events and/or varargs.
@@ -3366,24 +3641,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Value *Kernel =
Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
- auto *PtrToSizeArray = CreateArrayForSizeVar(4);
+ llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
+ std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
// Create a vector of the arguments, as well as a constant value to
// express to the runtime the number of variadic arguments.
std::vector<llvm::Value *> Args = {
Queue, Flags, Range,
Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
- PtrToSizeArray};
+ ElemPtr};
std::vector<llvm::Type *> ArgTys = {
- QueueTy, IntTy, RangeTy,
- GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
- PtrToSizeArray->getType()};
+ QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
+ GenericVoidPtrTy, IntTy, ElemPtr->getType()};
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
+ auto Call =
+ RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value *>(Args)));
+ if (TmpSize)
+ EmitLifetimeEnd(TmpSize, TmpPtr);
+ return Call;
}
// Any calls now have event arguments passed.
if (NumArgs >= 7) {
@@ -3400,7 +3678,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
// Convert to generic address space.
EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
- ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
+ ClkEvent = ClkEvent->getType()->isIntegerTy()
+ ? Builder.CreateBitOrPointerCast(ClkEvent, EventPtrTy)
+ : Builder.CreatePointerCast(ClkEvent, EventPtrTy);
auto Info =
CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
llvm::Value *Kernel =
@@ -3430,15 +3710,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
ArgTys.push_back(Int32Ty);
Name = "__enqueue_kernel_events_varargs";
- auto *PtrToSizeArray = CreateArrayForSizeVar(7);
- Args.push_back(PtrToSizeArray);
- ArgTys.push_back(PtrToSizeArray->getType());
+ llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
+ std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
+ Args.push_back(ElemPtr);
+ ArgTys.push_back(ElemPtr->getType());
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- llvm::ArrayRef<llvm::Value *>(Args)));
+ auto Call =
+ RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+ llvm::ArrayRef<llvm::Value *>(Args)));
+ if (TmpSize)
+ EmitLifetimeEnd(TmpSize, TmpPtr);
+ return Call;
}
LLVM_FALLTHROUGH;
}
@@ -3530,13 +3814,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
case Builtin::BI__builtin_os_log_format:
return emitBuiltinOSLogFormat(*E);
- case Builtin::BI__builtin_os_log_format_buffer_size: {
- analyze_os_log::OSLogBufferLayout Layout;
- analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
- return RValue::get(ConstantInt::get(ConvertType(E->getType()),
- Layout.size().getQuantity()));
- }
-
case Builtin::BI__xray_customevent: {
if (!ShouldXRayInstrumentFunction())
return RValue::getIgnored();
@@ -3703,6 +3980,16 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
// we need to do a bit cast.
llvm::Type *PTy = FTy->getParamType(i);
if (PTy != ArgValue->getType()) {
+ // XXX - vector of pointers?
+ if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
+ if (PtrTy->getAddressSpace() !=
+ ArgValue->getType()->getPointerAddressSpace()) {
+ ArgValue = Builder.CreateAddrSpaceCast(
+ ArgValue,
+ ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
+ }
+ }
+
assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
"Must be able to losslessly bit cast to param");
ArgValue = Builder.CreateBitCast(ArgValue, PTy);
@@ -3719,6 +4006,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
RetTy = ConvertType(BuiltinRetType);
if (RetTy != V->getType()) {
+ // XXX - vector of pointers?
+ if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
+ if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
+ V = Builder.CreateAddrSpaceCast(
+ V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
+ }
+ }
+
assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
"Must be able to losslessly bit cast result type");
V = Builder.CreateBitCast(V, RetTy);
@@ -4286,6 +4581,14 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP0(vextq_v),
NEONMAP0(vfma_v),
NEONMAP0(vfmaq_v),
+ NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
+ NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
+ NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
+ NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
+ NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
@@ -5259,6 +5562,34 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
}
+ case NEON::BI__builtin_neon_vfmlal_low_v:
+ case NEON::BI__builtin_neon_vfmlalq_low_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
+ }
+ case NEON::BI__builtin_neon_vfmlsl_low_v:
+ case NEON::BI__builtin_neon_vfmlslq_low_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
+ }
+ case NEON::BI__builtin_neon_vfmlal_high_v:
+ case NEON::BI__builtin_neon_vfmlalq_high_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
+ }
+ case NEON::BI__builtin_neon_vfmlsl_high_v:
+ case NEON::BI__builtin_neon_vfmlslq_high_v: {
+ llvm::Type *InputTy =
+ llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
+ llvm::Type *Tys[2] = { Ty, InputTy };
+ return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
+ }
}
assert(Int && "Expected valid intrinsic number");
@@ -5506,10 +5837,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
llvm::FunctionType *FTy =
llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
- APSInt Value;
- if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
+ Expr::EvalResult Result;
+ if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
llvm_unreachable("Sema will ensure that the parameter is constant");
+ llvm::APSInt Value = Result.Val.getInt();
uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
llvm::InlineAsm *Emit =
@@ -5991,6 +6323,120 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
case ARM::BI_InterlockedIncrement64:
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
+ case ARM::BI_InterlockedExchangeAdd8_acq:
+ case ARM::BI_InterlockedExchangeAdd16_acq:
+ case ARM::BI_InterlockedExchangeAdd_acq:
+ case ARM::BI_InterlockedExchangeAdd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
+ case ARM::BI_InterlockedExchangeAdd8_rel:
+ case ARM::BI_InterlockedExchangeAdd16_rel:
+ case ARM::BI_InterlockedExchangeAdd_rel:
+ case ARM::BI_InterlockedExchangeAdd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
+ case ARM::BI_InterlockedExchangeAdd8_nf:
+ case ARM::BI_InterlockedExchangeAdd16_nf:
+ case ARM::BI_InterlockedExchangeAdd_nf:
+ case ARM::BI_InterlockedExchangeAdd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
+ case ARM::BI_InterlockedExchange8_acq:
+ case ARM::BI_InterlockedExchange16_acq:
+ case ARM::BI_InterlockedExchange_acq:
+ case ARM::BI_InterlockedExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
+ case ARM::BI_InterlockedExchange8_rel:
+ case ARM::BI_InterlockedExchange16_rel:
+ case ARM::BI_InterlockedExchange_rel:
+ case ARM::BI_InterlockedExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
+ case ARM::BI_InterlockedExchange8_nf:
+ case ARM::BI_InterlockedExchange16_nf:
+ case ARM::BI_InterlockedExchange_nf:
+ case ARM::BI_InterlockedExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
+ case ARM::BI_InterlockedCompareExchange8_acq:
+ case ARM::BI_InterlockedCompareExchange16_acq:
+ case ARM::BI_InterlockedCompareExchange_acq:
+ case ARM::BI_InterlockedCompareExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
+ case ARM::BI_InterlockedCompareExchange8_rel:
+ case ARM::BI_InterlockedCompareExchange16_rel:
+ case ARM::BI_InterlockedCompareExchange_rel:
+ case ARM::BI_InterlockedCompareExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
+ case ARM::BI_InterlockedCompareExchange8_nf:
+ case ARM::BI_InterlockedCompareExchange16_nf:
+ case ARM::BI_InterlockedCompareExchange_nf:
+ case ARM::BI_InterlockedCompareExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
+ case ARM::BI_InterlockedOr8_acq:
+ case ARM::BI_InterlockedOr16_acq:
+ case ARM::BI_InterlockedOr_acq:
+ case ARM::BI_InterlockedOr64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
+ case ARM::BI_InterlockedOr8_rel:
+ case ARM::BI_InterlockedOr16_rel:
+ case ARM::BI_InterlockedOr_rel:
+ case ARM::BI_InterlockedOr64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
+ case ARM::BI_InterlockedOr8_nf:
+ case ARM::BI_InterlockedOr16_nf:
+ case ARM::BI_InterlockedOr_nf:
+ case ARM::BI_InterlockedOr64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
+ case ARM::BI_InterlockedXor8_acq:
+ case ARM::BI_InterlockedXor16_acq:
+ case ARM::BI_InterlockedXor_acq:
+ case ARM::BI_InterlockedXor64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
+ case ARM::BI_InterlockedXor8_rel:
+ case ARM::BI_InterlockedXor16_rel:
+ case ARM::BI_InterlockedXor_rel:
+ case ARM::BI_InterlockedXor64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
+ case ARM::BI_InterlockedXor8_nf:
+ case ARM::BI_InterlockedXor16_nf:
+ case ARM::BI_InterlockedXor_nf:
+ case ARM::BI_InterlockedXor64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
+ case ARM::BI_InterlockedAnd8_acq:
+ case ARM::BI_InterlockedAnd16_acq:
+ case ARM::BI_InterlockedAnd_acq:
+ case ARM::BI_InterlockedAnd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
+ case ARM::BI_InterlockedAnd8_rel:
+ case ARM::BI_InterlockedAnd16_rel:
+ case ARM::BI_InterlockedAnd_rel:
+ case ARM::BI_InterlockedAnd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
+ case ARM::BI_InterlockedAnd8_nf:
+ case ARM::BI_InterlockedAnd16_nf:
+ case ARM::BI_InterlockedAnd_nf:
+ case ARM::BI_InterlockedAnd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
+ case ARM::BI_InterlockedIncrement16_acq:
+ case ARM::BI_InterlockedIncrement_acq:
+ case ARM::BI_InterlockedIncrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
+ case ARM::BI_InterlockedIncrement16_rel:
+ case ARM::BI_InterlockedIncrement_rel:
+ case ARM::BI_InterlockedIncrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
+ case ARM::BI_InterlockedIncrement16_nf:
+ case ARM::BI_InterlockedIncrement_nf:
+ case ARM::BI_InterlockedIncrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
+ case ARM::BI_InterlockedDecrement16_acq:
+ case ARM::BI_InterlockedDecrement_acq:
+ case ARM::BI_InterlockedDecrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
+ case ARM::BI_InterlockedDecrement16_rel:
+ case ARM::BI_InterlockedDecrement_rel:
+ case ARM::BI_InterlockedDecrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
+ case ARM::BI_InterlockedDecrement16_nf:
+ case ARM::BI_InterlockedDecrement_nf:
+ case ARM::BI_InterlockedDecrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
}
// Get the last argument, which specifies the vector type.
@@ -6497,11 +6943,33 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
}
+ if (BuiltinID == AArch64::BI__getReg) {
+ Expr::EvalResult Result;
+ if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
+ llvm_unreachable("Sema will ensure that the parameter is constant");
+
+ llvm::APSInt Value = Result.Val.getInt();
+ LLVMContext &Context = CGM.getLLVMContext();
+ std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10);
+
+ llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
+ llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
+ llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
+
+ llvm::Value *F =
+ CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
+ return Builder.CreateCall(F, Metadata);
+ }
+
if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
return Builder.CreateCall(F);
}
+ if (BuiltinID == AArch64::BI_ReadWriteBarrier)
+ return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
+ llvm::SyncScope::SingleThread);
+
// CRC32
Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
switch (BuiltinID) {
@@ -6564,6 +7032,48 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
}
+ if (BuiltinID == AArch64::BI_ReadStatusReg ||
+ BuiltinID == AArch64::BI_WriteStatusReg) {
+ LLVMContext &Context = CGM.getLLVMContext();
+
+ unsigned SysReg =
+ E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
+
+ std::string SysRegStr;
+ llvm::raw_string_ostream(SysRegStr) <<
+ ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
+ ((SysReg >> 11) & 7) << ":" <<
+ ((SysReg >> 7) & 15) << ":" <<
+ ((SysReg >> 3) & 15) << ":" <<
+ ( SysReg & 7);
+
+ llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
+ llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
+ llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
+
+ llvm::Type *RegisterType = Int64Ty;
+ llvm::Type *ValueType = Int32Ty;
+ llvm::Type *Types[] = { RegisterType };
+
+ if (BuiltinID == AArch64::BI_ReadStatusReg) {
+ llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+ llvm::Value *Call = Builder.CreateCall(F, Metadata);
+
+ return Builder.CreateTrunc(Call, ValueType);
+ }
+
+ llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
+ llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
+ ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
+
+ return Builder.CreateCall(F, { Metadata, ArgValue });
+ }
+
+ if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
+ llvm::Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
+ return Builder.CreateCall(F);
+ }
+
// Find out if any arguments are required to be integer constant
// expressions.
unsigned ICEArguments = 0;
@@ -6659,7 +7169,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvth_f16_u32:
case NEON::BI__builtin_neon_vcvth_f16_u64:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_f16_s16:
case NEON::BI__builtin_neon_vcvth_f16_s32:
case NEON::BI__builtin_neon_vcvth_f16_s64: {
@@ -6679,7 +7189,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vcvth_u16_f16:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s16_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
@@ -6689,7 +7199,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vcvth_u32_f16:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s32_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
@@ -6699,7 +7209,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case NEON::BI__builtin_neon_vcvth_u64_f16:
usgn = true;
- // FALL THROUGH
+ LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s64_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
@@ -8414,6 +8924,129 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
case AArch64::BI_InterlockedIncrement64:
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
+ case AArch64::BI_InterlockedExchangeAdd8_acq:
+ case AArch64::BI_InterlockedExchangeAdd16_acq:
+ case AArch64::BI_InterlockedExchangeAdd_acq:
+ case AArch64::BI_InterlockedExchangeAdd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
+ case AArch64::BI_InterlockedExchangeAdd8_rel:
+ case AArch64::BI_InterlockedExchangeAdd16_rel:
+ case AArch64::BI_InterlockedExchangeAdd_rel:
+ case AArch64::BI_InterlockedExchangeAdd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
+ case AArch64::BI_InterlockedExchangeAdd8_nf:
+ case AArch64::BI_InterlockedExchangeAdd16_nf:
+ case AArch64::BI_InterlockedExchangeAdd_nf:
+ case AArch64::BI_InterlockedExchangeAdd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
+ case AArch64::BI_InterlockedExchange8_acq:
+ case AArch64::BI_InterlockedExchange16_acq:
+ case AArch64::BI_InterlockedExchange_acq:
+ case AArch64::BI_InterlockedExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
+ case AArch64::BI_InterlockedExchange8_rel:
+ case AArch64::BI_InterlockedExchange16_rel:
+ case AArch64::BI_InterlockedExchange_rel:
+ case AArch64::BI_InterlockedExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
+ case AArch64::BI_InterlockedExchange8_nf:
+ case AArch64::BI_InterlockedExchange16_nf:
+ case AArch64::BI_InterlockedExchange_nf:
+ case AArch64::BI_InterlockedExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
+ case AArch64::BI_InterlockedCompareExchange8_acq:
+ case AArch64::BI_InterlockedCompareExchange16_acq:
+ case AArch64::BI_InterlockedCompareExchange_acq:
+ case AArch64::BI_InterlockedCompareExchange64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
+ case AArch64::BI_InterlockedCompareExchange8_rel:
+ case AArch64::BI_InterlockedCompareExchange16_rel:
+ case AArch64::BI_InterlockedCompareExchange_rel:
+ case AArch64::BI_InterlockedCompareExchange64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
+ case AArch64::BI_InterlockedCompareExchange8_nf:
+ case AArch64::BI_InterlockedCompareExchange16_nf:
+ case AArch64::BI_InterlockedCompareExchange_nf:
+ case AArch64::BI_InterlockedCompareExchange64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
+ case AArch64::BI_InterlockedOr8_acq:
+ case AArch64::BI_InterlockedOr16_acq:
+ case AArch64::BI_InterlockedOr_acq:
+ case AArch64::BI_InterlockedOr64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
+ case AArch64::BI_InterlockedOr8_rel:
+ case AArch64::BI_InterlockedOr16_rel:
+ case AArch64::BI_InterlockedOr_rel:
+ case AArch64::BI_InterlockedOr64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
+ case AArch64::BI_InterlockedOr8_nf:
+ case AArch64::BI_InterlockedOr16_nf:
+ case AArch64::BI_InterlockedOr_nf:
+ case AArch64::BI_InterlockedOr64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
+ case AArch64::BI_InterlockedXor8_acq:
+ case AArch64::BI_InterlockedXor16_acq:
+ case AArch64::BI_InterlockedXor_acq:
+ case AArch64::BI_InterlockedXor64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
+ case AArch64::BI_InterlockedXor8_rel:
+ case AArch64::BI_InterlockedXor16_rel:
+ case AArch64::BI_InterlockedXor_rel:
+ case AArch64::BI_InterlockedXor64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
+ case AArch64::BI_InterlockedXor8_nf:
+ case AArch64::BI_InterlockedXor16_nf:
+ case AArch64::BI_InterlockedXor_nf:
+ case AArch64::BI_InterlockedXor64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
+ case AArch64::BI_InterlockedAnd8_acq:
+ case AArch64::BI_InterlockedAnd16_acq:
+ case AArch64::BI_InterlockedAnd_acq:
+ case AArch64::BI_InterlockedAnd64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
+ case AArch64::BI_InterlockedAnd8_rel:
+ case AArch64::BI_InterlockedAnd16_rel:
+ case AArch64::BI_InterlockedAnd_rel:
+ case AArch64::BI_InterlockedAnd64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
+ case AArch64::BI_InterlockedAnd8_nf:
+ case AArch64::BI_InterlockedAnd16_nf:
+ case AArch64::BI_InterlockedAnd_nf:
+ case AArch64::BI_InterlockedAnd64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
+ case AArch64::BI_InterlockedIncrement16_acq:
+ case AArch64::BI_InterlockedIncrement_acq:
+ case AArch64::BI_InterlockedIncrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
+ case AArch64::BI_InterlockedIncrement16_rel:
+ case AArch64::BI_InterlockedIncrement_rel:
+ case AArch64::BI_InterlockedIncrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
+ case AArch64::BI_InterlockedIncrement16_nf:
+ case AArch64::BI_InterlockedIncrement_nf:
+ case AArch64::BI_InterlockedIncrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
+ case AArch64::BI_InterlockedDecrement16_acq:
+ case AArch64::BI_InterlockedDecrement_acq:
+ case AArch64::BI_InterlockedDecrement64_acq:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
+ case AArch64::BI_InterlockedDecrement16_rel:
+ case AArch64::BI_InterlockedDecrement_rel:
+ case AArch64::BI_InterlockedDecrement64_rel:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
+ case AArch64::BI_InterlockedDecrement16_nf:
+ case AArch64::BI_InterlockedDecrement_nf:
+ case AArch64::BI_InterlockedDecrement64_nf:
+ return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
+
+ case AArch64::BI_InterlockedAdd: {
+ Value *Arg0 = EmitScalarExpr(E->getArg(0));
+ Value *Arg1 = EmitScalarExpr(E->getArg(1));
+ AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
+ AtomicRMWInst::Add, Arg0, Arg1,
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ return Builder.CreateAdd(RMWI, Arg1);
+ }
}
}
@@ -8524,8 +9157,9 @@ static Value *EmitX86CompressStore(CodeGenFunction &CGF,
}
static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
- unsigned NumElts, ArrayRef<Value *> Ops,
+ ArrayRef<Value *> Ops,
bool InvertLHS = false) {
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
@@ -8533,7 +9167,25 @@ static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
LHS = CGF.Builder.CreateNot(LHS);
return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
- CGF.Builder.getIntNTy(std::max(NumElts, 8U)));
+ Ops[0]->getType());
+}
+
+static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,
+ Value *Amt, bool IsRight) {
+ llvm::Type *Ty = Op0->getType();
+
+ // Amount may be scalar immediate, in which case create a splat vector.
+ // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
+ // we only care about the lowest log2 bits anyway.
+ if (Amt->getType() != Ty) {
+ unsigned NumElts = Ty->getVectorNumElements();
+ Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
+ Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
+ }
+
+ unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
+ Value *F = CGF.CGM.getIntrinsic(IID, Ty);
+ return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
}
static Value *EmitX86Select(CodeGenFunction &CGF,
@@ -8855,6 +9507,17 @@ static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
}
+// Emit addition or subtraction with signed/unsigned saturation.
+static Value *EmitX86AddSubSatExpr(CodeGenFunction &CGF,
+ ArrayRef<Value *> Ops, bool IsSigned,
+ bool IsAddition) {
+ Intrinsic::ID IID =
+ IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
+ : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
+ llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
+ return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
+}
+
Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
@@ -8876,6 +9539,7 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
// Grab the global __cpu_model.
llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+ cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
// Calculate the index needed to access the correct field based on the
// range. Also adjust the expected value.
@@ -8911,17 +9575,17 @@ Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
return EmitX86CpuSupports(FeatureStr);
}
-uint32_t
+uint64_t
CodeGenFunction::GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
// Processor features and mapping to processor feature value.
- uint32_t FeaturesMask = 0;
+ uint64_t FeaturesMask = 0;
for (const StringRef &FeatureStr : FeatureStrs) {
unsigned Feature =
StringSwitch<unsigned>(FeatureStr)
#define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
#include "llvm/Support/X86TargetParser.def"
;
- FeaturesMask |= (1U << Feature);
+ FeaturesMask |= (1ULL << Feature);
}
return FeaturesMask;
}
@@ -8930,37 +9594,66 @@ Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
}
-llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) {
- // Matching the struct layout from the compiler-rt/libgcc structure that is
- // filled in:
- // unsigned int __cpu_vendor;
- // unsigned int __cpu_type;
- // unsigned int __cpu_subtype;
- // unsigned int __cpu_features[1];
- llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
- llvm::ArrayType::get(Int32Ty, 1));
+llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
+ uint32_t Features1 = Lo_32(FeaturesMask);
+ uint32_t Features2 = Hi_32(FeaturesMask);
- // Grab the global __cpu_model.
- llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+ Value *Result = Builder.getTrue();
+
+ if (Features1 != 0) {
+ // Matching the struct layout from the compiler-rt/libgcc structure that is
+ // filled in:
+ // unsigned int __cpu_vendor;
+ // unsigned int __cpu_type;
+ // unsigned int __cpu_subtype;
+ // unsigned int __cpu_features[1];
+ llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
+ llvm::ArrayType::get(Int32Ty, 1));
+
+ // Grab the global __cpu_model.
+ llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
+ cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
+
+ // Grab the first (0th) element from the field __cpu_features off of the
+ // global in the struct STy.
+ Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
+ Builder.getInt32(0)};
+ Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
+ Value *Features =
+ Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
+
+ // Check the value of the bit corresponding to the feature requested.
+ Value *Mask = Builder.getInt32(Features1);
+ Value *Bitset = Builder.CreateAnd(Features, Mask);
+ Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
+ Result = Builder.CreateAnd(Result, Cmp);
+ }
- // Grab the first (0th) element from the field __cpu_features off of the
- // global in the struct STy.
- Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 3),
- ConstantInt::get(Int32Ty, 0)};
- Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
- Value *Features =
- Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
-
- // Check the value of the bit corresponding to the feature requested.
- Value *Bitset = Builder.CreateAnd(
- Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
- return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
+ if (Features2 != 0) {
+ llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
+ "__cpu_features2");
+ cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
+
+ Value *Features =
+ Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4));
+
+ // Check the value of the bit corresponding to the feature requested.
+ Value *Mask = Builder.getInt32(Features2);
+ Value *Bitset = Builder.CreateAnd(Features, Mask);
+ Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
+ Result = Builder.CreateAnd(Result, Cmp);
+ }
+
+ return Result;
}
Value *CodeGenFunction::EmitX86CpuInit() {
llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
/*Variadic*/ false);
llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
+ cast<llvm::GlobalValue>(Func)->setDSOLocal(true);
+ cast<llvm::GlobalValue>(Func)->setDLLStorageClass(
+ llvm::GlobalValue::DefaultStorageClass);
return Builder.CreateCall(Func);
}
@@ -9051,6 +9744,24 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__rdtsc: {
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
}
+ case X86::BI__builtin_ia32_rdtscp: {
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
+ Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
+ Ops[0]);
+ return Builder.CreateExtractValue(Call, 0);
+ }
+ case X86::BI__builtin_ia32_lzcnt_u16:
+ case X86::BI__builtin_ia32_lzcnt_u32:
+ case X86::BI__builtin_ia32_lzcnt_u64: {
+ Value *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
+ }
+ case X86::BI__builtin_ia32_tzcnt_u16:
+ case X86::BI__builtin_ia32_tzcnt_u32:
+ case X86::BI__builtin_ia32_tzcnt_u64: {
+ Value *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
+ return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
+ }
case X86::BI__builtin_ia32_undef128:
case X86::BI__builtin_ia32_undef256:
case X86::BI__builtin_ia32_undef512:
@@ -9822,6 +10533,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
"psrldq");
return Builder.CreateBitCast(SV, ResultType, "cast");
}
+ case X86::BI__builtin_ia32_kshiftliqi:
+ case X86::BI__builtin_ia32_kshiftlihi:
+ case X86::BI__builtin_ia32_kshiftlisi:
+ case X86::BI__builtin_ia32_kshiftlidi: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+
+ if (ShiftVal >= NumElts)
+ return llvm::Constant::getNullValue(Ops[0]->getType());
+
+ Value *In = getMaskVecValue(*this, Ops[0], NumElts);
+
+ uint32_t Indices[64];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = NumElts + i - ShiftVal;
+
+ Value *Zero = llvm::Constant::getNullValue(In->getType());
+ Value *SV = Builder.CreateShuffleVector(Zero, In,
+ makeArrayRef(Indices, NumElts),
+ "kshiftl");
+ return Builder.CreateBitCast(SV, Ops[0]->getType());
+ }
+ case X86::BI__builtin_ia32_kshiftriqi:
+ case X86::BI__builtin_ia32_kshiftrihi:
+ case X86::BI__builtin_ia32_kshiftrisi:
+ case X86::BI__builtin_ia32_kshiftridi: {
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+
+ if (ShiftVal >= NumElts)
+ return llvm::Constant::getNullValue(Ops[0]->getType());
+
+ Value *In = getMaskVecValue(*this, Ops[0], NumElts);
+
+ uint32_t Indices[64];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + ShiftVal;
+
+ Value *Zero = llvm::Constant::getNullValue(In->getType());
+ Value *SV = Builder.CreateShuffleVector(In, Zero,
+ makeArrayRef(Indices, NumElts),
+ "kshiftr");
+ return Builder.CreateBitCast(SV, Ops[0]->getType());
+ }
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64:
case X86::BI__builtin_ia32_movntsd:
@@ -9847,7 +10602,41 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
SI->setAlignment(1);
return SI;
}
-
+ // Rotate is a special case of funnel shift - 1st 2 args are the same.
+ case X86::BI__builtin_ia32_vprotb:
+ case X86::BI__builtin_ia32_vprotw:
+ case X86::BI__builtin_ia32_vprotd:
+ case X86::BI__builtin_ia32_vprotq:
+ case X86::BI__builtin_ia32_vprotbi:
+ case X86::BI__builtin_ia32_vprotwi:
+ case X86::BI__builtin_ia32_vprotdi:
+ case X86::BI__builtin_ia32_vprotqi:
+ case X86::BI__builtin_ia32_prold128:
+ case X86::BI__builtin_ia32_prold256:
+ case X86::BI__builtin_ia32_prold512:
+ case X86::BI__builtin_ia32_prolq128:
+ case X86::BI__builtin_ia32_prolq256:
+ case X86::BI__builtin_ia32_prolq512:
+ case X86::BI__builtin_ia32_prolvd128:
+ case X86::BI__builtin_ia32_prolvd256:
+ case X86::BI__builtin_ia32_prolvd512:
+ case X86::BI__builtin_ia32_prolvq128:
+ case X86::BI__builtin_ia32_prolvq256:
+ case X86::BI__builtin_ia32_prolvq512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
+ case X86::BI__builtin_ia32_prord128:
+ case X86::BI__builtin_ia32_prord256:
+ case X86::BI__builtin_ia32_prord512:
+ case X86::BI__builtin_ia32_prorq128:
+ case X86::BI__builtin_ia32_prorq256:
+ case X86::BI__builtin_ia32_prorq512:
+ case X86::BI__builtin_ia32_prorvd128:
+ case X86::BI__builtin_ia32_prorvd256:
+ case X86::BI__builtin_ia32_prorvd512:
+ case X86::BI__builtin_ia32_prorvq128:
+ case X86::BI__builtin_ia32_prorvq256:
+ case X86::BI__builtin_ia32_prorvq512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
@@ -9905,38 +10694,147 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedCompare(*this, CC, false, Ops);
}
+ case X86::BI__builtin_ia32_kortestcqi:
case X86::BI__builtin_ia32_kortestchi:
- case X86::BI__builtin_ia32_kortestzhi: {
- Value *Or = EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
- Value *C;
- if (BuiltinID == X86::BI__builtin_ia32_kortestchi)
- C = llvm::Constant::getAllOnesValue(Builder.getInt16Ty());
- else
- C = llvm::Constant::getNullValue(Builder.getInt16Ty());
+ case X86::BI__builtin_ia32_kortestcsi:
+ case X86::BI__builtin_ia32_kortestcdi: {
+ Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
+ Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
Value *Cmp = Builder.CreateICmpEQ(Or, C);
return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
}
+ case X86::BI__builtin_ia32_kortestzqi:
+ case X86::BI__builtin_ia32_kortestzhi:
+ case X86::BI__builtin_ia32_kortestzsi:
+ case X86::BI__builtin_ia32_kortestzdi: {
+ Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
+ Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
+ Value *Cmp = Builder.CreateICmpEQ(Or, C);
+ return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
+ }
+
+ case X86::BI__builtin_ia32_ktestcqi:
+ case X86::BI__builtin_ia32_ktestzqi:
+ case X86::BI__builtin_ia32_ktestchi:
+ case X86::BI__builtin_ia32_ktestzhi:
+ case X86::BI__builtin_ia32_ktestcsi:
+ case X86::BI__builtin_ia32_ktestzsi:
+ case X86::BI__builtin_ia32_ktestcdi:
+ case X86::BI__builtin_ia32_ktestzdi: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_ktestcqi:
+ IID = Intrinsic::x86_avx512_ktestc_b;
+ break;
+ case X86::BI__builtin_ia32_ktestzqi:
+ IID = Intrinsic::x86_avx512_ktestz_b;
+ break;
+ case X86::BI__builtin_ia32_ktestchi:
+ IID = Intrinsic::x86_avx512_ktestc_w;
+ break;
+ case X86::BI__builtin_ia32_ktestzhi:
+ IID = Intrinsic::x86_avx512_ktestz_w;
+ break;
+ case X86::BI__builtin_ia32_ktestcsi:
+ IID = Intrinsic::x86_avx512_ktestc_d;
+ break;
+ case X86::BI__builtin_ia32_ktestzsi:
+ IID = Intrinsic::x86_avx512_ktestz_d;
+ break;
+ case X86::BI__builtin_ia32_ktestcdi:
+ IID = Intrinsic::x86_avx512_ktestc_q;
+ break;
+ case X86::BI__builtin_ia32_ktestzdi:
+ IID = Intrinsic::x86_avx512_ktestz_q;
+ break;
+ }
+
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+ Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+ Function *Intr = CGM.getIntrinsic(IID);
+ return Builder.CreateCall(Intr, {LHS, RHS});
+ }
+ case X86::BI__builtin_ia32_kaddqi:
+ case X86::BI__builtin_ia32_kaddhi:
+ case X86::BI__builtin_ia32_kaddsi:
+ case X86::BI__builtin_ia32_kadddi: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_kaddqi:
+ IID = Intrinsic::x86_avx512_kadd_b;
+ break;
+ case X86::BI__builtin_ia32_kaddhi:
+ IID = Intrinsic::x86_avx512_kadd_w;
+ break;
+ case X86::BI__builtin_ia32_kaddsi:
+ IID = Intrinsic::x86_avx512_kadd_d;
+ break;
+ case X86::BI__builtin_ia32_kadddi:
+ IID = Intrinsic::x86_avx512_kadd_q;
+ break;
+ }
+
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
+ Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
+ Function *Intr = CGM.getIntrinsic(IID);
+ Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
+ return Builder.CreateBitCast(Res, Ops[0]->getType());
+ }
+ case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
- return EmitX86MaskLogic(*this, Instruction::And, 16, Ops);
+ case X86::BI__builtin_ia32_kandsi:
+ case X86::BI__builtin_ia32_kanddi:
+ return EmitX86MaskLogic(*this, Instruction::And, Ops);
+ case X86::BI__builtin_ia32_kandnqi:
case X86::BI__builtin_ia32_kandnhi:
- return EmitX86MaskLogic(*this, Instruction::And, 16, Ops, true);
+ case X86::BI__builtin_ia32_kandnsi:
+ case X86::BI__builtin_ia32_kandndi:
+ return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
+ case X86::BI__builtin_ia32_korqi:
case X86::BI__builtin_ia32_korhi:
- return EmitX86MaskLogic(*this, Instruction::Or, 16, Ops);
+ case X86::BI__builtin_ia32_korsi:
+ case X86::BI__builtin_ia32_kordi:
+ return EmitX86MaskLogic(*this, Instruction::Or, Ops);
+ case X86::BI__builtin_ia32_kxnorqi:
case X86::BI__builtin_ia32_kxnorhi:
- return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops, true);
+ case X86::BI__builtin_ia32_kxnorsi:
+ case X86::BI__builtin_ia32_kxnordi:
+ return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
+ case X86::BI__builtin_ia32_kxorqi:
case X86::BI__builtin_ia32_kxorhi:
- return EmitX86MaskLogic(*this, Instruction::Xor, 16, Ops);
- case X86::BI__builtin_ia32_knothi: {
- Ops[0] = getMaskVecValue(*this, Ops[0], 16);
- return Builder.CreateBitCast(Builder.CreateNot(Ops[0]),
- Builder.getInt16Ty());
+ case X86::BI__builtin_ia32_kxorsi:
+ case X86::BI__builtin_ia32_kxordi:
+ return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
+ case X86::BI__builtin_ia32_knotqi:
+ case X86::BI__builtin_ia32_knothi:
+ case X86::BI__builtin_ia32_knotsi:
+ case X86::BI__builtin_ia32_knotdi: {
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
+ return Builder.CreateBitCast(Builder.CreateNot(Res),
+ Ops[0]->getType());
+ }
+ case X86::BI__builtin_ia32_kmovb:
+ case X86::BI__builtin_ia32_kmovw:
+ case X86::BI__builtin_ia32_kmovd:
+ case X86::BI__builtin_ia32_kmovq: {
+ // Bitcast to vXi1 type and then back to integer. This gets the mask
+ // register type into the IR, but might be optimized out depending on
+ // what's around it.
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
+ Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
+ return Builder.CreateBitCast(Res, Ops[0]->getType());
}
case X86::BI__builtin_ia32_kunpckdi:
case X86::BI__builtin_ia32_kunpcksi:
case X86::BI__builtin_ia32_kunpckhi: {
- unsigned NumElts = Ops[0]->getType()->getScalarSizeInBits();
+ unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
uint32_t Indices[64];
@@ -10103,6 +11001,52 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pternlogq256_maskz:
return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
+ case X86::BI__builtin_ia32_vpshldd128:
+ case X86::BI__builtin_ia32_vpshldd256:
+ case X86::BI__builtin_ia32_vpshldd512:
+ case X86::BI__builtin_ia32_vpshldq128:
+ case X86::BI__builtin_ia32_vpshldq256:
+ case X86::BI__builtin_ia32_vpshldq512:
+ case X86::BI__builtin_ia32_vpshldw128:
+ case X86::BI__builtin_ia32_vpshldw256:
+ case X86::BI__builtin_ia32_vpshldw512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
+
+ case X86::BI__builtin_ia32_vpshrdd128:
+ case X86::BI__builtin_ia32_vpshrdd256:
+ case X86::BI__builtin_ia32_vpshrdd512:
+ case X86::BI__builtin_ia32_vpshrdq128:
+ case X86::BI__builtin_ia32_vpshrdq256:
+ case X86::BI__builtin_ia32_vpshrdq512:
+ case X86::BI__builtin_ia32_vpshrdw128:
+ case X86::BI__builtin_ia32_vpshrdw256:
+ case X86::BI__builtin_ia32_vpshrdw512:
+ // Ops 0 and 1 are swapped.
+ return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
+
+ case X86::BI__builtin_ia32_vpshldvd128:
+ case X86::BI__builtin_ia32_vpshldvd256:
+ case X86::BI__builtin_ia32_vpshldvd512:
+ case X86::BI__builtin_ia32_vpshldvq128:
+ case X86::BI__builtin_ia32_vpshldvq256:
+ case X86::BI__builtin_ia32_vpshldvq512:
+ case X86::BI__builtin_ia32_vpshldvw128:
+ case X86::BI__builtin_ia32_vpshldvw256:
+ case X86::BI__builtin_ia32_vpshldvw512:
+ return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
+
+ case X86::BI__builtin_ia32_vpshrdvd128:
+ case X86::BI__builtin_ia32_vpshrdvd256:
+ case X86::BI__builtin_ia32_vpshrdvd512:
+ case X86::BI__builtin_ia32_vpshrdvq128:
+ case X86::BI__builtin_ia32_vpshrdvq256:
+ case X86::BI__builtin_ia32_vpshrdvq512:
+ case X86::BI__builtin_ia32_vpshrdvw128:
+ case X86::BI__builtin_ia32_vpshrdvw256:
+ case X86::BI__builtin_ia32_vpshrdvw512:
+ // Ops 0 and 1 are swapped.
+ return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
+
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
@@ -10145,6 +11089,33 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Ops[0]);
return Builder.CreateExtractValue(Call, 1);
}
+ case X86::BI__builtin_ia32_addcarryx_u32:
+ case X86::BI__builtin_ia32_addcarryx_u64:
+ case X86::BI__builtin_ia32_subborrow_u32:
+ case X86::BI__builtin_ia32_subborrow_u64: {
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_addcarryx_u32:
+ IID = Intrinsic::x86_addcarry_32;
+ break;
+ case X86::BI__builtin_ia32_addcarryx_u64:
+ IID = Intrinsic::x86_addcarry_64;
+ break;
+ case X86::BI__builtin_ia32_subborrow_u32:
+ IID = Intrinsic::x86_subborrow_32;
+ break;
+ case X86::BI__builtin_ia32_subborrow_u64:
+ IID = Intrinsic::x86_subborrow_64;
+ break;
+ }
+
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
+ { Ops[0], Ops[1], Ops[2] });
+ Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
+ Ops[3]);
+ return Builder.CreateExtractValue(Call, 0);
+ }
case X86::BI__builtin_ia32_fpclassps128_mask:
case X86::BI__builtin_ia32_fpclassps256_mask:
@@ -10183,6 +11154,51 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
}
+ case X86::BI__builtin_ia32_vpmultishiftqb128:
+ case X86::BI__builtin_ia32_vpmultishiftqb256:
+ case X86::BI__builtin_ia32_vpmultishiftqb512: {
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_vpmultishiftqb128:
+ ID = Intrinsic::x86_avx512_pmultishift_qb_128;
+ break;
+ case X86::BI__builtin_ia32_vpmultishiftqb256:
+ ID = Intrinsic::x86_avx512_pmultishift_qb_256;
+ break;
+ case X86::BI__builtin_ia32_vpmultishiftqb512:
+ ID = Intrinsic::x86_avx512_pmultishift_qb_512;
+ break;
+ }
+
+ return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ }
+
+ case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+ case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ Value *MaskIn = Ops[2];
+ Ops.erase(&Ops[2]);
+
+ Intrinsic::ID ID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
+ ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
+ break;
+ case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
+ ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
+ break;
+ case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
+ ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
+ break;
+ }
+
+ Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+ return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
+ }
+
// packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
case X86::BI__builtin_ia32_cmpeqpd:
@@ -10361,6 +11377,27 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
llvm::SyncScope::System);
}
+ case X86::BI__shiftleft128:
+ case X86::BI__shiftright128: {
+ // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this:
+ // llvm::Function *F = CGM.getIntrinsic(
+ // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
+ // Int64Ty);
+ // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
+ // return Builder.CreateCall(F, Ops);
+ llvm::Type *Int128Ty = Builder.getInt128Ty();
+ Value *Val = Builder.CreateOr(
+ Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64),
+ Builder.CreateZExt(Ops[0], Int128Ty));
+ Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty),
+ llvm::ConstantInt::get(Int128Ty, 0x3f));
+ Value *Res;
+ if (BuiltinID == X86::BI__shiftleft128)
+ Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64);
+ else
+ Res = Builder.CreateLShr(Val, Amt);
+ return Builder.CreateTrunc(Res, Int64Ty);
+ }
case X86::BI_ReadWriteBarrier:
case X86::BI_ReadBarrier:
case X86::BI_WriteBarrier: {
@@ -10401,14 +11438,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
Value *Destination =
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PtrTy);
- Value *ExchangeHigh128 =
- Builder.CreateZExt(EmitScalarExpr(E->getArg(1)), Int128Ty);
- Value *ExchangeLow128 =
- Builder.CreateZExt(EmitScalarExpr(E->getArg(2)), Int128Ty);
- Address ComparandResult(
- Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int128PtrTy),
- getContext().toCharUnitsFromBits(128));
+ Builder.CreateBitCast(Ops[0], Int128PtrTy);
+ Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty);
+ Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty);
+ Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy),
+ getContext().toCharUnitsFromBits(128));
Value *Exchange = Builder.CreateOr(
Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
@@ -10459,8 +11493,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__readfsdword:
case X86::BI__readfsqword: {
llvm::Type *IntTy = ConvertType(E->getType());
- Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
- llvm::PointerType::get(IntTy, 257));
+ Value *Ptr =
+ Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
LoadInst *Load = Builder.CreateAlignedLoad(
IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
Load->setVolatile(true);
@@ -10471,17 +11505,44 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__readgsdword:
case X86::BI__readgsqword: {
llvm::Type *IntTy = ConvertType(E->getType());
- Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
- llvm::PointerType::get(IntTy, 256));
+ Value *Ptr =
+ Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
LoadInst *Load = Builder.CreateAlignedLoad(
IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
Load->setVolatile(true);
return Load;
}
+ case X86::BI__builtin_ia32_paddsb512:
+ case X86::BI__builtin_ia32_paddsw512:
+ case X86::BI__builtin_ia32_paddsb256:
+ case X86::BI__builtin_ia32_paddsw256:
+ case X86::BI__builtin_ia32_paddsb128:
+ case X86::BI__builtin_ia32_paddsw128:
+ return EmitX86AddSubSatExpr(*this, Ops, true, true);
+ case X86::BI__builtin_ia32_paddusb512:
+ case X86::BI__builtin_ia32_paddusw512:
+ case X86::BI__builtin_ia32_paddusb256:
+ case X86::BI__builtin_ia32_paddusw256:
+ case X86::BI__builtin_ia32_paddusb128:
+ case X86::BI__builtin_ia32_paddusw128:
+ return EmitX86AddSubSatExpr(*this, Ops, false, true);
+ case X86::BI__builtin_ia32_psubsb512:
+ case X86::BI__builtin_ia32_psubsw512:
+ case X86::BI__builtin_ia32_psubsb256:
+ case X86::BI__builtin_ia32_psubsw256:
+ case X86::BI__builtin_ia32_psubsb128:
+ case X86::BI__builtin_ia32_psubsw128:
+ return EmitX86AddSubSatExpr(*this, Ops, true, false);
+ case X86::BI__builtin_ia32_psubusb512:
+ case X86::BI__builtin_ia32_psubusw512:
+ case X86::BI__builtin_ia32_psubusb256:
+ case X86::BI__builtin_ia32_psubusw256:
+ case X86::BI__builtin_ia32_psubusb128:
+ case X86::BI__builtin_ia32_psubusw128:
+ return EmitX86AddSubSatExpr(*this, Ops, false, false);
}
}
-
Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
SmallVector<Value*, 4> Ops;
@@ -10901,6 +11962,28 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
auto RetTy = ConvertType(BIRetType);
return Builder.CreateBitCast(ShuffleCall, RetTy);
}
+
+ case PPC::BI__builtin_pack_vector_int128: {
+ bool isLittleEndian = getTarget().isLittleEndian();
+ Value *UndefValue =
+ llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2));
+ Value *Res = Builder.CreateInsertElement(
+ UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
+ Res = Builder.CreateInsertElement(Res, Ops[1],
+ (uint64_t)(isLittleEndian ? 0 : 1));
+ return Builder.CreateBitCast(Res, ConvertType(E->getType()));
+ }
+
+ case PPC::BI__builtin_unpack_vector_int128: {
+ ConstantInt *Index = cast<ConstantInt>(Ops[1]);
+ Value *Unpacked = Builder.CreateBitCast(
+ Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2));
+
+ if (getTarget().isLittleEndian())
+ Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
+
+ return Builder.CreateExtractElement(Unpacked, Index);
+ }
}
}
@@ -10948,12 +12031,16 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
- case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
- llvm::SmallVector<llvm::Value *, 5> Args;
- for (unsigned I = 0; I != 5; ++I)
+ case AMDGPU::BI__builtin_amdgcn_mov_dpp:
+ case AMDGPU::BI__builtin_amdgcn_update_dpp: {
+ llvm::SmallVector<llvm::Value *, 6> Args;
+ for (unsigned I = 0; I != E->getNumArgs(); ++I)
Args.push_back(EmitScalarExpr(E->getArg(I)));
- Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
- Args[0]->getType());
+ assert(Args.size() == 5 || Args.size() == 6);
+ if (Args.size() == 5)
+ Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
+ Value *F =
+ CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
return Builder.CreateCall(F, Args);
}
case AMDGPU::BI__builtin_amdgcn_div_fixup:
@@ -11039,50 +12126,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
CI->setConvergent();
return CI;
}
- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
- case AMDGPU::BI__builtin_amdgcn_ds_fminf:
- case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
- llvm::SmallVector<llvm::Value *, 5> Args;
- for (unsigned I = 0; I != 5; ++I)
- Args.push_back(EmitScalarExpr(E->getArg(I)));
- const llvm::Type *PtrTy = Args[0]->getType();
- // check pointer parameter
- if (!PtrTy->isPointerTy() ||
- E->getArg(0)
- ->getType()
- ->getPointeeType()
- .getQualifiers()
- .getAddressSpace() != LangAS::opencl_local ||
- !PtrTy->getPointerElementType()->isFloatTy()) {
- CGM.Error(E->getArg(0)->getLocStart(),
- "parameter should have type \"local float*\"");
- return nullptr;
- }
- // check float parameter
- if (!Args[1]->getType()->isFloatTy()) {
- CGM.Error(E->getArg(1)->getLocStart(),
- "parameter should have type \"float\"");
- return nullptr;
- }
-
- Intrinsic::ID ID;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
- ID = Intrinsic::amdgcn_ds_fadd;
- break;
- case AMDGPU::BI__builtin_amdgcn_ds_fminf:
- ID = Intrinsic::amdgcn_ds_fmin;
- break;
- case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
- ID = Intrinsic::amdgcn_ds_fmax;
- break;
- default:
- llvm_unreachable("Unknown BuiltinID");
- }
- Value *F = CGM.getIntrinsic(ID);
- return Builder.CreateCall(F, Args);
- }
-
// amdgcn workitem
case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
@@ -11363,7 +12406,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {X, Y, M4Value});
}
- // Vector intrisincs that output the post-instruction CC value.
+ // Vector intrinsics that output the post-instruction CC value.
#define INTRINSIC_WITH_CC(NAME) \
case SystemZ::BI__builtin_##NAME: \
@@ -11823,7 +12866,7 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
bool isColMajor = isColMajorArg.getSExtValue();
unsigned IID;
unsigned NumResults = 8;
- // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet
+ // PTX Instructions (and LLVM intrinsics) are defined for slice _d_, yet
// for some reason nvcc builtins use _c_.
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
@@ -12046,31 +13089,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
return Builder.CreateCall(Callee, Args);
}
- case WebAssembly::BI__builtin_wasm_mem_size: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *I = EmitScalarExpr(E->getArg(0));
- Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_size, ResultType);
- return Builder.CreateCall(Callee, I);
- }
- case WebAssembly::BI__builtin_wasm_mem_grow: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Args[] = {
- EmitScalarExpr(E->getArg(0)),
- EmitScalarExpr(E->getArg(1))
- };
- Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_mem_grow, ResultType);
- return Builder.CreateCall(Callee, Args);
- }
- case WebAssembly::BI__builtin_wasm_current_memory: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
- return Builder.CreateCall(Callee);
- }
- case WebAssembly::BI__builtin_wasm_grow_memory: {
- Value *X = EmitScalarExpr(E->getArg(0));
- Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
- return Builder.CreateCall(Callee, X);
- }
case WebAssembly::BI__builtin_wasm_throw: {
Value *Tag = EmitScalarExpr(E->getArg(0));
Value *Obj = EmitScalarExpr(E->getArg(1));
@@ -12081,6 +13099,211 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
return Builder.CreateCall(Callee);
}
+ case WebAssembly::BI__builtin_wasm_atomic_wait_i32: {
+ Value *Addr = EmitScalarExpr(E->getArg(0));
+ Value *Expected = EmitScalarExpr(E->getArg(1));
+ Value *Timeout = EmitScalarExpr(E->getArg(2));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32);
+ return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
+ }
+ case WebAssembly::BI__builtin_wasm_atomic_wait_i64: {
+ Value *Addr = EmitScalarExpr(E->getArg(0));
+ Value *Expected = EmitScalarExpr(E->getArg(1));
+ Value *Timeout = EmitScalarExpr(E->getArg(2));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64);
+ return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
+ }
+ case WebAssembly::BI__builtin_wasm_atomic_notify: {
+ Value *Addr = EmitScalarExpr(E->getArg(0));
+ Value *Count = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);
+ return Builder.CreateCall(Callee, {Addr, Count});
+ }
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ResT = ConvertType(E->getType());
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
+ {ResT, Src->getType()});
+ return Builder.CreateCall(Callee, {Src});
+ }
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4:
+ case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: {
+ Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ResT = ConvertType(E->getType());
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
+ {ResT, Src->getType()});
+ return Builder.CreateCall(Callee, {Src});
+ }
+ case WebAssembly::BI__builtin_wasm_min_f32:
+ case WebAssembly::BI__builtin_wasm_min_f64:
+ case WebAssembly::BI__builtin_wasm_min_f32x4:
+ case WebAssembly::BI__builtin_wasm_min_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::minimum,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_max_f32:
+ case WebAssembly::BI__builtin_wasm_max_f64:
+ case WebAssembly::BI__builtin_wasm_max_f32x4:
+ case WebAssembly::BI__builtin_wasm_max_f64x2: {
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::maximum,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
+ llvm::APSInt LaneConst;
+ if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
+ llvm_unreachable("Constant arg isn't actually constant?");
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
+ Value *Extract = Builder.CreateExtractElement(Vec, Lane);
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
+ return Builder.CreateSExt(Extract, ConvertType(E->getType()));
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
+ return Builder.CreateZExt(Extract, ConvertType(E->getType()));
+ case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_extract_lane_f64x2:
+ return Extract;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ }
+ case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i16x8:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
+ llvm::APSInt LaneConst;
+ if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
+ llvm_unreachable("Constant arg isn't actually constant?");
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
+ Value *Val = EmitScalarExpr(E->getArg(2));
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: {
+ llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType();
+ Value *Trunc = Builder.CreateTrunc(Val, ElemType);
+ return Builder.CreateInsertElement(Vec, Trunc, Lane);
+ }
+ case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
+ case WebAssembly::BI__builtin_wasm_replace_lane_f64x2:
+ return Builder.CreateInsertElement(Vec, Val, Lane);
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ }
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: {
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
+ IntNo = Intrinsic::sadd_sat;
+ break;
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
+ IntNo = Intrinsic::uadd_sat;
+ break;
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
+ IntNo = Intrinsic::wasm_sub_saturate_signed;
+ break;
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
+ case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8:
+ IntNo = Intrinsic::wasm_sub_saturate_unsigned;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Value *LHS = EmitScalarExpr(E->getArg(0));
+ Value *RHS = EmitScalarExpr(E->getArg(1));
+ Value *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {LHS, RHS});
+ }
+ case WebAssembly::BI__builtin_wasm_bitselect: {
+ Value *V1 = EmitScalarExpr(E->getArg(0));
+ Value *V2 = EmitScalarExpr(E->getArg(1));
+ Value *C = EmitScalarExpr(E->getArg(2));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect,
+ ConvertType(E->getType()));
+ return Builder.CreateCall(Callee, {V1, V2, C});
+ }
+ case WebAssembly::BI__builtin_wasm_any_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_any_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_any_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_any_true_i64x2:
+ case WebAssembly::BI__builtin_wasm_all_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_all_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_all_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
+ unsigned IntNo;
+ switch (BuiltinID) {
+ case WebAssembly::BI__builtin_wasm_any_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_any_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_any_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_any_true_i64x2:
+ IntNo = Intrinsic::wasm_anytrue;
+ break;
+ case WebAssembly::BI__builtin_wasm_all_true_i8x16:
+ case WebAssembly::BI__builtin_wasm_all_true_i16x8:
+ case WebAssembly::BI__builtin_wasm_all_true_i32x4:
+ case WebAssembly::BI__builtin_wasm_all_true_i64x2:
+ IntNo = Intrinsic::wasm_alltrue;
+ break;
+ default:
+ llvm_unreachable("unexpected builtin ID");
+ }
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
+ case WebAssembly::BI__builtin_wasm_abs_f32x4:
+ case WebAssembly::BI__builtin_wasm_abs_f64x2: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
+ case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
+ case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
+ Value *Vec = EmitScalarExpr(E->getArg(0));
+ Value *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
+ return Builder.CreateCall(Callee, {Vec});
+ }
default:
return nullptr;
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 5fcc9e011bcb..1c578bd151bd 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -137,7 +137,7 @@ CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
TheModule(CGM.getModule()),
- RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) {
+ RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
@@ -353,8 +353,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// global variable and save a reference in GpuBinaryHandle to be cleaned up
// in destructor on exit. Then associate all known kernels with the GPU binary
// handle so CUDA runtime can figure out what to call on the GPU side.
- std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary;
- if (!IsHIP) {
+ std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr;
+ if (!CudaGpuBinaryFileName.empty()) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
@@ -388,15 +388,23 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
ModuleIDSectionName = "__hip_module_id";
ModuleIDPrefix = "__hip_";
- // For HIP, create an external symbol __hip_fatbin in section .hip_fatbin.
- // The external symbol is supposed to contain the fat binary but will be
- // populated somewhere else, e.g. by lld through link script.
- FatBinStr = new llvm::GlobalVariable(
+ if (CudaGpuBinary) {
+ // If fatbin is available from early finalization, create a string
+ // literal containing the fat binary loaded from the given file.
+ FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
+ FatbinConstantName, 8);
+ } else {
+ // If fatbin is not available, create an external symbol
+ // __hip_fatbin in section .hip_fatbin. The external symbol is supposed
+ // to contain the fat binary but will be populated somewhere else,
+ // e.g. by lld through link script.
+ FatBinStr = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty,
/*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
"__hip_fatbin", nullptr,
llvm::GlobalVariable::NotThreadLocal);
- cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
+ cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
+ }
FatMagic = HIPFatMagic;
} else {
@@ -447,6 +455,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// thread safety of the loaded program. Therefore we can assume sequential
// execution of constructor functions here.
if (IsHIP) {
+ auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage :
+ llvm::GlobalValue::LinkOnceAnyLinkage;
llvm::BasicBlock *IfBlock =
llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
llvm::BasicBlock *ExitBlock =
@@ -455,10 +465,13 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// of HIP ABI.
GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, /*isConstant=*/false,
- llvm::GlobalValue::LinkOnceAnyLinkage,
+ Linkage,
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
"__hip_gpubin_handle");
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+ // Prevent the weak symbol in different shared libraries being merged.
+ if (Linkage != llvm::GlobalValue::InternalLinkage)
+ GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
Address GpuBinaryAddr(
GpuBinaryHandle,
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
@@ -507,7 +520,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// Generate a unique module ID.
SmallString<64> ModuleID;
llvm::raw_svector_ostream OS(ModuleID);
- OS << ModuleIDPrefix << llvm::format("%x", FatbinWrapper->getGUID());
+ OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
llvm::Constant *ModuleIDConstant =
makeConstantString(ModuleID.str(), "", ModuleIDSectionName, 32);
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index d5945be43458..8b0733fbec3e 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -23,7 +23,7 @@
#include "clang/AST/Mangle.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtCXX.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
using namespace clang;
using namespace CodeGen;
@@ -276,7 +276,7 @@ static CGCallee BuildAppleKextVirtualCall(CodeGenFunction &CGF,
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt");
llvm::Value *VFunc =
CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes);
- CGCallee Callee(GD.getDecl()->getCanonicalDecl(), VFunc);
+ CGCallee Callee(GD, VFunc);
return Callee;
}
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index 3b1b47cdfe07..ed168b1ce72d 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -132,7 +132,7 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList &params) {
// generation. Maybe we can come up with a better way?
auto *ThisDecl = ImplicitParamDecl::Create(
CGM.getContext(), nullptr, MD->getLocation(),
- &CGM.getContext().Idents.get("this"), MD->getThisType(CGM.getContext()),
+ &CGM.getContext().Idents.get("this"), MD->getThisType(),
ImplicitParamDecl::CXXThis);
params.push_back(ThisDecl);
CGF.CXXABIThisDecl = ThisDecl;
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index fa51dc30c58b..7d494bb1f1c7 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -23,11 +23,11 @@
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -59,6 +59,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
case CC_X86Pascal: return llvm::CallingConv::C;
// TODO: Add support for __vectorcall to LLVM.
case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
+ case CC_AArch64VectorCall: return llvm::CallingConv::AArch64_VectorCall;
case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
@@ -67,11 +68,13 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
}
}
-/// Derives the 'this' type for codegen purposes, i.e. ignoring method
+/// Derives the 'this' type for codegen purposes, i.e. ignoring method CVR
/// qualification.
-/// FIXME: address space qualification?
-static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD) {
+static CanQualType GetThisType(ASTContext &Context, const CXXRecordDecl *RD,
+ const CXXMethodDecl *MD) {
QualType RecTy = Context.getTagDeclType(RD)->getCanonicalTypeInternal();
+ if (MD)
+ RecTy = Context.getAddrSpaceQualType(RecTy, MD->getTypeQualifiers().getAddressSpace());
return Context.getPointerType(CanQualType::CreateUnsafe(RecTy));
}
@@ -214,6 +217,9 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
if (PcsAttr *PCS = D->getAttr<PcsAttr>())
return (PCS->getPCS() == PcsAttr::AAPCS ? CC_AAPCS : CC_AAPCS_VFP);
+ if (D->hasAttr<AArch64VectorPcsAttr>())
+ return CC_AArch64VectorCall;
+
if (D->hasAttr<IntelOclBiccAttr>())
return CC_IntelOclBicc;
@@ -246,7 +252,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
// Add the 'this' pointer.
if (RD)
- argTypes.push_back(GetThisType(Context, RD));
+ argTypes.push_back(GetThisType(Context, RD, MD));
else
argTypes.push_back(Context.VoidPtrTy);
@@ -302,7 +308,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
SmallVector<CanQualType, 16> argTypes;
SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
- argTypes.push_back(GetThisType(Context, MD->getParent()));
+ argTypes.push_back(GetThisType(Context, MD->getParent(), MD));
bool PassParams = true;
@@ -529,7 +535,7 @@ const CGFunctionInfo &
CodeGenTypes::arrangeUnprototypedMustTailThunk(const CXXMethodDecl *MD) {
assert(MD->isVirtual() && "only methods have thunks");
CanQual<FunctionProtoType> FTP = GetFormalType(MD);
- CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) };
+ CanQualType ArgTys[] = { GetThisType(Context, MD->getParent(), MD) };
return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
/*chainCall=*/false, ArgTys,
FTP->getExtInfo(), {}, RequiredArgs(1));
@@ -543,7 +549,7 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD,
CanQual<FunctionProtoType> FTP = GetFormalType(CD);
SmallVector<CanQualType, 2> ArgTys;
const CXXRecordDecl *RD = CD->getParent();
- ArgTys.push_back(GetThisType(Context, RD));
+ ArgTys.push_back(GetThisType(Context, RD, CD));
if (CT == Ctor_CopyingClosure)
ArgTys.push_back(*FTP->param_type_begin());
if (RD->getNumVBases() > 0)
@@ -741,8 +747,8 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
FunctionType::ExtInfo info,
ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
RequiredArgs required) {
- assert(std::all_of(argTypes.begin(), argTypes.end(),
- [](CanQualType T) { return T.isCanonicalAsParam(); }));
+ assert(llvm::all_of(argTypes,
+ [](CanQualType T) { return T.isCanonicalAsParam(); }));
// Lookup or create unique function info.
llvm::FoldingSetNodeID ID;
@@ -1253,8 +1259,8 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
// Otherwise do coercion through memory. This is stupid, but simple.
Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
- Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
- Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy);
+ Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+ Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty);
CGF.Builder.CreateMemCpy(Casted, SrcCasted,
llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
false);
@@ -1335,8 +1341,8 @@ static void CreateCoercedStore(llvm::Value *Src,
// to that information.
Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
CGF.Builder.CreateStore(Src, Tmp);
- Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
- Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy);
+ Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+ Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty);
CGF.Builder.CreateMemCpy(DstCasted, Casted,
llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
false);
@@ -1709,6 +1715,8 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
if (CodeGenOpts.DisableRedZone)
FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
+ if (CodeGenOpts.IndirectTlsSegRefs)
+ FuncAttrs.addAttribute("indirect-tls-seg-refs");
if (CodeGenOpts.NoImplicitFloat)
FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
@@ -1784,6 +1792,11 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
FuncAttrs.addAttribute("stackrealign");
if (CodeGenOpts.Backchain)
FuncAttrs.addAttribute("backchain");
+
+ // FIXME: The interaction of this attribute with the SLH command line flag
+ // has not been determined.
+ if (CodeGenOpts.SpeculativeLoadHardening)
+ FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
}
if (getLangOpts().assumeFunctionsAreConvergent()) {
@@ -1803,6 +1816,12 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
if (CodeGenOpts.FlushDenorm)
FuncAttrs.addAttribute("nvptx-f32ftz", "true");
}
+
+ for (StringRef Attr : CodeGenOpts.DefaultFunctionAttrs) {
+ StringRef Var, Value;
+ std::tie(Var, Value) = Attr.split('=');
+ FuncAttrs.addAttribute(Var, Value);
+ }
}
void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
@@ -1828,7 +1847,7 @@ void CodeGenModule::ConstructAttributeList(
AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
CalleeInfo.getCalleeFunctionProtoType());
- const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
+ const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
bool HasOptnone = false;
// FIXME: handle sseregparm someday...
@@ -1845,6 +1864,8 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (TargetDecl->hasAttr<ConvergentAttr>())
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+ if (TargetDecl->hasAttr<SpeculativeLoadHardeningAttr>())
+ FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening);
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
AddAttributesFromFunctionProtoType(
@@ -1936,7 +1957,7 @@ void CodeGenModule::ConstructAttributeList(
FuncAttrs.addAttribute("disable-tail-calls",
llvm::toStringRef(DisableTailCalls));
- GetCPUAndFeaturesAttributes(TargetDecl, FuncAttrs);
+ GetCPUAndFeaturesAttributes(CalleeInfo.getCalleeDecl(), FuncAttrs);
}
ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
@@ -2327,7 +2348,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
} else {
// Load scalar value from indirect argument.
llvm::Value *V =
- EmitLoadOfScalar(ParamAddr, false, Ty, Arg->getLocStart());
+ EmitLoadOfScalar(ParamAddr, false, Ty, Arg->getBeginLoc());
if (isPromoted)
V = emitArgumentDemotion(*this, Arg, V);
@@ -2389,7 +2410,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
if (!AVAttr)
if (const auto *TOTy = dyn_cast<TypedefType>(OTy))
AVAttr = TOTy->getDecl()->getAttr<AlignValueAttr>();
- if (AVAttr) {
+ if (AVAttr && !SanOpts.has(SanitizerKind::Alignment)) {
+ // If alignment-assumption sanitizer is enabled, we do *not* add
+ // alignment attribute here, but emit normal alignment assumption,
+ // so the UBSAN check could function.
llvm::Value *AlignmentValue =
EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI =
@@ -2490,7 +2514,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
// Match to what EmitParmDecl is expecting for this type.
if (CodeGenFunction::hasScalarEvaluationKind(Ty)) {
llvm::Value *V =
- EmitLoadOfScalar(Alloca, false, Ty, Arg->getLocStart());
+ EmitLoadOfScalar(Alloca, false, Ty, Arg->getBeginLoc());
if (isPromoted)
V = emitArgumentDemotion(*this, Arg, V);
ArgVals.push_back(ParamValue::forDirect(V));
@@ -3063,8 +3087,9 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
QualType type = param->getType();
- assert(!isInAllocaArgument(CGM.getCXXABI(), type) &&
- "cannot emit delegate call arguments for inalloca arguments!");
+ if (isInAllocaArgument(CGM.getCXXABI(), type)) {
+ CGM.ErrorUnsupported(param, "forwarded non-trivially copyable parameter");
+ }
// GetAddrOfLocalVar returns a pointer-to-pointer for references,
// but the argument needs to be the original pointer.
@@ -3945,15 +3970,28 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} else if (I->hasLValue()) {
auto LV = I->getKnownLValue();
auto AS = LV.getAddressSpace();
+
if ((!ArgInfo.getIndirectByVal() &&
(LV.getAlignment() >=
- getContext().getTypeAlignInChars(I->Ty))) ||
- (ArgInfo.getIndirectByVal() &&
- ((AS != LangAS::Default && AS != LangAS::opencl_private &&
- AS != CGM.getASTAllocaAddressSpace())))) {
+ getContext().getTypeAlignInChars(I->Ty)))) {
+ NeedCopy = true;
+ }
+ if (!getLangOpts().OpenCL) {
+ if ((ArgInfo.getIndirectByVal() &&
+ (AS != LangAS::Default &&
+ AS != CGM.getASTAllocaAddressSpace()))) {
+ NeedCopy = true;
+ }
+ }
+ // For OpenCL even if RV is located in default or alloca address space
+ // we don't want to perform address space cast for it.
+ else if ((ArgInfo.getIndirectByVal() &&
+ Addr.getType()->getAddressSpace() != IRFuncTy->
+ getParamType(FirstIRArg)->getPointerAddressSpace())) {
NeedCopy = true;
}
}
+
if (NeedCopy) {
// Create an aligned temporary, and copy to it.
Address AI = CreateMemTempWithoutCast(
@@ -4235,6 +4273,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
}
#endif
+ // Update the largest vector width if any arguments have vector types.
+ for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
+ if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+ }
+
// Compute the calling convention and attributes.
unsigned CallingConv;
llvm::AttributeList Attrs;
@@ -4248,8 +4293,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Apply always_inline to all calls within flatten functions.
// FIXME: should this really take priority over __try, below?
if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
- !(Callee.getAbstractInfo().getCalleeDecl() &&
- Callee.getAbstractInfo().getCalleeDecl()->hasAttr<NoInlineAttr>())) {
+ !(Callee.getAbstractInfo().getCalleeDecl().getDecl() &&
+ Callee.getAbstractInfo()
+ .getCalleeDecl()
+ .getDecl()
+ ->hasAttr<NoInlineAttr>())) {
Attrs =
Attrs.addAttribute(getLLVMContext(), llvm::AttributeList::FunctionIndex,
llvm::Attribute::AlwaysInline);
@@ -4315,6 +4363,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (!CI->getType()->isVoidTy())
CI->setName("call");
+ // Update largest vector width from the return type.
+ if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+
// Insert instrumentation or attach profile metadata at indirect call sites.
// For more details, see the comment before the definition of
// IPVK_IndirectCallTarget in InstrProfData.inc.
@@ -4329,7 +4382,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
// Suppress tail calls if requested.
if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) {
- const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl();
+ const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>())
Call->setTailCallKind(llvm::CallInst::TCK_NoTail);
}
@@ -4476,7 +4529,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} ();
// Emit the assume_aligned check on the return value.
- const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl();
+ const Decl *TargetDecl = Callee.getAbstractInfo().getCalleeDecl().getDecl();
if (Ret.isScalar() && TargetDecl) {
if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) {
llvm::Value *OffsetValue = nullptr;
@@ -4485,13 +4538,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
llvm::Value *Alignment = EmitScalarExpr(AA->getAlignment());
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(Alignment);
- EmitAlignmentAssumption(Ret.getScalarVal(), AlignmentCI->getZExtValue(),
- OffsetValue);
+ EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(),
+ AlignmentCI->getZExtValue(), OffsetValue);
} else if (const auto *AA = TargetDecl->getAttr<AllocAlignAttr>()) {
- llvm::Value *ParamVal =
- CallArgs[AA->getParamIndex().getLLVMIndex()].getRValue(
- *this).getScalarVal();
- EmitAlignmentAssumption(Ret.getScalarVal(), ParamVal);
+ llvm::Value *AlignmentVal = CallArgs[AA->getParamIndex().getLLVMIndex()]
+ .getRValue(*this)
+ .getScalarVal();
+ EmitAlignmentAssumption(Ret.getScalarVal(), RetTy, Loc, AA->getLocation(),
+ AlignmentVal);
}
}
@@ -4502,8 +4556,8 @@ CGCallee CGCallee::prepareConcreteCallee(CodeGenFunction &CGF) const {
if (isVirtual()) {
const CallExpr *CE = getVirtualCallExpr();
return CGF.CGM.getCXXABI().getVirtualFunctionPointer(
- CGF, getVirtualMethodDecl(), getThisAddress(),
- getFunctionType(), CE ? CE->getLocStart() : SourceLocation());
+ CGF, getVirtualMethodDecl(), getThisAddress(), getFunctionType(),
+ CE ? CE->getBeginLoc() : SourceLocation());
}
return *this;
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 99a36e4e12f1..c300808bea28 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -46,21 +46,21 @@ class CGCalleeInfo {
/// The function prototype of the callee.
const FunctionProtoType *CalleeProtoTy;
/// The function declaration of the callee.
- const Decl *CalleeDecl;
+ GlobalDecl CalleeDecl;
public:
- explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {}
- CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl)
+ explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl() {}
+ CGCalleeInfo(const FunctionProtoType *calleeProtoTy, GlobalDecl calleeDecl)
: CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {}
CGCalleeInfo(const FunctionProtoType *calleeProtoTy)
- : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {}
- CGCalleeInfo(const Decl *calleeDecl)
+ : CalleeProtoTy(calleeProtoTy), CalleeDecl() {}
+ CGCalleeInfo(GlobalDecl calleeDecl)
: CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {}
const FunctionProtoType *getCalleeFunctionProtoType() const {
return CalleeProtoTy;
}
- const Decl *getCalleeDecl() const { return CalleeDecl; }
+ const GlobalDecl getCalleeDecl() const { return CalleeDecl; }
};
/// All available information about a concrete callee.
@@ -171,7 +171,7 @@ public:
}
CGCalleeInfo getAbstractInfo() const {
if (isVirtual())
- return VirtualInfo.MD.getDecl();
+ return VirtualInfo.MD;
assert(isOrdinary());
return AbstractInfo;
}
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index ec4eb000a3b9..ee150a792b76 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -16,14 +16,15 @@
#include "CGDebugInfo.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
+#include "TargetInfo.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/EvaluatedExprVisitor.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtCXX.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Transforms/Utils/SanitizerStats.h"
@@ -829,7 +830,7 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) {
// delegation optimization.
if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor) &&
CGM.getTarget().getCXXABI().hasConstructorVariants()) {
- EmitDelegateCXXConstructorCall(Ctor, Ctor_Base, Args, Ctor->getLocEnd());
+ EmitDelegateCXXConstructorCall(Ctor, Ctor_Base, Args, Ctor->getEndLoc());
return;
}
@@ -2012,8 +2013,19 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
bool NewPointerIsChecked) {
CallArgList Args;
+ LangAS SlotAS = E->getType().getAddressSpace();
+ QualType ThisType = D->getThisType();
+ LangAS ThisAS = ThisType.getTypePtr()->getPointeeType().getAddressSpace();
+ llvm::Value *ThisPtr = This.getPointer();
+ if (SlotAS != ThisAS) {
+ unsigned TargetThisAS = getContext().getTargetAddressSpace(ThisAS);
+ llvm::Type *NewType =
+ ThisPtr->getType()->getPointerElementType()->getPointerTo(TargetThisAS);
+ ThisPtr = getTargetHooks().performAddrSpaceCast(*this, This.getPointer(),
+ ThisAS, SlotAS, NewType);
+ }
// Push the this ptr.
- Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
+ Args.add(RValue::get(ThisPtr), D->getThisType());
// If this is a trivial constructor, emit a memcpy now before we lose
// the alignment information on the argument.
@@ -2122,7 +2134,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
CGM.getAddrOfCXXStructor(D, getFromCtorType(Type));
const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
- CGCallee Callee = CGCallee::forDirect(CalleePtr, D);
+ CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type));
EmitCall(Info, Callee, ReturnValueSlot(), Args);
// Generate vtable assumptions if we're constructing a complete object
@@ -2147,7 +2159,7 @@ void CodeGenFunction::EmitInheritedCXXConstructorCall(
const CXXConstructorDecl *D, bool ForVirtualBase, Address This,
bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) {
CallArgList Args;
- CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()));
+ CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType());
// Forward the parameters.
if (InheritedFromVBase &&
@@ -2196,6 +2208,7 @@ void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
GlobalDecl GD(Ctor, CtorType);
InlinedInheritingConstructorScope Scope(*this, GD);
ApplyInlineDebugLocation DebugScope(*this, GD);
+ RunCleanupsScope RunCleanups(*this);
// Save the arguments to be passed to the inherited constructor.
CXXInheritedCtorInitExprArgs = Args;
@@ -2271,7 +2284,7 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
CallArgList Args;
// Push the this ptr.
- Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
+ Args.add(RValue::get(This.getPointer()), D->getThisType());
// Push the src ptr.
QualType QT = *(FPT->param_type_begin());
@@ -2808,7 +2821,7 @@ void CodeGenFunction::EmitForwardingCallToLambda(
// variadic arguments.
// Now emit our call.
- auto callee = CGCallee::forDirect(calleePtr, callOperator);
+ auto callee = CGCallee::forDirect(calleePtr, GlobalDecl(callOperator));
RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, callArgs);
// If necessary, copy the returned value into the slot.
@@ -2839,12 +2852,12 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() {
CallArgList CallArgs;
QualType ThisType = getContext().getPointerType(getContext().getRecordType(Lambda));
- Address ThisPtr = GetAddrOfBlockDecl(variable, false);
+ Address ThisPtr = GetAddrOfBlockDecl(variable);
CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
// Add the rest of the parameters.
for (auto param : BD->parameters())
- EmitDelegateCallArg(CallArgs, param, param->getLocStart());
+ EmitDelegateCallArg(CallArgs, param, param->getBeginLoc());
assert(!Lambda->isGenericLambda() &&
"generic lambda interconversion to block not implemented");
@@ -2863,7 +2876,7 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
// Add the rest of the parameters.
for (auto Param : MD->parameters())
- EmitDelegateCallArg(CallArgs, Param, Param->getLocStart());
+ EmitDelegateCallArg(CallArgs, Param, Param->getBeginLoc());
const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
// For a generic lambda, find the corresponding call operator specialization
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 0a766d176200..3743d24f11fc 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -366,7 +366,7 @@ static llvm::SwitchInst *TransitionToCleanupSwitch(CodeGenFunction &CGF,
llvm::BasicBlock *Block) {
// If it's a branch, turn it into a switch whose default
// destination is its original target.
- llvm::TerminatorInst *Term = Block->getTerminator();
+ llvm::Instruction *Term = Block->getTerminator();
assert(Term && "can't transition block without terminator");
if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) {
@@ -589,7 +589,7 @@ static void ForwardPrebranchedFallthrough(llvm::BasicBlock *Exit,
llvm::BasicBlock *To) {
// Exit is the exit block of a cleanup, so it always terminates in
// an unconditional branch or a switch.
- llvm::TerminatorInst *Term = Exit->getTerminator();
+ llvm::Instruction *Term = Exit->getTerminator();
if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) {
assert(Br->isUnconditional() && Br->getSuccessor(0) == From);
diff --git a/lib/CodeGen/CGCoroutine.cpp b/lib/CodeGen/CGCoroutine.cpp
index 4f525c8aac85..80fa7c873631 100644
--- a/lib/CodeGen/CGCoroutine.cpp
+++ b/lib/CodeGen/CGCoroutine.cpp
@@ -93,10 +93,10 @@ static void createCoroData(CodeGenFunction &CGF,
CallExpr const *CoroIdExpr = nullptr) {
if (CurCoro.Data) {
if (CurCoro.Data->CoroIdExpr)
- CGF.CGM.Error(CoroIdExpr->getLocStart(),
+ CGF.CGM.Error(CoroIdExpr->getBeginLoc(),
"only one __builtin_coro_id can be used in a function");
else if (CoroIdExpr)
- CGF.CGM.Error(CoroIdExpr->getLocStart(),
+ CGF.CGM.Error(CoroIdExpr->getBeginLoc(),
"__builtin_coro_id shall not be used in a C++ coroutine");
else
llvm_unreachable("EmitCoroutineBodyStatement called twice?");
@@ -444,7 +444,7 @@ struct CallCoroDelete final : public EHScopeStack::Cleanup {
// We should have captured coro.free from the emission of deallocate.
auto *CoroFree = CGF.CurCoro.Data->LastCoroFree;
if (!CoroFree) {
- CGF.CGM.Error(Deallocate->getLocStart(),
+ CGF.CGM.Error(Deallocate->getBeginLoc(),
"Deallocation expressoin does not refer to coro.free");
return;
}
@@ -654,7 +654,7 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) {
EmitBlock(BodyBB);
}
- auto Loc = S.getLocStart();
+ auto Loc = S.getBeginLoc();
CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr,
CurCoro.Data->ExceptionHandler);
auto *TryStmt =
@@ -707,8 +707,8 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
if (CurCoro.Data && CurCoro.Data->CoroBegin) {
return RValue::get(CurCoro.Data->CoroBegin);
}
- CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_begin "
- "has been used earlier in this function");
+ CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_begin "
+ "has been used earlier in this function");
auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy());
return RValue::get(NullPtr);
}
@@ -722,7 +722,7 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E,
Args.push_back(CurCoro.Data->CoroId);
break;
}
- CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_id has"
+ CGM.Error(E->getBeginLoc(), "this builtin expect that __builtin_coro_id has"
" been used earlier in this function");
// Fallthrough to the next case to add TokenNone as the first argument.
LLVM_FALLTHROUGH;
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 5be6fb3e4245..41f8721468a3 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -25,10 +25,10 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Version.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/FrontendOptions.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/ModuleMap.h"
@@ -41,6 +41,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MD5.h"
@@ -180,8 +181,7 @@ void CGDebugInfo::setLocation(SourceLocation Loc) {
SourceManager &SM = CGM.getContext().getSourceManager();
auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc);
-
- if (PCLoc.isInvalid() || Scope->getFilename() == PCLoc.getFilename())
+ if (PCLoc.isInvalid() || Scope->getFile() == getOrCreateFile(CurLoc))
return;
if (auto *LBF = dyn_cast<llvm::DILexicalBlockFile>(Scope)) {
@@ -220,7 +220,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context,
if (const auto *RDecl = dyn_cast<RecordDecl>(Context))
if (!RDecl->isDependentType())
return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl),
- getOrCreateMainFile());
+ TheCU->getFile());
return Default;
}
@@ -234,6 +234,9 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const {
if (CGM.getCodeGenOpts().EmitCodeView)
PP.MSVCFormatting = true;
+ // Apply -fdebug-prefix-map.
+ PP.RemapFilePaths = true;
+ PP.remapPath = [this](StringRef Path) { return remapDIPath(Path); };
return PP;
}
@@ -401,19 +404,18 @@ Optional<StringRef> CGDebugInfo::getSource(const SourceManager &SM,
llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
if (!Loc.isValid())
// If Location is not valid then use main input file.
- return getOrCreateMainFile();
+ return TheCU->getFile();
SourceManager &SM = CGM.getContext().getSourceManager();
PresumedLoc PLoc = SM.getPresumedLoc(Loc);
- if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty())
+ StringRef FileName = PLoc.getFilename();
+ if (PLoc.isInvalid() || FileName.empty())
// If the location is not valid then use main input file.
- return getOrCreateMainFile();
+ return TheCU->getFile();
// Cache the results.
- const char *fname = PLoc.getFilename();
- auto It = DIFileCache.find(fname);
-
+ auto It = DIFileCache.find(FileName.data());
if (It != DIFileCache.end()) {
// Verify that the information still exists.
if (llvm::Metadata *V = It->second)
@@ -426,22 +428,48 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo;
if (CSKind)
CSInfo.emplace(*CSKind, Checksum);
-
- llvm::DIFile *F = DBuilder.createFile(
- remapDIPath(PLoc.getFilename()), remapDIPath(getCurrentDirname()), CSInfo,
- getSource(SM, SM.getFileID(Loc)));
-
- DIFileCache[fname].reset(F);
+ return createFile(FileName, CSInfo, getSource(SM, SM.getFileID(Loc)));
+}
+
+llvm::DIFile *
+CGDebugInfo::createFile(StringRef FileName,
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
+ Optional<StringRef> Source) {
+ StringRef Dir;
+ StringRef File;
+ std::string RemappedFile = remapDIPath(FileName);
+ std::string CurDir = remapDIPath(getCurrentDirname());
+ SmallString<128> DirBuf;
+ SmallString<128> FileBuf;
+ if (llvm::sys::path::is_absolute(RemappedFile)) {
+ // Strip the common prefix (if it is more than just "/") from current
+ // directory and FileName for a more space-efficient encoding.
+ auto FileIt = llvm::sys::path::begin(RemappedFile);
+ auto FileE = llvm::sys::path::end(RemappedFile);
+ auto CurDirIt = llvm::sys::path::begin(CurDir);
+ auto CurDirE = llvm::sys::path::end(CurDir);
+ for (; CurDirIt != CurDirE && *CurDirIt == *FileIt; ++CurDirIt, ++FileIt)
+ llvm::sys::path::append(DirBuf, *CurDirIt);
+ if (std::distance(llvm::sys::path::begin(CurDir), CurDirIt) == 1) {
+ // The common prefix only the root; stripping it would cause
+ // LLVM diagnostic locations to be more confusing.
+ Dir = {};
+ File = RemappedFile;
+ } else {
+ for (; FileIt != FileE; ++FileIt)
+ llvm::sys::path::append(FileBuf, *FileIt);
+ Dir = DirBuf;
+ File = FileBuf;
+ }
+ } else {
+ Dir = CurDir;
+ File = RemappedFile;
+ }
+ llvm::DIFile *F = DBuilder.createFile(File, Dir, CSInfo, Source);
+ DIFileCache[FileName.data()].reset(F);
return F;
}
-llvm::DIFile *CGDebugInfo::getOrCreateMainFile() {
- return DBuilder.createFile(
- remapDIPath(TheCU->getFilename()), remapDIPath(TheCU->getDirectory()),
- TheCU->getFile()->getChecksum(),
- CGM.getCodeGenOpts().EmbedSource ? TheCU->getSource() : None);
-}
-
std::string CGDebugInfo::remapDIPath(StringRef Path) const {
for (const auto &Entry : DebugPrefixMap)
if (Path.startswith(Entry.first))
@@ -527,11 +555,11 @@ void CGDebugInfo::CreateCompileUnit() {
llvm::dwarf::SourceLanguage LangTag;
const LangOptions &LO = CGM.getLangOpts();
if (LO.CPlusPlus) {
- if (LO.ObjC1)
+ if (LO.ObjC)
LangTag = llvm::dwarf::DW_LANG_ObjC_plus_plus;
else
LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
- } else if (LO.ObjC1) {
+ } else if (LO.ObjC) {
LangTag = llvm::dwarf::DW_LANG_ObjC;
} else if (LO.RenderScript) {
LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript;
@@ -545,7 +573,7 @@ void CGDebugInfo::CreateCompileUnit() {
// Figure out which version of the ObjC runtime we have.
unsigned RuntimeVers = 0;
- if (LO.ObjC1)
+ if (LO.ObjC)
RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1;
llvm::DICompileUnit::DebugEmissionKind EmissionKind;
@@ -557,29 +585,42 @@ void CGDebugInfo::CreateCompileUnit() {
case codegenoptions::DebugLineTablesOnly:
EmissionKind = llvm::DICompileUnit::LineTablesOnly;
break;
+ case codegenoptions::DebugDirectivesOnly:
+ EmissionKind = llvm::DICompileUnit::DebugDirectivesOnly;
+ break;
case codegenoptions::LimitedDebugInfo:
case codegenoptions::FullDebugInfo:
EmissionKind = llvm::DICompileUnit::FullDebug;
break;
}
+ uint64_t DwoId = 0;
+ auto &CGOpts = CGM.getCodeGenOpts();
+ // The DIFile used by the CU is distinct from the main source
+ // file. Its directory part specifies what becomes the
+ // DW_AT_comp_dir (the compilation directory), even if the source
+ // file was specified with an absolute path.
if (CSKind)
CSInfo.emplace(*CSKind, Checksum);
+ llvm::DIFile *CUFile = DBuilder.createFile(
+ remapDIPath(MainFileName), remapDIPath(getCurrentDirname()), CSInfo,
+ getSource(SM, SM.getMainFileID()));
// Create new compile unit.
- // FIXME - Eliminate TheCU.
- auto &CGOpts = CGM.getCodeGenOpts();
TheCU = DBuilder.createCompileUnit(
- LangTag,
- DBuilder.createFile(remapDIPath(MainFileName),
- remapDIPath(getCurrentDirname()), CSInfo,
- getSource(SM, SM.getMainFileID())),
- CGOpts.EmitVersionIdentMetadata ? Producer : "",
+ LangTag, CUFile, CGOpts.EmitVersionIdentMetadata ? Producer : "",
LO.Optimize || CGOpts.PrepareForLTO || CGOpts.PrepareForThinLTO,
CGOpts.DwarfDebugFlags, RuntimeVers,
- CGOpts.EnableSplitDwarf ? "" : CGOpts.SplitDwarfFile, EmissionKind,
- 0 /* DWOid */, CGOpts.SplitDwarfInlining, CGOpts.DebugInfoForProfiling,
- CGOpts.GnuPubnames);
+ (CGOpts.getSplitDwarfMode() != CodeGenOptions::NoFission)
+ ? ""
+ : CGOpts.SplitDwarfFile,
+ EmissionKind, DwoId, CGOpts.SplitDwarfInlining,
+ CGOpts.DebugInfoForProfiling,
+ CGM.getTarget().getTriple().isNVPTX()
+ ? llvm::DICompileUnit::DebugNameTableKind::None
+ : static_cast<llvm::DICompileUnit::DebugNameTableKind>(
+ CGOpts.DebugNameTable),
+ CGOpts.DebugRangesBaseAddress);
}
llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
@@ -597,9 +638,9 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return nullptr;
case BuiltinType::ObjCClass:
if (!ClassTy)
- ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
- "objc_class", TheCU,
- getOrCreateMainFile(), 0);
+ ClassTy =
+ DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+ "objc_class", TheCU, TheCU->getFile(), 0);
return ClassTy;
case BuiltinType::ObjCId: {
// typedef struct objc_class *Class;
@@ -611,21 +652,21 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return ObjTy;
if (!ClassTy)
- ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
- "objc_class", TheCU,
- getOrCreateMainFile(), 0);
+ ClassTy =
+ DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+ "objc_class", TheCU, TheCU->getFile(), 0);
unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
auto *ISATy = DBuilder.createPointerType(ClassTy, Size);
- ObjTy = DBuilder.createStructType(
- TheCU, "objc_object", getOrCreateMainFile(), 0, 0, 0,
- llvm::DINode::FlagZero, nullptr, llvm::DINodeArray());
+ ObjTy = DBuilder.createStructType(TheCU, "objc_object", TheCU->getFile(), 0,
+ 0, 0, llvm::DINode::FlagZero, nullptr,
+ llvm::DINodeArray());
DBuilder.replaceArrays(
ObjTy, DBuilder.getOrCreateArray(&*DBuilder.createMemberType(
- ObjTy, "isa", getOrCreateMainFile(), 0, Size, 0, 0,
+ ObjTy, "isa", TheCU->getFile(), 0, Size, 0, 0,
llvm::DINode::FlagZero, ISATy)));
return ObjTy;
}
@@ -633,7 +674,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
if (!SelTy)
SelTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
"objc_selector", TheCU,
- getOrCreateMainFile(), 0);
+ TheCU->getFile(), 0);
return SelTy;
}
@@ -652,6 +693,10 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy);
case BuiltinType::OCLReserveID:
return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy);
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id: \
+ return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty);
+#include "clang/Basic/OpenCLExtensionTypes.def"
case BuiltinType::UChar:
case BuiltinType::Char_U:
@@ -825,31 +870,45 @@ static bool hasCXXMangling(const TagDecl *TD, llvm::DICompileUnit *TheCU) {
}
}
-// Determines if the tag declaration will require a type identifier.
+// Determines if the debug info for this tag declaration needs a type
+// identifier. The purpose of the unique identifier is to deduplicate type
+// information for identical types across TUs. Because of the C++ one definition
+// rule (ODR), it is valid to assume that the type is defined the same way in
+// every TU and its debug info is equivalent.
+//
+// C does not have the ODR, and it is common for codebases to contain multiple
+// different definitions of a struct with the same name in different TUs.
+// Therefore, if the type doesn't have a C++ mangling, don't give it an
+// identifer. Type information in C is smaller and simpler than C++ type
+// information, so the increase in debug info size is negligible.
+//
+// If the type is not externally visible, it should be unique to the current TU,
+// and should not need an identifier to participate in type deduplication.
+// However, when emitting CodeView, the format internally uses these
+// unique type name identifers for references between debug info. For example,
+// the method of a class in an anonymous namespace uses the identifer to refer
+// to its parent class. The Microsoft C++ ABI attempts to provide unique names
+// for such types, so when emitting CodeView, always use identifiers for C++
+// types. This may create problems when attempting to emit CodeView when the MS
+// C++ ABI is not in use.
static bool needsTypeIdentifier(const TagDecl *TD, CodeGenModule &CGM,
llvm::DICompileUnit *TheCU) {
// We only add a type identifier for types with C++ name mangling.
if (!hasCXXMangling(TD, TheCU))
return false;
- // CodeView types with C++ mangling need a type identifier.
- if (CGM.getCodeGenOpts().EmitCodeView)
- return true;
-
// Externally visible types with C++ mangling need a type identifier.
if (TD->isExternallyVisible())
return true;
+ // CodeView types with C++ mangling need a type identifier.
+ if (CGM.getCodeGenOpts().EmitCodeView)
+ return true;
+
return false;
}
-// When emitting CodeView debug information we need to produce a type
-// identifier for all types which have a C++ mangling. Until a GUID is added
-// to the identifier (not currently implemented) the result will not be unique
-// across compilation units.
-// When emitting DWARF debug information, we need to produce a type identifier
-// for all externally visible types with C++ name mangling. This identifier
-// should be unique across ODR-compliant compilation units.
+// Returns a unique type identifier string if one exists, or an empty string.
static SmallString<256> getTypeIdentifier(const TagType *Ty, CodeGenModule &CGM,
llvm::DICompileUnit *TheCU) {
SmallString<256> Identifier;
@@ -936,18 +995,53 @@ llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
if (Cache)
return Cache;
Cache = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, Name,
- TheCU, getOrCreateMainFile(), 0);
+ TheCU, TheCU->getFile(), 0);
unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
Cache = DBuilder.createPointerType(Cache, Size);
return Cache;
}
+uint64_t CGDebugInfo::collectDefaultElementTypesForBlockPointer(
+ const BlockPointerType *Ty, llvm::DIFile *Unit, llvm::DIDerivedType *DescTy,
+ unsigned LineNo, SmallVectorImpl<llvm::Metadata *> &EltTys) {
+ QualType FType;
+
+ // Advanced by calls to CreateMemberType in increments of FType, then
+ // returned as the overall size of the default elements.
+ uint64_t FieldOffset = 0;
+
+ // Blocks in OpenCL have unique constraints which make the standard fields
+ // redundant while requiring size and align fields for enqueue_kernel. See
+ // initializeForBlockHeader in CGBlocks.cpp
+ if (CGM.getLangOpts().OpenCL) {
+ FType = CGM.getContext().IntTy;
+ EltTys.push_back(CreateMemberType(Unit, FType, "__size", &FieldOffset));
+ EltTys.push_back(CreateMemberType(Unit, FType, "__align", &FieldOffset));
+ } else {
+ FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
+ EltTys.push_back(CreateMemberType(Unit, FType, "__isa", &FieldOffset));
+ FType = CGM.getContext().IntTy;
+ EltTys.push_back(CreateMemberType(Unit, FType, "__flags", &FieldOffset));
+ EltTys.push_back(CreateMemberType(Unit, FType, "__reserved", &FieldOffset));
+ FType = CGM.getContext().getPointerType(Ty->getPointeeType());
+ EltTys.push_back(CreateMemberType(Unit, FType, "__FuncPtr", &FieldOffset));
+ FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
+ uint64_t FieldSize = CGM.getContext().getTypeSize(Ty);
+ uint32_t FieldAlign = CGM.getContext().getTypeAlign(Ty);
+ EltTys.push_back(DBuilder.createMemberType(
+ Unit, "__descriptor", nullptr, LineNo, FieldSize, FieldAlign,
+ FieldOffset, llvm::DINode::FlagZero, DescTy));
+ FieldOffset += FieldSize;
+ }
+
+ return FieldOffset;
+}
+
llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
llvm::DIFile *Unit) {
SmallVector<llvm::Metadata *, 8> EltTys;
QualType FType;
- uint64_t FieldSize, FieldOffset;
- uint32_t FieldAlign;
+ uint64_t FieldOffset;
llvm::DINodeArray Elements;
FieldOffset = 0;
@@ -959,10 +1053,9 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
EltTys.clear();
llvm::DINode::DIFlags Flags = llvm::DINode::FlagAppleBlock;
- unsigned LineNo = 0;
auto *EltTy =
- DBuilder.createStructType(Unit, "__block_descriptor", nullptr, LineNo,
+ DBuilder.createStructType(Unit, "__block_descriptor", nullptr, 0,
FieldOffset, 0, Flags, nullptr, Elements);
// Bit size, align and offset of the type.
@@ -970,27 +1063,8 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
auto *DescTy = DBuilder.createPointerType(EltTy, Size);
- FieldOffset = 0;
- if (CGM.getLangOpts().OpenCL) {
- FType = CGM.getContext().IntTy;
- EltTys.push_back(CreateMemberType(Unit, FType, "__size", &FieldOffset));
- EltTys.push_back(CreateMemberType(Unit, FType, "__align", &FieldOffset));
- } else {
- FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
- EltTys.push_back(CreateMemberType(Unit, FType, "__isa", &FieldOffset));
- FType = CGM.getContext().IntTy;
- EltTys.push_back(CreateMemberType(Unit, FType, "__flags", &FieldOffset));
- EltTys.push_back(CreateMemberType(Unit, FType, "__reserved", &FieldOffset));
- FType = CGM.getContext().getPointerType(Ty->getPointeeType());
- EltTys.push_back(CreateMemberType(Unit, FType, "__FuncPtr", &FieldOffset));
- FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
- FieldSize = CGM.getContext().getTypeSize(Ty);
- FieldAlign = CGM.getContext().getTypeAlign(Ty);
- EltTys.push_back(DBuilder.createMemberType(
- Unit, "__descriptor", nullptr, LineNo, FieldSize, FieldAlign, FieldOffset,
- llvm::DINode::FlagZero, DescTy));
- FieldOffset += FieldSize;
- }
+ FieldOffset = collectDefaultElementTypesForBlockPointer(Ty, Unit, DescTy,
+ 0, EltTys);
Elements = DBuilder.getOrCreateArray(EltTys);
@@ -998,7 +1072,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
// DW_AT_APPLE_BLOCK attribute and are an implementation detail only
// the debugger needs to know about. To allow type uniquing, emit
// them without a name or a location.
- EltTy = DBuilder.createStructType(Unit, "", nullptr, LineNo, FieldOffset, 0,
+ EltTy = DBuilder.createStructType(Unit, "", nullptr, 0, FieldOffset, 0,
Flags, nullptr, Elements);
return DBuilder.createPointerType(EltTy, Size);
@@ -1058,6 +1132,7 @@ static unsigned getDwarfCC(CallingConv CC) {
case CC_X86_64SysV:
return llvm::dwarf::DW_CC_LLVM_X86_64SysV;
case CC_AAPCS:
+ case CC_AArch64VectorCall:
return llvm::dwarf::DW_CC_LLVM_AAPCS;
case CC_AAPCS_VFP:
return llvm::dwarf::DW_CC_LLVM_AAPCS_VFP;
@@ -1353,8 +1428,7 @@ CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method,
if (Method->isStatic())
return cast_or_null<llvm::DISubroutineType>(
getOrCreateType(QualType(Func, 0), Unit));
- return getOrCreateInstanceMethodType(Method->getThisType(CGM.getContext()),
- Func, Unit);
+ return getOrCreateInstanceMethodType(Method->getThisType(), Func, Unit);
}
llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
@@ -1450,16 +1524,16 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
// Collect virtual method info.
llvm::DIType *ContainingType = nullptr;
- unsigned Virtuality = 0;
unsigned VIndex = 0;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
int ThisAdjustment = 0;
if (Method->isVirtual()) {
if (Method->isPure())
- Virtuality = llvm::dwarf::DW_VIRTUALITY_pure_virtual;
+ SPFlags |= llvm::DISubprogram::SPFlagPureVirtual;
else
- Virtuality = llvm::dwarf::DW_VIRTUALITY_virtual;
+ SPFlags |= llvm::DISubprogram::SPFlagVirtual;
if (CGM.getTarget().getCXXABI().isItaniumFamily()) {
// It doesn't make sense to give a virtual destructor a vtable index,
@@ -1511,12 +1585,13 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
Flags |= llvm::DINode::FlagLValueReference;
if (Method->getRefQualifier() == RQ_RValue)
Flags |= llvm::DINode::FlagRValueReference;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
llvm::DISubprogram *SP = DBuilder.createMethod(
RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine,
- MethodTy, /*isLocalToUnit=*/false, /*isDefinition=*/false, Virtuality,
- VIndex, ThisAdjustment, ContainingType, Flags, CGM.getLangOpts().Optimize,
+ MethodTy, VIndex, ThisAdjustment, ContainingType, Flags, SPFlags,
TParamsArray.get());
SPCache[Method->getCanonicalDecl()].reset(SP);
@@ -1741,6 +1816,29 @@ CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD,
return llvm::DINodeArray();
}
+llvm::DINodeArray CGDebugInfo::CollectVarTemplateParams(const VarDecl *VL,
+ llvm::DIFile *Unit) {
+ if (auto *TS = dyn_cast<VarTemplateSpecializationDecl>(VL)) {
+ auto T = TS->getSpecializedTemplateOrPartial();
+ auto TA = TS->getTemplateArgs().asArray();
+ // Collect parameters for a partial specialization
+ if (T.is<VarTemplatePartialSpecializationDecl *>()) {
+ const TemplateParameterList *TList =
+ T.get<VarTemplatePartialSpecializationDecl *>()
+ ->getTemplateParameters();
+ return CollectTemplateParams(TList, TA, Unit);
+ }
+
+ // Collect parameters for an explicit specialization
+ if (T.is<VarTemplateDecl *>()) {
+ const TemplateParameterList *TList = T.get<VarTemplateDecl *>()
+ ->getTemplateParameters();
+ return CollectTemplateParams(TList, TA, Unit);
+ }
+ }
+ return llvm::DINodeArray();
+}
+
llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams(
const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) {
// Always get the full list of parameters, not just the ones from
@@ -1896,8 +1994,17 @@ static bool isDefinedInClangModule(const RecordDecl *RD) {
if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) {
if (!CXXDecl->isCompleteDefinition())
return false;
+ // Check wether RD is a template.
auto TemplateKind = CXXDecl->getTemplateSpecializationKind();
if (TemplateKind != TSK_Undeclared) {
+ // Unfortunately getOwningModule() isn't accurate enough to find the
+ // owning module of a ClassTemplateSpecializationDecl that is inside a
+ // namespace spanning multiple modules.
+ bool Explicit = false;
+ if (auto *TD = dyn_cast<ClassTemplateSpecializationDecl>(CXXDecl))
+ Explicit = TD->isExplicitInstantiationOrSpecialization();
+ if (!Explicit && CXXDecl->getEnclosingNamespaceContext())
+ return false;
// This is a template, check the origin of the first member.
if (CXXDecl->field_begin() == CXXDecl->field_end())
return TemplateKind == TSK_ExplicitInstantiationDeclaration;
@@ -2445,9 +2552,9 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
Count = CAT->getSize().getZExtValue();
else if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) {
if (Expr *Size = VAT->getSizeExpr()) {
- llvm::APSInt V;
- if (Size->EvaluateAsInt(V, CGM.getContext()))
- Count = V.getExtValue();
+ Expr::EvalResult Result;
+ if (Size->EvaluateAsInt(Result, CGM.getContext()))
+ Count = Result.Val.getInt().getExtValue();
}
}
@@ -2513,9 +2620,9 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
const FunctionProtoType *FPT =
Ty->getPointeeType()->getAs<FunctionProtoType>();
return DBuilder.createMemberPointerType(
- getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType(
- Ty->getClass(), FPT->getTypeQuals())),
- FPT, U),
+ getOrCreateInstanceMethodType(
+ CXXMethodDecl::getThisType(FPT, Ty->getMostRecentCXXRecordDecl()),
+ FPT, U),
ClassType, Size, /*Align=*/0, Flags);
}
@@ -2603,7 +2710,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
llvm::DIType *ClassTy = getOrCreateType(ED->getIntegerType(), DefUnit);
return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit,
Line, Size, Align, EltArray, ClassTy,
- Identifier, ED->isFixed());
+ Identifier, ED->isScoped());
}
llvm::DIMacro *CGDebugInfo::CreateMacro(llvm::DIMacroFile *Parent,
@@ -3035,6 +3142,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
unsigned &LineNo, QualType &T,
StringRef &Name, StringRef &LinkageName,
+ llvm::MDTuple *&TemplateParameters,
llvm::DIScope *&VDContext) {
Unit = getOrCreateFile(VD->getLocation());
LineNo = getLineNumber(VD->getLocation());
@@ -3058,6 +3166,13 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
if (LinkageName == Name)
LinkageName = StringRef();
+ if (isa<VarTemplateSpecializationDecl>(VD)) {
+ llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VD, &*Unit);
+ TemplateParameters = parameterNodes.get();
+ } else {
+ TemplateParameters = nullptr;
+ }
+
// Since we emit declarations (DW_AT_members) for static members, place the
// definition of those static members in the namespace they were declared in
// in the source code (the lexical decl context).
@@ -3084,6 +3199,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
llvm::DINodeArray TParamsArray;
StringRef Name, LinkageName;
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
SourceLocation Loc = GD.getDecl()->getLocation();
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
@@ -3100,20 +3216,23 @@ llvm::DISubprogram *CGDebugInfo::getFunctionFwdDeclOrStub(GlobalDecl GD,
CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
QualType FnType = CGM.getContext().getFunctionType(
FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
+ if (!FD->isExternallyVisible())
+ SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
+
if (Stub) {
+ Flags |= getCallSiteRelatedAttrs();
+ SPFlags |= llvm::DISubprogram::SPFlagDefinition;
return DBuilder.createFunction(
DContext, Name, LinkageName, Unit, Line,
- getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
- !FD->isExternallyVisible(),
- /* isDefinition = */ true, 0, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags,
TParamsArray.get(), getFunctionDeclaration(FD));
}
llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
DContext, Name, LinkageName, Unit, Line,
- getOrCreateFunctionType(GD.getDecl(), FnType, Unit),
- !FD->isExternallyVisible(),
- /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(GD.getDecl(), FnType, Unit), 0, Flags, SPFlags,
TParamsArray.get(), getFunctionDeclaration(FD));
const FunctionDecl *CanonDecl = FD->getCanonicalDecl();
FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
@@ -3138,12 +3257,14 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) {
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *DContext = Unit;
unsigned Line = getLineNumber(Loc);
+ llvm::MDTuple *TemplateParameters = nullptr;
- collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, DContext);
+ collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, TemplateParameters,
+ DContext);
auto Align = getDeclAlignIfRequired(VD, CGM.getContext());
auto *GV = DBuilder.createTempGlobalVariableFwdDecl(
DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit),
- !VD->isExternallyVisible(), nullptr, Align);
+ !VD->isExternallyVisible(), nullptr, TemplateParameters, Align);
FwdDeclReplaceMap.emplace_back(
std::piecewise_construct,
std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())),
@@ -3299,6 +3420,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
bool HasDecl = (D != nullptr);
llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
llvm::DIFile *Unit = getOrCreateFile(Loc);
llvm::DIScope *FDContext = Unit;
llvm::DINodeArray TParamsArray;
@@ -3338,6 +3460,15 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
if (CurFuncIsThunk)
Flags |= llvm::DINode::FlagThunk;
+ if (Fn->hasLocalLinkage())
+ SPFlags |= llvm::DISubprogram::SPFlagLocalToUnit;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
+
+ llvm::DINode::DIFlags FlagsForDef = Flags | getCallSiteRelatedAttrs();
+ llvm::DISubprogram::DISPFlags SPFlagsForDef =
+ SPFlags | llvm::DISubprogram::SPFlagDefinition;
+
unsigned LineNo = getLineNumber(Loc);
unsigned ScopeLine = getLineNumber(ScopeLoc);
@@ -3348,9 +3479,8 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
// are emitted as CU level entities by the backend.
llvm::DISubprogram *SP = DBuilder.createFunction(
FDContext, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
- true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
- TParamsArray.get(), getFunctionDeclaration(D));
+ getOrCreateFunctionType(D, FnType, Unit), ScopeLine, FlagsForDef,
+ SPFlagsForDef, TParamsArray.get(), getFunctionDeclaration(D));
Fn->setSubprogram(SP);
// We might get here with a VarDecl in the case we're generating
// code for the initialization of globals. Do not record these decls
@@ -3370,8 +3500,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
cast<llvm::DICompositeType>(It->second);
llvm::DISubprogram *FD = DBuilder.createFunction(
InterfaceDecl, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
- false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
TParamsArray.get());
DBuilder.finalizeSubprogram(FD);
ObjCMethodCache[ID].push_back(FD);
@@ -3420,11 +3549,13 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
}
unsigned LineNo = getLineNumber(Loc);
unsigned ScopeLine = 0;
+ llvm::DISubprogram::DISPFlags SPFlags = llvm::DISubprogram::SPFlagZero;
+ if (CGM.getLangOpts().Optimize)
+ SPFlags |= llvm::DISubprogram::SPFlagOptimized;
DBuilder.retainType(DBuilder.createFunction(
FDContext, Name, LinkageName, Unit, LineNo,
- getOrCreateFunctionType(D, FnType, Unit), false /*internalLinkage*/,
- false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+ getOrCreateFunctionType(D, FnType, Unit), ScopeLine, Flags, SPFlags,
TParamsArray.get(), getFunctionDeclaration(D)));
}
@@ -3453,7 +3584,7 @@ void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
// Update our current location
setLocation(Loc);
- if (CurLoc.isInvalid() || CurLoc.isMacroID())
+ if (CurLoc.isInvalid() || CurLoc.isMacroID() || LexicalBlockStack.empty())
return;
llvm::MDNode *Scope = LexicalBlockStack.back();
@@ -3530,9 +3661,9 @@ void CGDebugInfo::EmitFunctionEnd(CGBuilderTy &Builder, llvm::Function *Fn) {
DBuilder.finalizeSubprogram(Fn->getSubprogram());
}
-llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
- uint64_t *XOffset) {
-
+CGDebugInfo::BlockByRefType
+CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
+ uint64_t *XOffset) {
SmallVector<llvm::Metadata *, 5> EltTys;
QualType FType;
uint64_t FieldSize, FieldOffset;
@@ -3584,23 +3715,21 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
}
FType = Type;
- llvm::DIType *FieldTy = getOrCreateType(FType, Unit);
+ llvm::DIType *WrappedTy = getOrCreateType(FType, Unit);
FieldSize = CGM.getContext().getTypeSize(FType);
FieldAlign = CGM.getContext().toBits(Align);
*XOffset = FieldOffset;
- FieldTy = DBuilder.createMemberType(Unit, VD->getName(), Unit, 0, FieldSize,
- FieldAlign, FieldOffset,
- llvm::DINode::FlagZero, FieldTy);
+ llvm::DIType *FieldTy = DBuilder.createMemberType(
+ Unit, VD->getName(), Unit, 0, FieldSize, FieldAlign, FieldOffset,
+ llvm::DINode::FlagZero, WrappedTy);
EltTys.push_back(FieldTy);
FieldOffset += FieldSize;
llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
-
- llvm::DINode::DIFlags Flags = llvm::DINode::FlagBlockByrefStruct;
-
- return DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0, Flags,
- nullptr, Elements);
+ return {DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0,
+ llvm::DINode::FlagZero, nullptr, Elements),
+ WrappedTy};
}
llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
@@ -3621,7 +3750,7 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD,
llvm::DIType *Ty;
uint64_t XOffset = 0;
if (VD->hasAttr<BlocksAttr>())
- Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset);
+ Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType;
else
Ty = getOrCreateType(VD->getType(), Unit);
@@ -3759,7 +3888,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
llvm::DIType *Ty;
if (isByRef)
- Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset);
+ Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType;
else
Ty = getOrCreateType(VD->getType(), Unit);
@@ -3830,6 +3959,44 @@ bool operator<(const BlockLayoutChunk &l, const BlockLayoutChunk &r) {
}
} // namespace
+void CGDebugInfo::collectDefaultFieldsForBlockLiteralDeclare(
+ const CGBlockInfo &Block, const ASTContext &Context, SourceLocation Loc,
+ const llvm::StructLayout &BlockLayout, llvm::DIFile *Unit,
+ SmallVectorImpl<llvm::Metadata *> &Fields) {
+ // Blocks in OpenCL have unique constraints which make the standard fields
+ // redundant while requiring size and align fields for enqueue_kernel. See
+ // initializeForBlockHeader in CGBlocks.cpp
+ if (CGM.getLangOpts().OpenCL) {
+ Fields.push_back(createFieldType("__size", Context.IntTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(0),
+ Unit, Unit));
+ Fields.push_back(createFieldType("__align", Context.IntTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(1),
+ Unit, Unit));
+ } else {
+ Fields.push_back(createFieldType("__isa", Context.VoidPtrTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(0),
+ Unit, Unit));
+ Fields.push_back(createFieldType("__flags", Context.IntTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(1),
+ Unit, Unit));
+ Fields.push_back(
+ createFieldType("__reserved", Context.IntTy, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(2), Unit, Unit));
+ auto *FnTy = Block.getBlockExpr()->getFunctionType();
+ auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar());
+ Fields.push_back(createFieldType("__FuncPtr", FnPtrType, Loc, AS_public,
+ BlockLayout.getElementOffsetInBits(3),
+ Unit, Unit));
+ Fields.push_back(createFieldType(
+ "__descriptor",
+ Context.getPointerType(Block.NeedsCopyDispose
+ ? Context.getBlockDescriptorExtendedType()
+ : Context.getBlockDescriptorType()),
+ Loc, AS_public, BlockLayout.getElementOffsetInBits(4), Unit, Unit));
+ }
+}
+
void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
StringRef Name,
unsigned ArgNo,
@@ -3852,35 +4019,8 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
CGM.getDataLayout().getStructLayout(block.StructureType);
SmallVector<llvm::Metadata *, 16> fields;
- if (CGM.getLangOpts().OpenCL) {
- fields.push_back(createFieldType("__size", C.IntTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(0),
- tunit, tunit));
- fields.push_back(createFieldType("__align", C.IntTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(1),
- tunit, tunit));
- } else {
- fields.push_back(createFieldType("__isa", C.VoidPtrTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(0),
- tunit, tunit));
- fields.push_back(createFieldType("__flags", C.IntTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(1),
- tunit, tunit));
- fields.push_back(createFieldType("__reserved", C.IntTy, loc, AS_public,
- blockLayout->getElementOffsetInBits(2),
- tunit, tunit));
- auto *FnTy = block.getBlockExpr()->getFunctionType();
- auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar());
- fields.push_back(createFieldType("__FuncPtr", FnPtrType, loc, AS_public,
- blockLayout->getElementOffsetInBits(3),
- tunit, tunit));
- fields.push_back(createFieldType(
- "__descriptor",
- C.getPointerType(block.NeedsCopyDispose
- ? C.getBlockDescriptorExtendedType()
- : C.getBlockDescriptorType()),
- loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit));
- }
+ collectDefaultFieldsForBlockLiteralDeclare(block, C, loc, *blockLayout, tunit,
+ fields);
// We want to sort the captures by offset, not because DWARF
// requires this, but because we're paranoid about debuggers.
@@ -3923,7 +4063,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
QualType type;
if (auto *Method =
cast_or_null<CXXMethodDecl>(blockDecl->getNonClosureContext()))
- type = Method->getThisType(C);
+ type = Method->getThisType();
else if (auto *RDecl = dyn_cast<CXXRecordDecl>(blockDecl->getParent()))
type = QualType(RDecl->getTypeForDecl(), 0);
else
@@ -3941,10 +4081,10 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
if (capture->isByRef()) {
TypeInfo PtrInfo = C.getTypeInfo(C.VoidPtrTy);
auto Align = PtrInfo.AlignIsRequired ? PtrInfo.Align : 0;
-
- // FIXME: this creates a second copy of this type!
+ // FIXME: This recomputes the layout of the BlockByRefWrapper.
uint64_t xoffset;
- fieldType = EmitTypeForVarWithBlocksAttr(variable, &xoffset);
+ fieldType =
+ EmitTypeForVarWithBlocksAttr(variable, &xoffset).BlockByRefWrapper;
fieldType = DBuilder.createPointerType(fieldType, PtrInfo.Width);
fieldType = DBuilder.createMemberType(tunit, name, tunit, line,
PtrInfo.Width, Align, offsetInBits,
@@ -4045,7 +4185,9 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
unsigned LineNo;
StringRef DeclName, LinkageName;
QualType T;
- collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName, DContext);
+ llvm::MDTuple *TemplateParameters = nullptr;
+ collectVarDeclProps(D, Unit, LineNo, T, DeclName, LinkageName,
+ TemplateParameters, DContext);
// Attempt to store one global variable for the declaration - even if we
// emit a lot of fields.
@@ -4071,7 +4213,8 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
Var->hasLocalLinkage(),
Expr.empty() ? nullptr : DBuilder.createExpression(Expr),
- getOrCreateStaticDataMemberDeclarationOrNull(D), Align);
+ getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters,
+ Align);
Var->addDebugInfo(GVE);
}
DeclCache[D->getCanonicalDecl()].reset(GVE);
@@ -4128,10 +4271,19 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) {
InitExpr = DBuilder.createConstantValueExpression(
Init.getFloat().bitcastToAPInt().getZExtValue());
}
+
+ llvm::MDTuple *TemplateParameters = nullptr;
+
+ if (isa<VarTemplateSpecializationDecl>(VD))
+ if (VarD) {
+ llvm::DINodeArray parameterNodes = CollectVarTemplateParams(VarD, &*Unit);
+ TemplateParameters = parameterNodes.get();
+ }
+
GV.reset(DBuilder.createGlobalVariableExpression(
DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty,
true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD),
- Align));
+ TemplateParameters, Align));
}
llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
@@ -4320,7 +4472,7 @@ void CGDebugInfo::EmitExplicitCastType(QualType Ty) {
if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
return;
- if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile()))
+ if (auto *DieTy = getOrCreateType(Ty, TheCU->getFile()))
// Don't ignore in case of explicit cast where it is referenced indirectly.
DBuilder.retainType(DieTy);
}
@@ -4332,3 +4484,22 @@ llvm::DebugLoc CGDebugInfo::SourceLocToDebugLoc(SourceLocation Loc) {
llvm::MDNode *Scope = LexicalBlockStack.back();
return llvm::DebugLoc::get(getLineNumber(Loc), getColumnNumber(Loc), Scope);
}
+
+llvm::DINode::DIFlags CGDebugInfo::getCallSiteRelatedAttrs() const {
+ // Call site-related attributes are only useful in optimized programs, and
+ // when there's a possibility of debugging backtraces.
+ if (!CGM.getLangOpts().Optimize || DebugKind == codegenoptions::NoDebugInfo ||
+ DebugKind == codegenoptions::LocTrackingOnly)
+ return llvm::DINode::FlagZero;
+
+ // Call site-related attributes are available in DWARF v5. Some debuggers,
+ // while not fully DWARF v5-compliant, may accept these attributes as if they
+ // were part of DWARF v4.
+ bool SupportsDWARFv4Ext =
+ CGM.getCodeGenOpts().DwarfVersion == 4 &&
+ CGM.getCodeGenOpts().getDebuggerTuning() == llvm::DebuggerKind::LLDB;
+ if (!SupportsDWARFv4Ext && CGM.getCodeGenOpts().DwarfVersion < 5)
+ return llvm::DINode::FlagZero;
+
+ return llvm::DINode::FlagAllCallsDescribed;
+}
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index e632806138f0..031e40b9dde9 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -20,8 +20,8 @@
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeOrdering.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/SourceLocation.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
@@ -76,6 +76,9 @@ class CGDebugInfo {
llvm::DIType *OCLQueueDITy = nullptr;
llvm::DIType *OCLNDRangeDITy = nullptr;
llvm::DIType *OCLReserveIDDITy = nullptr;
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ llvm::DIType *Id##Ty = nullptr;
+#include "clang/Basic/OpenCLExtensionTypes.def"
/// Cache of previously constructed Types.
llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache;
@@ -248,6 +251,11 @@ class CGDebugInfo {
llvm::DINodeArray CollectFunctionTemplateParams(const FunctionDecl *FD,
llvm::DIFile *Unit);
+ /// A helper function to collect debug info for function template
+ /// parameters.
+ llvm::DINodeArray CollectVarTemplateParams(const VarDecl *VD,
+ llvm::DIFile *Unit);
+
/// A helper function to collect debug info for template
/// parameters.
llvm::DINodeArray
@@ -311,12 +319,31 @@ class CGDebugInfo {
void AppendAddressSpaceXDeref(unsigned AddressSpace,
SmallVectorImpl<int64_t> &Expr) const;
+ /// A helper function to collect debug info for the default elements of a
+ /// block.
+ ///
+ /// \returns The next available field offset after the default elements.
+ uint64_t collectDefaultElementTypesForBlockPointer(
+ const BlockPointerType *Ty, llvm::DIFile *Unit,
+ llvm::DIDerivedType *DescTy, unsigned LineNo,
+ SmallVectorImpl<llvm::Metadata *> &EltTys);
+
+ /// A helper function to collect debug info for the default fields of a
+ /// block.
+ void collectDefaultFieldsForBlockLiteralDeclare(
+ const CGBlockInfo &Block, const ASTContext &Context, SourceLocation Loc,
+ const llvm::StructLayout &BlockLayout, llvm::DIFile *Unit,
+ SmallVectorImpl<llvm::Metadata *> &Fields);
+
public:
CGDebugInfo(CodeGenModule &CGM);
~CGDebugInfo();
void finalize();
+ /// Remap a given path with the current debug prefix map
+ std::string remapDIPath(StringRef) const;
+
/// Register VLA size expression debug node with the qualified type.
void registerVLASizeExpression(QualType Ty, llvm::Metadata *SizeExpr) {
SizeExprCache[Ty] = SizeExpr;
@@ -475,9 +502,16 @@ private:
llvm::Optional<unsigned> ArgNo,
CGBuilderTy &Builder);
+ struct BlockByRefType {
+ /// The wrapper struct used inside the __block_literal struct.
+ llvm::DIType *BlockByRefWrapper;
+ /// The type as it appears in the source code.
+ llvm::DIType *WrappedType;
+ };
+
/// Build up structure info for the byref. See \a BuildByRefType.
- llvm::DIType *EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
- uint64_t *OffSet);
+ BlockByRefType EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
+ uint64_t *OffSet);
/// Get context info for the DeclContext of \p Decl.
llvm::DIScope *getDeclContextDescriptor(const Decl *D);
@@ -497,9 +531,6 @@ private:
/// Create new compile unit.
void CreateCompileUnit();
- /// Remap a given path with the current debug prefix map
- std::string remapDIPath(StringRef) const;
-
/// Compute the file checksum debug info for input file ID.
Optional<llvm::DIFile::ChecksumKind>
computeChecksum(FileID FID, SmallString<32> &Checksum) const;
@@ -507,11 +538,15 @@ private:
/// Get the source of the given file ID.
Optional<StringRef> getSource(const SourceManager &SM, FileID FID);
- /// Get the file debug info descriptor for the input location.
+ /// Convenience function to get the file debug info descriptor for the input
+ /// location.
llvm::DIFile *getOrCreateFile(SourceLocation Loc);
- /// Get the file info for main compile unit.
- llvm::DIFile *getOrCreateMainFile();
+ /// Create a file debug info descriptor for a source file.
+ llvm::DIFile *
+ createFile(StringRef FileName,
+ Optional<llvm::DIFile::ChecksumInfo<StringRef>> CSInfo,
+ Optional<StringRef> Source);
/// Get the type from the cache or create a new type if necessary.
llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg);
@@ -580,6 +615,11 @@ private:
unsigned LineNo, StringRef LinkageName,
llvm::GlobalVariable *Var, llvm::DIScope *DContext);
+
+ /// Return flags which enable debug info emission for call sites, provided
+ /// that it is supported and enabled.
+ llvm::DINode::DIFlags getCallSiteRelatedAttrs() const;
+
/// Get the printing policy for producing names for debug info.
PrintingPolicy getPrintingPolicy() const;
@@ -622,7 +662,9 @@ private:
/// Collect various properties of a VarDecl.
void collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
unsigned &LineNo, QualType &T, StringRef &Name,
- StringRef &LinkageName, llvm::DIScope *&VDContext);
+ StringRef &LinkageName,
+ llvm::MDTuple *&TemplateParameters,
+ llvm::DIScope *&VDContext);
/// Allocate a copy of \p A using the DebugInfoNames allocator
/// and return a reference to it. If multiple arguments are given the strings
@@ -702,7 +744,7 @@ public:
/// function \p InlinedFn. The current debug location becomes the inlined call
/// site of the inlined function.
ApplyInlineDebugLocation(CodeGenFunction &CGF, GlobalDecl InlinedFn);
- /// Restore everything back to the orginial state.
+ /// Restore everything back to the original state.
~ApplyInlineDebugLocation();
};
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 57b2fbadbeec..5959d889b455 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -26,10 +26,11 @@
#include "clang/AST/Decl.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
@@ -104,6 +105,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
case Decl::Import:
case Decl::OMPThreadPrivate:
case Decl::OMPCapturedExpr:
+ case Decl::OMPRequires:
case Decl::Empty:
// None of these decls require codegen support.
return;
@@ -545,7 +547,7 @@ namespace {
void Emit(CodeGenFunction &CGF, Flags flags) override {
// Compute the address of the local variable, in case it's a
// byref or something.
- DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false,
+ DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(&Var), false,
Var.getType(), VK_LValue, SourceLocation());
llvm::Value *value = CGF.EmitLoadOfScalar(CGF.EmitDeclRefLValue(&DRE),
SourceLocation());
@@ -563,7 +565,7 @@ namespace {
: CleanupFn(CleanupFn), FnInfo(*Info), Var(*Var) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
- DeclRefExpr DRE(const_cast<VarDecl*>(&Var), false,
+ DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(&Var), false,
Var.getType(), VK_LValue, SourceLocation());
// Compute the address of the local variable, in case it's a byref
// or something.
@@ -752,9 +754,9 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
// If we're emitting a value with lifetime, we have to do the
// initialization *before* we leave the cleanup scopes.
- if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) {
- enterFullExpression(ewc);
- init = ewc->getSubExpr();
+ if (const FullExpr *fe = dyn_cast<FullExpr>(init)) {
+ enterFullExpression(fe);
+ init = fe->getSubExpr();
}
CodeGenFunction::RunCleanupsScope Scope(*this);
@@ -795,15 +797,21 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
case Qualifiers::OCL_None:
llvm_unreachable("present but none");
+ case Qualifiers::OCL_Strong: {
+ if (!D || !isa<VarDecl>(D) || !cast<VarDecl>(D)->isARCPseudoStrong()) {
+ value = EmitARCRetainScalarExpr(init);
+ break;
+ }
+ // If D is pseudo-strong, treat it like __unsafe_unretained here. This means
+ // that we omit the retain, and causes non-autoreleased return values to be
+ // immediately released.
+ LLVM_FALLTHROUGH;
+ }
+
case Qualifiers::OCL_ExplicitNone:
value = EmitARCUnsafeUnretainedScalarExpr(init);
break;
- case Qualifiers::OCL_Strong: {
- value = EmitARCRetainScalarExpr(init);
- break;
- }
-
case Qualifiers::OCL_Weak: {
// If it's not accessed by the initializer, try to emit the
// initialization with a copy or move.
@@ -948,111 +956,242 @@ static bool shouldUseBZeroPlusStoresToInitialize(llvm::Constant *Init,
canEmitInitWithFewStoresAfterBZero(Init, StoreBudget);
}
-/// A byte pattern.
-///
-/// Can be "any" pattern if the value was padding or known to be undef.
-/// Can be "none" pattern if a sequence doesn't exist.
-class BytePattern {
- uint8_t Val;
- enum class ValueType : uint8_t { Specific, Any, None } Type;
- BytePattern(ValueType Type) : Type(Type) {}
-
-public:
- BytePattern(uint8_t Value) : Val(Value), Type(ValueType::Specific) {}
- static BytePattern Any() { return BytePattern(ValueType::Any); }
- static BytePattern None() { return BytePattern(ValueType::None); }
- bool isAny() const { return Type == ValueType::Any; }
- bool isNone() const { return Type == ValueType::None; }
- bool isValued() const { return Type == ValueType::Specific; }
- uint8_t getValue() const {
- assert(isValued());
- return Val;
- }
- BytePattern merge(const BytePattern Other) const {
- if (isNone() || Other.isNone())
- return None();
- if (isAny())
- return Other;
- if (Other.isAny())
- return *this;
- if (getValue() == Other.getValue())
- return *this;
- return None();
- }
-};
-
-/// Figures out whether the constant can be initialized with memset.
-static BytePattern constantIsRepeatedBytePattern(llvm::Constant *C) {
- if (isa<llvm::ConstantAggregateZero>(C) || isa<llvm::ConstantPointerNull>(C))
- return BytePattern(0x00);
- if (isa<llvm::UndefValue>(C))
- return BytePattern::Any();
-
- if (isa<llvm::ConstantInt>(C)) {
- auto *Int = cast<llvm::ConstantInt>(C);
- if (Int->getBitWidth() % 8 != 0)
- return BytePattern::None();
- const llvm::APInt &Value = Int->getValue();
- if (Value.isSplat(8))
- return BytePattern(Value.getLoBits(8).getLimitedValue());
- return BytePattern::None();
- }
-
- if (isa<llvm::ConstantFP>(C)) {
- auto *FP = cast<llvm::ConstantFP>(C);
- llvm::APInt Bits = FP->getValueAPF().bitcastToAPInt();
- if (Bits.getBitWidth() % 8 != 0)
- return BytePattern::None();
- if (!Bits.isSplat(8))
- return BytePattern::None();
- return BytePattern(Bits.getLimitedValue() & 0xFF);
- }
-
- if (isa<llvm::ConstantVector>(C)) {
- llvm::Constant *Splat = cast<llvm::ConstantVector>(C)->getSplatValue();
- if (Splat)
- return constantIsRepeatedBytePattern(Splat);
- return BytePattern::None();
- }
-
- if (isa<llvm::ConstantArray>(C) || isa<llvm::ConstantStruct>(C)) {
- BytePattern Pattern(BytePattern::Any());
- for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
- llvm::Constant *Elt = cast<llvm::Constant>(C->getOperand(I));
- Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
- if (Pattern.isNone())
- return Pattern;
+/// Decide whether we should use memset to initialize a local variable instead
+/// of using a memcpy from a constant global. Assumes we've already decided to
+/// not user bzero.
+/// FIXME We could be more clever, as we are for bzero above, and generate
+/// memset followed by stores. It's unclear that's worth the effort.
+static llvm::Value *shouldUseMemSetToInitialize(llvm::Constant *Init,
+ uint64_t GlobalSize) {
+ uint64_t SizeLimit = 32;
+ if (GlobalSize <= SizeLimit)
+ return nullptr;
+ return llvm::isBytewiseValue(Init);
+}
+
+static llvm::Constant *patternFor(CodeGenModule &CGM, llvm::Type *Ty) {
+ // The following value is a guaranteed unmappable pointer value and has a
+ // repeated byte-pattern which makes it easier to synthesize. We use it for
+ // pointers as well as integers so that aggregates are likely to be
+ // initialized with this repeated value.
+ constexpr uint64_t LargeValue = 0xAAAAAAAAAAAAAAAAull;
+ // For 32-bit platforms it's a bit trickier because, across systems, only the
+ // zero page can reasonably be expected to be unmapped, and even then we need
+ // a very low address. We use a smaller value, and that value sadly doesn't
+ // have a repeated byte-pattern. We don't use it for integers.
+ constexpr uint32_t SmallValue = 0x000000AA;
+ // Floating-point values are initialized as NaNs because they propagate. Using
+ // a repeated byte pattern means that it will be easier to initialize
+ // all-floating-point aggregates and arrays with memset. Further, aggregates
+ // which mix integral and a few floats might also initialize with memset
+ // followed by a handful of stores for the floats. Using fairly unique NaNs
+ // also means they'll be easier to distinguish in a crash.
+ constexpr bool NegativeNaN = true;
+ constexpr uint64_t NaNPayload = 0xFFFFFFFFFFFFFFFFull;
+ if (Ty->isIntOrIntVectorTy()) {
+ unsigned BitWidth = cast<llvm::IntegerType>(
+ Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
+ ->getBitWidth();
+ if (BitWidth <= 64)
+ return llvm::ConstantInt::get(Ty, LargeValue);
+ return llvm::ConstantInt::get(
+ Ty, llvm::APInt::getSplat(BitWidth, llvm::APInt(64, LargeValue)));
+ }
+ if (Ty->isPtrOrPtrVectorTy()) {
+ auto *PtrTy = cast<llvm::PointerType>(
+ Ty->isVectorTy() ? Ty->getVectorElementType() : Ty);
+ unsigned PtrWidth = CGM.getContext().getTargetInfo().getPointerWidth(
+ PtrTy->getAddressSpace());
+ llvm::Type *IntTy = llvm::IntegerType::get(CGM.getLLVMContext(), PtrWidth);
+ uint64_t IntValue;
+ switch (PtrWidth) {
+ default:
+ llvm_unreachable("pattern initialization of unsupported pointer width");
+ case 64:
+ IntValue = LargeValue;
+ break;
+ case 32:
+ IntValue = SmallValue;
+ break;
}
- return Pattern;
+ auto *Int = llvm::ConstantInt::get(IntTy, IntValue);
+ return llvm::ConstantExpr::getIntToPtr(Int, PtrTy);
+ }
+ if (Ty->isFPOrFPVectorTy()) {
+ unsigned BitWidth = llvm::APFloat::semanticsSizeInBits(
+ (Ty->isVectorTy() ? Ty->getVectorElementType() : Ty)
+ ->getFltSemantics());
+ llvm::APInt Payload(64, NaNPayload);
+ if (BitWidth >= 64)
+ Payload = llvm::APInt::getSplat(BitWidth, Payload);
+ return llvm::ConstantFP::getQNaN(Ty, NegativeNaN, &Payload);
+ }
+ if (Ty->isArrayTy()) {
+ // Note: this doesn't touch tail padding (at the end of an object, before
+ // the next array object). It is instead handled by replaceUndef.
+ auto *ArrTy = cast<llvm::ArrayType>(Ty);
+ llvm::SmallVector<llvm::Constant *, 8> Element(
+ ArrTy->getNumElements(), patternFor(CGM, ArrTy->getElementType()));
+ return llvm::ConstantArray::get(ArrTy, Element);
+ }
+
+ // Note: this doesn't touch struct padding. It will initialize as much union
+ // padding as is required for the largest type in the union. Padding is
+ // instead handled by replaceUndef. Stores to structs with volatile members
+ // don't have a volatile qualifier when initialized according to C++. This is
+ // fine because stack-based volatiles don't really have volatile semantics
+ // anyways, and the initialization shouldn't be observable.
+ auto *StructTy = cast<llvm::StructType>(Ty);
+ llvm::SmallVector<llvm::Constant *, 8> Struct(StructTy->getNumElements());
+ for (unsigned El = 0; El != Struct.size(); ++El)
+ Struct[El] = patternFor(CGM, StructTy->getElementType(El));
+ return llvm::ConstantStruct::get(StructTy, Struct);
+}
+
+static Address createUnnamedGlobalFrom(CodeGenModule &CGM, const VarDecl &D,
+ CGBuilderTy &Builder,
+ llvm::Constant *Constant,
+ CharUnits Align) {
+ auto FunctionName = [&](const DeclContext *DC) -> std::string {
+ if (const auto *FD = dyn_cast<FunctionDecl>(DC)) {
+ if (const auto *CC = dyn_cast<CXXConstructorDecl>(FD))
+ return CC->getNameAsString();
+ if (const auto *CD = dyn_cast<CXXDestructorDecl>(FD))
+ return CD->getNameAsString();
+ return CGM.getMangledName(FD);
+ } else if (const auto *OM = dyn_cast<ObjCMethodDecl>(DC)) {
+ return OM->getNameAsString();
+ } else if (isa<BlockDecl>(DC)) {
+ return "<block>";
+ } else if (isa<CapturedDecl>(DC)) {
+ return "<captured>";
+ } else {
+ llvm::llvm_unreachable_internal("expected a function or method");
+ }
+ };
+
+ auto *Ty = Constant->getType();
+ bool isConstant = true;
+ llvm::GlobalVariable *InsertBefore = nullptr;
+ unsigned AS = CGM.getContext().getTargetAddressSpace(
+ CGM.getStringLiteralAddressSpace());
+ llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+ CGM.getModule(), Ty, isConstant, llvm::GlobalValue::PrivateLinkage,
+ Constant,
+ "__const." + FunctionName(D.getParentFunctionOrMethod()) + "." +
+ D.getName(),
+ InsertBefore, llvm::GlobalValue::NotThreadLocal, AS);
+ GV->setAlignment(Align.getQuantity());
+ GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+ Address SrcPtr = Address(GV, Align);
+ llvm::Type *BP = llvm::PointerType::getInt8PtrTy(CGM.getLLVMContext(), AS);
+ if (SrcPtr.getType() != BP)
+ SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
+ return SrcPtr;
+}
+
+static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D,
+ Address Loc, bool isVolatile,
+ CGBuilderTy &Builder,
+ llvm::Constant *constant) {
+ auto *Ty = constant->getType();
+ bool isScalar = Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy() ||
+ Ty->isFPOrFPVectorTy();
+ if (isScalar) {
+ Builder.CreateStore(constant, Loc, isVolatile);
+ return;
}
- if (llvm::ConstantDataSequential *CDS =
- dyn_cast<llvm::ConstantDataSequential>(C)) {
- BytePattern Pattern(BytePattern::Any());
- for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
- llvm::Constant *Elt = CDS->getElementAsConstant(I);
- Pattern = Pattern.merge(constantIsRepeatedBytePattern(Elt));
- if (Pattern.isNone())
- return Pattern;
+ auto *Int8Ty = llvm::IntegerType::getInt8Ty(CGM.getLLVMContext());
+ auto *IntPtrTy = CGM.getDataLayout().getIntPtrType(CGM.getLLVMContext());
+
+ // If the initializer is all or mostly the same, codegen with bzero / memset
+ // then do a few stores afterward.
+ uint64_t ConstantSize = CGM.getDataLayout().getTypeAllocSize(Ty);
+ auto *SizeVal = llvm::ConstantInt::get(IntPtrTy, ConstantSize);
+ if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
+ Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
+ isVolatile);
+
+ bool valueAlreadyCorrect =
+ constant->isNullValue() || isa<llvm::UndefValue>(constant);
+ if (!valueAlreadyCorrect) {
+ Loc = Builder.CreateBitCast(Loc, Ty->getPointerTo(Loc.getAddressSpace()));
+ emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder);
+ }
+ return;
+ }
+
+ llvm::Value *Pattern = shouldUseMemSetToInitialize(constant, ConstantSize);
+ if (Pattern) {
+ uint64_t Value = 0x00;
+ if (!isa<llvm::UndefValue>(Pattern)) {
+ const llvm::APInt &AP = cast<llvm::ConstantInt>(Pattern)->getValue();
+ assert(AP.getBitWidth() <= 8);
+ Value = AP.getLimitedValue();
}
- return Pattern;
+ Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal,
+ isVolatile);
+ return;
}
- // BlockAddress, ConstantExpr, and everything else is scary.
- return BytePattern::None();
+ Builder.CreateMemCpy(
+ Loc,
+ createUnnamedGlobalFrom(CGM, D, Builder, constant, Loc.getAlignment()),
+ SizeVal, isVolatile);
}
-/// Decide whether we should use memset to initialize a local variable instead
-/// of using a memcpy from a constant global. Assumes we've already decided to
-/// not user bzero.
-/// FIXME We could be more clever, as we are for bzero above, and generate
-/// memset followed by stores. It's unclear that's worth the effort.
-static BytePattern shouldUseMemSetToInitialize(llvm::Constant *Init,
- uint64_t GlobalSize) {
- uint64_t SizeLimit = 32;
- if (GlobalSize <= SizeLimit)
- return BytePattern::None();
- return constantIsRepeatedBytePattern(Init);
+static void emitStoresForZeroInit(CodeGenModule &CGM, const VarDecl &D,
+ Address Loc, bool isVolatile,
+ CGBuilderTy &Builder) {
+ llvm::Type *ElTy = Loc.getElementType();
+ llvm::Constant *constant = llvm::Constant::getNullValue(ElTy);
+ emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant);
+}
+
+static void emitStoresForPatternInit(CodeGenModule &CGM, const VarDecl &D,
+ Address Loc, bool isVolatile,
+ CGBuilderTy &Builder) {
+ llvm::Type *ElTy = Loc.getElementType();
+ llvm::Constant *constant = patternFor(CGM, ElTy);
+ assert(!isa<llvm::UndefValue>(constant));
+ emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant);
+}
+
+static bool containsUndef(llvm::Constant *constant) {
+ auto *Ty = constant->getType();
+ if (isa<llvm::UndefValue>(constant))
+ return true;
+ if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy())
+ for (llvm::Use &Op : constant->operands())
+ if (containsUndef(cast<llvm::Constant>(Op)))
+ return true;
+ return false;
+}
+
+static llvm::Constant *replaceUndef(llvm::Constant *constant) {
+ // FIXME: when doing pattern initialization, replace undef with 0xAA instead.
+ // FIXME: also replace padding between values by creating a new struct type
+ // which has no padding.
+ auto *Ty = constant->getType();
+ if (isa<llvm::UndefValue>(constant))
+ return llvm::Constant::getNullValue(Ty);
+ if (!(Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()))
+ return constant;
+ if (!containsUndef(constant))
+ return constant;
+ llvm::SmallVector<llvm::Constant *, 8> Values(constant->getNumOperands());
+ for (unsigned Op = 0, NumOp = constant->getNumOperands(); Op != NumOp; ++Op) {
+ auto *OpValue = cast<llvm::Constant>(constant->getOperand(Op));
+ Values[Op] = replaceUndef(OpValue);
+ }
+ if (Ty->isStructTy())
+ return llvm::ConstantStruct::get(cast<llvm::StructType>(Ty), Values);
+ if (Ty->isArrayTy())
+ return llvm::ConstantArray::get(cast<llvm::ArrayType>(Ty), Values);
+ assert(Ty->isVectorTy());
+ return llvm::ConstantVector::get(Values);
}
/// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a
@@ -1098,6 +1237,7 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
// For each dimension stores its QualType and corresponding
// size-expression Value.
SmallVector<CodeGenFunction::VlaSizePair, 4> Dimensions;
+ SmallVector<IdentifierInfo *, 4> VLAExprNames;
// Break down the array into individual dimensions.
QualType Type1D = D.getType();
@@ -1106,8 +1246,14 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
Dimensions.emplace_back(C, Type1D.getUnqualifiedType());
else {
- auto SizeExprAddr = CreateDefaultAlignTempAlloca(
- VlaSize.NumElts->getType(), "__vla_expr");
+ // Generate a locally unique name for the size expression.
+ Twine Name = Twine("__vla_expr") + Twine(VLAExprCounter++);
+ SmallString<12> Buffer;
+ StringRef NameRef = Name.toStringRef(Buffer);
+ auto &Ident = getContext().Idents.getOwn(NameRef);
+ VLAExprNames.push_back(&Ident);
+ auto SizeExprAddr =
+ CreateDefaultAlignTempAlloca(VlaSize.NumElts->getType(), NameRef);
Builder.CreateStore(VlaSize.NumElts, SizeExprAddr);
Dimensions.emplace_back(SizeExprAddr.getPointer(),
Type1D.getUnqualifiedType());
@@ -1121,20 +1267,20 @@ void CodeGenFunction::EmitAndRegisterVariableArrayDimensions(
// Register each dimension's size-expression with a DILocalVariable,
// so that it can be used by CGDebugInfo when instantiating a DISubrange
// to describe this array.
+ unsigned NameIdx = 0;
for (auto &VlaSize : Dimensions) {
llvm::Metadata *MD;
if (auto *C = dyn_cast<llvm::ConstantInt>(VlaSize.NumElts))
MD = llvm::ConstantAsMetadata::get(C);
else {
// Create an artificial VarDecl to generate debug info for.
- IdentifierInfo &NameIdent = getContext().Idents.getOwn(
- cast<llvm::AllocaInst>(VlaSize.NumElts)->getName());
+ IdentifierInfo *NameIdent = VLAExprNames[NameIdx++];
auto VlaExprTy = VlaSize.NumElts->getType()->getPointerElementType();
auto QT = getContext().getIntTypeForBitwidth(
VlaExprTy->getScalarSizeInBits(), false);
auto *ArtificialDecl = VarDecl::Create(
getContext(), const_cast<DeclContext *>(D.getDeclContext()),
- D.getLocation(), D.getLocation(), &NameIdent, QT,
+ D.getLocation(), D.getLocation(), NameIdent, QT,
getContext().CreateTypeSourceInfo(QT), SC_Auto);
ArtificialDecl->setImplicit();
@@ -1157,8 +1303,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
AutoVarEmission emission(D);
- bool isByRef = D.hasAttr<BlocksAttr>();
- emission.IsByRef = isByRef;
+ bool isEscapingByRef = D.isEscapingByref();
+ emission.IsEscapingByRef = isEscapingByRef;
CharUnits alignment = getContext().getDeclAlign(&D);
@@ -1197,8 +1343,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// in OpenCL.
if ((!getLangOpts().OpenCL ||
Ty.getAddressSpace() == LangAS::opencl_constant) &&
- (CGM.getCodeGenOpts().MergeAllConstants && !NRVO && !isByRef &&
- CGM.isTypeConstant(Ty, true))) {
+ (CGM.getCodeGenOpts().MergeAllConstants && !NRVO &&
+ !isEscapingByRef && CGM.isTypeConstant(Ty, true))) {
EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
// Signal this condition to later callbacks.
@@ -1250,7 +1396,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
} else {
CharUnits allocaAlignment;
llvm::Type *allocaTy;
- if (isByRef) {
+ if (isEscapingByRef) {
auto &byrefInfo = getBlockByrefInfo(&D);
allocaTy = byrefInfo.Type;
allocaAlignment = byrefInfo.ByrefAlignment;
@@ -1439,6 +1585,8 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, D.getLocation());
QualType type = D.getType();
+ bool isVolatile = type.isVolatileQualified();
+
// If this local has an initializer, emit it now.
const Expr *Init = D.getInit();
@@ -1450,7 +1598,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
}
// Initialize the structure of a __block variable.
- if (emission.IsByRef)
+ if (emission.IsEscapingByRef)
emitByrefStructureInit(emission);
// Initialize the variable here if it doesn't have a initializer and it is a
@@ -1460,30 +1608,126 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
type.isNonTrivialToPrimitiveDefaultInitialize() ==
QualType::PDIK_Struct) {
LValue Dst = MakeAddrLValue(emission.getAllocatedAddress(), type);
- if (emission.IsByRef)
+ if (emission.IsEscapingByRef)
drillIntoBlockVariable(*this, Dst, &D);
defaultInitNonTrivialCStructVar(Dst);
return;
}
- if (isTrivialInitializer(Init))
- return;
-
// Check whether this is a byref variable that's potentially
// captured and moved by its own initializer. If so, we'll need to
// emit the initializer first, then copy into the variable.
- bool capturedByInit = emission.IsByRef && isCapturedBy(D, Init);
+ bool capturedByInit =
+ Init && emission.IsEscapingByRef && isCapturedBy(D, Init);
Address Loc =
- capturedByInit ? emission.Addr : emission.getObjectAddress(*this);
+ capturedByInit ? emission.Addr : emission.getObjectAddress(*this);
+
+ // Note: constexpr already initializes everything correctly.
+ LangOptions::TrivialAutoVarInitKind trivialAutoVarInit =
+ (D.isConstexpr()
+ ? LangOptions::TrivialAutoVarInitKind::Uninitialized
+ : (D.getAttr<UninitializedAttr>()
+ ? LangOptions::TrivialAutoVarInitKind::Uninitialized
+ : getContext().getLangOpts().getTrivialAutoVarInit()));
+
+ auto initializeWhatIsTechnicallyUninitialized = [&]() {
+ if (trivialAutoVarInit ==
+ LangOptions::TrivialAutoVarInitKind::Uninitialized)
+ return;
+
+ CharUnits Size = getContext().getTypeSizeInChars(type);
+ if (!Size.isZero()) {
+ switch (trivialAutoVarInit) {
+ case LangOptions::TrivialAutoVarInitKind::Uninitialized:
+ llvm_unreachable("Uninitialized handled above");
+ case LangOptions::TrivialAutoVarInitKind::Zero:
+ emitStoresForZeroInit(CGM, D, Loc, isVolatile, Builder);
+ break;
+ case LangOptions::TrivialAutoVarInitKind::Pattern:
+ emitStoresForPatternInit(CGM, D, Loc, isVolatile, Builder);
+ break;
+ }
+ return;
+ }
+
+ // VLAs look zero-sized to getTypeInfo. We can't emit constant stores to
+ // them, so emit a memcpy with the VLA size to initialize each element.
+ // Technically zero-sized or negative-sized VLAs are undefined, and UBSan
+ // will catch that code, but there exists code which generates zero-sized
+ // VLAs. Be nice and initialize whatever they requested.
+ const VariableArrayType *VlaType =
+ dyn_cast_or_null<VariableArrayType>(getContext().getAsArrayType(type));
+ if (!VlaType)
+ return;
+ auto VlaSize = getVLASize(VlaType);
+ auto SizeVal = VlaSize.NumElts;
+ CharUnits EltSize = getContext().getTypeSizeInChars(VlaSize.Type);
+ switch (trivialAutoVarInit) {
+ case LangOptions::TrivialAutoVarInitKind::Uninitialized:
+ llvm_unreachable("Uninitialized handled above");
+
+ case LangOptions::TrivialAutoVarInitKind::Zero:
+ if (!EltSize.isOne())
+ SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize));
+ Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
+ isVolatile);
+ break;
+
+ case LangOptions::TrivialAutoVarInitKind::Pattern: {
+ llvm::Type *ElTy = Loc.getElementType();
+ llvm::Constant *Constant = patternFor(CGM, ElTy);
+ CharUnits ConstantAlign = getContext().getTypeAlignInChars(VlaSize.Type);
+ llvm::BasicBlock *SetupBB = createBasicBlock("vla-setup.loop");
+ llvm::BasicBlock *LoopBB = createBasicBlock("vla-init.loop");
+ llvm::BasicBlock *ContBB = createBasicBlock("vla-init.cont");
+ llvm::Value *IsZeroSizedVLA = Builder.CreateICmpEQ(
+ SizeVal, llvm::ConstantInt::get(SizeVal->getType(), 0),
+ "vla.iszerosized");
+ Builder.CreateCondBr(IsZeroSizedVLA, ContBB, SetupBB);
+ EmitBlock(SetupBB);
+ if (!EltSize.isOne())
+ SizeVal = Builder.CreateNUWMul(SizeVal, CGM.getSize(EltSize));
+ llvm::Value *BaseSizeInChars =
+ llvm::ConstantInt::get(IntPtrTy, EltSize.getQuantity());
+ Address Begin = Builder.CreateElementBitCast(Loc, Int8Ty, "vla.begin");
+ llvm::Value *End =
+ Builder.CreateInBoundsGEP(Begin.getPointer(), SizeVal, "vla.end");
+ llvm::BasicBlock *OriginBB = Builder.GetInsertBlock();
+ EmitBlock(LoopBB);
+ llvm::PHINode *Cur = Builder.CreatePHI(Begin.getType(), 2, "vla.cur");
+ Cur->addIncoming(Begin.getPointer(), OriginBB);
+ CharUnits CurAlign = Loc.getAlignment().alignmentOfArrayElement(EltSize);
+ Builder.CreateMemCpy(
+ Address(Cur, CurAlign),
+ createUnnamedGlobalFrom(CGM, D, Builder, Constant, ConstantAlign),
+ BaseSizeInChars, isVolatile);
+ llvm::Value *Next =
+ Builder.CreateInBoundsGEP(Int8Ty, Cur, BaseSizeInChars, "vla.next");
+ llvm::Value *Done = Builder.CreateICmpEQ(Next, End, "vla-init.isdone");
+ Builder.CreateCondBr(Done, ContBB, LoopBB);
+ Cur->addIncoming(Next, LoopBB);
+ EmitBlock(ContBB);
+ } break;
+ }
+ };
+
+ if (isTrivialInitializer(Init)) {
+ initializeWhatIsTechnicallyUninitialized();
+ return;
+ }
llvm::Constant *constant = nullptr;
if (emission.IsConstantAggregate || D.isConstexpr()) {
assert(!capturedByInit && "constant init contains a capturing block?");
constant = ConstantEmitter(*this).tryEmitAbstractForInitializer(D);
+ if (constant && trivialAutoVarInit !=
+ LangOptions::TrivialAutoVarInitKind::Uninitialized)
+ constant = replaceUndef(constant);
}
if (!constant) {
+ initializeWhatIsTechnicallyUninitialized();
LValue lv = MakeAddrLValue(Loc, type);
lv.setNonGC(true);
return EmitExprAsInit(Init, &D, lv, capturedByInit);
@@ -1496,61 +1740,11 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
return EmitStoreThroughLValue(RValue::get(constant), lv, true);
}
- // If this is a simple aggregate initialization, we can optimize it
- // in various ways.
- bool isVolatile = type.isVolatileQualified();
-
- llvm::Value *SizeVal =
- llvm::ConstantInt::get(IntPtrTy,
- getContext().getTypeSizeInChars(type).getQuantity());
-
llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace());
if (Loc.getType() != BP)
Loc = Builder.CreateBitCast(Loc, BP);
- // If the initializer is all or mostly the same, codegen with bzero / memset
- // then do a few stores afterward.
- uint64_t ConstantSize =
- CGM.getDataLayout().getTypeAllocSize(constant->getType());
- if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
- Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
- isVolatile);
- // Zero and undef don't require a stores.
- if (!constant->isNullValue() && !isa<llvm::UndefValue>(constant)) {
- Loc = Builder.CreateBitCast(Loc,
- constant->getType()->getPointerTo(Loc.getAddressSpace()));
- emitStoresForInitAfterBZero(CGM, constant, Loc, isVolatile, Builder);
- }
- return;
- }
-
- BytePattern Pattern = shouldUseMemSetToInitialize(constant, ConstantSize);
- if (!Pattern.isNone()) {
- uint8_t Value = Pattern.isAny() ? 0x00 : Pattern.getValue();
- Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, Value), SizeVal,
- isVolatile);
- return;
- }
-
- // Otherwise, create a temporary global with the initializer then
- // memcpy from the global to the alloca.
- std::string Name = getStaticDeclName(CGM, D);
- unsigned AS = CGM.getContext().getTargetAddressSpace(
- CGM.getStringLiteralAddressSpace());
- BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
-
- llvm::GlobalVariable *GV = new llvm::GlobalVariable(
- CGM.getModule(), constant->getType(), true,
- llvm::GlobalValue::PrivateLinkage, constant, Name, nullptr,
- llvm::GlobalValue::NotThreadLocal, AS);
- GV->setAlignment(Loc.getAlignment().getQuantity());
- GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-
- Address SrcPtr = Address(GV, Loc.getAlignment());
- if (SrcPtr.getType() != BP)
- SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
-
- Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
+ emitStoresForConstant(CGM, D, Loc, isVolatile, Builder, constant);
}
/// Emit an expression as an initializer for an object (variable, field, etc.)
@@ -1712,12 +1906,14 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
// If this is a block variable, call _Block_object_destroy
// (on the unforwarded address). Don't enter this cleanup if we're in pure-GC
// mode.
- if (emission.IsByRef && CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
+ if (emission.IsEscapingByRef &&
+ CGM.getLangOpts().getGC() != LangOptions::GCOnly) {
BlockFieldFlags Flags = BLOCK_FIELD_IS_BYREF;
if (emission.Variable->getType().isObjCGCWeak())
Flags |= BLOCK_FIELD_IS_WEAK;
enterByrefCleanup(NormalAndEHCleanup, emission.Addr, Flags,
- /*LoadBlockVarAddr*/ false);
+ /*LoadBlockVarAddr*/ false,
+ cxxDestructorCanThrow(emission.Variable->getType()));
}
}
@@ -2134,15 +2330,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
// cleanup to do the release at the end of the function.
bool isConsumed = D.hasAttr<NSConsumedAttr>();
- // 'self' is always formally __strong, but if this is not an
- // init method then we don't want to retain it.
+ // If a parameter is pseudo-strong then we can omit the implicit retain.
if (D.isARCPseudoStrong()) {
- const ObjCMethodDecl *method = cast<ObjCMethodDecl>(CurCodeDecl);
- assert(&D == method->getSelfDecl());
- assert(lt == Qualifiers::OCL_Strong);
- assert(qs.hasConst());
- assert(method->getMethodFamily() != OMF_init);
- (void) method;
+ assert(lt == Qualifiers::OCL_Strong &&
+ "pseudo-strong variable isn't strong?");
+ assert(qs.hasConst() && "pseudo-strong variable should be const!");
lt = Qualifiers::OCL_ExplicitNone;
}
@@ -2224,3 +2416,7 @@ void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
return;
getOpenMPRuntime().emitUserDefinedReduction(CGF, D);
}
+
+void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) {
+ getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D);
+}
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 510863f68eff..9aa31f181e99 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -15,7 +15,7 @@
#include "CGCXXABI.h"
#include "CGObjCRuntime.h"
#include "CGOpenMPRuntime.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
@@ -26,7 +26,10 @@ using namespace CodeGen;
static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
ConstantAddress DeclPtr) {
- assert(D.hasGlobalStorage() && "VarDecl must have global storage!");
+ assert(
+ (D.hasGlobalStorage() ||
+ (D.hasLocalStorage() && CGF.getContext().getLangOpts().OpenCLCPlusPlus)) &&
+ "VarDecl must have global or local (in the case of OpenCL) storage!");
assert(!D.getType()->isReferenceType() &&
"Should not call EmitDeclInit on a reference!");
@@ -63,15 +66,24 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
/// Emit code to cause the destruction of the given variable with
/// static storage duration.
static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
- ConstantAddress addr) {
+ ConstantAddress Addr) {
+ // Honor __attribute__((no_destroy)) and bail instead of attempting
+ // to emit a reference to a possibly nonexistent destructor, which
+ // in turn can cause a crash. This will result in a global constructor
+ // that isn't balanced out by a destructor call as intended by the
+ // attribute. This also checks for -fno-c++-static-destructors and
+ // bails even if the attribute is not present.
+ if (D.isNoDestroy(CGF.getContext()))
+ return;
+
CodeGenModule &CGM = CGF.CGM;
// FIXME: __attribute__((cleanup)) ?
- QualType type = D.getType();
- QualType::DestructionKind dtorKind = type.isDestructedType();
+ QualType Type = D.getType();
+ QualType::DestructionKind DtorKind = Type.isDestructedType();
- switch (dtorKind) {
+ switch (DtorKind) {
case QualType::DK_none:
return;
@@ -86,13 +98,14 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
return;
}
- llvm::Constant *function;
- llvm::Constant *argument;
+ llvm::Constant *Func;
+ llvm::Constant *Argument;
// Special-case non-array C++ destructors, if they have the right signature.
// Under some ABIs, destructors return this instead of void, and cannot be
- // passed directly to __cxa_atexit if the target does not allow this mismatch.
- const CXXRecordDecl *Record = type->getAsCXXRecordDecl();
+ // passed directly to __cxa_atexit if the target does not allow this
+ // mismatch.
+ const CXXRecordDecl *Record = Type->getAsCXXRecordDecl();
bool CanRegisterDestructor =
Record && (!CGM.getCXXABI().HasThisReturn(
GlobalDecl(Record->getDestructor(), Dtor_Complete)) ||
@@ -103,43 +116,47 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit;
if (Record && (CanRegisterDestructor || UsingExternalHelper)) {
assert(!Record->hasTrivialDestructor());
- CXXDestructorDecl *dtor = Record->getDestructor();
+ CXXDestructorDecl *Dtor = Record->getDestructor();
- function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete);
- argument = llvm::ConstantExpr::getBitCast(
- addr.getPointer(), CGF.getTypes().ConvertType(type)->getPointerTo());
+ Func = CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete);
+ Argument = llvm::ConstantExpr::getBitCast(
+ Addr.getPointer(), CGF.getTypes().ConvertType(Type)->getPointerTo());
// Otherwise, the standard logic requires a helper function.
} else {
- function = CodeGenFunction(CGM)
- .generateDestroyHelper(addr, type, CGF.getDestroyer(dtorKind),
- CGF.needsEHCleanup(dtorKind), &D);
- argument = llvm::Constant::getNullValue(CGF.Int8PtrTy);
+ Func = CodeGenFunction(CGM)
+ .generateDestroyHelper(Addr, Type, CGF.getDestroyer(DtorKind),
+ CGF.needsEHCleanup(DtorKind), &D);
+ Argument = llvm::Constant::getNullValue(CGF.Int8PtrTy);
}
- CGM.getCXXABI().registerGlobalDtor(CGF, D, function, argument);
+ CGM.getCXXABI().registerGlobalDtor(CGF, D, Func, Argument);
}
/// Emit code to cause the variable at the given address to be considered as
/// constant from this point onwards.
static void EmitDeclInvariant(CodeGenFunction &CGF, const VarDecl &D,
llvm::Constant *Addr) {
+ return CGF.EmitInvariantStart(
+ Addr, CGF.getContext().getTypeSizeInChars(D.getType()));
+}
+
+void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) {
// Do not emit the intrinsic if we're not optimizing.
- if (!CGF.CGM.getCodeGenOpts().OptimizationLevel)
+ if (!CGM.getCodeGenOpts().OptimizationLevel)
return;
// Grab the llvm.invariant.start intrinsic.
llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start;
// Overloaded address space type.
- llvm::Type *ObjectPtr[1] = {CGF.Int8PtrTy};
- llvm::Constant *InvariantStart = CGF.CGM.getIntrinsic(InvStartID, ObjectPtr);
+ llvm::Type *ObjectPtr[1] = {Int8PtrTy};
+ llvm::Constant *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr);
// Emit a call with the size in bytes of the object.
- CharUnits WidthChars = CGF.getContext().getTypeSizeInChars(D.getType());
- uint64_t Width = WidthChars.getQuantity();
- llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(CGF.Int64Ty, Width),
- llvm::ConstantExpr::getBitCast(Addr, CGF.Int8PtrTy)};
- CGF.Builder.CreateCall(InvariantStart, Args);
+ uint64_t Width = Size.getQuantity();
+ llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width),
+ llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)};
+ Builder.CreateCall(InvariantStart, Args);
}
void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D,
@@ -347,6 +364,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
!isInSanitizerBlacklist(SanitizerKind::Memory, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+ if (getLangOpts().Sanitize.has(SanitizerKind::KernelMemory) &&
+ !isInSanitizerBlacklist(SanitizerKind::KernelMemory, Fn, Loc))
+ Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+
if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack) &&
!isInSanitizerBlacklist(SanitizerKind::SafeStack, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SafeStack);
@@ -355,6 +376,22 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
!isInSanitizerBlacklist(SanitizerKind::ShadowCallStack, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::ShadowCallStack);
+ auto RASignKind = getCodeGenOpts().getSignReturnAddress();
+ if (RASignKind != CodeGenOptions::SignReturnAddressScope::None) {
+ Fn->addFnAttr("sign-return-address",
+ RASignKind == CodeGenOptions::SignReturnAddressScope::All
+ ? "all"
+ : "non-leaf");
+ auto RASignKey = getCodeGenOpts().getSignReturnAddressKey();
+ Fn->addFnAttr("sign-return-address-key",
+ RASignKey == CodeGenOptions::SignReturnAddressKeyValue::AKey
+ ? "a_key"
+ : "b_key");
+ }
+
+ if (getCodeGenOpts().BranchTargetEnforcement)
+ Fn->addFnAttr("branch-target-enforcement");
+
return Fn;
}
@@ -565,7 +602,7 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
if (D->hasAttr<NoDebugAttr>())
DebugInfo = nullptr; // disable debug info indefinitely for this function
- CurEHLocation = D->getLocStart();
+ CurEHLocation = D->getBeginLoc();
StartFunction(GlobalDecl(D), getContext().VoidTy, Fn,
getTypes().arrangeNullaryFunction(),
@@ -587,7 +624,7 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
void
CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
ArrayRef<llvm::Function *> Decls,
- Address Guard) {
+ ConstantAddress Guard) {
{
auto NL = ApplyDebugLocation::CreateEmpty(*this);
StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
@@ -611,6 +648,12 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
// initializers use previously-initialized thread_local vars, that's
// probably supposed to be OK, but the standard doesn't say.
Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
+
+ // The guard variable can't ever change again.
+ EmitInvariantStart(
+ Guard.getPointer(),
+ CharUnits::fromQuantity(
+ CGM.getDataLayout().getTypeAllocSize(GuardVal->getType())));
}
RunCleanupsScope Scope(*this);
@@ -679,7 +722,7 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(
FTy, "__cxx_global_array_dtor", FI, VD->getLocation());
- CurEHLocation = VD->getLocStart();
+ CurEHLocation = VD->getBeginLoc();
StartFunction(VD, getContext().VoidTy, fn, FI, args);
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index a2ff102e1ab4..5756e13d2623 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -66,7 +66,7 @@ llvm::Constant *CodeGenModule::getTerminateFn() {
name = "__std_terminate";
else
name = "?terminate@@YAXXZ";
- } else if (getLangOpts().ObjC1 &&
+ } else if (getLangOpts().ObjC &&
getLangOpts().ObjCRuntime.hasTerminate())
name = "objc_terminate";
else
@@ -224,7 +224,7 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM,
if (FD && FD->usesSEHTry())
return getSEHPersonalityMSVC(T);
- if (L.ObjC1)
+ if (L.ObjC)
return L.CPlusPlus ? getObjCXXPersonality(Target, L)
: getObjCPersonality(Target, L);
return L.CPlusPlus ? getCXXPersonality(Target, L)
@@ -250,7 +250,11 @@ static llvm::Constant *getPersonalityFn(CodeGenModule &CGM,
static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM,
const EHPersonality &Personality) {
llvm::Constant *Fn = getPersonalityFn(CGM, Personality);
- return llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
+ llvm::PointerType* Int8PtrTy = llvm::PointerType::get(
+ llvm::Type::getInt8Ty(CGM.getLLVMContext()),
+ CGM.getDataLayout().getProgramAddressSpace());
+
+ return llvm::ConstantExpr::getBitCast(Fn, Int8PtrTy);
}
/// Check whether a landingpad instruction only uses C++ features.
@@ -315,7 +319,7 @@ static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) {
/// when it really needs it.
void CodeGenModule::SimplifyPersonality() {
// If we're not in ObjC++ -fexceptions, there's nothing to do.
- if (!LangOpts.CPlusPlus || !LangOpts.ObjC1 || !LangOpts.Exceptions)
+ if (!LangOpts.CPlusPlus || !LangOpts.ObjC || !LangOpts.Exceptions)
return;
// Both the problem this endeavors to fix and the way the logic
@@ -1248,7 +1252,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
// we follow the false destination for each of the cond branches to reach
// the rethrow block.
llvm::BasicBlock *RethrowBlock = WasmCatchStartBlock;
- while (llvm::TerminatorInst *TI = RethrowBlock->getTerminator()) {
+ while (llvm::Instruction *TI = RethrowBlock->getTerminator()) {
auto *BI = cast<llvm::BranchInst>(TI);
assert(BI->isConditional());
RethrowBlock = BI->getSuccessor(1);
@@ -1623,8 +1627,16 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup {
// Compute the two argument values.
QualType ArgTys[2] = {Context.UnsignedCharTy, Context.VoidPtrTy};
- llvm::Value *LocalAddrFn = CGM.getIntrinsic(llvm::Intrinsic::localaddress);
- llvm::Value *FP = CGF.Builder.CreateCall(LocalAddrFn);
+ llvm::Value *FP = nullptr;
+ // If CFG.IsOutlinedSEHHelper is true, then we are within a finally block.
+ if (CGF.IsOutlinedSEHHelper) {
+ FP = &CGF.CurFn->arg_begin()[1];
+ } else {
+ llvm::Value *LocalAddrFn =
+ CGM.getIntrinsic(llvm::Intrinsic::localaddress);
+ FP = CGF.Builder.CreateCall(LocalAddrFn);
+ }
+
llvm::Value *IsForEH =
llvm::ConstantInt::get(CGF.ConvertType(ArgTys[0]), F.isForEHCleanup());
Args.add(RValue::get(IsForEH), ArgTys[0]);
@@ -1777,7 +1789,7 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
// frame pointer of the parent function. We only need to do this in filters,
// since finally funclets recover the parent FP for us.
llvm::Function *RecoverFPIntrin =
- CGM.getIntrinsic(llvm::Intrinsic::x86_seh_recoverfp);
+ CGM.getIntrinsic(llvm::Intrinsic::eh_recoverfp);
llvm::Constant *ParentI8Fn =
llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP});
@@ -1823,13 +1835,13 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
bool IsFilter,
const Stmt *OutlinedStmt) {
- SourceLocation StartLoc = OutlinedStmt->getLocStart();
+ SourceLocation StartLoc = OutlinedStmt->getBeginLoc();
// Get the mangled function name.
SmallString<128> Name;
{
llvm::raw_svector_ostream OS(Name);
- const FunctionDecl *ParentSEHFn = ParentCGF.CurSEHParent;
+ const NamedDecl *ParentSEHFn = ParentCGF.CurSEHParent;
assert(ParentSEHFn && "No CurSEHParent!");
MangleContext &Mangler = CGM.getCXXABI().getMangleContext();
if (IsFilter)
@@ -1871,10 +1883,10 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
IsOutlinedSEHHelper = true;
StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args,
- OutlinedStmt->getLocStart(), OutlinedStmt->getLocStart());
+ OutlinedStmt->getBeginLoc(), OutlinedStmt->getBeginLoc());
CurSEHParent = ParentCGF.CurSEHParent;
- CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FnInfo, CurFn);
EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter);
}
@@ -1893,7 +1905,7 @@ CodeGenFunction::GenerateSEHFilterFunction(CodeGenFunction &ParentCGF,
FilterExpr->getType()->isSignedIntegerType());
Builder.CreateStore(R, ReturnValue);
- FinishFunction(FilterExpr->getLocEnd());
+ FinishFunction(FilterExpr->getEndLoc());
return CurFn;
}
@@ -1907,7 +1919,7 @@ CodeGenFunction::GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF,
// Emit the original filter expression, convert to i32, and return.
EmitStmt(FinallyBlock);
- FinishFunction(FinallyBlock->getLocEnd());
+ FinishFunction(FinallyBlock->getEndLoc());
return CurFn;
}
@@ -1972,6 +1984,11 @@ llvm::Value *CodeGenFunction::EmitSEHAbnormalTermination() {
return Builder.CreateZExt(&*AI, Int32Ty);
}
+void CodeGenFunction::pushSEHCleanup(CleanupKind Kind,
+ llvm::Function *FinallyFunc) {
+ EHStack.pushCleanup<PerformSEHFinally>(Kind, FinallyFunc);
+}
+
void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) {
CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true);
if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) {
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index f168dd02ead1..34a921e2dc00 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -26,7 +26,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/NSAPI.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DataLayout.h"
@@ -419,8 +419,12 @@ LValue CodeGenFunction::
EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
const Expr *E = M->GetTemporaryExpr();
- // FIXME: ideally this would use EmitAnyExprToMem, however, we cannot do so
- // as that will cause the lifetime adjustment to be lost for ARC
+ assert((!M->getExtendingDecl() || !isa<VarDecl>(M->getExtendingDecl()) ||
+ !cast<VarDecl>(M->getExtendingDecl())->isARCPseudoStrong()) &&
+ "Reference should never be pseudo-strong!");
+
+ // FIXME: ideally this would use EmitAnyExprToMem, however, we cannot do so
+ // as that will cause the lifetime adjustment to be lost for ARC
auto ownership = M->getType().getObjCLifetime();
if (ownership != Qualifiers::OCL_None &&
ownership != Qualifiers::OCL_ExplicitNone) {
@@ -498,18 +502,51 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
} else {
switch (M->getStorageDuration()) {
case SD_Automatic:
- case SD_FullExpression:
if (auto *Size = EmitLifetimeStart(
CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()),
Alloca.getPointer())) {
- if (M->getStorageDuration() == SD_Automatic)
- pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker,
- Alloca, Size);
- else
- pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca,
- Size);
+ pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker,
+ Alloca, Size);
+ }
+ break;
+
+ case SD_FullExpression: {
+ if (!ShouldEmitLifetimeMarkers)
+ break;
+
+ // Avoid creating a conditional cleanup just to hold an llvm.lifetime.end
+ // marker. Instead, start the lifetime of a conditional temporary earlier
+ // so that it's unconditional. Don't do this in ASan's use-after-scope
+ // mode so that it gets the more precise lifetime marks. If the type has
+ // a non-trivial destructor, we'll have a cleanup block for it anyway,
+ // so this typically doesn't help; skip it in that case.
+ ConditionalEvaluation *OldConditional = nullptr;
+ CGBuilderTy::InsertPoint OldIP;
+ if (isInConditionalBranch() && !E->getType().isDestructedType() &&
+ !CGM.getCodeGenOpts().SanitizeAddressUseAfterScope) {
+ OldConditional = OutermostConditional;
+ OutermostConditional = nullptr;
+
+ OldIP = Builder.saveIP();
+ llvm::BasicBlock *Block = OldConditional->getStartingBlock();
+ Builder.restoreIP(CGBuilderTy::InsertPoint(
+ Block, llvm::BasicBlock::iterator(Block->back())));
+ }
+
+ if (auto *Size = EmitLifetimeStart(
+ CGM.getDataLayout().getTypeAllocSize(Alloca.getElementType()),
+ Alloca.getPointer())) {
+ pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Alloca,
+ Size);
+ }
+
+ if (OldConditional) {
+ OutermostConditional = OldConditional;
+ Builder.restoreIP(OldIP);
}
break;
+ }
+
default:
break;
}
@@ -1043,7 +1080,7 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr.getPointer(),
/*MayBeNull=*/true,
CodeGenFunction::CFITCK_UnrelatedCast,
- CE->getLocStart());
+ CE->getBeginLoc());
}
return CE->getCastKind() != CK_AddressSpaceConversion
? Builder.CreateBitCast(Addr, ConvertType(E->getType()))
@@ -1227,6 +1264,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
return EmitVAArgExprLValue(cast<VAArgExpr>(E));
case Expr::DeclRefExprClass:
return EmitDeclRefLValue(cast<DeclRefExpr>(E));
+ case Expr::ConstantExprClass:
+ return EmitLValue(cast<ConstantExpr>(E)->getSubExpr());
case Expr::ParenExprClass:
return EmitLValue(cast<ParenExpr>(E)->getSubExpr());
case Expr::GenericSelectionExprClass:
@@ -1458,6 +1497,16 @@ CodeGenFunction::tryEmitAsConstant(const MemberExpr *ME) {
return ConstantEmission();
}
+llvm::Value *CodeGenFunction::emitScalarConstant(
+ const CodeGenFunction::ConstantEmission &Constant, Expr *E) {
+ assert(Constant && "not a constant");
+ if (Constant.isReference())
+ return EmitLoadOfLValue(Constant.getReferenceLValue(*this, E),
+ E->getExprLoc())
+ .getScalarVal();
+ return Constant.getValue();
+}
+
llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue,
SourceLocation Loc) {
return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
@@ -2237,18 +2286,14 @@ static LValue EmitThreadPrivateVarDeclLValue(
static Address emitDeclTargetLinkVarDeclLValue(CodeGenFunction &CGF,
const VarDecl *VD, QualType T) {
- for (const auto *D : VD->redecls()) {
- if (!VD->hasAttrs())
- continue;
- if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
- if (Attr->getMapType() == OMPDeclareTargetDeclAttr::MT_Link) {
- QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
- Address Addr =
- CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
- return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>());
- }
- }
- return Address::invalid();
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
+ if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_To)
+ return Address::invalid();
+ assert(*Res == OMPDeclareTargetDeclAttr::MT_Link && "Expected link clause");
+ QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
+ Address Addr = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ return CGF.EmitLoadOfPointer(Addr, PtrTy->castAs<PointerType>());
}
Address
@@ -2408,6 +2453,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
// A DeclRefExpr for a reference initialized by a constant expression can
// appear without being odr-used. Directly emit the constant initializer.
const Expr *Init = VD->getAnyInitializer(VD);
+ const auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl);
if (Init && !isa<ParmVarDecl>(VD) && VD->getType()->isReferenceType() &&
VD->isUsableInConstantExpressions(getContext()) &&
VD->checkInitIsICE() &&
@@ -2417,7 +2463,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
(LocalDeclMap.count(VD->getCanonicalDecl()) ||
CapturedStmtInfo->lookup(VD->getCanonicalDecl()))) ||
LambdaCaptureFields.lookup(VD->getCanonicalDecl()) ||
- isa<BlockDecl>(CurCodeDecl)))) {
+ (BD && BD->capturesVariable(VD))))) {
llvm::Constant *Val =
ConstantEmitter(*this).emitAbstract(E->getLocation(),
*VD->evaluateValue(),
@@ -2456,7 +2502,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
}
assert(isa<BlockDecl>(CurCodeDecl));
- Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>());
+ Address addr = GetAddrOfBlockDecl(VD);
return MakeAddrLValue(addr, T, AlignmentSource::Decl);
}
}
@@ -2508,7 +2554,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
}
// Drill into block byref variables.
- bool isBlockByref = VD->hasAttr<BlocksAttr>();
+ bool isBlockByref = VD->isEscapingByref();
if (isBlockByref) {
addr = emitBlockByrefAddress(addr, VD);
}
@@ -2571,7 +2617,7 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
// of a pointer to object; as in void foo (__weak id *param); *param = 0;
// But, we continue to generate __strong write barrier on indirect write
// into a pointer to object.
- if (getLangOpts().ObjC1 &&
+ if (getLangOpts().ObjC &&
getLangOpts().getGC() != LangOptions::NonGC &&
LV.isObjCWeak())
LV.setNonGC(!E->isOBJCGCCandidate(getContext()));
@@ -2632,7 +2678,7 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
if (FnName.startswith("\01"))
FnName = FnName.substr(1);
StringRef NameItems[] = {
- PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName};
+ PredefinedExpr::getIdentKindName(E->getIdentKind()), FnName};
std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl)) {
std::string Name = SL->getString();
@@ -2837,6 +2883,11 @@ static void emitCheckHandlerCall(CodeGenFunction &CGF,
CheckRecoverableKind RecoverKind, bool IsFatal,
llvm::BasicBlock *ContBB) {
assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable);
+ Optional<ApplyDebugLocation> DL;
+ if (!CGF.Builder.getCurrentDebugLocation()) {
+ // Ensure that the call has at least an artificial debug location.
+ DL.emplace(CGF, SourceLocation());
+ }
bool NeedsAbortSuffix =
IsFatal && RecoverKind != CheckRecoverableKind::Unrecoverable;
bool MinimalRuntime = CGF.CGM.getCodeGenOpts().SanitizeMinimalRuntime;
@@ -3448,7 +3499,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
LValue LV = MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo);
- if (getLangOpts().ObjC1 &&
+ if (getLangOpts().ObjC &&
getLangOpts().getGC() != LangOptions::NonGC) {
LV.setNonGC(!E->isOBJCGCCandidate(getContext()));
setObjCGCLValueClass(getContext(), E, LV);
@@ -3901,7 +3952,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
LValue RefLVal = MakeAddrLValue(addr, FieldType, FieldBaseInfo,
FieldTBAAInfo);
if (RecordCVR & Qualifiers::Volatile)
- RefLVal.getQuals().setVolatile(true);
+ RefLVal.getQuals().addVolatile();
addr = EmitLoadOfReference(RefLVal, &FieldBaseInfo, &FieldTBAAInfo);
// Qualifiers on the struct don't apply to the referencee.
@@ -4121,8 +4172,9 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
case CK_ARCReclaimReturnedObject:
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
- case CK_AddressSpaceConversion:
case CK_IntToOCLSampler:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
return EmitUnsupportedLValue(E, "unexpected cast lvalue");
case CK_Dependent:
@@ -4193,8 +4245,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
if (SanOpts.has(SanitizerKind::CFIDerivedCast))
EmitVTablePtrCheckForCast(E->getType(), Derived.getPointer(),
- /*MayBeNull=*/false,
- CFITCK_DerivedCast, E->getLocStart());
+ /*MayBeNull=*/false, CFITCK_DerivedCast,
+ E->getBeginLoc());
return MakeAddrLValue(Derived, E->getType(), LV.getBaseInfo(),
CGM.getTBAAInfoForSubobject(LV, E->getType()));
@@ -4210,12 +4262,21 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
if (SanOpts.has(SanitizerKind::CFIUnrelatedCast))
EmitVTablePtrCheckForCast(E->getType(), V.getPointer(),
- /*MayBeNull=*/false,
- CFITCK_UnrelatedCast, E->getLocStart());
+ /*MayBeNull=*/false, CFITCK_UnrelatedCast,
+ E->getBeginLoc());
return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(),
CGM.getTBAAInfoForSubobject(LV, E->getType()));
}
+ case CK_AddressSpaceConversion: {
+ LValue LV = EmitLValue(E->getSubExpr());
+ QualType DestTy = getContext().getPointerType(E->getType());
+ llvm::Value *V = getTargetHooks().performAddrSpaceCast(
+ *this, LV.getPointer(), E->getSubExpr()->getType().getAddressSpace(),
+ E->getType().getAddressSpace(), ConvertType(DestTy));
+ return MakeAddrLValue(Address(V, LV.getAddress().getAlignment()),
+ E->getType(), LV.getBaseInfo(), LV.getTBAAInfo());
+ }
case CK_ObjCObjectLValueCast: {
LValue LV = EmitLValue(E->getSubExpr());
Address V = Builder.CreateElementBitCast(LV.getAddress(),
@@ -4223,10 +4284,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
return MakeAddrLValue(V, E->getType(), LV.getBaseInfo(),
CGM.getTBAAInfoForSubobject(LV, E->getType()));
}
- case CK_ZeroToOCLQueue:
- llvm_unreachable("NULL to OpenCL queue lvalue cast is not valid");
- case CK_ZeroToOCLEvent:
- llvm_unreachable("NULL to OpenCL event lvalue cast is not valid");
+ case CK_ZeroToOCLOpaqueType:
+ llvm_unreachable("NULL to OpenCL opaque type lvalue cast is not valid");
}
llvm_unreachable("Unhandled lvalue cast kind?");
@@ -4333,7 +4392,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, const FunctionDecl *FD) {
}
llvm::Constant *calleePtr = EmitFunctionDeclPointer(CGF.CGM, FD);
- return CGCallee::forDirect(calleePtr, FD);
+ return CGCallee::forDirect(calleePtr, GlobalDecl(FD));
}
CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
@@ -4377,8 +4436,13 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
calleePtr = EmitLValue(E).getPointer();
}
assert(functionType->isFunctionType());
- CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(),
- E->getReferencedDeclOfCallee());
+
+ GlobalDecl GD;
+ if (const auto *VD =
+ dyn_cast_or_null<VarDecl>(E->getReferencedDeclOfCallee()))
+ GD = GlobalDecl(VD);
+
+ CGCalleeInfo calleeInfo(functionType->getAs<FunctionProtoType>(), GD);
CGCallee callee(calleeInfo, calleePtr);
return callee;
}
@@ -4563,7 +4627,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
assert(CalleeType->isFunctionPointerType() &&
"Call must have function pointer type!");
- const Decl *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl();
+ const Decl *TargetDecl =
+ OrigCallee.getAbstractInfo().getCalleeDecl().getDecl();
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl))
// We can only guarantee that a function is called from the correct
@@ -4620,10 +4685,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
DecodeAddrUsedInPrologue(CalleePtr, CalleeRTTIEncoded);
llvm::Value *CalleeRTTIMatch =
Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst);
- llvm::Constant *StaticData[] = {
- EmitCheckSourceLocation(E->getLocStart()),
- EmitCheckTypeDescriptor(CalleeType)
- };
+ llvm::Constant *StaticData[] = {EmitCheckSourceLocation(E->getBeginLoc()),
+ EmitCheckTypeDescriptor(CalleeType)};
EmitCheck(std::make_pair(CalleeRTTIMatch, SanitizerKind::Function),
SanitizerHandler::FunctionTypeMismatch, StaticData, CalleePtr);
@@ -4657,7 +4720,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD);
llvm::Constant *StaticData[] = {
llvm::ConstantInt::get(Int8Ty, CFITCK_ICall),
- EmitCheckSourceLocation(E->getLocStart()),
+ EmitCheckSourceLocation(E->getBeginLoc()),
EmitCheckTypeDescriptor(QualType(FnType, 0)),
};
if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) {
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 62641102861c..db49b3f28a59 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -125,6 +125,10 @@ public:
return Visit(E->getReplacement());
}
+ void VisitConstantExpr(ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+ }
+
// l-values.
void VisitDeclRefExpr(DeclRefExpr *E) { EmitAggLoadOfLValue(E); }
void VisitMemberExpr(MemberExpr *ME) { EmitAggLoadOfLValue(ME); }
@@ -847,10 +851,11 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
case CK_BuiltinFnToFnPtr:
- case CK_ZeroToOCLEvent:
- case CK_ZeroToOCLQueue:
+ case CK_ZeroToOCLOpaqueType:
case CK_AddressSpaceConversion:
case CK_IntToOCLSampler:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
llvm_unreachable("cast kind invalid for aggregate types");
}
}
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index f29ef754c03f..884ce96859c5 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -17,8 +17,8 @@
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
#include "ConstantEmitter.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Intrinsics.h"
@@ -177,7 +177,8 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE,
if (MD->isStatic()) {
// The method is static, emit it as we would a regular call.
- CGCallee callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD), MD);
+ CGCallee callee =
+ CGCallee::forDirect(CGM.GetAddrOfFunction(MD), GlobalDecl(MD));
return EmitCall(getContext().getPointerType(MD->getType()), callee, CE,
ReturnValue);
}
@@ -353,13 +354,13 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
else if (!DevirtualizedMethod)
Callee = CGCallee::forDirect(
CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty),
- Dtor);
+ GlobalDecl(Dtor, Dtor_Complete));
else {
const CXXDestructorDecl *DDtor =
cast<CXXDestructorDecl>(DevirtualizedMethod);
Callee = CGCallee::forDirect(
- CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty),
- DDtor);
+ CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty),
+ GlobalDecl(DDtor, Dtor_Complete));
}
EmitCXXMemberOrOperatorCall(
CalleeDecl, Callee, ReturnValue, This.getPointer(),
@@ -371,8 +372,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
CGCallee Callee;
if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) {
Callee = CGCallee::forDirect(
- CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty),
- Ctor);
+ CGM.GetAddrOfFunction(GlobalDecl(Ctor, Ctor_Complete), Ty),
+ GlobalDecl(Ctor, Ctor_Complete));
} else if (UseVirtualCall) {
Callee = CGCallee::forVirtual(CE, MD, This.getAddress(), Ty);
} else {
@@ -383,17 +384,18 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
std::tie(VTable, RD) =
CGM.getCXXABI().LoadVTablePtr(*this, This.getAddress(),
MD->getParent());
- EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getLocStart());
+ EmitVTablePtrCheckForCall(RD, VTable, CFITCK_NVCall, CE->getBeginLoc());
}
if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty);
else if (!DevirtualizedMethod)
- Callee = CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), MD);
+ Callee =
+ CGCallee::forDirect(CGM.GetAddrOfFunction(MD, Ty), GlobalDecl(MD));
else {
- Callee = CGCallee::forDirect(
- CGM.GetAddrOfFunction(DevirtualizedMethod, Ty),
- DevirtualizedMethod);
+ Callee =
+ CGCallee::forDirect(CGM.GetAddrOfFunction(DevirtualizedMethod, Ty),
+ GlobalDecl(DevirtualizedMethod));
}
}
@@ -1293,7 +1295,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF,
const CallArgList &Args) {
llvm::Instruction *CallOrInvoke;
llvm::Constant *CalleePtr = CGF.CGM.GetAddrOfFunction(CalleeDecl);
- CGCallee Callee = CGCallee::forDirect(CalleePtr, CalleeDecl);
+ CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl));
RValue RV =
CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall(
Args, CalleeType, /*chainCall=*/false),
@@ -1654,9 +1656,10 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
// Emit a null check on the allocation result if the allocation
// function is allowed to return null (because it has a non-throwing
// exception spec or is the reserved placement new) and we have an
- // interesting initializer.
- bool nullCheck = E->shouldNullCheckAllocation(getContext()) &&
- (!allocType.isPODType(getContext()) || E->hasInitializer());
+ // interesting initializer will be running sanitizers on the initialization.
+ bool nullCheck = E->shouldNullCheckAllocation() &&
+ (!allocType.isPODType(getContext()) || E->hasInitializer() ||
+ sanitizePerformTypeCheck());
llvm::BasicBlock *nullCheckBB = nullptr;
llvm::BasicBlock *contBB = nullptr;
@@ -2252,7 +2255,6 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,
}
void CodeGenFunction::EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Slot) {
- RunCleanupsScope Scope(*this);
LValue SlotLV = MakeAddrLValue(Slot.getAddress(), E->getType());
CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin();
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index fb176093a741..2db693b44c90 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -101,6 +101,9 @@ public:
llvm_unreachable("Stmt can't have complex result type!");
}
ComplexPairTy VisitExpr(Expr *S);
+ ComplexPairTy VisitConstantExpr(ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+ }
ComplexPairTy VisitParenExpr(ParenExpr *PE) { return Visit(PE->getSubExpr());}
ComplexPairTy VisitGenericSelectionExpr(GenericSelectionExpr *GE) {
return Visit(GE->getResultExpr());
@@ -505,10 +508,11 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
case CK_ARCExtendBlockObject:
case CK_CopyAndAutoreleaseBlockObject:
case CK_BuiltinFnToFnPtr:
- case CK_ZeroToOCLEvent:
- case CK_ZeroToOCLQueue:
+ case CK_ZeroToOCLOpaqueType:
case CK_AddressSpaceConversion:
case CK_IntToOCLSampler:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
llvm_unreachable("invalid cast kind for complex value");
case CK_FloatingRealToComplex:
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 68766479a539..c9475840aeeb 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -47,7 +47,7 @@ class ConstStructBuilder {
public:
static llvm::Constant *BuildStruct(ConstantEmitter &Emitter,
ConstExprEmitter *ExprEmitter,
- llvm::ConstantStruct *Base,
+ llvm::Constant *Base,
InitListExpr *Updater,
QualType ValTy);
static llvm::Constant *BuildStruct(ConstantEmitter &Emitter,
@@ -76,7 +76,7 @@ private:
void ConvertStructToPacked();
bool Build(InitListExpr *ILE);
- bool Build(ConstExprEmitter *Emitter, llvm::ConstantStruct *Base,
+ bool Build(ConstExprEmitter *Emitter, llvm::Constant *Base,
InitListExpr *Updater);
bool Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
@@ -566,7 +566,7 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
llvm::Constant *ConstStructBuilder::BuildStruct(ConstantEmitter &Emitter,
ConstExprEmitter *ExprEmitter,
- llvm::ConstantStruct *Base,
+ llvm::Constant *Base,
InitListExpr *Updater,
QualType ValTy) {
ConstStructBuilder Builder(Emitter);
@@ -723,6 +723,10 @@ public:
return nullptr;
}
+ llvm::Constant *VisitConstantExpr(ConstantExpr *CE, QualType T) {
+ return Visit(CE->getSubExpr(), T);
+ }
+
llvm::Constant *VisitParenExpr(ParenExpr *PE, QualType T) {
return Visit(PE->getSubExpr(), T);
}
@@ -869,8 +873,9 @@ public:
case CK_FloatingToIntegral:
case CK_FloatingToBoolean:
case CK_FloatingCast:
- case CK_ZeroToOCLEvent:
- case CK_ZeroToOCLQueue:
+ case CK_FixedPointCast:
+ case CK_FixedPointToBoolean:
+ case CK_ZeroToOCLOpaqueType:
return nullptr;
}
llvm_unreachable("Invalid CastKind");
@@ -1026,8 +1031,8 @@ public:
}
if (destType->isRecordType())
- return ConstStructBuilder::BuildStruct(Emitter, this,
- dyn_cast<llvm::ConstantStruct>(Base), Updater, destType);
+ return ConstStructBuilder::BuildStruct(Emitter, this, Base, Updater,
+ destType);
return nullptr;
}
@@ -1102,7 +1107,7 @@ public:
} // end anonymous namespace.
bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter,
- llvm::ConstantStruct *Base,
+ llvm::Constant *Base,
InitListExpr *Updater) {
assert(Base && "base expression should not be empty");
@@ -1110,7 +1115,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter,
RecordDecl *RD = ExprType->getAs<RecordType>()->getDecl();
const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
const llvm::StructLayout *BaseLayout = CGM.getDataLayout().getStructLayout(
- Base->getType());
+ cast<llvm::StructType>(Base->getType()));
unsigned FieldNo = -1;
unsigned ElementNo = 0;
@@ -1131,7 +1136,7 @@ bool ConstStructBuilder::Build(ConstExprEmitter *ExprEmitter,
if (Field->isUnnamedBitfield())
continue;
- llvm::Constant *EltInit = Base->getOperand(ElementNo);
+ llvm::Constant *EltInit = Base->getAggregateElement(ElementNo);
// Bail out if the type of the ConstantStruct does not have the same layout
// as the type of the InitListExpr.
@@ -1450,6 +1455,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) {
if (CD->isTrivial() && CD->isDefaultConstructor())
return CGM.EmitNullConstant(D.getType());
}
+ InConstantContext = true;
}
QualType destType = D.getType();
@@ -1547,7 +1553,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const Expr *E,
if (destType->isReferenceType())
Success = E->EvaluateAsLValue(Result, CGM.getContext());
else
- Success = E->EvaluateAsRValue(Result, CGM.getContext());
+ Success = E->EvaluateAsRValue(Result, CGM.getContext(), InConstantContext);
llvm::Constant *C;
if (Success && !Result.HasSideEffects)
@@ -1600,6 +1606,7 @@ private:
ConstantLValue tryEmitBase(const APValue::LValueBase &base);
ConstantLValue VisitStmt(const Stmt *S) { return nullptr; }
+ ConstantLValue VisitConstantExpr(const ConstantExpr *E);
ConstantLValue VisitCompoundLiteralExpr(const CompoundLiteralExpr *E);
ConstantLValue VisitStringLiteral(const StringLiteral *E);
ConstantLValue VisitObjCEncodeExpr(const ObjCEncodeExpr *E);
@@ -1755,6 +1762,11 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
}
ConstantLValue
+ConstantLValueEmitter::VisitConstantExpr(const ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+}
+
+ConstantLValue
ConstantLValueEmitter::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
return tryEmitGlobalCompoundLiteral(CGM, Emitter.CGF, E);
}
@@ -1782,7 +1794,7 @@ ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *E) {
return cast<ConstantAddress>(Res.getAddress());
}
- auto kind = E->getIdentType();
+ auto kind = E->getIdentKind();
if (kind == PredefinedExpr::PrettyFunction) {
return CGM.GetAddrOfConstantCString("top level", ".tmp");
}
@@ -1968,6 +1980,16 @@ llvm::Constant *ConstantEmitter::tryEmitPrivate(const APValue &Value,
Elts.push_back(C);
}
+ // This means that the array type is probably "IncompleteType" or some
+ // type that is not ConstantArray.
+ if (CAT == nullptr && CommonElementType == nullptr && !NumInitElts) {
+ const ArrayType *AT = CGM.getContext().getAsArrayType(DestType);
+ CommonElementType = CGM.getTypes().ConvertType(AT->getElementType());
+ llvm::ArrayType *AType = llvm::ArrayType::get(CommonElementType,
+ NumElements);
+ return llvm::ConstantAggregateZero::get(AType);
+ }
+
return EmitArrayConstant(CGM, CAT, CommonElementType, NumElements, Elts,
Filler);
}
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index c62588c68272..1c14d4c99a23 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "CodeGenFunction.h"
-#include "CGCleanup.h"
#include "CGCXXABI.h"
+#include "CGCleanup.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
+#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
@@ -23,8 +23,9 @@
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/FixedPoint.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/Optional.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -257,8 +258,11 @@ public:
AVAttr = TTy->getDecl()->getAttr<AlignValueAttr>();
} else {
// Assumptions for function parameters are emitted at the start of the
- // function, so there is no need to repeat that here.
- if (isa<ParmVarDecl>(VD))
+ // function, so there is no need to repeat that here,
+ // unless the alignment-assumption sanitizer is enabled,
+ // then we prefer the assumption over alignment attribute
+ // on IR function param.
+ if (isa<ParmVarDecl>(VD) && !CGF.SanOpts.has(SanitizerKind::Alignment))
return;
AVAttr = VD->getAttr<AlignValueAttr>();
@@ -275,7 +279,8 @@ public:
Value *AlignmentValue = CGF.EmitScalarExpr(AVAttr->getAlignment());
llvm::ConstantInt *AlignmentCI = cast<llvm::ConstantInt>(AlignmentValue);
- CGF.EmitAlignmentAssumption(V, AlignmentCI->getZExtValue());
+ CGF.EmitAlignmentAssumption(V, E, AVAttr->getLocation(),
+ AlignmentCI->getZExtValue());
}
/// EmitLoadOfLValue - Given an expression with complex type that represents a
@@ -302,7 +307,11 @@ public:
/// Known implicit conversion check kinds.
/// Keep in sync with the enum of the same name in ubsan_handlers.h
enum ImplicitConversionCheckKind : unsigned char {
- ICCK_IntegerTruncation = 0,
+ ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7.
+ ICCK_UnsignedIntegerTruncation = 1,
+ ICCK_SignedIntegerTruncation = 2,
+ ICCK_IntegerSignChange = 3,
+ ICCK_SignedIntegerTruncationOrSignChange = 4,
};
/// Emit a check that an [implicit] truncation of an integer does not
@@ -310,21 +319,39 @@ public:
void EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst,
QualType DstType, SourceLocation Loc);
+ /// Emit a check that an [implicit] conversion of an integer does not change
+ /// the sign of the value. It is not UB, so we use the value after conversion.
+ /// NOTE: Src and Dst may be the exact same value! (point to the same thing)
+ void EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, Value *Dst,
+ QualType DstType, SourceLocation Loc);
+
/// Emit a conversion from the specified type to the specified destination
/// type, both of which are LLVM scalar types.
struct ScalarConversionOpts {
bool TreatBooleanAsSigned;
bool EmitImplicitIntegerTruncationChecks;
+ bool EmitImplicitIntegerSignChangeChecks;
ScalarConversionOpts()
: TreatBooleanAsSigned(false),
- EmitImplicitIntegerTruncationChecks(false) {}
+ EmitImplicitIntegerTruncationChecks(false),
+ EmitImplicitIntegerSignChangeChecks(false) {}
+
+ ScalarConversionOpts(clang::SanitizerSet SanOpts)
+ : TreatBooleanAsSigned(false),
+ EmitImplicitIntegerTruncationChecks(
+ SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation)),
+ EmitImplicitIntegerSignChangeChecks(
+ SanOpts.has(SanitizerKind::ImplicitIntegerSignChange)) {}
};
Value *
EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy,
SourceLocation Loc,
ScalarConversionOpts Opts = ScalarConversionOpts());
+ Value *EmitFixedPointConversion(Value *Src, QualType SrcTy, QualType DstTy,
+ SourceLocation Loc);
+
/// Emit a conversion from the specified complex type to the specified
/// destination type, where the destination type is an LLVM scalar type.
Value *EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src,
@@ -382,6 +409,9 @@ public:
}
Value *VisitExpr(Expr *S);
+ Value *VisitConstantExpr(ConstantExpr *E) {
+ return Visit(E->getSubExpr());
+ }
Value *VisitParenExpr(ParenExpr *PE) {
return Visit(PE->getSubExpr());
}
@@ -450,19 +480,10 @@ public:
return CGF.getOrCreateOpaqueRValueMapping(E).getScalarVal();
}
- Value *emitConstant(const CodeGenFunction::ConstantEmission &Constant,
- Expr *E) {
- assert(Constant && "not a constant");
- if (Constant.isReference())
- return EmitLoadOfLValue(Constant.getReferenceLValue(CGF, E),
- E->getExprLoc());
- return Constant.getValue();
- }
-
// l-values.
Value *VisitDeclRefExpr(DeclRefExpr *E) {
if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E))
- return emitConstant(Constant, E);
+ return CGF.emitScalarConstant(Constant, E);
return EmitLoadOfLValue(E);
}
@@ -664,7 +685,7 @@ public:
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), Ops))
return Builder.CreateNSWMul(Ops.LHS, Ops.RHS, "mul");
@@ -941,48 +962,233 @@ void ScalarExprEmitter::EmitFloatConversionCheck(
SanitizerHandler::FloatCastOverflow, StaticArgs, OrigSrc);
}
+// Should be called within CodeGenFunction::SanitizerScope RAII scope.
+// Returns 'i1 false' when the truncation Src -> Dst was lossy.
+static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+EmitIntegerTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst,
+ QualType DstType, CGBuilderTy &Builder) {
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = Dst->getType();
+ (void)DstTy; // Only used in assert()
+
+ // This should be truncation of integral types.
+ assert(Src != Dst);
+ assert(SrcTy->getScalarSizeInBits() > Dst->getType()->getScalarSizeInBits());
+ assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) &&
+ "non-integer llvm type");
+
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+
+ // If both (src and dst) types are unsigned, then it's an unsigned truncation.
+ // Else, it is a signed truncation.
+ ScalarExprEmitter::ImplicitConversionCheckKind Kind;
+ SanitizerMask Mask;
+ if (!SrcSigned && !DstSigned) {
+ Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation;
+ Mask = SanitizerKind::ImplicitUnsignedIntegerTruncation;
+ } else {
+ Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation;
+ Mask = SanitizerKind::ImplicitSignedIntegerTruncation;
+ }
+
+ llvm::Value *Check = nullptr;
+ // 1. Extend the truncated value back to the same width as the Src.
+ Check = Builder.CreateIntCast(Dst, SrcTy, DstSigned, "anyext");
+ // 2. Equality-compare with the original source value
+ Check = Builder.CreateICmpEQ(Check, Src, "truncheck");
+ // If the comparison result is 'i1 false', then the truncation was lossy.
+ return std::make_pair(Kind, std::make_pair(Check, Mask));
+}
+
void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType,
Value *Dst, QualType DstType,
SourceLocation Loc) {
- if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation))
+ if (!CGF.SanOpts.hasOneOf(SanitizerKind::ImplicitIntegerTruncation))
return;
- llvm::Type *SrcTy = Src->getType();
- llvm::Type *DstTy = Dst->getType();
-
// We only care about int->int conversions here.
// We ignore conversions to/from pointer and/or bool.
if (!(SrcType->isIntegerType() && DstType->isIntegerType()))
return;
- assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) &&
- "clang integer type lowered to non-integer llvm type");
-
- unsigned SrcBits = SrcTy->getScalarSizeInBits();
- unsigned DstBits = DstTy->getScalarSizeInBits();
+ unsigned SrcBits = Src->getType()->getScalarSizeInBits();
+ unsigned DstBits = Dst->getType()->getScalarSizeInBits();
// This must be truncation. Else we do not care.
if (SrcBits <= DstBits)
return;
assert(!DstType->isBooleanType() && "we should not get here with booleans.");
+ // If the integer sign change sanitizer is enabled,
+ // and we are truncating from larger unsigned type to smaller signed type,
+ // let that next sanitizer deal with it.
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+ if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange) &&
+ (!SrcSigned && DstSigned))
+ return;
+
CodeGenFunction::SanitizerScope SanScope(&CGF);
+ std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+ Check =
+ EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder);
+ // If the comparison result is 'i1 false', then the truncation was lossy.
+
+ // Do we care about this type of truncation?
+ if (!CGF.SanOpts.has(Check.second.second))
+ return;
+
+ llvm::Constant *StaticArgs[] = {
+ CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType),
+ CGF.EmitCheckTypeDescriptor(DstType),
+ llvm::ConstantInt::get(Builder.getInt8Ty(), Check.first)};
+ CGF.EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs,
+ {Src, Dst});
+}
+
+// Should be called within CodeGenFunction::SanitizerScope RAII scope.
+// Returns 'i1 false' when the conversion Src -> Dst changed the sign.
+static std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+EmitIntegerSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst,
+ QualType DstType, CGBuilderTy &Builder) {
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = Dst->getType();
+
+ assert(isa<llvm::IntegerType>(SrcTy) && isa<llvm::IntegerType>(DstTy) &&
+ "non-integer llvm type");
+
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+ (void)SrcSigned; // Only used in assert()
+ (void)DstSigned; // Only used in assert()
+ unsigned SrcBits = SrcTy->getScalarSizeInBits();
+ unsigned DstBits = DstTy->getScalarSizeInBits();
+ (void)SrcBits; // Only used in assert()
+ (void)DstBits; // Only used in assert()
+
+ assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) &&
+ "either the widths should be different, or the signednesses.");
+
+ // NOTE: zero value is considered to be non-negative.
+ auto EmitIsNegativeTest = [&Builder](Value *V, QualType VType,
+ const char *Name) -> Value * {
+ // Is this value a signed type?
+ bool VSigned = VType->isSignedIntegerOrEnumerationType();
+ llvm::Type *VTy = V->getType();
+ if (!VSigned) {
+ // If the value is unsigned, then it is never negative.
+ // FIXME: can we encounter non-scalar VTy here?
+ return llvm::ConstantInt::getFalse(VTy->getContext());
+ }
+ // Get the zero of the same type with which we will be comparing.
+ llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0);
+ // %V.isnegative = icmp slt %V, 0
+ // I.e is %V *strictly* less than zero, does it have negative value?
+ return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero,
+ llvm::Twine(Name) + "." + V->getName() +
+ ".negativitycheck");
+ };
+
+ // 1. Was the old Value negative?
+ llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src");
+ // 2. Is the new Value negative?
+ llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst");
+ // 3. Now, was the 'negativity status' preserved during the conversion?
+ // NOTE: conversion from negative to zero is considered to change the sign.
+ // (We want to get 'false' when the conversion changed the sign)
+ // So we should just equality-compare the negativity statuses.
llvm::Value *Check = nullptr;
+ Check = Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "signchangecheck");
+ // If the comparison result is 'false', then the conversion changed the sign.
+ return std::make_pair(
+ ScalarExprEmitter::ICCK_IntegerSignChange,
+ std::make_pair(Check, SanitizerKind::ImplicitIntegerSignChange));
+}
- // 1. Extend the truncated value back to the same width as the Src.
- bool InputSigned = DstType->isSignedIntegerOrEnumerationType();
- Check = Builder.CreateIntCast(Dst, SrcTy, InputSigned, "anyext");
- // 2. Equality-compare with the original source value
- Check = Builder.CreateICmpEQ(Check, Src, "truncheck");
- // If the comparison result is 'i1 false', then the truncation was lossy.
+void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType,
+ Value *Dst, QualType DstType,
+ SourceLocation Loc) {
+ if (!CGF.SanOpts.has(SanitizerKind::ImplicitIntegerSignChange))
+ return;
+
+ llvm::Type *SrcTy = Src->getType();
+ llvm::Type *DstTy = Dst->getType();
+
+ // We only care about int->int conversions here.
+ // We ignore conversions to/from pointer and/or bool.
+ if (!(SrcType->isIntegerType() && DstType->isIntegerType()))
+ return;
+
+ bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType();
+ bool DstSigned = DstType->isSignedIntegerOrEnumerationType();
+ unsigned SrcBits = SrcTy->getScalarSizeInBits();
+ unsigned DstBits = DstTy->getScalarSizeInBits();
+
+ // Now, we do not need to emit the check in *all* of the cases.
+ // We can avoid emitting it in some obvious cases where it would have been
+ // dropped by the opt passes (instcombine) always anyways.
+ // If it's a cast between effectively the same type, no check.
+ // NOTE: this is *not* equivalent to checking the canonical types.
+ if (SrcSigned == DstSigned && SrcBits == DstBits)
+ return;
+ // At least one of the values needs to have signed type.
+ // If both are unsigned, then obviously, neither of them can be negative.
+ if (!SrcSigned && !DstSigned)
+ return;
+ // If the conversion is to *larger* *signed* type, then no check is needed.
+ // Because either sign-extension happens (so the sign will remain),
+ // or zero-extension will happen (the sign bit will be zero.)
+ if ((DstBits > SrcBits) && DstSigned)
+ return;
+ if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) &&
+ (SrcBits > DstBits) && SrcSigned) {
+ // If the signed integer truncation sanitizer is enabled,
+ // and this is a truncation from signed type, then no check is needed.
+ // Because here sign change check is interchangeable with truncation check.
+ return;
+ }
+ // That's it. We can't rule out any more cases with the data we have.
+
+ CodeGenFunction::SanitizerScope SanScope(&CGF);
+
+ std::pair<ScalarExprEmitter::ImplicitConversionCheckKind,
+ std::pair<llvm::Value *, SanitizerMask>>
+ Check;
+
+ // Each of these checks needs to return 'false' when an issue was detected.
+ ImplicitConversionCheckKind CheckKind;
+ llvm::SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks;
+ // So we can 'and' all the checks together, and still get 'false',
+ // if at least one of the checks detected an issue.
+
+ Check = EmitIntegerSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder);
+ CheckKind = Check.first;
+ Checks.emplace_back(Check.second);
+
+ if (CGF.SanOpts.has(SanitizerKind::ImplicitSignedIntegerTruncation) &&
+ (SrcBits > DstBits) && !SrcSigned && DstSigned) {
+ // If the signed integer truncation sanitizer was enabled,
+ // and we are truncating from larger unsigned type to smaller signed type,
+ // let's handle the case we skipped in that check.
+ Check =
+ EmitIntegerTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder);
+ CheckKind = ICCK_SignedIntegerTruncationOrSignChange;
+ Checks.emplace_back(Check.second);
+ // If the comparison result is 'i1 false', then the truncation was lossy.
+ }
llvm::Constant *StaticArgs[] = {
CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType),
CGF.EmitCheckTypeDescriptor(DstType),
- llvm::ConstantInt::get(Builder.getInt8Ty(), ICCK_IntegerTruncation)};
- CGF.EmitCheck(std::make_pair(Check, SanitizerKind::ImplicitIntegerTruncation),
- SanitizerHandler::ImplicitConversion, StaticArgs, {Src, Dst});
+ llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind)};
+ // EmitCheck() will 'and' all the checks together.
+ CGF.EmitCheck(Checks, SanitizerHandler::ImplicitConversion, StaticArgs,
+ {Src, Dst});
}
/// Emit a conversion from the specified type to the specified destination type,
@@ -991,6 +1197,27 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
QualType DstType,
SourceLocation Loc,
ScalarConversionOpts Opts) {
+ // All conversions involving fixed point types should be handled by the
+ // EmitFixedPoint family functions. This is done to prevent bloating up this
+ // function more, and although fixed point numbers are represented by
+ // integers, we do not want to follow any logic that assumes they should be
+ // treated as integers.
+ // TODO(leonardchan): When necessary, add another if statement checking for
+ // conversions to fixed point types from other types.
+ if (SrcType->isFixedPointType()) {
+ if (DstType->isFixedPointType()) {
+ return EmitFixedPointConversion(Src, SrcType, DstType, Loc);
+ } else if (DstType->isBooleanType()) {
+ // We do not need to check the padding bit on unsigned types if unsigned
+ // padding is enabled because overflow into this bit is undefined
+ // behavior.
+ return Builder.CreateIsNotNull(Src, "tobool");
+ }
+
+ llvm_unreachable(
+ "Unhandled scalar conversion involving a fixed point type.");
+ }
+
QualType NoncanonicalSrcType = SrcType;
QualType NoncanonicalDstType = DstType;
@@ -1036,8 +1263,13 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
}
// Ignore conversions like int -> uint.
- if (SrcTy == DstTy)
+ if (SrcTy == DstTy) {
+ if (Opts.EmitImplicitIntegerSignChangeChecks)
+ EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Src,
+ NoncanonicalDstType, Loc);
+
return Src;
+ }
// Handle pointer conversions next: pointers can only be converted to/from
// other pointers and integers. Check for pointer types in terms of LLVM, as
@@ -1181,9 +1413,91 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
EmitIntegerTruncationCheck(Src, NoncanonicalSrcType, Res,
NoncanonicalDstType, Loc);
+ if (Opts.EmitImplicitIntegerSignChangeChecks)
+ EmitIntegerSignChangeCheck(Src, NoncanonicalSrcType, Res,
+ NoncanonicalDstType, Loc);
+
return Res;
}
+Value *ScalarExprEmitter::EmitFixedPointConversion(Value *Src, QualType SrcTy,
+ QualType DstTy,
+ SourceLocation Loc) {
+ using llvm::APInt;
+ using llvm::ConstantInt;
+ using llvm::Value;
+
+ assert(SrcTy->isFixedPointType());
+ assert(DstTy->isFixedPointType());
+
+ FixedPointSemantics SrcFPSema =
+ CGF.getContext().getFixedPointSemantics(SrcTy);
+ FixedPointSemantics DstFPSema =
+ CGF.getContext().getFixedPointSemantics(DstTy);
+ unsigned SrcWidth = SrcFPSema.getWidth();
+ unsigned DstWidth = DstFPSema.getWidth();
+ unsigned SrcScale = SrcFPSema.getScale();
+ unsigned DstScale = DstFPSema.getScale();
+ bool SrcIsSigned = SrcFPSema.isSigned();
+ bool DstIsSigned = DstFPSema.isSigned();
+
+ llvm::Type *DstIntTy = Builder.getIntNTy(DstWidth);
+
+ Value *Result = Src;
+ unsigned ResultWidth = SrcWidth;
+
+ if (!DstFPSema.isSaturated()) {
+ // Downscale.
+ if (DstScale < SrcScale)
+ Result = SrcIsSigned ?
+ Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") :
+ Builder.CreateLShr(Result, SrcScale - DstScale, "downscale");
+
+ // Resize.
+ Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize");
+
+ // Upscale.
+ if (DstScale > SrcScale)
+ Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale");
+ } else {
+ // Adjust the number of fractional bits.
+ if (DstScale > SrcScale) {
+ ResultWidth = SrcWidth + DstScale - SrcScale;
+ llvm::Type *UpscaledTy = Builder.getIntNTy(ResultWidth);
+ Result = Builder.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize");
+ Result = Builder.CreateShl(Result, DstScale - SrcScale, "upscale");
+ } else if (DstScale < SrcScale) {
+ Result = SrcIsSigned ?
+ Builder.CreateAShr(Result, SrcScale - DstScale, "downscale") :
+ Builder.CreateLShr(Result, SrcScale - DstScale, "downscale");
+ }
+
+ // Handle saturation.
+ bool LessIntBits = DstFPSema.getIntegralBits() < SrcFPSema.getIntegralBits();
+ if (LessIntBits) {
+ Value *Max = ConstantInt::get(
+ CGF.getLLVMContext(),
+ APFixedPoint::getMax(DstFPSema).getValue().extOrTrunc(ResultWidth));
+ Value *TooHigh = SrcIsSigned ? Builder.CreateICmpSGT(Result, Max)
+ : Builder.CreateICmpUGT(Result, Max);
+ Result = Builder.CreateSelect(TooHigh, Max, Result, "satmax");
+ }
+ // Cannot overflow min to dest type if src is unsigned since all fixed
+ // point types can cover the unsigned min of 0.
+ if (SrcIsSigned && (LessIntBits || !DstIsSigned)) {
+ Value *Min = ConstantInt::get(
+ CGF.getLLVMContext(),
+ APFixedPoint::getMin(DstFPSema).getValue().extOrTrunc(ResultWidth));
+ Value *TooLow = Builder.CreateICmpSLT(Result, Min);
+ Result = Builder.CreateSelect(TooLow, Min, Result, "satmin");
+ }
+
+ // Resize the integer part to get the final destination size.
+ Result = Builder.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize");
+ }
+ return Result;
+}
+
/// Emit a conversion from the specified complex type to the specified
/// destination type, where the destination type is an LLVM scalar type.
Value *ScalarExprEmitter::EmitComplexToScalarConversion(
@@ -1405,10 +1719,11 @@ Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) {
Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) {
if (CodeGenFunction::ConstantEmission Constant = CGF.tryEmitAsConstant(E)) {
CGF.EmitIgnoredExpr(E->getBase());
- return emitConstant(Constant, E);
+ return CGF.emitScalarConstant(Constant, E);
} else {
- llvm::APSInt Value;
- if (E->EvaluateAsInt(Value, CGF.getContext(), Expr::SE_AllowSideEffects)) {
+ Expr::EvalResult Result;
+ if (E->EvaluateAsInt(Result, CGF.getContext(), Expr::SE_AllowSideEffects)) {
+ llvm::APSInt Value = Result.Val.getInt();
CGF.EmitIgnoredExpr(E->getBase());
return Builder.getInt(Value);
}
@@ -1681,7 +1996,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
CGF.EmitVTablePtrCheckForCast(PT->getPointeeType(), Src,
/*MayBeNull=*/true,
CodeGenFunction::CFITCK_UnrelatedCast,
- CE->getLocStart());
+ CE->getBeginLoc());
}
if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) {
@@ -1745,11 +2060,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
Derived.getPointer(), DestTy->getPointeeType());
if (CGF.SanOpts.has(SanitizerKind::CFIDerivedCast))
- CGF.EmitVTablePtrCheckForCast(DestTy->getPointeeType(),
- Derived.getPointer(),
- /*MayBeNull=*/true,
- CodeGenFunction::CFITCK_DerivedCast,
- CE->getLocStart());
+ CGF.EmitVTablePtrCheckForCast(
+ DestTy->getPointeeType(), Derived.getPointer(),
+ /*MayBeNull=*/true, CodeGenFunction::CFITCK_DerivedCast,
+ CE->getBeginLoc());
return Derived.getPointer();
}
@@ -1875,11 +2189,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
return Builder.CreateVectorSplat(NumElements, Elt, "splat");
}
+ case CK_FixedPointCast:
+ return EmitScalarConversion(Visit(E), E->getType(), DestTy,
+ CE->getExprLoc());
+
+ case CK_FixedPointToBoolean:
+ assert(E->getType()->isFixedPointType() &&
+ "Expected src type to be fixed point type");
+ assert(DestTy->isBooleanType() && "Expected dest type to be boolean type");
+ return EmitScalarConversion(Visit(E), E->getType(), DestTy,
+ CE->getExprLoc());
+
case CK_IntegralCast: {
ScalarConversionOpts Opts;
- if (CGF.SanOpts.has(SanitizerKind::ImplicitIntegerTruncation)) {
- if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE))
- Opts.EmitImplicitIntegerTruncationChecks = !ICE->isPartOfExplicitCast();
+ if (auto *ICE = dyn_cast<ImplicitCastExpr>(CE)) {
+ if (!ICE->isPartOfExplicitCast())
+ Opts = ScalarConversionOpts(CGF.SanOpts);
}
return EmitScalarConversion(Visit(E), E->getType(), DestTy,
CE->getExprLoc(), Opts);
@@ -1920,13 +2245,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
CE->getExprLoc());
}
- case CK_ZeroToOCLEvent: {
- assert(DestTy->isEventT() && "CK_ZeroToOCLEvent cast on non-event type");
- return llvm::Constant::getNullValue(ConvertType(DestTy));
- }
-
- case CK_ZeroToOCLQueue: {
- assert(DestTy->isQueueT() && "CK_ZeroToOCLQueue cast on non queue_t type");
+ case CK_ZeroToOCLOpaqueType: {
+ assert((DestTy->isEventT() || DestTy->isQueueT() ||
+ DestTy->isOCLIntelSubgroupAVCType()) &&
+ "CK_ZeroToOCLEvent cast on non-event type");
return llvm::Constant::getNullValue(ConvertType(DestTy));
}
@@ -1985,7 +2307,7 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWAdd(InVal, Amount, Name);
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (!E->canOverflow())
return Builder.CreateNSWAdd(InVal, Amount, Name);
@@ -2280,9 +2602,11 @@ Value *ScalarExprEmitter::VisitUnaryLNot(const UnaryOperator *E) {
Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
// Try folding the offsetof to a constant.
- llvm::APSInt Value;
- if (E->EvaluateAsInt(Value, CGF.getContext()))
+ Expr::EvalResult EVResult;
+ if (E->EvaluateAsInt(EVResult, CGF.getContext())) {
+ llvm::APSInt Value = EVResult.Val.getInt();
return Builder.getInt(Value);
+ }
// Loop over the components of the offsetof to compute the value.
unsigned n = E->getNumComponents();
@@ -2551,9 +2875,10 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
// Expand the binary operator.
Result = (this->*Func)(OpInfo);
- // Convert the result back to the LHS type.
- Result =
- EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, Loc);
+ // Convert the result back to the LHS type,
+ // potentially with Implicit Conversion sanitizer check.
+ Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
+ Loc, ScalarConversionOpts(CGF.SanOpts));
if (atomicPHI) {
llvm::BasicBlock *opBB = Builder.GetInsertBlock();
@@ -2991,7 +3316,7 @@ Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWAdd(op.LHS, op.RHS, "add");
@@ -3026,7 +3351,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) {
case LangOptions::SOB_Undefined:
if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
- // Fall through.
+ LLVM_FALLTHROUGH;
case LangOptions::SOB_Trapping:
if (CanElideOverflowCheck(CGF.getContext(), op))
return Builder.CreateNSWSub(op.LHS, op.RHS, "sub");
diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
index 21e2b8dd8c31..fd0a9c773a2e 100644
--- a/lib/CodeGen/CGLoopInfo.cpp
+++ b/lib/CodeGen/CGLoopInfo.cpp
@@ -10,8 +10,8 @@
#include "CGLoopInfo.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
-#include "clang/Sema/LoopHint.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
@@ -21,14 +21,17 @@ using namespace llvm;
static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
const llvm::DebugLoc &StartLoc,
- const llvm::DebugLoc &EndLoc) {
+ const llvm::DebugLoc &EndLoc, MDNode *&AccGroup) {
if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 &&
Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
+ Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
+ Attrs.PipelineInitiationInterval == 0 &&
Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
- Attrs.DistributeEnable == LoopAttributes::Unspecified &&
- !StartLoc && !EndLoc)
+ Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified &&
+ Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc &&
+ !EndLoc)
return nullptr;
SmallVector<Metadata *, 4> Args;
@@ -61,7 +64,7 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}
- // Setting interleave.count
+ // Setting unroll.count
if (Attrs.UnrollCount > 0) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.count"),
ConstantAsMetadata::get(ConstantInt::get(
@@ -69,6 +72,14 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}
+ // Setting unroll_and_jam.count
+ if (Attrs.UnrollAndJamCount > 0) {
+ Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll_and_jam.count"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ Type::getInt32Ty(Ctx), Attrs.UnrollAndJamCount))};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
// Setting vectorize.enable
if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
@@ -91,6 +102,19 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}
+ // Setting unroll_and_jam.full or unroll_and_jam.disable
+ if (Attrs.UnrollAndJamEnable != LoopAttributes::Unspecified) {
+ std::string Name;
+ if (Attrs.UnrollAndJamEnable == LoopAttributes::Enable)
+ Name = "llvm.loop.unroll_and_jam.enable";
+ else if (Attrs.UnrollAndJamEnable == LoopAttributes::Full)
+ Name = "llvm.loop.unroll_and_jam.full";
+ else
+ Name = "llvm.loop.unroll_and_jam.disable";
+ Metadata *Vals[] = {MDString::get(Ctx, Name)};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
if (Attrs.DistributeEnable != LoopAttributes::Unspecified) {
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"),
ConstantAsMetadata::get(ConstantInt::get(
@@ -99,6 +123,28 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
}
+ if (Attrs.IsParallel) {
+ AccGroup = MDNode::getDistinct(Ctx, {});
+ Args.push_back(MDNode::get(
+ Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccGroup}));
+ }
+
+ if (Attrs.PipelineDisabled) {
+ Metadata *Vals[] = {
+ MDString::get(Ctx, "llvm.loop.pipeline.disable"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ Type::getInt1Ty(Ctx), (Attrs.PipelineDisabled == true)))};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
+ if (Attrs.PipelineInitiationInterval > 0) {
+ Metadata *Vals[] = {
+ MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ Type::getInt32Ty(Ctx), Attrs.PipelineInitiationInterval))};
+ Args.push_back(MDNode::get(Ctx, Vals));
+ }
+
// Set the first operand to itself.
MDNode *LoopID = MDNode::get(Ctx, Args);
LoopID->replaceOperandWith(0, LoopID);
@@ -107,24 +153,31 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
LoopAttributes::LoopAttributes(bool IsParallel)
: IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified),
- UnrollEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
- InterleaveCount(0), UnrollCount(0),
- DistributeEnable(LoopAttributes::Unspecified) {}
+ UnrollEnable(LoopAttributes::Unspecified),
+ UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
+ InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0),
+ DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false),
+ PipelineInitiationInterval(0) {}
void LoopAttributes::clear() {
IsParallel = false;
VectorizeWidth = 0;
InterleaveCount = 0;
UnrollCount = 0;
+ UnrollAndJamCount = 0;
VectorizeEnable = LoopAttributes::Unspecified;
UnrollEnable = LoopAttributes::Unspecified;
+ UnrollAndJamEnable = LoopAttributes::Unspecified;
DistributeEnable = LoopAttributes::Unspecified;
+ PipelineDisabled = false;
+ PipelineInitiationInterval = 0;
}
LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
const llvm::DebugLoc &StartLoc, const llvm::DebugLoc &EndLoc)
: LoopID(nullptr), Header(Header), Attrs(Attrs) {
- LoopID = createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc);
+ LoopID =
+ createMetadata(Header->getContext(), Attrs, StartLoc, EndLoc, AccGroup);
}
void LoopInfoStack::push(BasicBlock *Header, const llvm::DebugLoc &StartLoc,
@@ -191,12 +244,20 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::Unroll:
setUnrollState(LoopAttributes::Disable);
break;
+ case LoopHintAttr::UnrollAndJam:
+ setUnrollAndJamState(LoopAttributes::Disable);
+ break;
case LoopHintAttr::Distribute:
setDistributeState(false);
break;
+ case LoopHintAttr::PipelineDisabled:
+ setPipelineDisabled(true);
+ break;
case LoopHintAttr::UnrollCount:
+ case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
+ case LoopHintAttr::PipelineInitiationInterval:
llvm_unreachable("Options cannot be disabled.");
break;
}
@@ -210,12 +271,18 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::Unroll:
setUnrollState(LoopAttributes::Enable);
break;
+ case LoopHintAttr::UnrollAndJam:
+ setUnrollAndJamState(LoopAttributes::Enable);
+ break;
case LoopHintAttr::Distribute:
setDistributeState(true);
break;
case LoopHintAttr::UnrollCount:
+ case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
+ case LoopHintAttr::PipelineDisabled:
+ case LoopHintAttr::PipelineInitiationInterval:
llvm_unreachable("Options cannot enabled.");
break;
}
@@ -229,10 +296,14 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
setVectorizeEnable(true);
break;
case LoopHintAttr::Unroll:
+ case LoopHintAttr::UnrollAndJam:
case LoopHintAttr::UnrollCount:
+ case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
case LoopHintAttr::Distribute:
+ case LoopHintAttr::PipelineDisabled:
+ case LoopHintAttr::PipelineInitiationInterval:
llvm_unreachable("Options cannot be used to assume mem safety.");
break;
}
@@ -242,12 +313,18 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::Unroll:
setUnrollState(LoopAttributes::Full);
break;
+ case LoopHintAttr::UnrollAndJam:
+ setUnrollAndJamState(LoopAttributes::Full);
+ break;
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
case LoopHintAttr::UnrollCount:
+ case LoopHintAttr::UnrollAndJamCount:
case LoopHintAttr::VectorizeWidth:
case LoopHintAttr::InterleaveCount:
case LoopHintAttr::Distribute:
+ case LoopHintAttr::PipelineDisabled:
+ case LoopHintAttr::PipelineInitiationInterval:
llvm_unreachable("Options cannot be used with 'full' hint.");
break;
}
@@ -263,10 +340,18 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
case LoopHintAttr::UnrollCount:
setUnrollCount(ValueInt);
break;
+ case LoopHintAttr::UnrollAndJamCount:
+ setUnrollAndJamCount(ValueInt);
+ break;
+ case LoopHintAttr::PipelineInitiationInterval:
+ setPipelineInitiationInterval(ValueInt);
+ break;
case LoopHintAttr::Unroll:
+ case LoopHintAttr::UnrollAndJam:
case LoopHintAttr::Vectorize:
case LoopHintAttr::Interleave:
case LoopHintAttr::Distribute:
+ case LoopHintAttr::PipelineDisabled:
llvm_unreachable("Options cannot be assigned a value.");
break;
}
@@ -284,6 +369,21 @@ void LoopInfoStack::pop() {
}
void LoopInfoStack::InsertHelper(Instruction *I) const {
+ if (I->mayReadOrWriteMemory()) {
+ SmallVector<Metadata *, 4> AccessGroups;
+ for (const LoopInfo &AL : Active) {
+ // Here we assume that every loop that has an access group is parallel.
+ if (MDNode *Group = AL.getAccessGroup())
+ AccessGroups.push_back(Group);
+ }
+ MDNode *UnionMD = nullptr;
+ if (AccessGroups.size() == 1)
+ UnionMD = cast<MDNode>(AccessGroups[0]);
+ else if (AccessGroups.size() >= 2)
+ UnionMD = MDNode::get(I->getContext(), AccessGroups);
+ I->setMetadata("llvm.access.group", UnionMD);
+ }
+
if (!hasInfo())
return;
@@ -291,15 +391,12 @@ void LoopInfoStack::InsertHelper(Instruction *I) const {
if (!L.getLoopID())
return;
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) {
- for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i)
- if (TI->getSuccessor(i) == L.getHeader()) {
- TI->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID());
+ if (I->isTerminator()) {
+ for (BasicBlock *Succ : successors(I))
+ if (Succ == L.getHeader()) {
+ I->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID());
break;
}
return;
}
-
- if (L.getAttributes().IsParallel && I->mayReadOrWriteMemory())
- I->setMetadata("llvm.mem.parallel_loop_access", L.getLoopID());
}
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
index 9d5f23ff9a2a..84ba03bfb00b 100644
--- a/lib/CodeGen/CGLoopInfo.h
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -49,6 +49,9 @@ struct LoopAttributes {
/// Value for llvm.loop.unroll.* metadata (enable, disable, or full).
LVEnableState UnrollEnable;
+ /// Value for llvm.loop.unroll_and_jam.* metadata (enable, disable, or full).
+ LVEnableState UnrollAndJamEnable;
+
/// Value for llvm.loop.vectorize.width metadata.
unsigned VectorizeWidth;
@@ -58,8 +61,17 @@ struct LoopAttributes {
/// llvm.unroll.
unsigned UnrollCount;
+ /// llvm.unroll.
+ unsigned UnrollAndJamCount;
+
/// Value for llvm.loop.distribute.enable metadata.
LVEnableState DistributeEnable;
+
+ /// Value for llvm.loop.pipeline.disable metadata.
+ bool PipelineDisabled;
+
+ /// Value for llvm.loop.pipeline.iicount metadata.
+ unsigned PipelineInitiationInterval;
};
/// Information used when generating a structured loop.
@@ -78,6 +90,9 @@ public:
/// Get the set of attributes active for this loop.
const LoopAttributes &getAttributes() const { return Attrs; }
+ /// Return this loop's access group or nullptr if it does not have one.
+ llvm::MDNode *getAccessGroup() const { return AccGroup; }
+
private:
/// Loop ID metadata.
llvm::MDNode *LoopID;
@@ -85,6 +100,8 @@ private:
llvm::BasicBlock *Header;
/// The attributes for this loop.
LoopAttributes Attrs;
+ /// The access group for memory accesses parallel to this loop.
+ llvm::MDNode *AccGroup = nullptr;
};
/// A stack of loop information corresponding to loop nesting levels.
@@ -143,6 +160,11 @@ public:
StagedAttrs.UnrollEnable = State;
}
+ /// Set the next pushed loop unroll_and_jam state.
+ void setUnrollAndJamState(const LoopAttributes::LVEnableState &State) {
+ StagedAttrs.UnrollAndJamEnable = State;
+ }
+
/// Set the vectorize width for the next loop pushed.
void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; }
@@ -152,6 +174,17 @@ public:
/// Set the unroll count for the next loop pushed.
void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; }
+ /// \brief Set the unroll count for the next loop pushed.
+ void setUnrollAndJamCount(unsigned C) { StagedAttrs.UnrollAndJamCount = C; }
+
+ /// Set the pipeline disabled state.
+ void setPipelineDisabled(bool S) { StagedAttrs.PipelineDisabled = S; }
+
+ /// Set the pipeline initiation interval.
+ void setPipelineInitiationInterval(unsigned C) {
+ StagedAttrs.PipelineInitiationInterval = C;
+ }
+
private:
/// Returns true if there is LoopInfo on the stack.
bool hasInfo() const { return !Active.empty(); }
diff --git a/lib/CodeGen/CGNonTrivialStruct.cpp b/lib/CodeGen/CGNonTrivialStruct.cpp
index 922e0934b866..c6a96a912622 100644
--- a/lib/CodeGen/CGNonTrivialStruct.cpp
+++ b/lib/CodeGen/CGNonTrivialStruct.cpp
@@ -187,6 +187,7 @@ template <class Derived> struct GenFuncNameBase {
if (!FK)
return asDerived().visitTrivial(QualType(AT, 0), FD, CurStructOffset);
+ asDerived().flushTrivialFields();
CharUnits FieldOffset = CurStructOffset + asDerived().getFieldOffset(FD);
ASTContext &Ctx = asDerived().getContext();
const ConstantArrayType *CAT = cast<ConstantArrayType>(AT);
@@ -283,8 +284,9 @@ struct GenDefaultInitializeFuncName
struct GenDestructorFuncName : GenUnaryFuncName<GenDestructorFuncName>,
DestructedTypeVisitor<GenDestructorFuncName> {
using Super = DestructedTypeVisitor<GenDestructorFuncName>;
- GenDestructorFuncName(CharUnits DstAlignment, ASTContext &Ctx)
- : GenUnaryFuncName<GenDestructorFuncName>("__destructor_", DstAlignment,
+ GenDestructorFuncName(const char *Prefix, CharUnits DstAlignment,
+ ASTContext &Ctx)
+ : GenUnaryFuncName<GenDestructorFuncName>(Prefix, DstAlignment,
Ctx) {}
void visitWithKind(QualType::DestructionKind DK, QualType FT,
const FieldDecl *FD, CharUnits CurStructOffset) {
@@ -335,6 +337,7 @@ template <class Derived> struct GenFuncBase {
return asDerived().visitTrivial(QualType(AT, 0), FD, CurStackOffset,
Addrs);
+ asDerived().flushTrivialFields(Addrs);
CodeGenFunction &CGF = *this->CGF;
ASTContext &Ctx = CGF.getContext();
@@ -455,12 +458,13 @@ template <class Derived> struct GenFuncBase {
llvm::Function::Create(FuncTy, llvm::GlobalValue::LinkOnceODRLinkage,
FuncName, &CGM.getModule());
F->setVisibility(llvm::GlobalValue::HiddenVisibility);
- CGM.SetLLVMFunctionAttributes(nullptr, FI, F);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, F);
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, F);
IdentifierInfo *II = &Ctx.Idents.get(FuncName);
FunctionDecl *FD = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
- II, Ctx.VoidTy, nullptr, SC_PrivateExtern, false, false);
+ II, Ctx.getFunctionType(Ctx.VoidTy, llvm::None, {}), nullptr,
+ SC_PrivateExtern, false, false);
CodeGenFunction NewCGF(CGM);
setCGF(&NewCGF);
CGF->StartFunction(FD, Ctx.VoidTy, F, FI, Args);
@@ -824,11 +828,28 @@ void CodeGenFunction::callCStructDefaultConstructor(LValue Dst) {
IsVolatile, *this, std::array<Address, 1>({{DstPtr}}));
}
+std::string
+CodeGenFunction::getNonTrivialCopyConstructorStr(QualType QT,
+ CharUnits Alignment,
+ bool IsVolatile,
+ ASTContext &Ctx) {
+ GenBinaryFuncName<false> GenName("", Alignment, Alignment, Ctx);
+ return GenName.getName(QT, IsVolatile);
+}
+
+std::string
+CodeGenFunction::getNonTrivialDestructorStr(QualType QT, CharUnits Alignment,
+ bool IsVolatile, ASTContext &Ctx) {
+ GenDestructorFuncName GenName("", Alignment, Ctx);
+ return GenName.getName(QT, IsVolatile);
+}
+
void CodeGenFunction::callCStructDestructor(LValue Dst) {
bool IsVolatile = Dst.isVolatile();
Address DstPtr = Dst.getAddress();
QualType QT = Dst.getType();
- GenDestructorFuncName GenName(DstPtr.getAlignment(), getContext());
+ GenDestructorFuncName GenName("__destructor_", DstPtr.getAlignment(),
+ getContext());
std::string FuncName = GenName.getName(QT, IsVolatile);
callSpecialFunction(GenDestructor(getContext()), FuncName, QT, IsVolatile,
*this, std::array<Address, 1>({{DstPtr}}));
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index b94bbf2a384f..9c66ff0e8fb2 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -127,7 +127,7 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
llvm::Constant *Constant =
CGM.CreateRuntimeVariable(ConvertType(IdTy), ConstantName);
LValue LV = MakeNaturalAlignAddrLValue(Constant, IdTy);
- llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getLocStart());
+ llvm::Value *Ptr = EmitLoadOfScalar(LV, E->getBeginLoc());
cast<llvm::LoadInst>(Ptr)->setMetadata(
CGM.getModule().getMDKindID("invariant.load"),
llvm::MDNode::get(getLLVMContext(), None));
@@ -352,6 +352,81 @@ static const Expr *findWeakLValue(const Expr *E) {
return nullptr;
}
+/// The ObjC runtime may provide entrypoints that are likely to be faster
+/// than an ordinary message send of the appropriate selector.
+///
+/// The entrypoints are guaranteed to be equivalent to just sending the
+/// corresponding message. If the entrypoint is implemented naively as just a
+/// message send, using it is a trade-off: it sacrifices a few cycles of
+/// overhead to save a small amount of code. However, it's possible for
+/// runtimes to detect and special-case classes that use "standard"
+/// behavior; if that's dynamically a large proportion of all objects, using
+/// the entrypoint will also be faster than using a message send.
+///
+/// If the runtime does support a required entrypoint, then this method will
+/// generate a call and return the resulting value. Otherwise it will return
+/// None and the caller can generate a msgSend instead.
+static Optional<llvm::Value *>
+tryGenerateSpecializedMessageSend(CodeGenFunction &CGF, QualType ResultType,
+ llvm::Value *Receiver,
+ const CallArgList& Args, Selector Sel,
+ const ObjCMethodDecl *method,
+ bool isClassMessage) {
+ auto &CGM = CGF.CGM;
+ if (!CGM.getCodeGenOpts().ObjCConvertMessagesToRuntimeCalls)
+ return None;
+
+ auto &Runtime = CGM.getLangOpts().ObjCRuntime;
+ switch (Sel.getMethodFamily()) {
+ case OMF_alloc:
+ if (isClassMessage &&
+ Runtime.shouldUseRuntimeFunctionsForAlloc() &&
+ ResultType->isObjCObjectPointerType()) {
+ // [Foo alloc] -> objc_alloc(Foo)
+ if (Sel.isUnarySelector() && Sel.getNameForSlot(0) == "alloc")
+ return CGF.EmitObjCAlloc(Receiver, CGF.ConvertType(ResultType));
+ // [Foo allocWithZone:nil] -> objc_allocWithZone(Foo)
+ if (Sel.isKeywordSelector() && Sel.getNumArgs() == 1 &&
+ Args.size() == 1 && Args.front().getType()->isPointerType() &&
+ Sel.getNameForSlot(0) == "allocWithZone") {
+ const llvm::Value* arg = Args.front().getKnownRValue().getScalarVal();
+ if (isa<llvm::ConstantPointerNull>(arg))
+ return CGF.EmitObjCAllocWithZone(Receiver,
+ CGF.ConvertType(ResultType));
+ return None;
+ }
+ }
+ break;
+
+ case OMF_autorelease:
+ if (ResultType->isObjCObjectPointerType() &&
+ CGM.getLangOpts().getGC() == LangOptions::NonGC &&
+ Runtime.shouldUseARCFunctionsForRetainRelease())
+ return CGF.EmitObjCAutorelease(Receiver, CGF.ConvertType(ResultType));
+ break;
+
+ case OMF_retain:
+ if (ResultType->isObjCObjectPointerType() &&
+ CGM.getLangOpts().getGC() == LangOptions::NonGC &&
+ Runtime.shouldUseARCFunctionsForRetainRelease())
+ return CGF.EmitObjCRetainNonBlock(Receiver, CGF.ConvertType(ResultType));
+ break;
+
+ case OMF_release:
+ if (ResultType->isVoidType() &&
+ CGM.getLangOpts().getGC() == LangOptions::NonGC &&
+ Runtime.shouldUseARCFunctionsForRetainRelease()) {
+ CGF.EmitObjCRelease(Receiver, ARCPreciseLifetime);
+ return nullptr;
+ }
+ break;
+
+ default:
+ break;
+ }
+ return None;
+}
+
RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
ReturnValueSlot Return) {
// Only the lookup mechanism and first two arguments of the method
@@ -474,10 +549,17 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
Args,
method);
} else {
- result = Runtime.GenerateMessageSend(*this, Return, ResultType,
- E->getSelector(),
- Receiver, Args, OID,
- method);
+ // Call runtime methods directly if we can.
+ if (Optional<llvm::Value *> SpecializedResult =
+ tryGenerateSpecializedMessageSend(*this, ResultType, Receiver, Args,
+ E->getSelector(), method,
+ isClassMessage)) {
+ result = RValue::get(SpecializedResult.getValue());
+ } else {
+ result = Runtime.GenerateMessageSend(*this, Return, ResultType,
+ E->getSelector(), Receiver, Args,
+ OID, method);
+ }
}
// For delegate init calls in ARC, implicitly store the result of
@@ -531,7 +613,7 @@ struct FinishARCDealloc final : EHScopeStack::Cleanup {
/// CodeGenFunction.
void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
const ObjCContainerDecl *CD) {
- SourceLocation StartLoc = OMD->getLocStart();
+ SourceLocation StartLoc = OMD->getBeginLoc();
FunctionArgList args;
// Check if we should generate debug info for this method.
if (OMD->hasAttr<NoDebugAttr>())
@@ -548,7 +630,7 @@ void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
args.append(OMD->param_begin(), OMD->param_end());
CurGD = OMD;
- CurEHLocation = OMD->getLocEnd();
+ CurEHLocation = OMD->getEndLoc();
StartFunction(OMD, OMD->getReturnType(), Fn, FI, args,
OMD->getLocation(), StartLoc);
@@ -568,7 +650,7 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF,
LValue lvalue, QualType type);
/// Generate an Objective-C method. An Objective-C method is a C function with
-/// its pointer, name, and types registered in the class struture.
+/// its pointer, name, and types registered in the class structure.
void CodeGenFunction::GenerateObjCMethod(const ObjCMethodDecl *OMD) {
StartObjCMethod(OMD, OMD->getClassInterface());
PGO.assignRegionCounters(GlobalDecl(OMD), CurFn);
@@ -883,9 +965,10 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
// If there's a non-trivial 'get' expression, we just have to emit that.
if (!hasTrivialGetExpr(propImpl)) {
if (!AtomicHelperFn) {
- ReturnStmt ret(SourceLocation(), propImpl->getGetterCXXConstructor(),
- /*nrvo*/ nullptr);
- EmitReturnStmt(ret);
+ auto *ret = ReturnStmt::Create(getContext(), SourceLocation(),
+ propImpl->getGetterCXXConstructor(),
+ /* NRVOCandidate=*/nullptr);
+ EmitReturnStmt(*ret);
}
else {
ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
@@ -1068,8 +1151,9 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD,
// The second argument is the address of the parameter variable.
ParmVarDecl *argVar = *OMD->param_begin();
- DeclRefExpr argRef(argVar, false, argVar->getType().getNonReferenceType(),
- VK_LValue, SourceLocation());
+ DeclRefExpr argRef(CGF.getContext(), argVar, false,
+ argVar->getType().getNonReferenceType(), VK_LValue,
+ SourceLocation());
llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer();
argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy);
args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy);
@@ -1113,8 +1197,9 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF,
// The second argument is the address of the parameter variable.
ParmVarDecl *argVar = *OMD->param_begin();
- DeclRefExpr argRef(argVar, false, argVar->getType().getNonReferenceType(),
- VK_LValue, SourceLocation());
+ DeclRefExpr argRef(CGF.getContext(), argVar, false,
+ argVar->getType().getNonReferenceType(), VK_LValue,
+ SourceLocation());
llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer();
argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy);
args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy);
@@ -1286,7 +1371,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
// Otherwise, fake up some ASTs and emit a normal assignment.
ValueDecl *selfDecl = setterMethod->getSelfDecl();
- DeclRefExpr self(selfDecl, false, selfDecl->getType(),
+ DeclRefExpr self(getContext(), selfDecl, false, selfDecl->getType(),
VK_LValue, SourceLocation());
ImplicitCastExpr selfLoad(ImplicitCastExpr::OnStack,
selfDecl->getType(), CK_LValueToRValue, &self,
@@ -1297,7 +1382,8 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
ParmVarDecl *argDecl = *setterMethod->param_begin();
QualType argType = argDecl->getType().getNonReferenceType();
- DeclRefExpr arg(argDecl, false, argType, VK_LValue, SourceLocation());
+ DeclRefExpr arg(getContext(), argDecl, false, argType, VK_LValue,
+ SourceLocation());
ImplicitCastExpr argLoad(ImplicitCastExpr::OnStack,
argType.getUnqualifiedType(), CK_LValueToRValue,
&arg, VK_RValue);
@@ -1459,7 +1545,8 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP,
llvm::Value *CodeGenFunction::LoadObjCSelf() {
VarDecl *Self = cast<ObjCMethodDecl>(CurFuncDecl)->getSelfDecl();
- DeclRefExpr DRE(Self, /*is enclosing local*/ (CurFuncDecl != CurCodeDecl),
+ DeclRefExpr DRE(getContext(), Self,
+ /*is enclosing local*/ (CurFuncDecl != CurCodeDecl),
Self->getType(), VK_LValue, SourceLocation());
return EmitLoadOfScalar(EmitDeclRefLValue(&DRE), SourceLocation());
}
@@ -1645,9 +1732,9 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
// Initialize the variable, in case it's a __block variable or something.
EmitAutoVarInit(variable);
- const VarDecl* D = cast<VarDecl>(SD->getSingleDecl());
- DeclRefExpr tempDRE(const_cast<VarDecl*>(D), false, D->getType(),
- VK_LValue, SourceLocation());
+ const VarDecl *D = cast<VarDecl>(SD->getSingleDecl());
+ DeclRefExpr tempDRE(getContext(), const_cast<VarDecl *>(D), false,
+ D->getType(), VK_LValue, SourceLocation());
elementLValue = EmitLValue(&tempDRE);
elementType = D->getType();
elementIsVariable = true;
@@ -1805,23 +1892,16 @@ llvm::Value *CodeGenFunction::EmitObjCExtendObjectLifetime(QualType type,
/// being intrinsically used up until this point in the program.
void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) {
llvm::Constant *&fn = CGM.getObjCEntrypoints().clang_arc_use;
- if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(CGM.VoidTy, None, true);
- fn = CGM.CreateRuntimeFunction(fnType, "clang.arc.use");
- }
+ if (!fn)
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_clang_arc_use);
// This isn't really a "runtime" function, but as an intrinsic it
// doesn't really matter as long as we align things up.
EmitNounwindRuntimeCall(fn, values);
}
-
-static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
- llvm::FunctionType *FTy,
- StringRef Name) {
- llvm::Constant *RTF = CGM.CreateRuntimeFunction(FTy, Name);
-
+static void setARCRuntimeFunctionLinkage(CodeGenModule &CGM,
+ llvm::Constant *RTF) {
if (auto *F = dyn_cast<llvm::Function>(RTF)) {
// If the target runtime doesn't naturally support ARC, emit weak
// references to the runtime support library. We don't really
@@ -1829,14 +1909,8 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
if (!CGM.getLangOpts().ObjCRuntime.hasNativeARC() &&
!CGM.getTriple().isOSBinFormatCOFF()) {
F->setLinkage(llvm::Function::ExternalWeakLinkage);
- } else if (Name == "objc_retain" || Name == "objc_release") {
- // If we have Native ARC, set nonlazybind attribute for these APIs for
- // performance.
- F->addFnAttr(llvm::Attribute::NonLazyBind);
}
}
-
- return RTF;
}
/// Perform an operation having the signature
@@ -1844,20 +1918,20 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM,
/// where a null input causes a no-op and returns null.
static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
llvm::Value *value,
+ llvm::Type *returnType,
llvm::Constant *&fn,
- StringRef fnName,
+ llvm::Intrinsic::ID IntID,
bool isTailCall = false) {
if (isa<llvm::ConstantPointerNull>(value))
return value;
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, false);
- fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
+ fn = CGF.CGM.getIntrinsic(IntID);
+ setARCRuntimeFunctionLinkage(CGF.CGM, fn);
}
// Cast the argument to 'id'.
- llvm::Type *origType = value->getType();
+ llvm::Type *origType = returnType ? returnType : value->getType();
value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy);
// Call the function.
@@ -1874,11 +1948,10 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF,
Address addr,
llvm::Constant *&fn,
- StringRef fnName) {
+ llvm::Intrinsic::ID IntID) {
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrPtrTy, false);
- fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
+ fn = CGF.CGM.getIntrinsic(IntID);
+ setARCRuntimeFunctionLinkage(CGF.CGM, fn);
}
// Cast the argument to 'id*'.
@@ -1901,16 +1974,13 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF,
Address addr,
llvm::Value *value,
llvm::Constant *&fn,
- StringRef fnName,
+ llvm::Intrinsic::ID IntID,
bool ignored) {
assert(addr.getElementType() == value->getType());
if (!fn) {
- llvm::Type *argTypes[] = { CGF.Int8PtrPtrTy, CGF.Int8PtrTy };
-
- llvm::FunctionType *fnType
- = llvm::FunctionType::get(CGF.Int8PtrTy, argTypes, false);
- fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
+ fn = CGF.CGM.getIntrinsic(IntID);
+ setARCRuntimeFunctionLinkage(CGF.CGM, fn);
}
llvm::Type *origType = value->getType();
@@ -1932,15 +2002,12 @@ static void emitARCCopyOperation(CodeGenFunction &CGF,
Address dst,
Address src,
llvm::Constant *&fn,
- StringRef fnName) {
+ llvm::Intrinsic::ID IntID) {
assert(dst.getType() == src.getType());
if (!fn) {
- llvm::Type *argTypes[] = { CGF.Int8PtrPtrTy, CGF.Int8PtrPtrTy };
-
- llvm::FunctionType *fnType
- = llvm::FunctionType::get(CGF.Builder.getVoidTy(), argTypes, false);
- fn = createARCRuntimeFunction(CGF.CGM, fnType, fnName);
+ fn = CGF.CGM.getIntrinsic(IntID);
+ setARCRuntimeFunctionLinkage(CGF.CGM, fn);
}
llvm::Value *args[] = {
@@ -1950,6 +2017,39 @@ static void emitARCCopyOperation(CodeGenFunction &CGF,
CGF.EmitNounwindRuntimeCall(fn, args);
}
+/// Perform an operation having the signature
+/// i8* (i8*)
+/// where a null input causes a no-op and returns null.
+static llvm::Value *emitObjCValueOperation(CodeGenFunction &CGF,
+ llvm::Value *value,
+ llvm::Type *returnType,
+ llvm::Constant *&fn,
+ StringRef fnName) {
+ if (isa<llvm::ConstantPointerNull>(value))
+ return value;
+
+ if (!fn) {
+ llvm::FunctionType *fnType =
+ llvm::FunctionType::get(CGF.Int8PtrTy, CGF.Int8PtrTy, false);
+ fn = CGF.CGM.CreateRuntimeFunction(fnType, fnName);
+
+ // We have Native ARC, so set nonlazybind attribute for performance
+ if (llvm::Function *f = dyn_cast<llvm::Function>(fn))
+ if (fnName == "objc_retain")
+ f->addFnAttr(llvm::Attribute::NonLazyBind);
+ }
+
+ // Cast the argument to 'id'.
+ llvm::Type *origType = returnType ? returnType : value->getType();
+ value = CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy);
+
+ // Call the function.
+ llvm::CallInst *call = CGF.EmitNounwindRuntimeCall(fn, value);
+
+ // Cast the result back to the original type.
+ return CGF.Builder.CreateBitCast(call, origType);
+}
+
/// Produce the code to do a retain. Based on the type, calls one of:
/// call i8* \@objc_retain(i8* %value)
/// call i8* \@objc_retainBlock(i8* %value)
@@ -1963,9 +2063,9 @@ llvm::Value *CodeGenFunction::EmitARCRetain(QualType type, llvm::Value *value) {
/// Retain the given object, with normal retain semantics.
/// call i8* \@objc_retain(i8* %value)
llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retain,
- "objc_retain");
+ llvm::Intrinsic::objc_retain);
}
/// Retain the given block, with _Block_copy semantics.
@@ -1977,9 +2077,9 @@ llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) {
llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value,
bool mandatory) {
llvm::Value *result
- = emitARCValueOperation(*this, value,
+ = emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainBlock,
- "objc_retainBlock");
+ llvm::Intrinsic::objc_retainBlock);
// If the copy isn't mandatory, add !clang.arc.copy_on_escape to
// tell the optimizer that it doesn't need to do this copy if the
@@ -2047,9 +2147,9 @@ static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) {
llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
emitAutoreleasedReturnValueMarker(*this);
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
- "objc_retainAutoreleasedReturnValue");
+ llvm::Intrinsic::objc_retainAutoreleasedReturnValue);
}
/// Claim a possibly-autoreleased return value at +0. This is only
@@ -2062,9 +2162,9 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
llvm::Value *
CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) {
emitAutoreleasedReturnValueMarker(*this);
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue,
- "objc_unsafeClaimAutoreleasedReturnValue");
+ llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue);
}
/// Release the given object.
@@ -2075,9 +2175,8 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value,
llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release;
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
- fn = createARCRuntimeFunction(CGM, fnType, "objc_release");
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_release);
+ setARCRuntimeFunctionLinkage(CGM, fn);
}
// Cast the argument to 'id'.
@@ -2122,10 +2221,8 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(Address addr,
llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_storeStrong;
if (!fn) {
- llvm::Type *argTypes[] = { Int8PtrPtrTy, Int8PtrTy };
- llvm::FunctionType *fnType
- = llvm::FunctionType::get(Builder.getVoidTy(), argTypes, false);
- fn = createARCRuntimeFunction(CGM, fnType, "objc_storeStrong");
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_storeStrong);
+ setARCRuntimeFunctionLinkage(CGM, fn);
}
llvm::Value *args[] = {
@@ -2177,18 +2274,18 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst,
/// Autorelease the given object.
/// call i8* \@objc_autorelease(i8* %value)
llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_autorelease,
- "objc_autorelease");
+ llvm::Intrinsic::objc_autorelease);
}
/// Autorelease the given object.
/// call i8* \@objc_autoreleaseReturnValue(i8* %value)
llvm::Value *
CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_autoreleaseReturnValue,
- "objc_autoreleaseReturnValue",
+ llvm::Intrinsic::objc_autoreleaseReturnValue,
/*isTailCall*/ true);
}
@@ -2196,9 +2293,9 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
/// call i8* \@objc_retainAutoreleaseReturnValue(i8* %value)
llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue,
- "objc_retainAutoreleaseReturnValue",
+ llvm::Intrinsic::objc_retainAutoreleaseReturnValue,
/*isTailCall*/ true);
}
@@ -2225,9 +2322,9 @@ llvm::Value *CodeGenFunction::EmitARCRetainAutorelease(QualType type,
/// call i8* \@objc_retainAutorelease(i8* %value)
llvm::Value *
CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) {
- return emitARCValueOperation(*this, value,
+ return emitARCValueOperation(*this, value, nullptr,
CGM.getObjCEntrypoints().objc_retainAutorelease,
- "objc_retainAutorelease");
+ llvm::Intrinsic::objc_retainAutorelease);
}
/// i8* \@objc_loadWeak(i8** %addr)
@@ -2235,14 +2332,14 @@ CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) {
llvm::Value *CodeGenFunction::EmitARCLoadWeak(Address addr) {
return emitARCLoadOperation(*this, addr,
CGM.getObjCEntrypoints().objc_loadWeak,
- "objc_loadWeak");
+ llvm::Intrinsic::objc_loadWeak);
}
/// i8* \@objc_loadWeakRetained(i8** %addr)
llvm::Value *CodeGenFunction::EmitARCLoadWeakRetained(Address addr) {
return emitARCLoadOperation(*this, addr,
CGM.getObjCEntrypoints().objc_loadWeakRetained,
- "objc_loadWeakRetained");
+ llvm::Intrinsic::objc_loadWeakRetained);
}
/// i8* \@objc_storeWeak(i8** %addr, i8* %value)
@@ -2252,7 +2349,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreWeak(Address addr,
bool ignored) {
return emitARCStoreOperation(*this, addr, value,
CGM.getObjCEntrypoints().objc_storeWeak,
- "objc_storeWeak", ignored);
+ llvm::Intrinsic::objc_storeWeak, ignored);
}
/// i8* \@objc_initWeak(i8** %addr, i8* %value)
@@ -2272,7 +2369,7 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) {
emitARCStoreOperation(*this, addr, value,
CGM.getObjCEntrypoints().objc_initWeak,
- "objc_initWeak", /*ignored*/ true);
+ llvm::Intrinsic::objc_initWeak, /*ignored*/ true);
}
/// void \@objc_destroyWeak(i8** %addr)
@@ -2280,9 +2377,8 @@ void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) {
void CodeGenFunction::EmitARCDestroyWeak(Address addr) {
llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_destroyWeak;
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrPtrTy, false);
- fn = createARCRuntimeFunction(CGM, fnType, "objc_destroyWeak");
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_destroyWeak);
+ setARCRuntimeFunctionLinkage(CGM, fn);
}
// Cast the argument to 'id*'.
@@ -2297,7 +2393,7 @@ void CodeGenFunction::EmitARCDestroyWeak(Address addr) {
void CodeGenFunction::EmitARCMoveWeak(Address dst, Address src) {
emitARCCopyOperation(*this, dst, src,
CGM.getObjCEntrypoints().objc_moveWeak,
- "objc_moveWeak");
+ llvm::Intrinsic::objc_moveWeak);
}
/// void \@objc_copyWeak(i8** %dest, i8** %src)
@@ -2306,7 +2402,7 @@ void CodeGenFunction::EmitARCMoveWeak(Address dst, Address src) {
void CodeGenFunction::EmitARCCopyWeak(Address dst, Address src) {
emitARCCopyOperation(*this, dst, src,
CGM.getObjCEntrypoints().objc_copyWeak,
- "objc_copyWeak");
+ llvm::Intrinsic::objc_copyWeak);
}
void CodeGenFunction::emitARCCopyAssignWeak(QualType Ty, Address DstAddr,
@@ -2329,9 +2425,8 @@ void CodeGenFunction::emitARCMoveAssignWeak(QualType Ty, Address DstAddr,
llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush;
if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(Int8PtrTy, false);
- fn = createARCRuntimeFunction(CGM, fnType, "objc_autoreleasePoolPush");
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPush);
+ setARCRuntimeFunctionLinkage(CGM, fn);
}
return EmitNounwindRuntimeCall(fn);
@@ -2342,18 +2437,28 @@ llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) {
assert(value->getType() == Int8PtrTy);
- llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop;
- if (!fn) {
- llvm::FunctionType *fnType =
- llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
+ if (getInvokeDest()) {
+ // Call the runtime method not the intrinsic if we are handling exceptions
+ llvm::Constant *&fn =
+ CGM.getObjCEntrypoints().objc_autoreleasePoolPopInvoke;
+ if (!fn) {
+ llvm::FunctionType *fnType =
+ llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
+ fn = CGM.CreateRuntimeFunction(fnType, "objc_autoreleasePoolPop");
+ setARCRuntimeFunctionLinkage(CGM, fn);
+ }
- // We don't want to use a weak import here; instead we should not
- // fall into this path.
- fn = createARCRuntimeFunction(CGM, fnType, "objc_autoreleasePoolPop");
- }
+ // objc_autoreleasePoolPop can throw.
+ EmitRuntimeCallOrInvoke(fn, value);
+ } else {
+ llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop;
+ if (!fn) {
+ fn = CGM.getIntrinsic(llvm::Intrinsic::objc_autoreleasePoolPop);
+ setARCRuntimeFunctionLinkage(CGM, fn);
+ }
- // objc_autoreleasePoolPop can throw.
- EmitRuntimeCallOrInvoke(fn, value);
+ EmitRuntimeCall(fn, value);
+ }
}
/// Produce the code to do an MRR version objc_autoreleasepool_push.
@@ -2384,6 +2489,24 @@ llvm::Value *CodeGenFunction::EmitObjCMRRAutoreleasePoolPush() {
return InitRV.getScalarVal();
}
+/// Allocate the given objc object.
+/// call i8* \@objc_alloc(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCAlloc(llvm::Value *value,
+ llvm::Type *resultType) {
+ return emitObjCValueOperation(*this, value, resultType,
+ CGM.getObjCEntrypoints().objc_alloc,
+ "objc_alloc");
+}
+
+/// Allocate the given objc object.
+/// call i8* \@objc_allocWithZone(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCAllocWithZone(llvm::Value *value,
+ llvm::Type *resultType) {
+ return emitObjCValueOperation(*this, value, resultType,
+ CGM.getObjCEntrypoints().objc_allocWithZone,
+ "objc_allocWithZone");
+}
+
/// Produce the code to do a primitive release.
/// [tmp drain];
void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) {
@@ -2418,6 +2541,55 @@ void CodeGenFunction::emitARCIntrinsicUse(CodeGenFunction &CGF, Address addr,
CGF.EmitARCIntrinsicUse(value);
}
+/// Autorelease the given object.
+/// call i8* \@objc_autorelease(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCAutorelease(llvm::Value *value,
+ llvm::Type *returnType) {
+ return emitObjCValueOperation(*this, value, returnType,
+ CGM.getObjCEntrypoints().objc_autoreleaseRuntimeFunction,
+ "objc_autorelease");
+}
+
+/// Retain the given object, with normal retain semantics.
+/// call i8* \@objc_retain(i8* %value)
+llvm::Value *CodeGenFunction::EmitObjCRetainNonBlock(llvm::Value *value,
+ llvm::Type *returnType) {
+ return emitObjCValueOperation(*this, value, returnType,
+ CGM.getObjCEntrypoints().objc_retainRuntimeFunction,
+ "objc_retain");
+}
+
+/// Release the given object.
+/// call void \@objc_release(i8* %value)
+void CodeGenFunction::EmitObjCRelease(llvm::Value *value,
+ ARCPreciseLifetime_t precise) {
+ if (isa<llvm::ConstantPointerNull>(value)) return;
+
+ llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release;
+ if (!fn) {
+ if (!fn) {
+ llvm::FunctionType *fnType =
+ llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
+ fn = CGM.CreateRuntimeFunction(fnType, "objc_release");
+ setARCRuntimeFunctionLinkage(CGM, fn);
+ // We have Native ARC, so set nonlazybind attribute for performance
+ if (llvm::Function *f = dyn_cast<llvm::Function>(fn))
+ f->addFnAttr(llvm::Attribute::NonLazyBind);
+ }
+ }
+
+ // Cast the argument to 'id'.
+ value = Builder.CreateBitCast(value, Int8PtrTy);
+
+ // Call objc_release.
+ llvm::CallInst *call = EmitNounwindRuntimeCall(fn, value);
+
+ if (precise == ARCImpreciseLifetime) {
+ call->setMetadata("clang.imprecise_release",
+ llvm::MDNode::get(Builder.getContext(), None));
+ }
+}
+
namespace {
struct CallObjCAutoreleasePoolObject final : EHScopeStack::Cleanup {
llvm::Value *Token;
@@ -2446,27 +2618,36 @@ void CodeGenFunction::EmitObjCAutoreleasePoolCleanup(llvm::Value *Ptr) {
EHStack.pushCleanup<CallObjCMRRAutoreleasePoolObject>(NormalCleanup, Ptr);
}
-static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
- LValue lvalue,
- QualType type) {
- switch (type.getObjCLifetime()) {
+static bool shouldRetainObjCLifetime(Qualifiers::ObjCLifetime lifetime) {
+ switch (lifetime) {
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
case Qualifiers::OCL_Strong:
case Qualifiers::OCL_Autoreleasing:
- return TryEmitResult(CGF.EmitLoadOfLValue(lvalue,
- SourceLocation()).getScalarVal(),
- false);
+ return true;
case Qualifiers::OCL_Weak:
- return TryEmitResult(CGF.EmitARCLoadWeakRetained(lvalue.getAddress()),
- true);
+ return false;
}
llvm_unreachable("impossible lifetime!");
}
static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
+ LValue lvalue,
+ QualType type) {
+ llvm::Value *result;
+ bool shouldRetain = shouldRetainObjCLifetime(type.getObjCLifetime());
+ if (shouldRetain) {
+ result = CGF.EmitLoadOfLValue(lvalue, SourceLocation()).getScalarVal();
+ } else {
+ assert(type.getObjCLifetime() == Qualifiers::OCL_Weak);
+ result = CGF.EmitARCLoadWeakRetained(lvalue.getAddress());
+ }
+ return TryEmitResult(result, !shouldRetain);
+}
+
+static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
const Expr *e) {
e = e->IgnoreParens();
QualType type = e->getType();
@@ -2500,6 +2681,16 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
cast<BinaryOperator>(e)->getOpcode() == BO_Assign)
return TryEmitResult(CGF.EmitScalarExpr(e), false);
+ // Try to emit code for scalar constant instead of emitting LValue and
+ // loading it because we are not guaranteed to have an l-value. One of such
+ // cases is DeclRefExpr referencing non-odr-used constant-evaluated variable.
+ if (const auto *decl_expr = dyn_cast<DeclRefExpr>(e)) {
+ auto *DRE = const_cast<DeclRefExpr *>(decl_expr);
+ if (CodeGenFunction::ConstantEmission constant = CGF.tryEmitAsConstant(DRE))
+ return TryEmitResult(CGF.emitScalarConstant(constant, DRE),
+ !shouldRetainObjCLifetime(type.getObjCLifetime()));
+ }
+
return tryEmitARCRetainLoadOfScalar(CGF, CGF.EmitLValue(e), type);
}
@@ -3229,29 +3420,32 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
ASTContext &C = getContext();
IdentifierInfo *II
= &CGM.getContext().Idents.get("__assign_helper_atomic_property_");
- FunctionDecl *FD = FunctionDecl::Create(C,
- C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false,
- false);
+ QualType ReturnTy = C.VoidTy;
QualType DestTy = C.getPointerType(Ty);
QualType SrcTy = Ty;
SrcTy.addConst();
SrcTy = C.getPointerType(SrcTy);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(DestTy);
+ ArgTys.push_back(SrcTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
+
FunctionArgList args;
- ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- DestTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy,
+ ImplicitParamDecl::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- SrcTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy,
+ ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
@@ -3262,25 +3456,25 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
- DeclRefExpr DstExpr(&DstDecl, false, DestTy,
- VK_RValue, SourceLocation());
+ DeclRefExpr DstExpr(getContext(), &DstDecl, false, DestTy, VK_RValue,
+ SourceLocation());
UnaryOperator DST(&DstExpr, UO_Deref, DestTy->getPointeeType(),
VK_LValue, OK_Ordinary, SourceLocation(), false);
- DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
- VK_RValue, SourceLocation());
+ DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue,
+ SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
VK_LValue, OK_Ordinary, SourceLocation(), false);
Expr *Args[2] = { &DST, &SRC };
CallExpr *CalleeExp = cast<CallExpr>(PID->getSetterCXXAssignment());
- CXXOperatorCallExpr TheCall(C, OO_Equal, CalleeExp->getCallee(),
- Args, DestTy->getPointeeType(),
- VK_LValue, SourceLocation(), FPOptions());
+ CXXOperatorCallExpr *TheCall = CXXOperatorCallExpr::Create(
+ C, OO_Equal, CalleeExp->getCallee(), Args, DestTy->getPointeeType(),
+ VK_LValue, SourceLocation(), FPOptions());
- EmitStmt(&TheCall);
+ EmitStmt(TheCall);
FinishFunction();
HelperFn = llvm::ConstantExpr::getBitCast(Fn, VoidPtrTy);
@@ -3301,53 +3495,54 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic)))
return nullptr;
llvm::Constant *HelperFn = nullptr;
-
if (hasTrivialGetExpr(PID))
return nullptr;
assert(PID->getGetterCXXConstructor() && "getGetterCXXConstructor - null");
if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty)))
return HelperFn;
-
ASTContext &C = getContext();
- IdentifierInfo *II
- = &CGM.getContext().Idents.get("__copy_helper_atomic_property_");
- FunctionDecl *FD = FunctionDecl::Create(C,
- C.getTranslationUnitDecl(),
- SourceLocation(),
- SourceLocation(), II, C.VoidTy,
- nullptr, SC_Static,
- false,
- false);
+ IdentifierInfo *II =
+ &CGM.getContext().Idents.get("__copy_helper_atomic_property_");
+ QualType ReturnTy = C.VoidTy;
QualType DestTy = C.getPointerType(Ty);
QualType SrcTy = Ty;
SrcTy.addConst();
SrcTy = C.getPointerType(SrcTy);
+ SmallVector<QualType, 2> ArgTys;
+ ArgTys.push_back(DestTy);
+ ArgTys.push_back(SrcTy);
+ QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {});
+
+ FunctionDecl *FD = FunctionDecl::Create(
+ C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II,
+ FunctionTy, nullptr, SC_Static, false, false);
+
FunctionArgList args;
- ImplicitParamDecl DstDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- DestTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl DstDecl(C, FD, SourceLocation(), /*Id=*/nullptr, DestTy,
+ ImplicitParamDecl::Other);
args.push_back(&DstDecl);
- ImplicitParamDecl SrcDecl(getContext(), FD, SourceLocation(), /*Id=*/nullptr,
- SrcTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcDecl(C, FD, SourceLocation(), /*Id=*/nullptr, SrcTy,
+ ImplicitParamDecl::Other);
args.push_back(&SrcDecl);
const CGFunctionInfo &FI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, args);
llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
- llvm::Function *Fn =
- llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
- "__copy_helper_atomic_property_", &CGM.getModule());
+ llvm::Function *Fn = llvm::Function::Create(
+ LTy, llvm::GlobalValue::InternalLinkage, "__copy_helper_atomic_property_",
+ &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI);
- StartFunction(FD, C.VoidTy, Fn, FI, args);
+ StartFunction(FD, ReturnTy, Fn, FI, args);
- DeclRefExpr SrcExpr(&SrcDecl, false, SrcTy,
- VK_RValue, SourceLocation());
+ DeclRefExpr SrcExpr(getContext(), &SrcDecl, false, SrcTy, VK_RValue,
+ SourceLocation());
UnaryOperator SRC(&SrcExpr, UO_Deref, SrcTy->getPointeeType(),
VK_LValue, OK_Ordinary, SourceLocation(), false);
@@ -3372,8 +3567,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
CXXConstExpr->getConstructionKind(),
SourceRange());
- DeclRefExpr DstExpr(&DstDecl, false, DestTy,
- VK_RValue, SourceLocation());
+ DeclRefExpr DstExpr(getContext(), &DstDecl, false, DestTy, VK_RValue,
+ SourceLocation());
RValue DV = EmitAnyExpr(&DstExpr);
CharUnits Alignment
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index 3e994edc976b..548bd6b3fd72 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -18,6 +18,7 @@
#include "CGCleanup.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include "CGCXXABI.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
@@ -178,6 +179,9 @@ protected:
/// runtime provides some LLVM passes that can use this to do things like
/// automatic IMP caching and speculative inlining.
unsigned msgSendMDKind;
+ /// Does the current target use SEH-based exceptions? False implies
+ /// Itanium-style DWARF unwinding.
+ bool usesSEHExceptions;
/// Helper to check if we are targeting a specific runtime version or later.
bool isRuntime(ObjCRuntime::Kind kind, unsigned major, unsigned minor=0) {
@@ -217,6 +221,7 @@ protected:
llvm::Constant *value = llvm::ConstantDataArray::getString(VMContext,Str);
auto *GV = new llvm::GlobalVariable(TheModule, value->getType(), true,
llvm::GlobalValue::LinkOnceODRLinkage, value, name);
+ GV->setComdat(TheModule.getOrInsertComdat(name));
if (Private)
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
ConstStr = GV;
@@ -272,6 +277,8 @@ protected:
Fields.addInt(Int8Ty, 0);
}
+ virtual llvm::Constant *GenerateCategoryProtocolList(const
+ ObjCCategoryDecl *OCD);
virtual ConstantArrayBuilder PushPropertyListHeader(ConstantStructBuilder &Fields,
int count) {
// int count;
@@ -510,8 +517,8 @@ protected:
/// Returns a selector with the specified type encoding. An empty string is
/// used to return an untyped selector (with the types field set to NULL).
- virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
- const std::string &TypeEncoding);
+ virtual llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding);
/// Returns the name of ivar offset variables. In the GNUstep v1 ABI, this
/// contains the class and ivar names, in the v2 ABI this contains the type
@@ -810,8 +817,12 @@ class CGObjCGNUstep : public CGObjCGNU {
// Slot_t objc_slot_lookup_super(struct objc_super*, SEL);
SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy,
PtrToObjCSuperTy, SelectorTy);
- // If we're in ObjC++ mode, then we want to make
- if (CGM.getLangOpts().CPlusPlus) {
+ // If we're in ObjC++ mode, then we want to make
+ if (usesSEHExceptions) {
+ llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
+ // void objc_exception_rethrow(void)
+ ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy);
+ } else if (CGM.getLangOpts().CPlusPlus) {
llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
// void *__cxa_begin_catch(void *e)
EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy);
@@ -888,22 +899,25 @@ class CGObjCGNUstep : public CGObjCGNU {
/// This is the ABI that provides a clean break with the legacy GCC ABI and
/// cleans up a number of things that were added to work around 1980s linkers.
class CGObjCGNUstep2 : public CGObjCGNUstep {
- /// The section for selectors.
- static constexpr const char *const SelSection = "__objc_selectors";
- /// The section for classes.
- static constexpr const char *const ClsSection = "__objc_classes";
- /// The section for references to classes.
- static constexpr const char *const ClsRefSection = "__objc_class_refs";
- /// The section for categories.
- static constexpr const char *const CatSection = "__objc_cats";
- /// The section for protocols.
- static constexpr const char *const ProtocolSection = "__objc_protocols";
- /// The section for protocol references.
- static constexpr const char *const ProtocolRefSection = "__objc_protocol_refs";
- /// The section for class aliases
- static constexpr const char *const ClassAliasSection = "__objc_class_aliases";
- /// The section for constexpr constant strings
- static constexpr const char *const ConstantStringSection = "__objc_constant_string";
+ enum SectionKind
+ {
+ SelectorSection = 0,
+ ClassSection,
+ ClassReferenceSection,
+ CategorySection,
+ ProtocolSection,
+ ProtocolReferenceSection,
+ ClassAliasSection,
+ ConstantStringSection
+ };
+ static const char *const SectionsBaseNames[8];
+ template<SectionKind K>
+ std::string sectionName() {
+ std::string name(SectionsBaseNames[K]);
+ if (CGM.getTriple().isOSBinFormatCOFF())
+ name += "$m";
+ return name;
+ }
/// The GCC ABI superclass message lookup function. Takes a pointer to a
/// structure describing the receiver and the class, and a selector as
/// arguments. Returns the IMP for the corresponding method.
@@ -1069,7 +1083,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
isNamed ? StringRef(StringName) : ".objc_string",
Align, false, isNamed ? llvm::GlobalValue::LinkOnceODRLinkage
: llvm::GlobalValue::PrivateLinkage);
- ObjCStrGV->setSection(ConstantStringSection);
+ ObjCStrGV->setSection(sectionName<ConstantStringSection>());
if (isNamed) {
ObjCStrGV->setComdat(TheModule.getOrInsertComdat(StringName));
ObjCStrGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -1152,6 +1166,15 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
return MethodList.finishAndCreateGlobal(".objc_protocol_method_list",
CGM.getPointerAlign());
}
+ llvm::Constant *GenerateCategoryProtocolList(const ObjCCategoryDecl *OCD)
+ override {
+ SmallVector<llvm::Constant*, 16> Protocols;
+ for (const auto *PI : OCD->getReferencedProtocols())
+ Protocols.push_back(
+ llvm::ConstantExpr::getBitCast(GenerateProtocolRef(PI),
+ ProtocolPtrTy));
+ return GenerateProtocolList(Protocols);
+ }
llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
llvm::Value *cmd, MessageSendInfo &MSI) override {
@@ -1247,9 +1270,10 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
assert(!TheModule.getGlobalVariable(RefName));
// Emit a reference symbol.
auto GV = new llvm::GlobalVariable(TheModule, ProtocolPtrTy,
- false, llvm::GlobalValue::ExternalLinkage,
+ false, llvm::GlobalValue::LinkOnceODRLinkage,
llvm::ConstantExpr::getBitCast(Protocol, ProtocolPtrTy), RefName);
- GV->setSection(ProtocolRefSection);
+ GV->setComdat(TheModule.getOrInsertComdat(RefName));
+ GV->setSection(sectionName<ProtocolReferenceSection>());
GV->setAlignment(CGM.getPointerAlign().getQuantity());
Ref = GV;
}
@@ -1282,9 +1306,22 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
EmittedProtocol = true;
+ auto SymName = SymbolForProtocol(ProtocolName);
+ auto *OldGV = TheModule.getGlobalVariable(SymName);
+
// Use the protocol definition, if there is one.
if (const ObjCProtocolDecl *Def = PD->getDefinition())
PD = Def;
+ else {
+ // If there is no definition, then create an external linkage symbol and
+ // hope that someone else fills it in for us (and fail to link if they
+ // don't).
+ assert(!OldGV);
+ Protocol = new llvm::GlobalVariable(TheModule, ProtocolTy,
+ /*isConstant*/false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, SymName);
+ return Protocol;
+ }
SmallVector<llvm::Constant*, 16> Protocols;
for (const auto *PI : PD->protocols())
@@ -1301,8 +1338,6 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
EmitProtocolMethodList(PD->class_methods(), ClassMethodList,
OptionalClassMethodList);
- auto SymName = SymbolForProtocol(ProtocolName);
- auto *OldGV = TheModule.getGlobalVariable(SymName);
// The isa pointer must be set to a magic number so the runtime knows it's
// the correct layout.
ConstantInitBuilder builder(CGM);
@@ -1326,7 +1361,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
auto *GV = ProtocolBuilder.finishAndCreateGlobal(SymName,
CGM.getPointerAlign(), false, llvm::GlobalValue::ExternalLinkage);
- GV->setSection(ProtocolSection);
+ GV->setSection(sectionName<ProtocolSection>());
GV->setComdat(TheModule.getOrInsertComdat(SymName));
if (OldGV) {
OldGV->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GV,
@@ -1342,8 +1377,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
return Val;
return llvm::ConstantExpr::getBitCast(Val, Ty);
}
- llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
- const std::string &TypeEncoding) override {
+ llvm::Value *GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding) override {
return GetConstantSelector(Sel, TypeEncoding);
}
llvm::Constant *GetTypeString(llvm::StringRef TypeEncoding) {
@@ -1359,6 +1394,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
TypeEncoding);
auto *GV = new llvm::GlobalVariable(TheModule, Init->getType(),
true, llvm::GlobalValue::LinkOnceODRLinkage, Init, TypesVarName);
+ GV->setComdat(TheModule.getOrInsertComdat(TypesVarName));
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
TypesGlobal = GV;
}
@@ -1387,12 +1423,41 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage);
GV->setComdat(TheModule.getOrInsertComdat(SelVarName));
GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
- GV->setSection(SelSection);
+ GV->setSection(sectionName<SelectorSection>());
auto *SelVal = EnforceType(GV, SelectorTy);
return SelVal;
}
+ llvm::StructType *emptyStruct = nullptr;
+
+ /// Return pointers to the start and end of a section. On ELF platforms, we
+ /// use the __start_ and __stop_ symbols that GNU-compatible linkers will set
+ /// to the start and end of section names, as long as those section names are
+ /// valid identifiers and the symbols are referenced but not defined. On
+ /// Windows, we use the fact that MSVC-compatible linkers will lexically sort
+ /// by subsections and place everything that we want to reference in a middle
+ /// subsection and then insert zero-sized symbols in subsections a and z.
std::pair<llvm::Constant*,llvm::Constant*>
GetSectionBounds(StringRef Section) {
+ if (CGM.getTriple().isOSBinFormatCOFF()) {
+ if (emptyStruct == nullptr) {
+ emptyStruct = llvm::StructType::create(VMContext, ".objc_section_sentinel");
+ emptyStruct->setBody({}, /*isPacked*/true);
+ }
+ auto ZeroInit = llvm::Constant::getNullValue(emptyStruct);
+ auto Sym = [&](StringRef Prefix, StringRef SecSuffix) {
+ auto *Sym = new llvm::GlobalVariable(TheModule, emptyStruct,
+ /*isConstant*/false,
+ llvm::GlobalValue::LinkOnceODRLinkage, ZeroInit, Prefix +
+ Section);
+ Sym->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ Sym->setSection((Section + SecSuffix).str());
+ Sym->setComdat(TheModule.getOrInsertComdat((Prefix +
+ Section).str()));
+ Sym->setAlignment(1);
+ return Sym;
+ };
+ return { Sym("__start_", "$a"), Sym("__stop", "$z") };
+ }
auto *Start = new llvm::GlobalVariable(TheModule, PtrTy,
/*isConstant*/false,
llvm::GlobalValue::ExternalLinkage, nullptr, StringRef("__start_") +
@@ -1405,6 +1470,9 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
Stop->setVisibility(llvm::GlobalValue::HiddenVisibility);
return { Start, Stop };
}
+ CatchTypeInfo getCatchAllTypeInfo() override {
+ return CGM.getCXXABI().getCatchAllTypeInfo();
+ }
llvm::Function *ModuleInitFunction() override {
llvm::Function *LoadFunction = llvm::Function::Create(
llvm::FunctionType::get(llvm::Type::getVoidTy(VMContext), false),
@@ -1420,19 +1488,11 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
ConstantInitBuilder builder(CGM);
auto InitStructBuilder = builder.beginStruct();
InitStructBuilder.addInt(Int64Ty, 0);
- auto addSection = [&](const char *section) {
- auto bounds = GetSectionBounds(section);
+ for (auto *s : SectionsBaseNames) {
+ auto bounds = GetSectionBounds(s);
InitStructBuilder.add(bounds.first);
InitStructBuilder.add(bounds.second);
};
- addSection(SelSection);
- addSection(ClsSection);
- addSection(ClsRefSection);
- addSection(CatSection);
- addSection(ProtocolSection);
- addSection(ProtocolRefSection);
- addSection(ClassAliasSection);
- addSection(ConstantStringSection);
auto *InitStruct = InitStructBuilder.finishAndCreateGlobal(".objc_init",
CGM.getPointerAlign(), false, llvm::GlobalValue::LinkOnceODRLinkage);
InitStruct->setVisibility(llvm::GlobalValue::HiddenVisibility);
@@ -1451,18 +1511,23 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
// Check that this hasn't been renamed. This shouldn't happen, because
// this function should be called precisely once.
assert(InitVar->getName() == ".objc_ctor");
- InitVar->setSection(".ctors");
+ // In Windows, initialisers are sorted by the suffix. XCL is for library
+ // initialisers, which run before user initialisers. We are running
+ // Objective-C loads at the end of library load. This means +load methods
+ // will run before any other static constructors, but that static
+ // constructors can see a fully initialised Objective-C state.
+ if (CGM.getTriple().isOSBinFormatCOFF())
+ InitVar->setSection(".CRT$XCLz");
+ else
+ InitVar->setSection(".ctors");
InitVar->setVisibility(llvm::GlobalValue::HiddenVisibility);
InitVar->setComdat(TheModule.getOrInsertComdat(".objc_ctor"));
- CGM.addCompilerUsedGlobal(InitVar);
+ CGM.addUsedGlobal(InitVar);
for (auto *C : Categories) {
auto *Cat = cast<llvm::GlobalVariable>(C->stripPointerCasts());
- Cat->setSection(CatSection);
+ Cat->setSection(sectionName<CategorySection>());
CGM.addUsedGlobal(Cat);
}
- // Add a null value fore each special section so that we can always
- // guarantee that the _start and _stop symbols will exist and be
- // meaningful.
auto createNullGlobal = [&](StringRef Name, ArrayRef<llvm::Constant*> Init,
StringRef Section) {
auto nullBuilder = builder.beginStruct();
@@ -1476,38 +1541,48 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
CGM.addUsedGlobal(GV);
return GV;
};
- createNullGlobal(".objc_null_selector", {NULLPtr, NULLPtr}, SelSection);
- if (Categories.empty())
- createNullGlobal(".objc_null_category", {NULLPtr, NULLPtr,
- NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr}, CatSection);
- if (!EmittedClass) {
- createNullGlobal(".objc_null_cls_init_ref", NULLPtr, ClsSection);
- createNullGlobal(".objc_null_class_ref", { NULLPtr, NULLPtr },
- ClsRefSection);
- }
- if (!EmittedProtocol)
- createNullGlobal(".objc_null_protocol", {NULLPtr, NULLPtr, NULLPtr,
- NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr,
- NULLPtr}, ProtocolSection);
- if (!EmittedProtocolRef)
- createNullGlobal(".objc_null_protocol_ref", {NULLPtr}, ProtocolRefSection);
- if (!ClassAliases.empty())
- for (auto clsAlias : ClassAliases)
- createNullGlobal(std::string(".objc_class_alias") +
- clsAlias.second, { MakeConstantString(clsAlias.second),
- GetClassVar(clsAlias.first) }, ClassAliasSection);
- else
- createNullGlobal(".objc_null_class_alias", { NULLPtr, NULLPtr },
- ClassAliasSection);
- if (ConstantStrings.empty()) {
- auto i32Zero = llvm::ConstantInt::get(Int32Ty, 0);
- createNullGlobal(".objc_null_constant_string", { NULLPtr, i32Zero,
- i32Zero, i32Zero, i32Zero, NULLPtr }, ConstantStringSection);
+ for (auto clsAlias : ClassAliases)
+ createNullGlobal(std::string(".objc_class_alias") +
+ clsAlias.second, { MakeConstantString(clsAlias.second),
+ GetClassVar(clsAlias.first) }, sectionName<ClassAliasSection>());
+ // On ELF platforms, add a null value for each special section so that we
+ // can always guarantee that the _start and _stop symbols will exist and be
+ // meaningful. This is not required on COFF platforms, where our start and
+ // stop symbols will create the section.
+ if (!CGM.getTriple().isOSBinFormatCOFF()) {
+ createNullGlobal(".objc_null_selector", {NULLPtr, NULLPtr},
+ sectionName<SelectorSection>());
+ if (Categories.empty())
+ createNullGlobal(".objc_null_category", {NULLPtr, NULLPtr,
+ NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr},
+ sectionName<CategorySection>());
+ if (!EmittedClass) {
+ createNullGlobal(".objc_null_cls_init_ref", NULLPtr,
+ sectionName<ClassSection>());
+ createNullGlobal(".objc_null_class_ref", { NULLPtr, NULLPtr },
+ sectionName<ClassReferenceSection>());
+ }
+ if (!EmittedProtocol)
+ createNullGlobal(".objc_null_protocol", {NULLPtr, NULLPtr, NULLPtr,
+ NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr, NULLPtr,
+ NULLPtr}, sectionName<ProtocolSection>());
+ if (!EmittedProtocolRef)
+ createNullGlobal(".objc_null_protocol_ref", {NULLPtr},
+ sectionName<ProtocolReferenceSection>());
+ if (ClassAliases.empty())
+ createNullGlobal(".objc_null_class_alias", { NULLPtr, NULLPtr },
+ sectionName<ClassAliasSection>());
+ if (ConstantStrings.empty()) {
+ auto i32Zero = llvm::ConstantInt::get(Int32Ty, 0);
+ createNullGlobal(".objc_null_constant_string", { NULLPtr, i32Zero,
+ i32Zero, i32Zero, i32Zero, NULLPtr },
+ sectionName<ConstantStringSection>());
+ }
}
ConstantStrings.clear();
Categories.clear();
Classes.clear();
- return nullptr;//CGObjCGNU::ModuleInitFunction();
+ return nullptr;
}
/// In the v2 ABI, ivar offset variables use the type encoding in their name
/// to trigger linker failures if the types don't match.
@@ -1774,7 +1849,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
}
auto *classRefSymbol = GetClassVar(className);
- classRefSymbol->setSection(ClsRefSection);
+ classRefSymbol->setSection(sectionName<ClassReferenceSection>());
classRefSymbol->setInitializer(llvm::ConstantExpr::getBitCast(classStruct, IdTy));
@@ -1805,7 +1880,7 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
auto classInitRef = new llvm::GlobalVariable(TheModule,
classStruct->getType(), false, llvm::GlobalValue::ExternalLinkage,
classStruct, "._OBJC_INIT_CLASS_" + className);
- classInitRef->setSection(ClsSection);
+ classInitRef->setSection(sectionName<ClassSection>());
CGM.addUsedGlobal(classInitRef);
EmittedClass = true;
@@ -1829,6 +1904,18 @@ class CGObjCGNUstep2 : public CGObjCGNUstep {
};
+const char *const CGObjCGNUstep2::SectionsBaseNames[8] =
+{
+"__objc_selectors",
+"__objc_classes",
+"__objc_class_refs",
+"__objc_cats",
+"__objc_protocols",
+"__objc_protocol_refs",
+"__objc_class_aliases",
+"__objc_constant_string"
+};
+
/// Support for the ObjFW runtime.
class CGObjCObjFW: public CGObjCGNU {
protected:
@@ -1931,6 +2018,8 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
ProtocolVersion(protocolClassVersion), ClassABIVersion(classABI) {
msgSendMDKind = VMContext.getMDKindID("GNUObjCMessageSend");
+ usesSEHExceptions =
+ cgm.getContext().getTargetInfo().getTriple().isWindowsMSVCEnvironment();
CodeGenTypes &Types = CGM.getTypes();
IntTy = cast<llvm::IntegerType>(
@@ -2121,8 +2210,8 @@ llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
return Value;
}
-llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
- const std::string &TypeEncoding) {
+llvm::Value *CGObjCGNU::GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
+ const std::string &TypeEncoding) {
SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel];
llvm::GlobalAlias *SelValue = nullptr;
@@ -2155,13 +2244,13 @@ Address CGObjCGNU::GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) {
}
llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel) {
- return GetSelector(CGF, Sel, std::string());
+ return GetTypedSelector(CGF, Sel, std::string());
}
llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF,
const ObjCMethodDecl *Method) {
std::string SelTypes = CGM.getContext().getObjCEncodingForMethodDecl(Method);
- return GetSelector(CGF, Method->getSelector(), SelTypes);
+ return GetTypedSelector(CGF, Method->getSelector(), SelTypes);
}
llvm::Constant *CGObjCGNU::GetEHType(QualType T) {
@@ -2186,6 +2275,9 @@ llvm::Constant *CGObjCGNU::GetEHType(QualType T) {
}
llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
+ if (usesSEHExceptions)
+ return CGM.getCXXABI().getAddrOfRTTIDescriptor(T);
+
if (!CGM.getLangOpts().CPlusPlus)
return CGObjCGNU::GetEHType(T);
@@ -3018,18 +3110,21 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) {
return ptr;
}
+llvm::Constant *CGObjCGNU::GenerateCategoryProtocolList(const
+ ObjCCategoryDecl *OCD) {
+ SmallVector<std::string, 16> Protocols;
+ for (const auto *PD : OCD->getReferencedProtocols())
+ Protocols.push_back(PD->getNameAsString());
+ return GenerateProtocolList(Protocols);
+}
+
void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
const ObjCInterfaceDecl *Class = OCD->getClassInterface();
std::string ClassName = Class->getNameAsString();
std::string CategoryName = OCD->getNameAsString();
// Collect the names of referenced protocols
- SmallVector<std::string, 16> Protocols;
const ObjCCategoryDecl *CatDecl = OCD->getCategoryDecl();
- const ObjCList<ObjCProtocolDecl> &Protos = CatDecl->getReferencedProtocols();
- for (ObjCList<ObjCProtocolDecl>::iterator I = Protos.begin(),
- E = Protos.end(); I != E; ++I)
- Protocols.push_back((*I)->getNameAsString());
ConstantInitBuilder Builder(CGM);
auto Elements = Builder.beginStruct();
@@ -3051,7 +3146,7 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
GenerateMethodList(ClassName, CategoryName, ClassMethods, true),
PtrTy);
// Protocol list
- Elements.addBitCast(GenerateProtocolList(Protocols), PtrTy);
+ Elements.addBitCast(GenerateCategoryProtocolList(CatDecl), PtrTy);
if (isRuntime(ObjCRuntime::GNUstep, 2)) {
const ObjCCategoryDecl *Category =
Class->FindCategoryDeclaration(OCD->getIdentifier());
@@ -3460,12 +3555,16 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
ConstantInitBuilder builder(CGM);
auto selectors = builder.beginArray(selStructTy);
auto &table = SelectorTable; // MSVC workaround
- for (auto &entry : table) {
+ std::vector<Selector> allSelectors;
+ for (auto &entry : table)
+ allSelectors.push_back(entry.first);
+ llvm::sort(allSelectors);
- std::string selNameStr = entry.first.getAsString();
+ for (auto &untypedSel : allSelectors) {
+ std::string selNameStr = untypedSel.getAsString();
llvm::Constant *selName = ExportUniqueString(selNameStr, ".objc_sel_name");
- for (TypedSelector &sel : entry.second) {
+ for (TypedSelector &sel : table[untypedSel]) {
llvm::Constant *selectorTypeEncoding = NULLPtr;
if (!sel.first.empty())
selectorTypeEncoding =
@@ -3726,6 +3825,7 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF,
const ObjCAtThrowStmt &S,
bool ClearInsertionPoint) {
llvm::Value *ExceptionAsObject;
+ bool isRethrow = false;
if (const Expr *ThrowExpr = S.getThrowExpr()) {
llvm::Value *Exception = CGF.EmitObjCThrowOperand(ThrowExpr);
@@ -3734,11 +3834,24 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF,
assert((!CGF.ObjCEHValueStack.empty() && CGF.ObjCEHValueStack.back()) &&
"Unexpected rethrow outside @catch block.");
ExceptionAsObject = CGF.ObjCEHValueStack.back();
+ isRethrow = true;
+ }
+ if (isRethrow && usesSEHExceptions) {
+ // For SEH, ExceptionAsObject may be undef, because the catch handler is
+ // not passed it for catchalls and so it is not visible to the catch
+ // funclet. The real thrown object will still be live on the stack at this
+ // point and will be rethrown. If we are explicitly rethrowing the object
+ // that was passed into the `@catch` block, then this code path is not
+ // reached and we will instead call `objc_exception_throw` with an explicit
+ // argument.
+ CGF.EmitRuntimeCallOrInvoke(ExceptionReThrowFn).setDoesNotReturn();
+ }
+ else {
+ ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy);
+ llvm::CallSite Throw =
+ CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject);
+ Throw.setDoesNotReturn();
}
- ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy);
- llvm::CallSite Throw =
- CGF.EmitRuntimeCallOrInvoke(ExceptionThrowFn, ExceptionAsObject);
- Throw.setDoesNotReturn();
CGF.Builder.CreateUnreachable();
if (ClearInsertionPoint)
CGF.Builder.ClearInsertionPoint();
@@ -3812,40 +3925,10 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
// is. This allows code compiled with non-fragile ivars to work correctly
// when linked against code which isn't (most of the time).
llvm::GlobalVariable *IvarOffsetPointer = TheModule.getNamedGlobal(Name);
- if (!IvarOffsetPointer) {
- // This will cause a run-time crash if we accidentally use it. A value of
- // 0 would seem more sensible, but will silently overwrite the isa pointer
- // causing a great deal of confusion.
- uint64_t Offset = -1;
- // We can't call ComputeIvarBaseOffset() here if we have the
- // implementation, because it will create an invalid ASTRecordLayout object
- // that we are then stuck with forever, so we only initialize the ivar
- // offset variable with a guess if we only have the interface. The
- // initializer will be reset later anyway, when we are generating the class
- // description.
- if (!CGM.getContext().getObjCImplementation(
- const_cast<ObjCInterfaceDecl *>(ID)))
- Offset = ComputeIvarBaseOffset(CGM, ID, Ivar);
-
- llvm::ConstantInt *OffsetGuess = llvm::ConstantInt::get(Int32Ty, Offset,
- /*isSigned*/true);
- // Don't emit the guess in non-PIC code because the linker will not be able
- // to replace it with the real version for a library. In non-PIC code you
- // must compile with the fragile ABI if you want to use ivars from a
- // GCC-compiled class.
- if (CGM.getLangOpts().PICLevel) {
- llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule,
- Int32Ty, false,
- llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess");
- IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
- IvarOffsetGV->getType(), false, llvm::GlobalValue::LinkOnceAnyLinkage,
- IvarOffsetGV, Name);
- } else {
- IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
- llvm::Type::getInt32PtrTy(VMContext), false,
- llvm::GlobalValue::ExternalLinkage, nullptr, Name);
- }
- }
+ if (!IvarOffsetPointer)
+ IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
+ llvm::Type::getInt32PtrTy(VMContext), false,
+ llvm::GlobalValue::ExternalLinkage, nullptr, Name);
return IvarOffsetPointer;
}
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 2b54e7bd67af..d91eb43ca322 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -23,9 +23,9 @@
#include "clang/AST/DeclObjC.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/StmtObjC.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/LangOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
@@ -37,6 +37,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdio>
@@ -1085,9 +1086,14 @@ public:
const CGBlockInfo &blockInfo) override;
llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
const CGBlockInfo &blockInfo) override;
+ std::string getRCBlockLayoutStr(CodeGen::CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) override;
llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
QualType T) override;
+
+private:
+ void fillRunSkipBlockVars(CodeGenModule &CGM, const CGBlockInfo &blockInfo);
};
namespace {
@@ -2795,8 +2801,44 @@ llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) {
return getConstantGEP(VMContext, Entry, 0, 0);
}
-llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
- const CGBlockInfo &blockInfo) {
+static std::string getBlockLayoutInfoString(
+ const SmallVectorImpl<CGObjCCommonMac::RUN_SKIP> &RunSkipBlockVars,
+ bool HasCopyDisposeHelpers) {
+ std::string Str;
+ for (const CGObjCCommonMac::RUN_SKIP &R : RunSkipBlockVars) {
+ if (R.opcode == CGObjCCommonMac::BLOCK_LAYOUT_UNRETAINED) {
+ // Copy/dispose helpers don't have any information about
+ // __unsafe_unretained captures, so unconditionally concatenate a string.
+ Str += "u";
+ } else if (HasCopyDisposeHelpers) {
+ // Information about __strong, __weak, or byref captures has already been
+ // encoded into the names of the copy/dispose helpers. We have to add a
+ // string here only when the copy/dispose helpers aren't generated (which
+ // happens when the block is non-escaping).
+ continue;
+ } else {
+ switch (R.opcode) {
+ case CGObjCCommonMac::BLOCK_LAYOUT_STRONG:
+ Str += "s";
+ break;
+ case CGObjCCommonMac::BLOCK_LAYOUT_BYREF:
+ Str += "r";
+ break;
+ case CGObjCCommonMac::BLOCK_LAYOUT_WEAK:
+ Str += "w";
+ break;
+ default:
+ continue;
+ }
+ }
+ Str += llvm::to_string(R.block_var_bytepos.getQuantity());
+ Str += "l" + llvm::to_string(R.block_var_size.getQuantity());
+ }
+ return Str;
+}
+
+void CGObjCCommonMac::fillRunSkipBlockVars(CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) {
assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
RunSkipBlockVars.clear();
@@ -2845,9 +2887,22 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
UpdateRunSkipBlockVars(CI.isByRef(), getBlockCaptureLifetime(type, false),
fieldOffset, fieldSize);
}
+}
+
+llvm::Constant *
+CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) {
+ fillRunSkipBlockVars(CGM, blockInfo);
return getBitmapBlockLayout(false);
}
+std::string CGObjCCommonMac::getRCBlockLayoutStr(CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) {
+ fillRunSkipBlockVars(CGM, blockInfo);
+ return getBlockLayoutInfoString(RunSkipBlockVars,
+ blockInfo.needsCopyDisposeHelpers());
+}
+
llvm::Constant *CGObjCCommonMac::BuildByrefLayout(CodeGen::CodeGenModule &CGM,
QualType T) {
assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
@@ -6783,8 +6838,9 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
return Entry;
// Use the protocol definition, if there is one.
- if (const ObjCProtocolDecl *Def = PD->getDefinition())
- PD = Def;
+ assert(PD->hasDefinition() &&
+ "emitting protocol metadata without definition");
+ PD = PD->getDefinition();
auto methodLists = ProtocolMethodLists::get(PD);
@@ -7132,15 +7188,21 @@ CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name,
Weak ? llvm::GlobalValue::ExternalWeakLinkage
: llvm::GlobalValue::ExternalLinkage;
-
-
llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name);
- if (!GV) {
- GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABITy,
- false, L, nullptr, Name);
+ if (!GV || GV->getType() != ObjCTypes.ClassnfABITy->getPointerTo()) {
+ auto *NewGV = new llvm::GlobalVariable(ObjCTypes.ClassnfABITy, false, L,
+ nullptr, Name);
if (DLLImport)
- GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+ NewGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+
+ if (GV) {
+ GV->replaceAllUsesWith(
+ llvm::ConstantExpr::getBitCast(NewGV, GV->getType()));
+ GV->eraseFromParent();
+ }
+ GV = NewGV;
+ CGM.getModule().getGlobalList().push_back(GV);
}
assert(GV->getLinkage() == L);
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index a43885c0f9a2..4b6f24a03f27 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -15,6 +15,7 @@
#include "CGObjCRuntime.h"
#include "CGCleanup.h"
+#include "CGCXXABI.h"
#include "CGRecordLayout.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
@@ -22,6 +23,7 @@
#include "clang/AST/StmtObjC.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/Support/SaveAndRestore.h"
using namespace clang;
using namespace CodeGen;
@@ -120,6 +122,8 @@ namespace {
const Stmt *Body;
llvm::BasicBlock *Block;
llvm::Constant *TypeInfo;
+ /// Flags used to differentiate cleanups and catchalls in Windows SEH
+ unsigned Flags;
};
struct CallObjCEndCatch final : EHScopeStack::Cleanup {
@@ -148,13 +152,17 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
if (S.getNumCatchStmts())
Cont = CGF.getJumpDestInCurrentScope("eh.cont");
+ bool useFunclets = EHPersonality::get(CGF).usesFuncletPads();
+
CodeGenFunction::FinallyInfo FinallyInfo;
- if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt())
- FinallyInfo.enter(CGF, Finally->getFinallyBody(),
- beginCatchFn, endCatchFn, exceptionRethrowFn);
+ if (!useFunclets)
+ if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt())
+ FinallyInfo.enter(CGF, Finally->getFinallyBody(),
+ beginCatchFn, endCatchFn, exceptionRethrowFn);
SmallVector<CatchHandler, 8> Handlers;
+
// Enter the catch, if there is one.
if (S.getNumCatchStmts()) {
for (unsigned I = 0, N = S.getNumCatchStmts(); I != N; ++I) {
@@ -166,10 +174,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
Handler.Variable = CatchDecl;
Handler.Body = CatchStmt->getCatchBody();
Handler.Block = CGF.createBasicBlock("catch");
+ Handler.Flags = 0;
// @catch(...) always matches.
if (!CatchDecl) {
- Handler.TypeInfo = nullptr; // catch-all
+ auto catchAll = getCatchAllTypeInfo();
+ Handler.TypeInfo = catchAll.RTTI;
+ Handler.Flags = catchAll.Flags;
// Don't consider any other catches.
break;
}
@@ -179,9 +190,31 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
EHCatchScope *Catch = CGF.EHStack.pushCatch(Handlers.size());
for (unsigned I = 0, E = Handlers.size(); I != E; ++I)
- Catch->setHandler(I, Handlers[I].TypeInfo, Handlers[I].Block);
+ Catch->setHandler(I, { Handlers[I].TypeInfo, Handlers[I].Flags }, Handlers[I].Block);
}
+ if (useFunclets)
+ if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt()) {
+ CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true);
+ if (!CGF.CurSEHParent)
+ CGF.CurSEHParent = cast<NamedDecl>(CGF.CurFuncDecl);
+ // Outline the finally block.
+ const Stmt *FinallyBlock = Finally->getFinallyBody();
+ HelperCGF.startOutlinedSEHHelper(CGF, /*isFilter*/false, FinallyBlock);
+
+ // Emit the original filter expression, convert to i32, and return.
+ HelperCGF.EmitStmt(FinallyBlock);
+
+ HelperCGF.FinishFunction(FinallyBlock->getEndLoc());
+
+ llvm::Function *FinallyFunc = HelperCGF.CurFn;
+
+
+ // Push a cleanup for __finally blocks.
+ CGF.pushSEHCleanup(NormalAndEHCleanup, FinallyFunc);
+ }
+
+
// Emit the try body.
CGF.EmitStmt(S.getTryBody());
@@ -197,6 +230,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
CatchHandler &Handler = Handlers[I];
CGF.EmitBlock(Handler.Block);
+ llvm::CatchPadInst *CPI = nullptr;
+ SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(CGF.CurrentFuncletPad);
+ if (useFunclets)
+ if ((CPI = dyn_cast_or_null<llvm::CatchPadInst>(Handler.Block->getFirstNonPHI()))) {
+ CGF.CurrentFuncletPad = CPI;
+ CPI->setOperand(2, CGF.getExceptionSlot().getPointer());
+ }
llvm::Value *RawExn = CGF.getExceptionFromSlot();
// Enter the catch.
@@ -223,6 +263,8 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
CGF.EmitAutoVarDecl(*CatchParam);
EmitInitOfCatchParam(CGF, CastExn, CatchParam);
}
+ if (CPI)
+ CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
CGF.ObjCEHValueStack.push_back(Exn);
CGF.EmitStmt(Handler.Body);
@@ -232,13 +274,13 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
cleanups.ForceCleanup();
CGF.EmitBranchThroughCleanup(Cont);
- }
+ }
// Go back to the try-statement fallthrough.
CGF.Builder.restoreIP(SavedIP);
// Pop out of the finally.
- if (S.getFinallyStmt())
+ if (!useFunclets && S.getFinallyStmt())
FinallyInfo.exit(CGF);
if (Cont.isValid())
@@ -254,7 +296,7 @@ void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF,
switch (paramDecl->getType().getQualifiers().getObjCLifetime()) {
case Qualifiers::OCL_Strong:
exn = CGF.EmitARCRetainNonBlock(exn);
- // fallthrough
+ LLVM_FALLTHROUGH;
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
@@ -277,7 +319,7 @@ namespace {
: SyncExitFn(SyncExitFn), SyncArg(SyncArg) {}
void Emit(CodeGenFunction &CGF, Flags flags) override {
- CGF.Builder.CreateCall(SyncExitFn, SyncArg)->setDoesNotThrow();
+ CGF.EmitNounwindRuntimeCall(SyncExitFn, SyncArg);
}
};
}
diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h
index ce082a61eb5e..fa16c198adbc 100644
--- a/lib/CodeGen/CGObjCRuntime.h
+++ b/lib/CodeGen/CGObjCRuntime.h
@@ -17,6 +17,7 @@
#define LLVM_CLANG_LIB_CODEGEN_CGOBJCRUNTIME_H
#include "CGBuilder.h"
#include "CGCall.h"
+#include "CGCleanup.h"
#include "CGValue.h"
#include "clang/AST/DeclObjC.h"
#include "clang/Basic/IdentifierTable.h" // Selector
@@ -141,6 +142,8 @@ public:
/// error to Sema.
virtual llvm::Constant *GetEHType(QualType T) = 0;
+ virtual CatchTypeInfo getCatchAllTypeInfo() { return { nullptr, 0 }; }
+
/// Generate a constant string object.
virtual ConstantAddress GenerateConstantString(const StringLiteral *) = 0;
@@ -275,6 +278,10 @@ public:
const CodeGen::CGBlockInfo &blockInfo) = 0;
virtual llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
const CodeGen::CGBlockInfo &blockInfo) = 0;
+ virtual std::string getRCBlockLayoutStr(CodeGen::CodeGenModule &CGM,
+ const CGBlockInfo &blockInfo) {
+ return {};
+ }
/// Returns an i8* which points to the byref layout information.
virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 1da19a90c387..7f6f595dd5d1 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -62,6 +62,11 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
case BuiltinType::OCLReserveID:
return llvm::PointerType::get(
llvm::StructType::create(Ctx, "opencl.reserve_id_t"), AddrSpc);
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id: \
+ return llvm::PointerType::get( \
+ llvm::StructType::create(Ctx, "opencl." #ExtType), AddrSpc);
+#include "clang/Basic/OpenCLExtensionTypes.def"
}
}
@@ -118,25 +123,6 @@ llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
-// Get the block literal from an expression derived from the block expression.
-// OpenCL v2.0 s6.12.5:
-// Block variable declarations are implicitly qualified with const. Therefore
-// all block variables must be initialized at declaration time and may not be
-// reassigned.
-static const BlockExpr *getBlockExpr(const Expr *E) {
- if (auto Cast = dyn_cast<CastExpr>(E)) {
- E = Cast->getSubExpr();
- }
- if (auto DR = dyn_cast<DeclRefExpr>(E)) {
- E = cast<VarDecl>(DR->getDecl())->getInit();
- }
- E = E->IgnoreImplicit();
- if (auto Cast = dyn_cast<CastExpr>(E)) {
- E = Cast->getSubExpr();
- }
- return cast<BlockExpr>(E);
-}
-
/// Record emitted llvm invoke function and llvm block literal for the
/// corresponding block expression.
void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
@@ -151,15 +137,21 @@ void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
EnqueuedBlockMap[E].Kernel = nullptr;
}
-llvm::Function *CGOpenCLRuntime::getInvokeFunction(const Expr *E) {
- return EnqueuedBlockMap[getBlockExpr(E)].InvokeFunc;
-}
-
CGOpenCLRuntime::EnqueuedBlockInfo
CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
CGF.EmitScalarExpr(E);
- const BlockExpr *Block = getBlockExpr(E);
+ // The block literal may be assigned to a const variable. Chasing down
+ // to get the block literal.
+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
+ E = cast<VarDecl>(DR->getDecl())->getInit();
+ }
+ E = E->IgnoreImplicit();
+ if (auto Cast = dyn_cast<CastExpr>(E)) {
+ E = Cast->getSubExpr();
+ }
+ auto *Block = cast<BlockExpr>(E);
+
assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
"Block expression not emitted");
diff --git a/lib/CodeGen/CGOpenCLRuntime.h b/lib/CodeGen/CGOpenCLRuntime.h
index a513340827a8..750721f1b80f 100644
--- a/lib/CodeGen/CGOpenCLRuntime.h
+++ b/lib/CodeGen/CGOpenCLRuntime.h
@@ -16,6 +16,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENCLRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGOPENCLRUNTIME_H
+#include "clang/AST/Expr.h"
#include "clang/AST/Type.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Type.h"
@@ -91,10 +92,6 @@ public:
/// \param Block block literal emitted for the block expression.
void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
llvm::Value *Block);
-
- /// \return LLVM block invoke function emitted for an expression derived from
- /// the block expression.
- llvm::Function *getInvokeFunction(const Expr *E);
};
}
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index fa850155df4f..20eb0b29f427 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -353,7 +353,7 @@ public:
if (VD->isLocalVarDeclOrParm())
continue;
- DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+ DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
/*RefersToEnclosingVariableOrCapture=*/false,
VD->getType().getNonReferenceType(), VK_LValue,
C.getLocation());
@@ -673,6 +673,9 @@ enum OpenMPRTLFunction {
//
// Offloading related calls
//
+ // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+ // size);
+ OMPRTL__kmpc_push_target_tripcount,
// Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
// *arg_types);
@@ -897,25 +900,6 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
-static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
-isDeclareTargetDeclaration(const ValueDecl *VD) {
- for (const Decl *D : VD->redecls()) {
- if (!D->hasAttrs())
- continue;
- if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
- return Attr->getMapType();
- }
- if (const auto *V = dyn_cast<VarDecl>(VD)) {
- if (const VarDecl *TD = V->getTemplateInstantiationPattern())
- return isDeclareTargetDeclaration(TD);
- } else if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
- if (const auto *TD = FD->getTemplateInstantiationPattern())
- return isDeclareTargetDeclaration(TD);
- }
-
- return llvm::None;
-}
-
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
return CGF.EmitOMPSharedLValue(E);
}
@@ -1242,6 +1226,17 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
void CGOpenMPRuntime::clear() {
InternalVars.clear();
+ // Clean non-target variable declarations possibly used only in debug info.
+ for (const auto &Data : EmittedNonTargetVariables) {
+ if (!Data.getValue().pointsToAliveValue())
+ continue;
+ auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
+ if (!GV)
+ continue;
+ if (!GV->isDeclaration() || GV->getNumUses() > 0)
+ continue;
+ GV->eraseFromParent();
+ }
}
std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
@@ -1314,27 +1309,19 @@ void CGOpenMPRuntime::emitUserDefinedReduction(
CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
if (UDRMap.count(D) > 0)
return;
- ASTContext &C = CGM.getContext();
- if (!In || !Out) {
- In = &C.Idents.get("omp_in");
- Out = &C.Idents.get("omp_out");
- }
llvm::Function *Combiner = emitCombinerOrInitializer(
- CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
- cast<VarDecl>(D->lookup(Out).front()),
+ CGM, D->getType(), D->getCombiner(),
+ cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
/*IsCombiner=*/true);
llvm::Function *Initializer = nullptr;
if (const Expr *Init = D->getInitializer()) {
- if (!Priv || !Orig) {
- Priv = &C.Idents.get("omp_priv");
- Orig = &C.Idents.get("omp_orig");
- }
Initializer = emitCombinerOrInitializer(
CGM, D->getType(),
D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
: nullptr,
- cast<VarDecl>(D->lookup(Orig).front()),
- cast<VarDecl>(D->lookup(Priv).front()),
+ cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
/*IsCombiner=*/false);
}
UDRMap.try_emplace(D, Combiner, Initializer);
@@ -1406,8 +1393,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
bool Tied, unsigned &NumberOfParts) {
auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
PrePostActionTy &) {
- llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart());
- llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
+ llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
+ llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
llvm::Value *TaskArgs[] = {
UpLoc, ThreadID,
CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
@@ -1456,17 +1443,17 @@ static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
template <class... As>
static llvm::GlobalVariable *
-createConstantGlobalStruct(CodeGenModule &CGM, QualType Ty,
- ArrayRef<llvm::Constant *> Data, const Twine &Name,
- As &&... Args) {
+createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
+ ArrayRef<llvm::Constant *> Data, const Twine &Name,
+ As &&... Args) {
const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
ConstantInitBuilder CIBuilder(CGM);
ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
buildStructValue(Fields, CGM, RD, RL, Data);
return Fields.finishAndCreateGlobal(
- Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty),
- /*isConstant=*/true, std::forward<As>(Args)...);
+ Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
+ std::forward<As>(Args)...);
}
template <typename T>
@@ -1483,7 +1470,9 @@ createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
- llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
+ unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
+ FlagsTy FlagsKey(Flags, Reserved2Flags);
+ llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
if (!Entry) {
if (!DefaultOpenMPPSource) {
// Initialize default location for psource field of ident_t structure of
@@ -1496,21 +1485,47 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
}
- llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty),
- llvm::ConstantInt::get(CGM.Int32Ty, Flags),
- llvm::ConstantInt::getNullValue(CGM.Int32Ty),
- llvm::ConstantInt::getNullValue(CGM.Int32Ty),
- DefaultOpenMPPSource};
- llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct(
- CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage);
+ llvm::Constant *Data[] = {
+ llvm::ConstantInt::getNullValue(CGM.Int32Ty),
+ llvm::ConstantInt::get(CGM.Int32Ty, Flags),
+ llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
+ llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
+ llvm::GlobalValue *DefaultOpenMPLocation =
+ createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
+ llvm::GlobalValue::PrivateLinkage);
DefaultOpenMPLocation->setUnnamedAddr(
llvm::GlobalValue::UnnamedAddr::Global);
- OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
+ OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
}
return Address(Entry, Align);
}
+void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
+ bool AtCurrentPoint) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
+
+ llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
+ if (AtCurrentPoint) {
+ Elem.second.ServiceInsertPt = new llvm::BitCastInst(
+ Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
+ } else {
+ Elem.second.ServiceInsertPt =
+ new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
+ Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
+ }
+}
+
+void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (Elem.second.ServiceInsertPt) {
+ llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
+ Elem.second.ServiceInsertPt = nullptr;
+ Ptr->eraseFromParent();
+ }
+}
+
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
SourceLocation Loc,
unsigned Flags) {
@@ -1537,8 +1552,10 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
Elem.second.DebugLoc = AI.getPointer();
LocValue = AI;
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
CGF.getTypeSize(IdentQTy));
}
@@ -1608,21 +1625,25 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
// kmpc_global_thread_num(ident_t *loc).
// Generate thread id value and cache this value for use across the
// function.
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
llvm::CallInst *Call = CGF.Builder.CreateCall(
createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
emitUpdateLocation(CGF, Loc));
Call->setCallingConv(CGF.getRuntimeCC());
- auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = Call;
return Call;
}
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
- if (OpenMPLocThreadIDMap.count(CGF.CurFn))
+ if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
+ clearLocThreadIdInsertPt(CGF);
OpenMPLocThreadIDMap.erase(CGF.CurFn);
+ }
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
for(auto *D : FunctionUDRMap[CGF.CurFn])
UDRMap.erase(D);
@@ -2145,6 +2166,15 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
+ case OMPRTL__kmpc_push_target_tripcount: {
+ // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+ // size);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
+ break;
+ }
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
@@ -2417,7 +2447,7 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetLink(const VarDecl *VD) {
if (CGM.getLangOpts().OpenMPSimd)
return Address::invalid();
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD);
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
SmallString<64> PtrName;
{
@@ -2496,8 +2526,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
VD = VD->getDefinition(CGM.getContext());
- if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
- ThreadPrivateWithDefinition.insert(VD);
+ if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
QualType ASTTy = VD->getType();
llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
@@ -2639,16 +2668,16 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
llvm::GlobalVariable *Addr,
bool PerformInit) {
Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD);
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
- return false;
+ return CGM.getLangOpts().OpenMPIsDevice;
VD = VD->getDefinition(CGM.getContext());
- if (VD && !DeclareTargetWithDefinition.insert(VD).second)
+ if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
return CGM.getLangOpts().OpenMPIsDevice;
QualType ASTTy = VD->getType();
- SourceLocation Loc = VD->getCanonicalDecl()->getLocStart();
+ SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
// Produce the unique prefix to identify the new target regions. We use
// the source location of the variable declaration which we know to not
// conflict with any target region.
@@ -3197,13 +3226,7 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
}
-void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
- OpenMPDirectiveKind Kind, bool EmitChecks,
- bool ForceSimpleCall) {
- if (!CGF.HaveInsertPoint())
- return;
- // Build call __kmpc_cancel_barrier(loc, thread_id);
- // Build call __kmpc_barrier(loc, thread_id);
+unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
unsigned Flags;
if (Kind == OMPD_for)
Flags = OMP_IDENT_BARRIER_IMPL_FOR;
@@ -3215,6 +3238,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
Flags = OMP_IDENT_BARRIER_EXPL;
else
Flags = OMP_IDENT_BARRIER_IMPL;
+ return Flags;
+}
+
+void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind Kind, bool EmitChecks,
+ bool ForceSimpleCall) {
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Build call __kmpc_cancel_barrier(loc, thread_id);
+ // Build call __kmpc_barrier(loc, thread_id);
+ unsigned Flags = getDefaultFlagsForBarriers(Kind);
// Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
// thread_id);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
@@ -3287,6 +3321,18 @@ bool CGOpenMPRuntime::isStaticNonchunked(
return Schedule == OMP_dist_sch_static;
}
+bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const {
+ OpenMPSchedType Schedule =
+ getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
+ return Schedule == OMP_sch_static_chunked;
+}
+
+bool CGOpenMPRuntime::isStaticChunked(
+ OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+ return Schedule == OMP_dist_sch_static_chunked;
+}
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
OpenMPSchedType Schedule =
@@ -3784,8 +3830,8 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
DeviceImages, Index),
HostEntriesBegin, HostEntriesEnd};
std::string Descriptor = getName({"omp_offloading", "descriptor"});
- llvm::GlobalVariable *Desc = createConstantGlobalStruct(
- CGM, getTgtBinaryDescriptorQTy(), Data, Descriptor);
+ llvm::GlobalVariable *Desc = createGlobalStruct(
+ CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
// Emit code to register or unregister the descriptor at execution
// startup or closing, respectively.
@@ -3818,7 +3864,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
CGF.disableDebugInfo();
const auto &FI = CGM.getTypes().arrangeNullaryFunction();
llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
- std::string Descriptor = getName({"omp_offloading", "descriptor_reg"});
+
+ // Encode offload target triples into the registration function name. It
+ // will serve as a comdat key for the registration/unregistration code for
+ // this particular combination of offloading targets.
+ SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
+ RegFnNameParts[0] = "omp_offloading";
+ RegFnNameParts[1] = "descriptor_reg";
+ llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
+ [](const llvm::Triple &T) -> const std::string& {
+ return T.getTriple();
+ });
+ llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
+ std::string Descriptor = getName(RegFnNameParts);
RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
@@ -3868,9 +3926,9 @@ void CGOpenMPRuntime::createOffloadEntry(
llvm::ConstantInt::get(CGM.Int32Ty, Flags),
llvm::ConstantInt::get(CGM.Int32Ty, 0)};
std::string EntryName = getName({"omp_offloading", "entry", ""});
- llvm::GlobalVariable *Entry = createConstantGlobalStruct(
- CGM, getTgtOffloadEntryQTy(), Data, Twine(EntryName).concat(Name),
- llvm::GlobalValue::WeakAnyLinkage);
+ llvm::GlobalVariable *Entry = createGlobalStruct(
+ CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
+ Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
// The entry has to be created in the section the linker expects it to be.
std::string Section = getName({"omp_offloading", "entries"});
@@ -3895,6 +3953,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
llvm::LLVMContext &C = M.getContext();
SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
OrderedEntries(OffloadEntriesInfoManager.size());
+ llvm::SmallVector<StringRef, 16> ParentFunctions(
+ OffloadEntriesInfoManager.size());
// Auxiliary methods to create metadata values and strings.
auto &&GetMDInt = [this](unsigned V) {
@@ -3909,7 +3969,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Create function that emits metadata for each target region entry;
auto &&TargetRegionMetadataEmitter =
- [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
+ [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
unsigned DeviceID, unsigned FileID, StringRef ParentName,
unsigned Line,
const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
@@ -3929,6 +3989,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Save this entry in the right position of the ordered entries array.
OrderedEntries[E.getOrder()] = &E;
+ ParentFunctions[E.getOrder()] = ParentName;
// Add metadata to the named metadata node.
MD->addOperand(llvm::MDNode::get(C, Ops));
@@ -3970,6 +4031,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
E)) {
if (!CE->getID() || !CE->getAddress()) {
+ // Do not blame the entry if the parent funtion is not emitted.
+ StringRef FnName = ParentFunctions[CE->getOrder()];
+ if (!CGM.GetGlobalValue(FnName))
+ continue;
unsigned DiagID = CGM.getDiags().getCustomDiagID(
DiagnosticsEngine::Error,
"Offloading entry for target region is incorrect: either the "
@@ -3995,6 +4060,9 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
CGM.getDiags().Report(DiagID);
continue;
}
+ // The vaiable has no definition - no need to add the entry.
+ if (CE->getVarSize().isZero())
+ continue;
break;
}
case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
@@ -5226,8 +5294,8 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
LBLVal.getPointer(),
UBLVal.getPointer(),
CGF.EmitLoadOfScalar(StLVal, Loc),
- llvm::ConstantInt::getNullValue(
- CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
? Data.Schedule.getInt() ? NumTasks : Grainsize
@@ -5776,7 +5844,7 @@ static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
std::string Name = CGM.getOpenMPRuntime().getName(
{D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
Out << Prefix << Name << "_"
- << D->getCanonicalDecl()->getLocStart().getRawEncoding();
+ << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
return Out.str();
}
@@ -6274,7 +6342,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
unsigned DeviceID;
unsigned FileID;
unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
+ getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
Line);
SmallString<64> EntryFnName;
{
@@ -6589,17 +6657,17 @@ private:
struct MapInfo {
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
- OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
+ ArrayRef<OpenMPMapModifierKind> MapModifiers;
bool ReturnDevicePointer = false;
bool IsImplicit = false;
MapInfo() = default;
MapInfo(
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
- OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
bool ReturnDevicePointer, bool IsImplicit)
- : Components(Components), MapType(MapType),
- MapTypeModifier(MapTypeModifier),
+ : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
};
@@ -6676,10 +6744,9 @@ private:
/// a flag marking the map as a pointer if requested. Add a flag marking the
/// map as the first one of a series of maps that relate to the same map
/// expression.
- OpenMPOffloadMappingFlags getMapTypeBits(OpenMPMapClauseKind MapType,
- OpenMPMapClauseKind MapTypeModifier,
- bool IsImplicit, bool AddPtrFlag,
- bool AddIsTargetParamFlag) const {
+ OpenMPOffloadMappingFlags getMapTypeBits(
+ OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
OpenMPOffloadMappingFlags Bits =
IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
switch (MapType) {
@@ -6702,7 +6769,6 @@ private:
case OMPC_MAP_delete:
Bits |= OMP_MAP_DELETE;
break;
- case OMPC_MAP_always:
case OMPC_MAP_unknown:
llvm_unreachable("Unexpected map type!");
}
@@ -6710,7 +6776,8 @@ private:
Bits |= OMP_MAP_PTR_AND_OBJ;
if (AddIsTargetParamFlag)
Bits |= OMP_MAP_TARGET_PARAM;
- if (MapTypeModifier == OMPC_MAP_always)
+ if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
+ != MapModifiers.end())
Bits |= OMP_MAP_ALWAYS;
return Bits;
}
@@ -6746,10 +6813,11 @@ private:
}
// Check if the length evaluates to 1.
- llvm::APSInt ConstLength;
- if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
+ Expr::EvalResult Result;
+ if (!Length->EvaluateAsInt(Result, CGF.getContext()))
return true; // Can have more that size 1.
+ llvm::APSInt ConstLength = Result.Val.getInt();
return ConstLength.getSExtValue() != 1;
}
@@ -6758,12 +6826,15 @@ private:
/// \a IsFirstComponent should be set to true if the provided set of
/// components is the first associated with a capture.
void generateInfoForComponentList(
- OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
- bool IsImplicit) const {
+ bool IsImplicit,
+ ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
+ OverlappedElements = llvm::None) const {
// The following summarizes what has to be generated for each map and the
// types below. The generated information is expressed in this order:
// base pointer, section pointer, size, flags
@@ -6933,19 +7004,26 @@ private:
// components.
bool IsExpressionFirstInfo = true;
Address BP = Address::invalid();
+ const Expr *AssocExpr = I->getAssociatedExpression();
+ const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
+ const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
- if (isa<MemberExpr>(I->getAssociatedExpression())) {
+ if (isa<MemberExpr>(AssocExpr)) {
// The base is the 'this' pointer. The content of the pointer is going
// to be the base of the field being mapped.
BP = CGF.LoadCXXThisAddress();
+ } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
+ (OASE &&
+ isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
+ BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
} else {
// The base is the reference to the variable.
// BP = &Var.
- BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
+ BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
if (const auto *VD =
dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD))
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
if (*Res == OMPDeclareTargetDeclAttr::MT_Link) {
IsLink = true;
BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
@@ -7034,7 +7112,6 @@ private:
Address LB =
CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
- llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
// If this component is a pointer inside the base struct then we don't
// need to create any entry for it - it will be combined with the object
@@ -7043,6 +7120,70 @@ private:
IsPointer && EncounteredME &&
(dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
EncounteredME);
+ if (!OverlappedElements.empty()) {
+ // Handle base element with the info for overlapped elements.
+ assert(!PartialStruct.Base.isValid() && "The base element is set.");
+ assert(Next == CE &&
+ "Expected last element for the overlapped elements.");
+ assert(!IsPointer &&
+ "Unexpected base element with the pointer type.");
+ // Mark the whole struct as the struct that requires allocation on the
+ // device.
+ PartialStruct.LowestElem = {0, LB};
+ CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
+ I->getAssociatedExpression()->getType());
+ Address HB = CGF.Builder.CreateConstGEP(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
+ CGF.VoidPtrTy),
+ TypeSize.getQuantity() - 1, CharUnits::One());
+ PartialStruct.HighestElem = {
+ std::numeric_limits<decltype(
+ PartialStruct.HighestElem.first)>::max(),
+ HB};
+ PartialStruct.Base = BP;
+ // Emit data for non-overlapped data.
+ OpenMPOffloadMappingFlags Flags =
+ OMP_MAP_MEMBER_OF |
+ getMapTypeBits(MapType, MapModifiers, IsImplicit,
+ /*AddPtrFlag=*/false,
+ /*AddIsTargetParamFlag=*/false);
+ LB = BP;
+ llvm::Value *Size = nullptr;
+ // Do bitcopy of all non-overlapped structure elements.
+ for (OMPClauseMappableExprCommon::MappableExprComponentListRef
+ Component : OverlappedElements) {
+ Address ComponentLB = Address::invalid();
+ for (const OMPClauseMappableExprCommon::MappableComponent &MC :
+ Component) {
+ if (MC.getAssociatedDeclaration()) {
+ ComponentLB =
+ CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
+ .getAddress();
+ Size = CGF.Builder.CreatePtrDiff(
+ CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
+ CGF.EmitCastToVoidPtr(LB.getPointer()));
+ break;
+ }
+ }
+ BasePointers.push_back(BP.getPointer());
+ Pointers.push_back(LB.getPointer());
+ Sizes.push_back(Size);
+ Types.push_back(Flags);
+ LB = CGF.Builder.CreateConstGEP(ComponentLB, 1,
+ CGF.getPointerSize());
+ }
+ BasePointers.push_back(BP.getPointer());
+ Pointers.push_back(LB.getPointer());
+ Size = CGF.Builder.CreatePtrDiff(
+ CGF.EmitCastToVoidPtr(
+ CGF.Builder.CreateConstGEP(HB, 1, CharUnits::One())
+ .getPointer()),
+ CGF.EmitCastToVoidPtr(LB.getPointer()));
+ Sizes.push_back(Size);
+ Types.push_back(Flags);
+ break;
+ }
+ llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
if (!IsMemberPointer) {
BasePointers.push_back(BP.getPointer());
Pointers.push_back(LB.getPointer());
@@ -7053,7 +7194,7 @@ private:
// this map is the first one that relates with the current capture
// (there is a set of entries for each capture).
OpenMPOffloadMappingFlags Flags = getMapTypeBits(
- MapType, MapTypeModifier, IsImplicit,
+ MapType, MapModifiers, IsImplicit,
!IsExpressionFirstInfo || IsLink, IsCaptureFirstInfo && !IsLink);
if (!IsExpressionFirstInfo) {
@@ -7147,6 +7288,66 @@ private:
Flags |= MemberOfFlag;
}
+ void getPlainLayout(const CXXRecordDecl *RD,
+ llvm::SmallVectorImpl<const FieldDecl *> &Layout,
+ bool AsBase) const {
+ const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
+
+ llvm::StructType *St =
+ AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
+
+ unsigned NumElements = St->getNumElements();
+ llvm::SmallVector<
+ llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
+ RecordLayout(NumElements);
+
+ // Fill bases.
+ for (const auto &I : RD->bases()) {
+ if (I.isVirtual())
+ continue;
+ const auto *Base = I.getType()->getAsCXXRecordDecl();
+ // Ignore empty bases.
+ if (Base->isEmpty() || CGF.getContext()
+ .getASTRecordLayout(Base)
+ .getNonVirtualSize()
+ .isZero())
+ continue;
+
+ unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
+ RecordLayout[FieldIndex] = Base;
+ }
+ // Fill in virtual bases.
+ for (const auto &I : RD->vbases()) {
+ const auto *Base = I.getType()->getAsCXXRecordDecl();
+ // Ignore empty bases.
+ if (Base->isEmpty())
+ continue;
+ unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
+ if (RecordLayout[FieldIndex])
+ continue;
+ RecordLayout[FieldIndex] = Base;
+ }
+ // Fill in all the fields.
+ assert(!RD->isUnion() && "Unexpected union.");
+ for (const auto *Field : RD->fields()) {
+ // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
+ // will fill in later.)
+ if (!Field->isBitField()) {
+ unsigned FieldIndex = RL.getLLVMFieldNo(Field);
+ RecordLayout[FieldIndex] = Field;
+ }
+ }
+ for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
+ &Data : RecordLayout) {
+ if (Data.isNull())
+ continue;
+ if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
+ getPlainLayout(Base, Layout, /*AsBase=*/true);
+ else
+ Layout.push_back(Data.get<const FieldDecl *>());
+ }
+ }
+
public:
MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
: CurDir(Dir), CGF(CGF) {
@@ -7213,28 +7414,29 @@ public:
auto &&InfoGen = [&Info](
const ValueDecl *D,
OMPClauseMappableExprCommon::MappableExprComponentListRef L,
- OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
bool ReturnDevicePointer, bool IsImplicit) {
const ValueDecl *VD =
D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
- Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
+ Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
IsImplicit);
};
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
for (const auto &L : C->component_lists()) {
- InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
+ InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
/*ReturnDevicePointer=*/false, C->isImplicit());
}
for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
for (const auto &L : C->component_lists()) {
- InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
+ InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
for (const auto &L : C->component_lists()) {
- InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
+ InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
}
@@ -7287,7 +7489,7 @@ public:
// Nonetheless, generateInfoForComponentList must be called to take
// the pointer into account for the calculation of the range of the
// partial struct.
- InfoGen(nullptr, L.second, OMPC_MAP_unknown, OMPC_MAP_unknown,
+ InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
/*ReturnDevicePointer=*/false, C->isImplicit());
DeferredInfo[nullptr].emplace_back(IE, VD);
} else {
@@ -7321,7 +7523,7 @@ public:
unsigned CurrentBasePointersIdx = CurBasePointers.size();
// FIXME: MSVC 2013 seems to require this-> to find the member method.
this->generateInfoForComponentList(
- L.MapType, L.MapTypeModifier, L.Components, CurBasePointers,
+ L.MapType, L.MapModifiers, L.Components, CurBasePointers,
CurPointers, CurSizes, CurTypes, PartialStruct,
IsFirstComponentList, L.IsImplicit);
@@ -7375,6 +7577,82 @@ public:
}
}
+ /// Emit capture info for lambdas for variables captured by reference.
+ void generateInfoForLambdaCaptures(
+ const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
+ MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
+ MapFlagsArrayTy &Types,
+ llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
+ const auto *RD = VD->getType()
+ .getCanonicalType()
+ .getNonReferenceType()
+ ->getAsCXXRecordDecl();
+ if (!RD || !RD->isLambda())
+ return;
+ Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
+ LValue VDLVal = CGF.MakeAddrLValue(
+ VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+ llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ FieldDecl *ThisCapture = nullptr;
+ RD->getCaptureFields(Captures, ThisCapture);
+ if (ThisCapture) {
+ LValue ThisLVal =
+ CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+ LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
+ LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
+ BasePointers.push_back(ThisLVal.getPointer());
+ Pointers.push_back(ThisLValVal.getPointer());
+ Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
+ Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ }
+ for (const LambdaCapture &LC : RD->captures()) {
+ if (LC.getCaptureKind() != LCK_ByRef)
+ continue;
+ const VarDecl *VD = LC.getCapturedVar();
+ auto It = Captures.find(VD);
+ assert(It != Captures.end() && "Found lambda capture without field.");
+ LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+ LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
+ LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
+ BasePointers.push_back(VarLVal.getPointer());
+ Pointers.push_back(VarLValVal.getPointer());
+ Sizes.push_back(CGF.getTypeSize(
+ VD->getType().getCanonicalType().getNonReferenceType()));
+ Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ }
+ }
+
+ /// Set correct indices for lambdas captures.
+ void adjustMemberOfForLambdaCaptures(
+ const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
+ MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
+ MapFlagsArrayTy &Types) const {
+ for (unsigned I = 0, E = Types.size(); I < E; ++I) {
+ // Set correct member_of idx for all implicit lambda captures.
+ if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
+ continue;
+ llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
+ assert(BasePtr && "Unable to find base lambda address.");
+ int TgtIdx = -1;
+ for (unsigned J = I; J > 0; --J) {
+ unsigned Idx = J - 1;
+ if (Pointers[Idx] != BasePtr)
+ continue;
+ TgtIdx = Idx;
+ break;
+ }
+ assert(TgtIdx != -1 && "Unable to find parent lambda.");
+ // All other current entries will be MEMBER_OF the combined entry
+ // (except for PTR_AND_OBJ entries which do not have a placeholder value
+ // 0xFFFF in the MEMBER_OF field).
+ OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
+ setCorrectMemberOfFlag(Types[I], MemberOfFlag);
+ }
+ }
+
/// Generate the base pointers, section pointers, sizes and map types
/// associated to a given capture.
void generateInfoForCapture(const CapturedStmt::Capture *Cap,
@@ -7387,9 +7665,6 @@ public:
"Not expecting to generate map info for a variable array type!");
// We need to know when we generating information for the first component
- // associated with a capture, because the mapping flags depend on it.
- bool IsFirstComponentList = true;
-
const ValueDecl *VD = Cap->capturesThis()
? nullptr
: Cap->getCapturedVar()->getCanonicalDecl();
@@ -7405,19 +7680,151 @@ public:
return;
}
+ using MapData =
+ std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
+ OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
+ SmallVector<MapData, 4> DeclComponentLists;
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
- for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
+ for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
for (const auto &L : C->decl_component_lists(VD)) {
assert(L.first == VD &&
"We got information for the wrong declaration??");
assert(!L.second.empty() &&
"Not expecting declaration with no component lists.");
- generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
- L.second, BasePointers, Pointers, Sizes,
- Types, PartialStruct, IsFirstComponentList,
- C->isImplicit());
- IsFirstComponentList = false;
+ DeclComponentLists.emplace_back(L.second, C->getMapType(),
+ C->getMapTypeModifiers(),
+ C->isImplicit());
+ }
+ }
+
+ // Find overlapping elements (including the offset from the base element).
+ llvm::SmallDenseMap<
+ const MapData *,
+ llvm::SmallVector<
+ OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
+ 4>
+ OverlappedData;
+ size_t Count = 0;
+ for (const MapData &L : DeclComponentLists) {
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+ OpenMPMapClauseKind MapType;
+ ArrayRef<OpenMPMapModifierKind> MapModifiers;
+ bool IsImplicit;
+ std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+ ++Count;
+ for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
+ std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
+ auto CI = Components.rbegin();
+ auto CE = Components.rend();
+ auto SI = Components1.rbegin();
+ auto SE = Components1.rend();
+ for (; CI != CE && SI != SE; ++CI, ++SI) {
+ if (CI->getAssociatedExpression()->getStmtClass() !=
+ SI->getAssociatedExpression()->getStmtClass())
+ break;
+ // Are we dealing with different variables/fields?
+ if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
+ break;
+ }
+ // Found overlapping if, at least for one component, reached the head of
+ // the components list.
+ if (CI == CE || SI == SE) {
+ assert((CI != CE || SI != SE) &&
+ "Unexpected full match of the mapping components.");
+ const MapData &BaseData = CI == CE ? L : L1;
+ OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
+ SI == SE ? Components : Components1;
+ auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
+ OverlappedElements.getSecond().push_back(SubData);
+ }
}
+ }
+ // Sort the overlapped elements for each item.
+ llvm::SmallVector<const FieldDecl *, 4> Layout;
+ if (!OverlappedData.empty()) {
+ if (const auto *CRD =
+ VD->getType().getCanonicalType()->getAsCXXRecordDecl())
+ getPlainLayout(CRD, Layout, /*AsBase=*/false);
+ else {
+ const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
+ Layout.append(RD->field_begin(), RD->field_end());
+ }
+ }
+ for (auto &Pair : OverlappedData) {
+ llvm::sort(
+ Pair.getSecond(),
+ [&Layout](
+ OMPClauseMappableExprCommon::MappableExprComponentListRef First,
+ OMPClauseMappableExprCommon::MappableExprComponentListRef
+ Second) {
+ auto CI = First.rbegin();
+ auto CE = First.rend();
+ auto SI = Second.rbegin();
+ auto SE = Second.rend();
+ for (; CI != CE && SI != SE; ++CI, ++SI) {
+ if (CI->getAssociatedExpression()->getStmtClass() !=
+ SI->getAssociatedExpression()->getStmtClass())
+ break;
+ // Are we dealing with different variables/fields?
+ if (CI->getAssociatedDeclaration() !=
+ SI->getAssociatedDeclaration())
+ break;
+ }
+
+ // Lists contain the same elements.
+ if (CI == CE && SI == SE)
+ return false;
+
+ // List with less elements is less than list with more elements.
+ if (CI == CE || SI == SE)
+ return CI == CE;
+
+ const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
+ const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
+ if (FD1->getParent() == FD2->getParent())
+ return FD1->getFieldIndex() < FD2->getFieldIndex();
+ const auto It =
+ llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
+ return FD == FD1 || FD == FD2;
+ });
+ return *It == FD1;
+ });
+ }
+
+ // Associated with a capture, because the mapping flags depend on it.
+ // Go through all of the elements with the overlapped elements.
+ for (const auto &Pair : OverlappedData) {
+ const MapData &L = *Pair.getFirst();
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+ OpenMPMapClauseKind MapType;
+ ArrayRef<OpenMPMapModifierKind> MapModifiers;
+ bool IsImplicit;
+ std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+ ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
+ OverlappedComponents = Pair.getSecond();
+ bool IsFirstComponentList = true;
+ generateInfoForComponentList(MapType, MapModifiers, Components,
+ BasePointers, Pointers, Sizes, Types,
+ PartialStruct, IsFirstComponentList,
+ IsImplicit, OverlappedComponents);
+ }
+ // Go through other elements without overlapped elements.
+ bool IsFirstComponentList = OverlappedData.empty();
+ for (const MapData &L : DeclComponentLists) {
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+ OpenMPMapClauseKind MapType;
+ ArrayRef<OpenMPMapModifierKind> MapModifiers;
+ bool IsImplicit;
+ std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+ auto It = OverlappedData.find(&L);
+ if (It == OverlappedData.end())
+ generateInfoForComponentList(MapType, MapModifiers, Components,
+ BasePointers, Pointers, Sizes, Types,
+ PartialStruct, IsFirstComponentList,
+ IsImplicit);
+ IsFirstComponentList = false;
+ }
}
/// Generate the base pointers, section pointers, sizes and map types
@@ -7436,12 +7843,12 @@ public:
if (!VD)
continue;
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD);
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
continue;
StructRangeInfoTy PartialStruct;
generateInfoForComponentList(
- C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
+ C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
Pointers, Sizes, Types, PartialStruct,
/*IsFirstComponentList=*/true, C->isImplicit());
assert(!PartialStruct.Base.isValid() &&
@@ -7658,6 +8065,183 @@ static void emitOffloadingArraysArgument(
}
}
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
+static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+ // We can skip constant expressions.
+ // We can skip expressions with trivial calls or simple expressions.
+ return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
+ !E->hasNonTrivialCall(Ctx)) &&
+ !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
+}
+
+/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
+/// iff there is only one that is not evaluatable at the compile time.
+static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
+ if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
+ const Stmt *Child = nullptr;
+ for (const Stmt *S : C->body()) {
+ if (const auto *E = dyn_cast<Expr>(S)) {
+ if (isTrivial(Ctx, E))
+ continue;
+ }
+ // Some of the statements can be ignored.
+ if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
+ isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
+ continue;
+ // Analyze declarations.
+ if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+ if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
+ if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
+ isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
+ isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
+ isa<UsingDirectiveDecl>(D) ||
+ isa<OMPDeclareReductionDecl>(D) ||
+ isa<OMPThreadPrivateDecl>(D))
+ return true;
+ const auto *VD = dyn_cast<VarDecl>(D);
+ if (!VD)
+ return false;
+ return VD->isConstexpr() ||
+ ((VD->getType().isTrivialType(Ctx) ||
+ VD->getType()->isReferenceType()) &&
+ (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
+ }))
+ continue;
+ }
+ // Found multiple children - cannot get the one child only.
+ if (Child)
+ return Body;
+ Child = S;
+ }
+ if (Child)
+ return Child;
+ }
+ return Body;
+}
+
+/// Check for inner distribute directive.
+static const OMPExecutableDirective *
+getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
+
+ if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+ switch (D.getDirectiveKind()) {
+ case OMPD_target:
+ if (isOpenMPDistributeDirective(DKind))
+ return NestedDir;
+ if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return nullptr;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPDistributeDirective(DKind))
+ return NND;
+ }
+ }
+ return nullptr;
+ case OMPD_target_teams:
+ if (isOpenMPDistributeDirective(DKind))
+ return NestedDir;
+ return nullptr;
+ case OMPD_target_parallel:
+ case OMPD_target_simd:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ return nullptr;
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ llvm_unreachable("Unexpected directive.");
+ }
+ }
+
+ return nullptr;
+}
+
+void CGOpenMPRuntime::emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+ const llvm::function_ref<llvm::Value *(
+ CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
+ OpenMPDirectiveKind Kind = D.getDirectiveKind();
+ const OMPExecutableDirective *TD = &D;
+ // Get nested teams distribute kind directive, if any.
+ if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
+ TD = getNestedDistributeDirective(CGM.getContext(), D);
+ if (!TD)
+ return;
+ const auto *LD = cast<OMPLoopDirective>(TD);
+ auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
+
+ // Emit device ID if any.
+ llvm::Value *DeviceID;
+ if (Device)
+ DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+ CGF.Int64Ty, /*isSigned=*/true);
+ else
+ DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+
+ llvm::Value *Args[] = {DeviceID, NumIterations};
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ };
+ emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
+}
+
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
llvm::Value *OutlinedFn,
@@ -7790,7 +8374,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
CapturedVars.clear();
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
}
- emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
+ emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
CGF.EmitBranch(OffloadContBlock);
CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
@@ -7804,7 +8388,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
CapturedVars.clear();
CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
}
- emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, CapturedVars);
+ emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
};
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
@@ -7818,6 +8402,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// Get mappable expression information.
MappableExprsHandler MEHandler(D, CGF);
+ llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto CV = CapturedVars.begin();
@@ -7847,6 +8432,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
if (CurBasePointers.empty())
MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
CurPointers, CurSizes, CurMapTypes);
+ // Generate correct mapping for variables captured by reference in
+ // lambdas.
+ if (CI->capturesVariable())
+ MEHandler.generateInfoForLambdaCaptures(
+ CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
+ CurMapTypes, LambdaPointers);
}
// We expect to have at least an element of information for this capture.
assert(!CurBasePointers.empty() &&
@@ -7868,6 +8459,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
Sizes.append(CurSizes.begin(), CurSizes.end());
MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
}
+ // Adjust MEMBER_OF flags for the lambdas captures.
+ MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
+ Pointers, MapTypes);
// Map other list items in the map clause which are not captured variables
// but "declare target link" global variables.
MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
@@ -7935,7 +8529,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
unsigned DeviceID;
unsigned FileID;
unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
+ getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
FileID, Line);
// Is this a target region that should not be emitted as an entry point? If
@@ -8030,6 +8624,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
case OMPD_declare_reduction:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
}
@@ -8055,19 +8650,20 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
}
bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
- const auto *FD = cast<FunctionDecl>(GD.getDecl());
-
// If emitting code for the host, we do not process FD here. Instead we do
// the normal code generation.
if (!CGM.getLangOpts().OpenMPIsDevice)
return false;
+ const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
+ StringRef Name = CGM.getMangledName(GD);
// Try to detect target regions in the function.
- scanForTargetRegionsFunctions(FD->getBody(), CGM.getMangledName(GD));
+ if (const auto *FD = dyn_cast<FunctionDecl>(VD))
+ scanForTargetRegionsFunctions(FD->getBody(), Name);
// Do not to emit function if it is not marked as declare target.
- return !isDeclareTargetDeclaration(FD) &&
- AlreadyEmittedTargetFunctions.count(FD->getCanonicalDecl()) == 0;
+ return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
+ AlreadyEmittedTargetFunctions.count(Name) == 0;
}
bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
@@ -8093,64 +8689,105 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
// Do not to emit variable if it is not marked as declare target.
llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(cast<VarDecl>(GD.getDecl()));
- return !Res || *Res == OMPDeclareTargetDeclAttr::MT_Link;
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
+ cast<VarDecl>(GD.getDecl()));
+ if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link) {
+ DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
+ return true;
+ }
+ return false;
}
void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
llvm::Constant *Addr) {
- if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- isDeclareTargetDeclaration(VD)) {
- OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
- StringRef VarName;
- CharUnits VarSize;
- llvm::GlobalValue::LinkageTypes Linkage;
- switch (*Res) {
- case OMPDeclareTargetDeclAttr::MT_To:
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
- VarName = CGM.getMangledName(VD);
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
+ if (!Res) {
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ // Register non-target variables being emitted in device code (debug info
+ // may cause this).
+ StringRef VarName = CGM.getMangledName(VD);
+ EmittedNonTargetVariables.try_emplace(VarName, Addr);
+ }
+ return;
+ }
+ // Register declare target variables.
+ OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
+ StringRef VarName;
+ CharUnits VarSize;
+ llvm::GlobalValue::LinkageTypes Linkage;
+ switch (*Res) {
+ case OMPDeclareTargetDeclAttr::MT_To:
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
+ VarName = CGM.getMangledName(VD);
+ if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
- Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
- // Temp solution to prevent optimizations of the internal variables.
- if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
- std::string RefName = getName({VarName, "ref"});
- if (!CGM.GetGlobalValue(RefName)) {
- llvm::Constant *AddrRef =
- getOrCreateInternalVariable(Addr->getType(), RefName);
- auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
- GVAddrRef->setConstant(/*Val=*/true);
- GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
- GVAddrRef->setInitializer(Addr);
- CGM.addCompilerUsedGlobal(GVAddrRef);
- }
- }
- break;
- case OMPDeclareTargetDeclAttr::MT_Link:
- Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
- if (CGM.getLangOpts().OpenMPIsDevice) {
- VarName = Addr->getName();
- Addr = nullptr;
- } else {
- VarName = getAddrOfDeclareTargetLink(VD).getName();
- Addr =
- cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
+ assert(!VarSize.isZero() && "Expected non-zero size of the variable");
+ } else {
+ VarSize = CharUnits::Zero();
+ }
+ Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
+ // Temp solution to prevent optimizations of the internal variables.
+ if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
+ std::string RefName = getName({VarName, "ref"});
+ if (!CGM.GetGlobalValue(RefName)) {
+ llvm::Constant *AddrRef =
+ getOrCreateInternalVariable(Addr->getType(), RefName);
+ auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
+ GVAddrRef->setConstant(/*Val=*/true);
+ GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
+ GVAddrRef->setInitializer(Addr);
+ CGM.addCompilerUsedGlobal(GVAddrRef);
}
- VarSize = CGM.getPointerSize();
- Linkage = llvm::GlobalValue::WeakAnyLinkage;
- break;
}
- OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
- VarName, Addr, VarSize, Flags, Linkage);
+ break;
+ case OMPDeclareTargetDeclAttr::MT_Link:
+ Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
+ if (CGM.getLangOpts().OpenMPIsDevice) {
+ VarName = Addr->getName();
+ Addr = nullptr;
+ } else {
+ VarName = getAddrOfDeclareTargetLink(VD).getName();
+ Addr = cast<llvm::Constant>(getAddrOfDeclareTargetLink(VD).getPointer());
+ }
+ VarSize = CGM.getPointerSize();
+ Linkage = llvm::GlobalValue::WeakAnyLinkage;
+ break;
}
+ OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
+ VarName, Addr, VarSize, Flags, Linkage);
}
bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
- if (isa<FunctionDecl>(GD.getDecl()))
+ if (isa<FunctionDecl>(GD.getDecl()) ||
+ isa<OMPDeclareReductionDecl>(GD.getDecl()))
return emitTargetFunctions(GD);
return emitTargetGlobalVariable(GD);
}
+void CGOpenMPRuntime::emitDeferredTargetDecls() const {
+ for (const VarDecl *VD : DeferredGlobalVariables) {
+ llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
+ if (!Res)
+ continue;
+ if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
+ CGM.EmitGlobal(VD);
+ } else {
+ assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
+ "Expected to or link clauses.");
+ (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ }
+ }
+}
+
+void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+ assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+ " Expected target-based directive.");
+}
+
CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
CodeGenModule &CGM)
: CGM(CGM) {
@@ -8169,21 +8806,20 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
return true;
+ StringRef Name = CGM.getMangledName(GD);
const auto *D = cast<FunctionDecl>(GD.getDecl());
- const FunctionDecl *FD = D->getCanonicalDecl();
// Do not to emit function if it is marked as declare target as it was already
// emitted.
- if (isDeclareTargetDeclaration(D)) {
- if (D->hasBody() && AlreadyEmittedTargetFunctions.count(FD) == 0) {
- if (auto *F = dyn_cast_or_null<llvm::Function>(
- CGM.GetGlobalValue(CGM.getMangledName(GD))))
+ if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
+ if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
+ if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
return !F->isDeclaration();
return false;
}
return true;
}
- return !AlreadyEmittedTargetFunctions.insert(FD).second;
+ return !AlreadyEmittedTargetFunctions.insert(Name).second;
}
llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
@@ -8478,6 +9114,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
+ case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected standalone target data directive.");
break;
@@ -8730,8 +9367,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
ParamAttrTy &ParamAttr = ParamAttrs[Pos];
ParamAttr.Kind = Linear;
if (*SI) {
- if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
- Expr::SE_AllowSideEffects)) {
+ Expr::EvalResult Result;
+ if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
if (const auto *DRE =
cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
@@ -8740,6 +9377,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
ParamPositions[StridePVD->getCanonicalDecl()]);
}
}
+ } else {
+ ParamAttr.StrideOrArg = Result.Val.getInt();
}
}
++SI;
@@ -8782,7 +9421,8 @@ public:
} // namespace
void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D) {
+ const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations) {
if (!CGF.HaveInsertPoint())
return;
@@ -8805,37 +9445,50 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
} else {
RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
}
+ llvm::APInt Size(/*numBits=*/32, NumIterations.size());
+ QualType ArrayTy =
+ C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
- Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
- CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
+ Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
+ CGF.EmitNullInitialization(DimsAddr, ArrayTy);
enum { LowerFD = 0, UpperFD, StrideFD };
// Fill dims with data.
- LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
- // dims.upper = num_iterations;
- LValue UpperLVal =
- CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
- llvm::Value *NumIterVal = CGF.EmitScalarConversion(
- CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
- Int64Ty, D.getNumIterations()->getExprLoc());
- CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
- // dims.stride = 1;
- LValue StrideLVal =
- CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
- CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
- StrideLVal);
+ for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
+ LValue DimsLVal =
+ CGF.MakeAddrLValue(CGF.Builder.CreateConstArrayGEP(
+ DimsAddr, I, C.getTypeSizeInChars(KmpDimTy)),
+ KmpDimTy);
+ // dims.upper = num_iterations;
+ LValue UpperLVal = CGF.EmitLValueForField(
+ DimsLVal, *std::next(RD->field_begin(), UpperFD));
+ llvm::Value *NumIterVal =
+ CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
+ D.getNumIterations()->getType(), Int64Ty,
+ D.getNumIterations()->getExprLoc());
+ CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
+ // dims.stride = 1;
+ LValue StrideLVal = CGF.EmitLValueForField(
+ DimsLVal, *std::next(RD->field_begin(), StrideFD));
+ CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
+ StrideLVal);
+ }
// Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
// kmp_int32 num_dims, struct kmp_dim * dims);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
- getThreadID(CGF, D.getLocStart()),
- llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- DimsAddr.getPointer(), CGM.VoidPtrTy)};
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, D.getBeginLoc()),
+ getThreadID(CGF, D.getBeginLoc()),
+ llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder
+ .CreateConstArrayGEP(DimsAddr, 0, C.getTypeSizeInChars(KmpDimTy))
+ .getPointer(),
+ CGM.VoidPtrTy)};
llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
CGF.EmitRuntimeCall(RTLFn, Args);
llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
- emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
+ emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
llvm::makeArrayRef(FiniArgs));
@@ -8845,16 +9498,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
const OMPDependClause *C) {
QualType Int64Ty =
CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
- const Expr *CounterVal = C->getCounterValue();
- assert(CounterVal);
- llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
- CounterVal->getType(), Int64Ty,
- CounterVal->getExprLoc());
- Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
- CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
- getThreadID(CGF, C->getLocStart()),
- CntAddr.getPointer()};
+ llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
+ QualType ArrayTy = CGM.getContext().getConstantArrayType(
+ Int64Ty, Size, ArrayType::Normal, 0);
+ Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
+ for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
+ const Expr *CounterVal = C->getLoopData(I);
+ assert(CounterVal);
+ llvm::Value *CntVal = CGF.EmitScalarConversion(
+ CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
+ CounterVal->getExprLoc());
+ CGF.EmitStoreOfScalar(
+ CntVal,
+ CGF.Builder.CreateConstArrayGEP(
+ CntAddr, I, CGM.getContext().getTypeSizeInChars(Int64Ty)),
+ /*Volatile=*/false, Int64Ty);
+ }
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, C->getBeginLoc()),
+ getThreadID(CGF, C->getBeginLoc()),
+ CGF.Builder
+ .CreateConstArrayGEP(CntAddr, 0,
+ CGM.getContext().getTypeSizeInChars(Int64Ty))
+ .getPointer()};
llvm::Value *RTLFn;
if (C->getDependencyKind() == OMPC_DEPEND_source) {
RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
@@ -9169,7 +9835,8 @@ void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
}
void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D) {
+ const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations) {
llvm_unreachable("Not supported in SIMD-only mode");
}
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 01ff0c20fd66..1822a6fd1974 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -15,12 +15,13 @@
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#include "CGValue.h"
+#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/Type.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/ValueHandle.h"
@@ -278,12 +279,39 @@ protected:
/// stored.
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
+ void setLocThreadIdInsertPt(CodeGenFunction &CGF,
+ bool AtCurrentPoint = false);
+ void clearLocThreadIdInsertPt(CodeGenFunction &CGF);
+
+ /// Check if the default location must be constant.
+ /// Default is false to support OMPT/OMPD.
+ virtual bool isDefaultLocationConstant() const { return false; }
+
+ /// Returns additional flags that can be stored in reserved_2 field of the
+ /// default location.
+ virtual unsigned getDefaultLocationReserved2Flags() const { return 0; }
+
+ /// Returns default flags for the barriers depending on the directive, for
+ /// which this barier is going to be emitted.
+ static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind);
+
+ /// Get the LLVM type for the critical name.
+ llvm::ArrayType *getKmpCriticalNameTy() const {return KmpCriticalNameTy;}
+
+ /// Returns corresponding lock object for the specified critical region
+ /// name. If the lock object does not exist it is created, otherwise the
+ /// reference to the existing copy is returned.
+ /// \param CriticalName Name of the critical region.
+ ///
+ llvm::Value *getCriticalRegionLock(StringRef CriticalName);
+
private:
/// Default const ident_t object used for initialization of all other
/// ident_t objects.
llvm::Constant *DefaultOpenMPPSource = nullptr;
+ using FlagsTy = std::pair<unsigned, unsigned>;
/// Map of flags and corresponding default locations.
- typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
+ using OpenMPDefaultLocMapTy = llvm::DenseMap<FlagsTy, llvm::Value *>;
OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
Address getOrCreateDefaultLocation(unsigned Flags);
@@ -300,6 +328,8 @@ private:
struct DebugLocThreadIdTy {
llvm::Value *DebugLoc;
llvm::Value *ThreadID;
+ /// Insert point for the service instructions.
+ llvm::AssertingVH<llvm::Instruction> ServiceInsertPt = nullptr;
};
/// Map of local debug location, ThreadId and functions.
typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
@@ -315,10 +345,6 @@ private:
SmallVector<const OMPDeclareReductionDecl *, 4>>
FunctionUDRMapTy;
FunctionUDRMapTy FunctionUDRMap;
- IdentifierInfo *In = nullptr;
- IdentifierInfo *Out = nullptr;
- IdentifierInfo *Priv = nullptr;
- IdentifierInfo *Orig = nullptr;
/// Type kmp_critical_name, originally defined as typedef kmp_int32
/// kmp_critical_name[8];
llvm::ArrayType *KmpCriticalNameTy;
@@ -600,7 +626,15 @@ private:
OffloadEntriesInfoManagerTy OffloadEntriesInfoManager;
bool ShouldMarkAsGlobal = true;
- llvm::SmallDenseSet<const FunctionDecl *> AlreadyEmittedTargetFunctions;
+ /// List of the emitted functions.
+ llvm::StringSet<> AlreadyEmittedTargetFunctions;
+ /// List of the global variables with their addresses that should not be
+ /// emitted for the target.
+ llvm::StringMap<llvm::WeakTrackingVH> EmittedNonTargetVariables;
+
+ /// List of variables that can become declare target implicitly and, thus,
+ /// must be emitted.
+ llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables;
/// Creates and registers offloading binary descriptor for the current
/// compilation unit. The function that does the registration is returned.
@@ -673,10 +707,10 @@ private:
const llvm::Twine &Name);
/// Set of threadprivate variables with the generated initializer.
- llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition;
+ llvm::StringSet<> ThreadPrivateWithDefinition;
/// Set of declare target variables with the generated initializer.
- llvm::SmallPtrSet<const VarDecl *, 4> DeclareTargetWithDefinition;
+ llvm::StringSet<> DeclareTargetWithDefinition;
/// Emits initialization code for the threadprivate variables.
/// \param VDAddr Address of the global variable \a VD.
@@ -688,13 +722,6 @@ private:
llvm::Value *Ctor, llvm::Value *CopyCtor,
llvm::Value *Dtor, SourceLocation Loc);
- /// Returns corresponding lock object for the specified critical region
- /// name. If the lock object does not exist it is created, otherwise the
- /// reference to the existing copy is returned.
- /// \param CriticalName Name of the critical region.
- ///
- llvm::Value *getCriticalRegionLock(StringRef CriticalName);
-
struct TaskResultTy {
llvm::Value *NewTask = nullptr;
llvm::Value *TaskEntry = nullptr;
@@ -884,6 +911,20 @@ public:
virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
bool Chunked) const;
+ /// Check if the specified \a ScheduleKind is static chunked.
+ /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
+ /// Check if the specified \a ScheduleKind is static non-chunked.
+ /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
/// Check if the specified \a ScheduleKind is dynamic.
/// This kind of worksharing directive is emitted without outer loop.
/// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
@@ -1327,6 +1368,15 @@ public:
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
+ /// Emit code that pushes the trip count of loops associated with constructs
+ /// 'target teams distribute' and 'teams distribute parallel for'.
+ /// \param SizeEmitter Emits the int64 value for the number of iterations of
+ /// the associated loop.
+ virtual void emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+ const llvm::function_ref<llvm::Value *(
+ CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter);
+
/// Emit the target offloading code associated with \a D. The emitted
/// code attempts offloading the execution to the device, an the event of
/// a failure it executes the host version outlined in \a OutlinedFn.
@@ -1465,8 +1515,8 @@ public:
/// Emit initialization for doacross loop nesting support.
/// \param D Loop-based construct used in doacross nesting construct.
- virtual void emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D);
+ virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations);
/// Emit code for doacross ordered directive with 'depend' clause.
/// \param C 'depend' clause with 'sink|source' dependency kind.
@@ -1490,6 +1540,18 @@ public:
const VarDecl *NativeParam,
const VarDecl *TargetParam) const;
+ /// Choose default schedule type and chunk value for the
+ /// dist_schedule clause.
+ virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+ const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+ llvm::Value *&Chunk) const {}
+
+ /// Choose default schedule type and chunk value for the
+ /// schedule clause.
+ virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
+ const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
+ const Expr *&ChunkExpr) const {}
+
/// Emits call of the outlined function with the provided arguments,
/// translating these arguments to correct target-specific arguments.
virtual void
@@ -1505,10 +1567,23 @@ public:
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD);
- /// Marks the declaration as alread emitted for the device code and returns
+ /// Marks the declaration as already emitted for the device code and returns
/// true, if it was marked already, and false, otherwise.
bool markAsGlobalTarget(GlobalDecl GD);
+ /// Emit deferred declare target variables marked for deferred emission.
+ void emitDeferredTargetDecls() const;
+
+ /// Adjust some parameters for the target-based directives, like addresses of
+ /// the variables captured by reference in lambdas.
+ virtual void
+ adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) const;
+
+ /// Perform check on requires decl to ensure that target architecture
+ /// supports unified addressing
+ virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+ const OMPRequiresDecl *D) const {}
};
/// Class supports emissionof SIMD-only code.
@@ -2051,8 +2126,8 @@ public:
/// Emit initialization for doacross loop nesting support.
/// \param D Loop-based construct used in doacross nesting construct.
- void emitDoacrossInit(CodeGenFunction &CGF,
- const OMPLoopDirective &D) override;
+ void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D,
+ ArrayRef<Expr *> NumIterations) override;
/// Emit code for doacross ordered directive with 'depend' clause.
/// \param C 'depend' clause with 'sink|source' dependency kind.
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 036b5371fe0b..7046ab3aa35c 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -17,6 +17,7 @@
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace clang;
@@ -32,8 +33,8 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
/// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
OMPRTL_NVPTX__kmpc_spmd_kernel_init,
- /// Call to void __kmpc_spmd_kernel_deinit();
- OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
+ /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
/// Call to void __kmpc_kernel_prepare_parallel(void
/// *outlined_function, int16_t
/// IsOMPRuntimeInitialized);
@@ -55,37 +56,27 @@ enum OpenMPRTLFunctionNVPTX {
/// Call to int64_t __kmpc_shuffle_int64(int64_t element,
/// int16_t lane_offset, int16_t warp_size);
OMPRTL_NVPTX__kmpc_shuffle_int64,
- /// Call to __kmpc_nvptx_parallel_reduce_nowait(kmp_int32
+ /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
/// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
/// lane_offset, int16_t shortCircuit),
/// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
- OMPRTL_NVPTX__kmpc_parallel_reduce_nowait,
- /// Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32
- /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
- /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- /// lane_offset, int16_t shortCircuit),
- /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
- OMPRTL_NVPTX__kmpc_simd_reduce_nowait,
- /// Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
- /// int32_t num_vars, size_t reduce_size, void *reduce_data,
- /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t
- /// lane_offset, int16_t shortCircuit),
- /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
- /// void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
- /// int32_t index, int32_t width),
- /// void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad, int32_t
- /// index, int32_t width, int32_t reduce))
- OMPRTL_NVPTX__kmpc_teams_reduce_nowait,
+ OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2,
+ /// Call to __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32
+ /// global_tid, kmp_critical_name *lck)
+ OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple,
+ /// Call to __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc,
+ /// kmp_int32 global_tid, kmp_critical_name *lck)
+ OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple,
/// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
OMPRTL_NVPTX__kmpc_end_reduce_nowait,
/// Call to void __kmpc_data_sharing_init_stack();
OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
/// Call to void __kmpc_data_sharing_init_stack_spmd();
OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
- /// Call to void* __kmpc_data_sharing_push_stack(size_t size,
+ /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
/// int16_t UseSharedMemory);
- OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
/// Call to void __kmpc_data_sharing_pop_stack(void *a);
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
/// Call to void __kmpc_begin_sharing_variables(void ***args,
@@ -100,6 +91,17 @@ enum OpenMPRTLFunctionNVPTX {
OMPRTL_NVPTX__kmpc_parallel_level,
/// Call to int8_t __kmpc_is_spmd_exec_mode();
OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
+ /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+ /// const void *buf, size_t size, int16_t is_shared, const void **res);
+ OMPRTL_NVPTX__kmpc_get_team_static_memory,
+ /// Call to void __kmpc_restore_team_static_memory(int16_t
+ /// isSPMDExecutionMode, int16_t is_shared);
+ OMPRTL_NVPTX__kmpc_restore_team_static_memory,
+ /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+ OMPRTL__kmpc_barrier,
+ /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+ /// global_tid);
+ OMPRTL__kmpc_barrier_simple_spmd,
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -142,19 +144,35 @@ public:
/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry
/// to the target region and used by containing directives such as 'parallel'
/// to emit optimized code.
-class ExecutionModeRAII {
+class ExecutionRuntimeModesRAII {
private:
- CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
- CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
+ CGOpenMPRuntimeNVPTX::ExecutionMode SavedExecMode =
+ CGOpenMPRuntimeNVPTX::EM_Unknown;
+ CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode;
+ bool SavedRuntimeMode = false;
+ bool *RuntimeMode = nullptr;
public:
- ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD)
- : Mode(Mode) {
- SavedMode = Mode;
- Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD
- : CGOpenMPRuntimeNVPTX::EM_NonSPMD;
+ /// Constructor for Non-SPMD mode.
+ ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode)
+ : ExecMode(ExecMode) {
+ SavedExecMode = ExecMode;
+ ExecMode = CGOpenMPRuntimeNVPTX::EM_NonSPMD;
+ }
+ /// Constructor for SPMD mode.
+ ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode,
+ bool &RuntimeMode, bool FullRuntimeMode)
+ : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
+ SavedExecMode = ExecMode;
+ SavedRuntimeMode = RuntimeMode;
+ ExecMode = CGOpenMPRuntimeNVPTX::EM_SPMD;
+ RuntimeMode = FullRuntimeMode;
+ }
+ ~ExecutionRuntimeModesRAII() {
+ ExecMode = SavedExecMode;
+ if (RuntimeMode)
+ *RuntimeMode = SavedRuntimeMode;
}
- ~ExecutionModeRAII() { Mode = SavedMode; }
};
/// GPU Configuration: This information can be derived from cuda registers,
@@ -169,16 +187,113 @@ enum MachineConfiguration : unsigned {
LaneIDMask = WarpSize - 1,
/// Global memory alignment for performance.
- GlobalMemoryAlignment = 256,
-};
+ GlobalMemoryAlignment = 128,
-enum NamedBarrier : unsigned {
- /// Synchronize on this barrier #ID using a named barrier primitive.
- /// Only the subset of active threads in a parallel region arrive at the
- /// barrier.
- NB_Parallel = 1,
+ /// Maximal size of the shared memory buffer.
+ SharedMemorySize = 128,
};
+static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
+ RefExpr = RefExpr->IgnoreParens();
+ if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
+ const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ RefExpr = Base;
+ } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
+ const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+ Base = TempOASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ RefExpr = Base;
+ }
+ RefExpr = RefExpr->IgnoreParenImpCasts();
+ if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
+ return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
+ const auto *ME = cast<MemberExpr>(RefExpr);
+ return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
+}
+
+typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
+static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
+ return P1.first > P2.first;
+}
+
+static RecordDecl *buildRecordForGlobalizedVars(
+ ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
+ ArrayRef<const ValueDecl *> EscapedDeclsForTeams,
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+ &MappedDeclsFields) {
+ if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
+ return nullptr;
+ SmallVector<VarsDataTy, 4> GlobalizedVars;
+ for (const ValueDecl *D : EscapedDecls)
+ GlobalizedVars.emplace_back(
+ CharUnits::fromQuantity(std::max(
+ C.getDeclAlign(D).getQuantity(),
+ static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))),
+ D);
+ for (const ValueDecl *D : EscapedDeclsForTeams)
+ GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
+ std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
+ stable_sort_comparator);
+ // Build struct _globalized_locals_ty {
+ // /* globalized vars */[WarSize] align (max(decl_align,
+ // GlobalMemoryAlignment))
+ // /* globalized vars */ for EscapedDeclsForTeams
+ // };
+ RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
+ GlobalizedRD->startDefinition();
+ llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(
+ EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
+ for (const auto &Pair : GlobalizedVars) {
+ const ValueDecl *VD = Pair.second;
+ QualType Type = VD->getType();
+ if (Type->isLValueReferenceType())
+ Type = C.getPointerType(Type.getNonReferenceType());
+ else
+ Type = Type.getNonReferenceType();
+ SourceLocation Loc = VD->getLocation();
+ FieldDecl *Field;
+ if (SingleEscaped.count(VD)) {
+ Field = FieldDecl::Create(
+ C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ if (VD->hasAttrs()) {
+ for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
+ E(VD->getAttrs().end());
+ I != E; ++I)
+ Field->addAttr(*I);
+ }
+ } else {
+ llvm::APInt ArraySize(32, WarpSize);
+ Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0);
+ Field = FieldDecl::Create(
+ C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
+ static_cast<CharUnits::QuantityType>(
+ GlobalMemoryAlignment)));
+ Field->addAttr(AlignedAttr::CreateImplicit(
+ C, AlignedAttr::GNU_aligned, /*IsAlignmentExpr=*/true,
+ IntegerLiteral::Create(C, Align,
+ C.getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation())));
+ }
+ GlobalizedRD->addDecl(Field);
+ MappedDeclsFields.try_emplace(VD, Field);
+ }
+ GlobalizedRD->completeDefinition();
+ return GlobalizedRD;
+}
+
/// Get the list of variables that can escape their declaration context.
class CheckVarsEscapingDeclContext final
: public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
@@ -191,20 +306,10 @@ class CheckVarsEscapingDeclContext final
bool AllEscaped = false;
bool IsForCombinedParallelRegion = false;
- static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy>
- isDeclareTargetDeclaration(const ValueDecl *VD) {
- for (const Decl *D : VD->redecls()) {
- if (!D->hasAttrs())
- continue;
- if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
- return Attr->getMapType();
- }
- return llvm::None;
- }
-
void markAsEscaped(const ValueDecl *VD) {
// Do not globalize declare target variables.
- if (!isa<VarDecl>(VD) || isDeclareTargetDeclaration(VD))
+ if (!isa<VarDecl>(VD) ||
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
return;
VD = cast<ValueDecl>(VD->getCanonicalDecl());
// Variables captured by value must be globalized.
@@ -218,9 +323,11 @@ class CheckVarsEscapingDeclContext final
const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();
if (!Attr)
return;
- if (!isOpenMPPrivate(
- static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) ||
- Attr->getCaptureKind() == OMPC_map)
+ if (((Attr->getCaptureKind() != OMPC_map) &&
+ !isOpenMPPrivate(
+ static_cast<OpenMPClauseKind>(Attr->getCaptureKind()))) ||
+ ((Attr->getCaptureKind() == OMPC_map) &&
+ !FD->getType()->isAnyPointerType()))
return;
}
if (!FD->getType()->isReferenceType()) {
@@ -302,55 +409,24 @@ class CheckVarsEscapingDeclContext final
}
}
- typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
- static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
- return P1.first > P2.first;
- }
-
- void buildRecordForGlobalizedVars() {
+ void buildRecordForGlobalizedVars(bool IsInTTDRegion) {
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
- if (EscapedDecls.empty())
- return;
- ASTContext &C = CGF.getContext();
- SmallVector<VarsDataTy, 4> GlobalizedVars;
- for (const ValueDecl *D : EscapedDecls)
- GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
- std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
- stable_sort_comparator);
- // Build struct _globalized_locals_ty {
- // /* globalized vars */
- // };
- GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
- GlobalizedRD->startDefinition();
- for (const auto &Pair : GlobalizedVars) {
- const ValueDecl *VD = Pair.second;
- QualType Type = VD->getType();
- if (Type->isLValueReferenceType())
- Type = C.getPointerType(Type.getNonReferenceType());
- else
- Type = Type.getNonReferenceType();
- SourceLocation Loc = VD->getLocation();
- auto *Field = FieldDecl::Create(
- C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
- C.getTrivialTypeSourceInfo(Type, SourceLocation()),
- /*BW=*/nullptr, /*Mutable=*/false,
- /*InitStyle=*/ICIS_NoInit);
- Field->setAccess(AS_public);
- GlobalizedRD->addDecl(Field);
- if (VD->hasAttrs()) {
- for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
- E(VD->getAttrs().end());
- I != E; ++I)
- Field->addAttr(*I);
- }
- MappedDeclsFields.try_emplace(VD, Field);
- }
- GlobalizedRD->completeDefinition();
+ ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
+ if (IsInTTDRegion)
+ EscapedDeclsForTeams = EscapedDecls.getArrayRef();
+ else
+ EscapedDeclsForParallel = EscapedDecls.getArrayRef();
+ GlobalizedRD = ::buildRecordForGlobalizedVars(
+ CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
+ MappedDeclsFields);
}
public:
- CheckVarsEscapingDeclContext(CodeGenFunction &CGF) : CGF(CGF) {}
+ CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
+ ArrayRef<const ValueDecl *> TeamsReductions)
+ : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
+ }
virtual ~CheckVarsEscapingDeclContext() = default;
void VisitDeclStmt(const DeclStmt *S) {
if (!S)
@@ -492,9 +568,9 @@ public:
/// Returns the record that handles all the escaped local variables and used
/// instead of their original storage.
- const RecordDecl *getGlobalizedRecord() {
+ const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {
if (!GlobalizedRD)
- buildRecordForGlobalizedVars();
+ buildRecordForGlobalizedVars(IsInTTDRegion);
return GlobalizedRD;
}
@@ -568,31 +644,6 @@ static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
"nvptx_num_threads");
}
-/// Get barrier to synchronize all threads in a block.
-static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
- CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
-}
-
-/// Get barrier #ID to synchronize selected (multiple of warp size) threads in
-/// a CTA.
-static void getNVPTXBarrier(CodeGenFunction &CGF, int ID,
- llvm::Value *NumThreads) {
- CGBuilderTy &Bld = CGF.Builder;
- llvm::Value *Args[] = {Bld.getInt32(ID), NumThreads};
- CGF.EmitRuntimeCall(llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier),
- Args);
-}
-
-/// Synchronize all GPU threads in a block.
-static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
-
-/// Synchronize worker threads in a parallel region.
-static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) {
- return getNVPTXBarrier(CGF, NB_Parallel, NumThreads);
-}
-
/// Get the value of the thread_limit clause in the teams directive.
/// For the 'generic' execution mode, the runtime encodes thread_limit in
/// the launch parameters, always starting thread_limit+warpSize threads per
@@ -654,12 +705,58 @@ getDataSharingMode(CodeGenModule &CGM) {
: CGOpenMPRuntimeNVPTX::Generic;
}
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
+static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+ // We can skip constant expressions.
+ // We can skip expressions with trivial calls or simple expressions.
+ return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
+ !E->hasNonTrivialCall(Ctx)) &&
+ !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
+}
+
/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
-/// iff there is only one.
-static const Stmt *getSingleCompoundChild(const Stmt *Body) {
- if (const auto *C = dyn_cast<CompoundStmt>(Body))
- if (C->size() == 1)
- return C->body_front();
+/// iff there is only one that is not evaluatable at the compile time.
+static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
+ if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
+ const Stmt *Child = nullptr;
+ for (const Stmt *S : C->body()) {
+ if (const auto *E = dyn_cast<Expr>(S)) {
+ if (isTrivial(Ctx, E))
+ continue;
+ }
+ // Some of the statements can be ignored.
+ if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
+ isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
+ continue;
+ // Analyze declarations.
+ if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+ if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
+ if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
+ isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
+ isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
+ isa<UsingDirectiveDecl>(D) ||
+ isa<OMPDeclareReductionDecl>(D) ||
+ isa<OMPThreadPrivateDecl>(D))
+ return true;
+ const auto *VD = dyn_cast<VarDecl>(D);
+ if (!VD)
+ return false;
+ return VD->isConstexpr() ||
+ ((VD->getType().isTrivialType(Ctx) ||
+ VD->getType()->isReferenceType()) &&
+ (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
+ }))
+ continue;
+ }
+ // Found multiple children - cannot get the one child only.
+ if (Child)
+ return Body;
+ Child = S;
+ }
+ if (Child)
+ return Child;
+ }
return Body;
}
@@ -686,8 +783,9 @@ static bool hasParallelIfNumThreadsClause(ASTContext &Ctx,
static bool hasNestedSPMDDirective(ASTContext &Ctx,
const OMPExecutableDirective &D) {
const auto *CS = D.getInnermostCapturedStmt();
- const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
- const Stmt *ChildStmt = getSingleCompoundChild(Body);
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
@@ -696,27 +794,215 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
if (isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NestedDir))
return true;
- if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
if (isOpenMPParallelDirective(DKind) &&
!hasParallelIfNumThreadsClause(Ctx, *NND))
return true;
- if (DKind == OMPD_distribute) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ }
+ }
+ return false;
+ case OMPD_target_teams:
+ return isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfNumThreadsClause(Ctx, *NestedDir);
+ case OMPD_target_simd:
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ llvm_unreachable("Unexpected directive.");
+ }
+ }
+
+ return false;
+}
+
+static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+ switch (DirectiveKind) {
+ case OMPD_target:
+ case OMPD_target_teams:
+ return hasNestedSPMDDirective(Ctx, D);
+ case OMPD_target_parallel:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ return !hasParallelIfNumThreadsClause(Ctx, D);
+ case OMPD_target_simd:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ return false;
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ break;
+ }
+ llvm_unreachable(
+ "Unknown programming model for OpenMP directive on NVPTX target.");
+}
+
+/// Check if the directive is loops based and has schedule clause at all or has
+/// static scheduling.
+static bool hasStaticScheduling(const OMPExecutableDirective &D) {
+ assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
+ isOpenMPLoopDirective(D.getDirectiveKind()) &&
+ "Expected loop-based directive.");
+ return !D.hasClausesOfKind<OMPOrderedClause>() &&
+ (!D.hasClausesOfKind<OMPScheduleClause>() ||
+ llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),
+ [](const OMPScheduleClause *C) {
+ return C->getScheduleKind() == OMPC_SCHEDULE_static;
+ }));
+}
+
+/// Check for inner (nested) lightweight runtime construct, if any
+static bool hasNestedLightweightDirective(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
+
+ if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+ switch (D.getDirectiveKind()) {
+ case OMPD_target:
+ if (isOpenMPParallelDirective(DKind) &&
+ isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+ hasStaticScheduling(*NestedDir))
+ return true;
+ if (DKind == OMPD_parallel) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
+ }
+ } else if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPParallelDirective(DKind) &&
+ isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
+ if (DKind == OMPD_parallel) {
+ Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
- if (!ChildStmt)
- return false;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
- return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NND);
+ if (isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
}
}
}
@@ -724,25 +1010,28 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
return false;
case OMPD_target_teams:
if (isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NestedDir))
+ isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+ hasStaticScheduling(*NestedDir))
return true;
- if (DKind == OMPD_distribute) {
- Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (DKind == OMPD_parallel) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
if (!Body)
return false;
- ChildStmt = getSingleCompoundChild(Body);
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
DKind = NND->getDirectiveKind();
- return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NND);
+ if (isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+ return true;
}
}
return false;
+ case OMPD_target_parallel:
+ return isOpenMPWorksharingDirective(DKind) &&
+ isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
case OMPD_target_teams_distribute:
- return isOpenMPParallelDirective(DKind) &&
- !hasParallelIfNumThreadsClause(Ctx, *NestedDir);
case OMPD_target_simd:
- case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute_simd:
@@ -790,6 +1079,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
case OMPD_declare_reduction:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_requires:
case OMPD_unknown:
llvm_unreachable("Unexpected directive.");
}
@@ -798,21 +1088,26 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
return false;
}
-static bool supportsSPMDExecutionMode(ASTContext &Ctx,
- const OMPExecutableDirective &D) {
+/// Checks if the construct supports lightweight runtime. It must be SPMD
+/// construct + inner loop-based construct with static scheduling.
+static bool supportsLightweightRuntime(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ if (!supportsSPMDExecutionMode(Ctx, D))
+ return false;
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
switch (DirectiveKind) {
case OMPD_target:
case OMPD_target_teams:
- case OMPD_target_teams_distribute:
- return hasNestedSPMDDirective(Ctx, D);
case OMPD_target_parallel:
+ return hasNestedLightweightDirective(Ctx, D);
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd:
- return !hasParallelIfNumThreadsClause(Ctx, D);
+ // (Last|First)-privates must be shared in parallel region.
+ return hasStaticScheduling(D);
case OMPD_target_simd:
+ case OMPD_target_teams_distribute:
case OMPD_target_teams_distribute_simd:
return false;
case OMPD_parallel:
@@ -857,6 +1152,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
case OMPD_declare_reduction:
case OMPD_taskloop:
case OMPD_taskloop_simd:
+ case OMPD_requires:
case OMPD_unknown:
break;
}
@@ -870,9 +1166,9 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false);
+ ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
EntryFunctionState EST;
- WorkerFunctionState WST(CGM, D.getLocStart());
+ WorkerFunctionState WST(CGM, D.getBeginLoc());
Work.clear();
WrapperFunctionsMap.clear();
@@ -886,17 +1182,35 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
: EST(EST), WST(WST) {}
void Enter(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitNonSPMDEntryHeader(CGF, EST, WST);
+ auto &RT =
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
+ RT.emitNonSPMDEntryHeader(CGF, EST, WST);
+ // Skip target region initialization.
+ RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitNonSPMDEntryFooter(CGF, EST);
+ auto &RT =
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
+ RT.clearLocThreadIdInsertPt(CGF);
+ RT.emitNonSPMDEntryFooter(CGF, EST);
}
} Action(EST, WST);
CodeGen.setAction(Action);
+ IsInTTDRegion = true;
+ // Reserve place for the globalized memory.
+ GlobalizedRecords.emplace_back();
+ if (!KernelStaticGlobalized) {
+ KernelStaticGlobalized = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+ }
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
+ IsInTTDRegion = false;
// Now change the name of the worker function to correspond to this target
// region's entry function.
@@ -984,7 +1298,10 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true);
+ ExecutionRuntimeModesRAII ModeRAII(
+ CurrentExecutionMode, RequiresFullRuntime,
+ CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
+ !supportsLightweightRuntime(CGM.getContext(), D));
EntryFunctionState EST;
// Emit target region as a standalone region.
@@ -1000,14 +1317,30 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
: RT(RT), EST(EST), D(D) {}
void Enter(CodeGenFunction &CGF) override {
RT.emitSPMDEntryHeader(CGF, EST, D);
+ // Skip target region initialization.
+ RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
+ RT.clearLocThreadIdInsertPt(CGF);
RT.emitSPMDEntryFooter(CGF, EST);
}
} Action(*this, EST, D);
CodeGen.setAction(Action);
+ IsInTTDRegion = true;
+ // Reserve place for the globalized memory.
+ GlobalizedRecords.emplace_back();
+ if (!KernelStaticGlobalized) {
+ KernelStaticGlobalized = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+ }
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
+ IsInTTDRegion = false;
}
void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
@@ -1019,19 +1352,18 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
EST.ExitBB = CGF.createBasicBlock(".exit");
- // Initialize the OMP state in the runtime; called by all active threads.
- // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters
- // based on code analysis of the target region.
llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
- /*RequiresOMPRuntime=*/Bld.getInt16(1),
- /*RequiresDataSharing=*/Bld.getInt16(1)};
+ /*RequiresOMPRuntime=*/
+ Bld.getInt16(RequiresFullRuntime ? 1 : 0),
+ /*RequiresDataSharing=*/Bld.getInt16(0)};
CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
- // For data sharing, we need to initialize the stack.
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
- OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+ if (RequiresFullRuntime) {
+ // For data sharing, we need to initialize the stack.
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+ }
CGF.EmitBranch(ExecuteBB);
@@ -1054,8 +1386,11 @@ void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF,
CGF.EmitBlock(OMPDeInitBB);
// DeInitialize the OMP state in the runtime; called by all active threads.
+ llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
+ CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_deinit), None);
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(EST.ExitBB);
@@ -1142,6 +1477,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
// Signal start of parallel region.
CGF.EmitBlock(ExecuteBB);
+ // Skip initialization.
+ setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
// Process work items: outlined parallel functions.
for (llvm::Function *W : Work) {
@@ -1202,6 +1539,8 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
// Exit target region.
CGF.EmitBlock(ExitBB);
+ // Skip initialization.
+ clearLocThreadIdInsertPt(CGF);
}
/// Returns specified OpenMP runtime function for the current OpenMP
@@ -1238,11 +1577,12 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
break;
}
- case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit: {
- // Build void __kmpc_spmd_kernel_deinit();
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
+ // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty};
auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit");
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
break;
}
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
@@ -1307,12 +1647,12 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
break;
}
- case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait: {
- // Build int32_t kmpc_nvptx_parallel_reduce_nowait(kmp_int32 global_tid,
- // kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
- // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- // lane_offset, int16_t Algorithm Version),
- // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
+ case OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2: {
+ // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
+ // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
+ // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
+ // lane_id, int16_t lane_offset, int16_t Algorithm Version), void
+ // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
CGM.Int16Ty, CGM.Int16Ty};
auto *ShuffleReduceFnTy =
@@ -1322,7 +1662,8 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
auto *InterWarpCopyFnTy =
llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
/*isVarArg=*/false);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+ CGM.Int32Ty,
CGM.Int32Ty,
CGM.SizeTy,
CGM.VoidPtrTy,
@@ -1331,86 +1672,40 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
break;
}
- case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: {
- // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid,
- // kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
- // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- // lane_offset, int16_t Algorithm Version),
- // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
- llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
- CGM.Int16Ty, CGM.Int16Ty};
- auto *ShuffleReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
- auto *InterWarpCopyFnTy =
- llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
- CGM.Int32Ty,
- CGM.SizeTy,
- CGM.VoidPtrTy,
- ShuffleReduceFnTy->getPointerTo(),
- InterWarpCopyFnTy->getPointerTo()};
+ case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
+ // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty};
auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
break;
}
- case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: {
- // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid,
- // int32_t num_vars, size_t reduce_size, void *reduce_data,
- // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- // lane_offset, int16_t shortCircuit),
- // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num),
- // void (*kmp_CopyToScratchpadFctPtr)(void *reduce_data, void * scratchpad,
- // int32_t index, int32_t width),
- // void (*kmp_LoadReduceFctPtr)(void *reduce_data, void * scratchpad,
- // int32_t index, int32_t width, int32_t reduce))
- llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
- CGM.Int16Ty, CGM.Int16Ty};
- auto *ShuffleReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
- auto *InterWarpCopyFnTy =
- llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
- /*isVarArg=*/false);
- llvm::Type *CopyToScratchpadTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy,
- CGM.Int32Ty, CGM.Int32Ty};
- auto *CopyToScratchpadFnTy =
- llvm::FunctionType::get(CGM.VoidTy, CopyToScratchpadTypeParams,
- /*isVarArg=*/false);
- llvm::Type *LoadReduceTypeParams[] = {
- CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.Int32Ty, CGM.Int32Ty, CGM.Int32Ty};
- auto *LoadReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, LoadReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
- CGM.Int32Ty,
- CGM.SizeTy,
- CGM.VoidPtrTy,
- ShuffleReduceFnTy->getPointerTo(),
- InterWarpCopyFnTy->getPointerTo(),
- CopyToScratchpadFnTy->getPointerTo(),
- LoadReduceFnTy->getPointerTo()};
+ case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple: {
+ // Build __kmpc_nvptx_teams_reduce_nowait_simple(ident_t *loc, kmp_int32
+ // global_tid, kmp_critical_name *lck)
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty,
+ llvm::PointerType::getUnqual(getKmpCriticalNameTy())};
auto *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_simple");
break;
}
- case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
- // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {CGM.Int32Ty};
+ case OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple: {
+ // Build __kmpc_nvptx_teams_end_reduce_nowait_simple(ident_t *loc, kmp_int32
+ // global_tid, kmp_critical_name *lck)
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty,
+ llvm::PointerType::getUnqual(getKmpCriticalNameTy())};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
+ FnTy, /*Name=*/"__kmpc_nvptx_teams_end_reduce_nowait_simple");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
@@ -1424,17 +1719,18 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
/// Build void __kmpc_data_sharing_init_stack_spmd();
auto *FnTy =
llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
break;
}
- case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
- // Build void *__kmpc_data_sharing_push_stack(size_t size,
+ case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
+ // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
// int16_t UseSharedMemory);
llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
auto *FnTy =
llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
+ FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
break;
}
case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
@@ -1484,6 +1780,46 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
break;
}
+ case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
+ // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+ // const void *buf, size_t size, int16_t is_shared, const void **res);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
+ CGM.Int16Ty, CGM.VoidPtrPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
+ // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
+ // int16_t is_shared);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
+ break;
+ }
+ case OMPRTL__kmpc_barrier: {
+ // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
+ cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent);
+ break;
+ }
+ case OMPRTL__kmpc_barrier_simple_spmd: {
+ // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+ // global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
+ cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent);
+ break;
+ }
}
return RTLFn;
}
@@ -1530,6 +1866,37 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
+namespace {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+/// Enum for accesseing the reserved_2 field of the ident_t struct.
+enum ModeFlagsTy : unsigned {
+ /// Bit set to 1 when in SPMD mode.
+ KMP_IDENT_SPMD_MODE = 0x01,
+ /// Bit set to 1 when a simplified runtime is used.
+ KMP_IDENT_SIMPLE_RT_MODE = 0x02,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)
+};
+
+/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.
+static const ModeFlagsTy UndefinedMode =
+ (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
+} // anonymous namespace
+
+unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const {
+ switch (getExecutionMode()) {
+ case EM_SPMD:
+ if (requiresFullRuntime())
+ return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
+ return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
+ case EM_NonSPMD:
+ assert(requiresFullRuntime() && "Expected full runtime.");
+ return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
+ case EM_Unknown:
+ return UndefinedMode;
+ }
+ llvm_unreachable("Unknown flags are requested.");
+}
+
CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
: CGOpenMPRuntime(CGM, "_", "$") {
if (!CGM.getLangOpts().OpenMPIsDevice)
@@ -1581,12 +1948,15 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
}
} Action(IsInParallelRegion);
CodeGen.setAction(Action);
+ bool PrevIsInTTDRegion = IsInTTDRegion;
+ IsInTTDRegion = false;
bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
IsInTargetMasterThreadRegion = false;
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
+ IsInTTDRegion = PrevIsInTTDRegion;
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
!IsInParallelRegion) {
llvm::Function *WrapperFun =
@@ -1597,26 +1967,106 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
return OutlinedFun;
}
+/// Get list of lastprivate variables from the teams distribute ... or
+/// teams {distribute ...} directives.
+static void
+getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
+ assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+ "expected teams directive.");
+ const OMPExecutableDirective *Dir = &D;
+ if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
+ if (const Stmt *S = getSingleCompoundChild(
+ Ctx,
+ D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true))) {
+ Dir = dyn_cast<OMPExecutableDirective>(S);
+ if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind()))
+ Dir = nullptr;
+ }
+ }
+ if (!Dir)
+ return;
+ for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) {
+ for (const Expr *E : C->getVarRefs())
+ Vars.push_back(getPrivateItem(E));
+ }
+}
+
+/// Get list of reduction variables from the teams ... directives.
+static void
+getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
+ assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+ "expected teams directive.");
+ for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+ for (const Expr *E : C->privates())
+ Vars.push_back(getPrivateItem(E));
+ }
+}
+
llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
- SourceLocation Loc = D.getLocStart();
+ SourceLocation Loc = D.getBeginLoc();
+
+ const RecordDecl *GlobalizedRD = nullptr;
+ llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
+ // Globalize team reductions variable unconditionally in all modes.
+ getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
+ getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
+ if (!LastPrivatesReductions.empty()) {
+ GlobalizedRD = ::buildRecordForGlobalizedVars(
+ CGM.getContext(), llvm::None, LastPrivatesReductions,
+ MappedDeclsFields);
+ }
+ } else if (!LastPrivatesReductions.empty()) {
+ assert(!TeamAndReductions.first &&
+ "Previous team declaration is not expected.");
+ TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
+ std::swap(TeamAndReductions.second, LastPrivatesReductions);
+ }
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
SourceLocation &Loc;
+ const RecordDecl *GlobalizedRD;
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+ &MappedDeclsFields;
public:
- NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+ NVPTXPrePostActionTy(
+ SourceLocation &Loc, const RecordDecl *GlobalizedRD,
+ llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+ &MappedDeclsFields)
+ : Loc(Loc), GlobalizedRD(GlobalizedRD),
+ MappedDeclsFields(MappedDeclsFields) {}
void Enter(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsProlog(CGF, Loc);
+ auto &Rt =
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
+ if (GlobalizedRD) {
+ auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+ I->getSecond().GlobalRecord = GlobalizedRD;
+ I->getSecond().MappedParams =
+ llvm::make_unique<CodeGenFunction::OMPMapVars>();
+ DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
+ for (const auto &Pair : MappedDeclsFields) {
+ assert(Pair.getFirst()->isCanonicalDecl() &&
+ "Expected canonical declaration");
+ Data.insert(std::make_pair(Pair.getFirst(),
+ MappedVarData(Pair.getSecond(),
+ /*IsOnePerTeam=*/true)));
+ }
+ }
+ Rt.emitGenericVarsProlog(CGF, Loc);
}
void Exit(CodeGenFunction &CGF) override {
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
.emitGenericVarsEpilog(CGF);
}
- } Action(Loc);
+ } Action(Loc, GlobalizedRD, MappedDeclsFields);
CodeGen.setAction(Action);
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
@@ -1629,8 +2079,10 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
}
void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
- SourceLocation Loc) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+ SourceLocation Loc,
+ bool WithSPMDCheck) {
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
+ getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
return;
CGBuilderTy &Bld = CGF.Builder;
@@ -1639,33 +2091,187 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
if (I == FunctionGlobalizedDecls.end())
return;
if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
- QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+ QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+ QualType SecGlobalRecTy;
// Recover pointer to this function's global record. The runtime will
// handle the specifics of the allocation of the memory.
// Use actual memory size of the record including the padding
// for alignment purposes.
unsigned Alignment =
- CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
+ CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
unsigned GlobalRecordSize =
- CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
+ CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity();
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
- // TODO: allow the usage of shared memory to be controlled by
- // the user, for now, default to global.
- llvm::Value *GlobalRecordSizeArg[] = {
- llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
- llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
- GlobalRecordSizeArg);
- llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+
+ llvm::PointerType *GlobalRecPtrTy =
+ CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
+ llvm::Value *GlobalRecCastAddr;
+ llvm::Value *IsTTD = nullptr;
+ if (!IsInTTDRegion &&
+ (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+ llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
+ llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+ if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *PL = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+ {RTLoc, ThreadID});
+ IsTTD = Bld.CreateIsNull(PL);
+ }
+ llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+ Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(SPMDBB);
+ Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
+ CharUnits::fromQuantity(Alignment));
+ CGF.EmitBranch(ExitBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(NonSPMDBB);
+ llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);
+ if (const RecordDecl *SecGlobalizedVarsRecord =
+ I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
+ SecGlobalRecTy =
+ CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
+
+ // Recover pointer to this function's global record. The runtime will
+ // handle the specifics of the allocation of the memory.
+ // Use actual memory size of the record including the padding
+ // for alignment purposes.
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
+ unsigned GlobalRecordSize =
+ CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
+ GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ Size = Bld.CreateSelect(
+ IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);
+ }
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+ GlobalRecordSizeArg);
+ GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, GlobalRecPtrTy);
+ CGF.EmitBlock(ExitBB);
+ auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
+ /*NumReservedValues=*/2, "_select_stack");
+ Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
+ Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
+ GlobalRecCastAddr = Phi;
+ I->getSecond().GlobalRecordAddr = Phi;
+ I->getSecond().IsInSPMDModeFlag = IsSPMD;
+ } else if (IsInTTDRegion) {
+ assert(GlobalizedRecords.back().Records.size() < 2 &&
+ "Expected less than 2 globalized records: one for target and one "
+ "for teams.");
+ unsigned Offset = 0;
+ for (const RecordDecl *RD : GlobalizedRecords.back().Records) {
+ QualType RDTy = CGM.getContext().getRecordType(RD);
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(RDTy).getQuantity();
+ unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity();
+ Offset =
+ llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
+ }
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
+ Offset = llvm::alignTo(Offset, Alignment);
+ GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
+ ++GlobalizedRecords.back().RegionCounter;
+ if (GlobalizedRecords.back().Records.size() == 1) {
+ assert(KernelStaticGlobalized &&
+ "Kernel static pointer must be initialized already.");
+ auto *UseSharedMemory = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
+ llvm::GlobalValue::InternalLinkage, nullptr,
+ "_openmp_static_kernel$is_shared");
+ UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/16, /*Signed=*/0);
+ llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+ Address(UseSharedMemory,
+ CGM.getContext().getTypeAlignInChars(Int16Ty)),
+ /*Volatile=*/false, Int16Ty, Loc);
+ auto *StaticGlobalized = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
+ llvm::GlobalValue::CommonLinkage, nullptr);
+ auto *RecSize = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.SizeTy, /*isConstant=*/true,
+ llvm::GlobalValue::InternalLinkage, nullptr,
+ "_openmp_static_kernel$size");
+ RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+ llvm::Value *Ld = CGF.EmitLoadOfScalar(
+ Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false,
+ CGM.getContext().getSizeType(), Loc);
+ llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ KernelStaticGlobalized, CGM.VoidPtrPtrTy);
+ llvm::Value *GlobalRecordSizeArg[] = {
+ llvm::ConstantInt::get(
+ CGM.Int16Ty,
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
+ StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_get_team_static_memory),
+ GlobalRecordSizeArg);
+ GlobalizedRecords.back().Buffer = StaticGlobalized;
+ GlobalizedRecords.back().RecSize = RecSize;
+ GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
+ GlobalizedRecords.back().Loc = Loc;
+ }
+ assert(KernelStaticGlobalized && "Global address must be set already.");
+ Address FrameAddr = CGF.EmitLoadOfPointer(
+ Address(KernelStaticGlobalized, CGM.getPointerAlign()),
+ CGM.getContext()
+ .getPointerType(CGM.getContext().VoidPtrTy)
+ .castAs<PointerType>());
+ llvm::Value *GlobalRecValue =
+ Bld.CreateConstInBoundsGEP(FrameAddr, Offset, CharUnits::One())
+ .getPointer();
+ I->getSecond().GlobalRecordAddr = GlobalRecValue;
+ I->getSecond().IsInSPMDModeFlag = nullptr;
+ GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo());
+ } else {
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+ CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+ GlobalRecordSizeArg);
+ GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, GlobalRecPtrTy);
+ I->getSecond().GlobalRecordAddr = GlobalRecValue;
+ I->getSecond().IsInSPMDModeFlag = nullptr;
+ }
LValue Base =
- CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
- I->getSecond().GlobalRecordAddr = GlobalRecValue;
+ CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy);
// Emit the "global alloca" which is a GEP from the global declaration
// record using the pointer returned by the runtime.
+ LValue SecBase;
+ decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
+ if (IsTTD) {
+ SecIt = I->getSecond().SecondaryLocalVarData->begin();
+ llvm::PointerType *SecGlobalRecPtrTy =
+ CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
+ SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
+ SecGlobalRecTy);
+ }
for (auto &Rec : I->getSecond().LocalVarData) {
bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
llvm::Value *ParValue;
@@ -1675,14 +2281,51 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
}
- const FieldDecl *FD = Rec.second.first;
- LValue VarAddr = CGF.EmitLValueForField(Base, FD);
- Rec.second.second = VarAddr.getAddress();
+ LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD);
+ // Emit VarAddr basing on lane-id if required.
+ QualType VarTy;
+ if (Rec.second.IsOnePerTeam) {
+ VarTy = Rec.second.FD->getType();
+ } else {
+ llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
+ VarAddr.getAddress().getPointer(),
+ {Bld.getInt32(0), getNVPTXLaneID(CGF)});
+ VarTy =
+ Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
+ VarAddr = CGF.MakeAddrLValue(
+ Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
+ AlignmentSource::Decl);
+ }
+ Rec.second.PrivateAddr = VarAddr.getAddress();
+ if (!IsInTTDRegion &&
+ (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
+ assert(I->getSecond().IsInSPMDModeFlag &&
+ "Expected unknown execution mode or required SPMD check.");
+ if (IsTTD) {
+ assert(SecIt->second.IsOnePerTeam &&
+ "Secondary glob data must be one per team.");
+ LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);
+ VarAddr.setAddress(
+ Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(),
+ VarAddr.getPointer()),
+ VarAddr.getAlignment()));
+ Rec.second.PrivateAddr = VarAddr.getAddress();
+ }
+ Address GlobalPtr = Rec.second.PrivateAddr;
+ Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
+ Rec.second.PrivateAddr = Address(
+ Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
+ LocalAddr.getPointer(), GlobalPtr.getPointer()),
+ LocalAddr.getAlignment());
+ }
if (EscapedParam) {
const auto *VD = cast<VarDecl>(Rec.first);
CGF.EmitStoreOfScalar(ParValue, VarAddr);
I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
}
+ if (IsTTD)
+ ++SecIt;
}
}
for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
@@ -1704,7 +2347,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
llvm::Value *GlobalRecordSizeArg[] = {
Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
@@ -1718,8 +2362,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
I->getSecond().MappedParams->apply(CGF);
}
-void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
+void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
+ bool WithSPMDCheck) {
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
+ getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
return;
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
@@ -1734,9 +2380,48 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
Addr);
}
if (I->getSecond().GlobalRecordAddr) {
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
- I->getSecond().GlobalRecordAddr);
+ if (!IsInTTDRegion &&
+ (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
+ CGBuilderTy &Bld = CGF.Builder;
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+ llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+ Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(NonSPMDBB);
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
+ CGF.EmitBlock(ExitBB);
+ } else if (IsInTTDRegion) {
+ assert(GlobalizedRecords.back().RegionCounter > 0 &&
+ "region counter must be > 0.");
+ --GlobalizedRecords.back().RegionCounter;
+ // Emit the restore function only in the target region.
+ if (GlobalizedRecords.back().RegionCounter == 0) {
+ QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/16, /*Signed=*/0);
+ llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+ Address(GlobalizedRecords.back().UseSharedMemory,
+ CGM.getContext().getTypeAlignInChars(Int16Ty)),
+ /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc);
+ llvm::Value *Args[] = {
+ llvm::ConstantInt::get(
+ CGM.Int16Ty,
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
+ IsInSharedMemory};
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_restore_team_static_memory),
+ Args);
+ }
+ } else {
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ I->getSecond().GlobalRecordAddr);
+ }
}
}
}
@@ -1830,7 +2515,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
// passed from the outside of the target region.
CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
- // There's somehting to share.
+ // There's something to share.
if (!CapturedVars.empty()) {
// Prepare for parallel region. Indicate the outlined function.
Address SharedArgs =
@@ -1884,30 +2569,24 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
Work.emplace_back(WFn);
};
- auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen, &CodeGen,
- &ThreadIDAddr](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
- RegionCodeGenTy RCG(CodeGen);
+ auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen](
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
if (IsInParallelRegion) {
SeqGen(CGF, Action);
} else if (IsInTargetMasterThreadRegion) {
L0ParallelGen(CGF, Action);
- } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) {
- RCG(CGF);
} else {
// Check for master and then parallelism:
// if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) {
- // Serialized execution.
- // } else if (master) {
- // Worker call.
+ // Serialized execution.
// } else {
- // Outlined function call.
+ // Worker call.
// }
CGBuilderTy &Bld = CGF.Builder;
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential");
llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
- llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+ llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
@@ -1920,29 +2599,17 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
{RTLoc, ThreadID});
llvm::Value *Res = Bld.CreateIsNotNull(PL);
- Bld.CreateCondBr(Res, SeqBB, MasterCheckBB);
+ Bld.CreateCondBr(Res, SeqBB, MasterBB);
CGF.EmitBlock(SeqBB);
SeqGen(CGF, Action);
CGF.EmitBranch(ExitBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
- CGF.EmitBlock(MasterCheckBB);
- llvm::BasicBlock *MasterThenBB = CGF.createBasicBlock("master.then");
- llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
- llvm::Value *IsMaster =
- Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
- Bld.CreateCondBr(IsMaster, MasterThenBB, ElseBlock);
- CGF.EmitBlock(MasterThenBB);
+ CGF.EmitBlock(MasterBB);
L0ParallelGen(CGF, Action);
CGF.EmitBranch(ExitBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
- CGF.EmitBlock(ElseBlock);
- // In the worker need to use the real thread id.
- ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
- RCG(CGF);
- // There is no need to emit line number for unconditional branch.
- (void)ApplyDebugLocation::CreateEmpty(CGF);
// Emit the continuation block for code after the if.
CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
}
@@ -2013,6 +2680,34 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
}
}
+void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
+ // Always emit simple barriers!
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Build call __kmpc_barrier_simple_spmd(nullptr, 0);
+ // This function does not use parameters, so we can emit just default values.
+ llvm::Value *Args[] = {
+ llvm::ConstantPointerNull::get(
+ cast<llvm::PointerType>(getIdentTyPointerTy())),
+ llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
+}
+
+void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ OpenMPDirectiveKind Kind, bool,
+ bool) {
+ // Always emit simple barriers!
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Build call __kmpc_cancel_barrier(loc, thread_id);
+ unsigned Flags = getDefaultFlagsForBarriers(Kind);
+ llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
+ getThreadID(CGF, Loc)};
+ CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+}
+
void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
CodeGenFunction &CGF, StringRef CriticalName,
const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
@@ -2055,14 +2750,16 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
CGF.EmitBlock(BodyBB);
// Output the critical statement.
- CriticalOpGen(CGF);
+ CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc,
+ Hint);
// After the body surrounded by the critical region, the single executing
// thread will jump to the synchronisation point.
// Block waits for all threads in current team to finish then increments the
// counter variable and returns to the loop.
CGF.EmitBlock(SyncBB);
- getNVPTXCTABarrier(CGF);
+ emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
llvm::Value *IncCounterVal =
CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
@@ -2184,11 +2881,12 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
IntType, Offset, Loc);
CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
- Ptr = Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize));
- ElemPtr =
+ Address LocalPtr =
+ Bld.CreateConstGEP(Ptr, 1, CharUnits::fromQuantity(IntSize));
+ Address LocalElemPtr =
Bld.CreateConstGEP(ElemPtr, 1, CharUnits::fromQuantity(IntSize));
- PhiSrc->addIncoming(Ptr.getPointer(), ThenBB);
- PhiDest->addIncoming(ElemPtr.getPointer(), ThenBB);
+ PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB);
+ PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB);
CGF.EmitBranch(PreCondBB);
CGF.EmitBlock(ExitBB);
} else {
@@ -2414,235 +3112,18 @@ static void emitReductionListCopy(
}
}
-/// This function emits a helper that loads data from the scratchpad array
-/// and (optionally) reduces it with the input operand.
-///
-/// load_and_reduce(local, scratchpad, index, width, should_reduce)
-/// reduce_data remote;
-/// for elem in remote:
-/// remote.elem = Scratchpad[elem_id][index]
-/// if (should_reduce)
-/// local = local @ remote
-/// else
-/// local = remote
-static llvm::Value *emitReduceScratchpadFunction(
- CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy, llvm::Value *ReduceFn, SourceLocation Loc) {
- ASTContext &C = CGM.getContext();
- QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1);
-
- // Destination of the copy.
- ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
- // Base address of the scratchpad array, with each element storing a
- // Reduce list per team.
- ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
- // A source index into the scratchpad array.
- ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
- ImplicitParamDecl::Other);
- // Row width of an element in the scratchpad array, typically
- // the number of teams.
- ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
- ImplicitParamDecl::Other);
- // If should_reduce == 1, then it's load AND reduce,
- // If should_reduce == 0 (or otherwise), then it only loads (+ copy).
- // The latter case is used for initialization.
- ImplicitParamDecl ShouldReduceArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- Int32Ty, ImplicitParamDecl::Other);
-
- FunctionArgList Args;
- Args.push_back(&ReduceListArg);
- Args.push_back(&ScratchPadArg);
- Args.push_back(&IndexArg);
- Args.push_back(&WidthArg);
- Args.push_back(&ShouldReduceArg);
-
- const CGFunctionInfo &CGFI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *Fn = llvm::Function::Create(
- CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
- "_omp_reduction_load_and_reduce", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
- Fn->setDoesNotRecurse();
- CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
- CGBuilderTy &Bld = CGF.Builder;
-
- // Get local Reduce list pointer.
- Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
- Address ReduceListAddr(
- Bld.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
- C.VoidPtrTy, Loc),
- CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
- CGF.getPointerAlign());
-
- Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
- llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
- AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
-
- Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
- llvm::Value *IndexVal = Bld.CreateIntCast(
- CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc),
- CGM.SizeTy, /*isSigned=*/true);
-
- Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
- llvm::Value *WidthVal = Bld.CreateIntCast(
- CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false, Int32Ty, Loc),
- CGM.SizeTy, /*isSigned=*/true);
-
- Address AddrShouldReduceArg = CGF.GetAddrOfLocalVar(&ShouldReduceArg);
- llvm::Value *ShouldReduceVal = CGF.EmitLoadOfScalar(
- AddrShouldReduceArg, /*Volatile=*/false, Int32Ty, Loc);
-
- // The absolute ptr address to the base addr of the next element to copy.
- llvm::Value *CumulativeElemBasePtr =
- Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy);
- Address SrcDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign());
-
- // Create a Remote Reduce list to store the elements read from the
- // scratchpad array.
- Address RemoteReduceList =
- CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_red_list");
-
- // Assemble remote Reduce list from scratchpad array.
- emitReductionListCopy(ScratchpadToThread, CGF, ReductionArrayTy, Privates,
- SrcDataAddr, RemoteReduceList,
- {/*RemoteLaneOffset=*/nullptr,
- /*ScratchpadIndex=*/IndexVal,
- /*ScratchpadWidth=*/WidthVal});
-
- llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
- llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
- llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
-
- llvm::Value *CondReduce = Bld.CreateIsNotNull(ShouldReduceVal);
- Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
-
- CGF.EmitBlock(ThenBB);
- // We should reduce with the local Reduce list.
- // reduce_function(LocalReduceList, RemoteReduceList)
- llvm::Value *LocalDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- ReduceListAddr.getPointer(), CGF.VoidPtrTy);
- llvm::Value *RemoteDataPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- RemoteReduceList.getPointer(), CGF.VoidPtrTy);
- CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
- CGF, Loc, ReduceFn, {LocalDataPtr, RemoteDataPtr});
- Bld.CreateBr(MergeBB);
-
- CGF.EmitBlock(ElseBB);
- // No reduction; just copy:
- // Local Reduce list = Remote Reduce list.
- emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
- RemoteReduceList, ReduceListAddr);
- Bld.CreateBr(MergeBB);
-
- CGF.EmitBlock(MergeBB);
-
- CGF.FinishFunction();
- return Fn;
-}
-
-/// This function emits a helper that stores reduced data from the team
-/// master to a scratchpad array in global memory.
-///
-/// for elem in Reduce List:
-/// scratchpad[elem_id][index] = elem
-///
-static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM,
- ArrayRef<const Expr *> Privates,
- QualType ReductionArrayTy,
- SourceLocation Loc) {
-
- ASTContext &C = CGM.getContext();
- QualType Int32Ty = C.getIntTypeForBitwidth(32, /*Signed=*/1);
-
- // Source of the copy.
- ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
- // Base address of the scratchpad array, with each element storing a
- // Reduce list per team.
- ImplicitParamDecl ScratchPadArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.VoidPtrTy, ImplicitParamDecl::Other);
- // A destination index into the scratchpad array, typically the team
- // identifier.
- ImplicitParamDecl IndexArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
- ImplicitParamDecl::Other);
- // Row width of an element in the scratchpad array, typically
- // the number of teams.
- ImplicitParamDecl WidthArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int32Ty,
- ImplicitParamDecl::Other);
-
- FunctionArgList Args;
- Args.push_back(&ReduceListArg);
- Args.push_back(&ScratchPadArg);
- Args.push_back(&IndexArg);
- Args.push_back(&WidthArg);
-
- const CGFunctionInfo &CGFI =
- CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
- auto *Fn = llvm::Function::Create(
- CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
- "_omp_reduction_copy_to_scratchpad", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
- Fn->setDoesNotRecurse();
- CodeGenFunction CGF(CGM);
- CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
- CGBuilderTy &Bld = CGF.Builder;
-
- Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
- Address SrcDataAddr(
- Bld.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
- C.VoidPtrTy, Loc),
- CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
- CGF.getPointerAlign());
-
- Address AddrScratchPadArg = CGF.GetAddrOfLocalVar(&ScratchPadArg);
- llvm::Value *ScratchPadBase = CGF.EmitLoadOfScalar(
- AddrScratchPadArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
-
- Address AddrIndexArg = CGF.GetAddrOfLocalVar(&IndexArg);
- llvm::Value *IndexVal = Bld.CreateIntCast(
- CGF.EmitLoadOfScalar(AddrIndexArg, /*Volatile=*/false, Int32Ty, Loc),
- CGF.SizeTy, /*isSigned=*/true);
-
- Address AddrWidthArg = CGF.GetAddrOfLocalVar(&WidthArg);
- llvm::Value *WidthVal =
- Bld.CreateIntCast(CGF.EmitLoadOfScalar(AddrWidthArg, /*Volatile=*/false,
- Int32Ty, SourceLocation()),
- CGF.SizeTy, /*isSigned=*/true);
-
- // The absolute ptr address to the base addr of the next element to copy.
- llvm::Value *CumulativeElemBasePtr =
- Bld.CreatePtrToInt(ScratchPadBase, CGM.SizeTy);
- Address DestDataAddr(CumulativeElemBasePtr, CGF.getPointerAlign());
-
- emitReductionListCopy(ThreadToScratchpad, CGF, ReductionArrayTy, Privates,
- SrcDataAddr, DestDataAddr,
- {/*RemoteLaneOffset=*/nullptr,
- /*ScratchpadIndex=*/IndexVal,
- /*ScratchpadWidth=*/WidthVal});
-
- CGF.FinishFunction();
- return Fn;
-}
-
/// This function emits a helper that gathers Reduce lists from the first
/// lane of every active warp to lanes in the first warp.
///
/// void inter_warp_copy_func(void* reduce_data, num_warps)
/// shared smem[warp_size];
/// For all data entries D in reduce_data:
+/// sync
/// If (I am the first lane in each warp)
/// Copy my local D to smem[warp_id]
/// sync
/// if (I am the first warp)
/// Copy smem[thread_id] to my local D
-/// sync
static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
ArrayRef<const Expr *> Privates,
QualType ReductionArrayTy,
@@ -2688,11 +3169,10 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
llvm::GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
if (!TransferMedium) {
- auto *Ty = llvm::ArrayType::get(CGM.Int64Ty, WarpSize);
+ auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
TransferMedium = new llvm::GlobalVariable(
- M, Ty,
- /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
+ M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
llvm::Constant::getNullValue(Ty), TransferMediumName,
/*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
SharedAddressSpace);
@@ -2710,7 +3190,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
Address LocalReduceList(
Bld.CreatePointerBitCastOrAddrSpaceCast(
CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
- C.VoidPtrTy, SourceLocation()),
+ C.VoidPtrTy, Loc),
CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
CGF.getPointerAlign());
@@ -2720,121 +3200,151 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
// Warp master copies reduce element to transfer medium in __shared__
// memory.
//
- llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
- llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
- llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
-
- // if (lane_id == 0)
- llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
- Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
- CGF.EmitBlock(ThenBB);
-
- // Reduce element = LocalReduceList[i]
- Address ElemPtrPtrAddr =
- Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
- llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
- ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- // elemptr = (type[i]*)(elemptrptr)
- Address ElemPtr =
- Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
- ElemPtr = Bld.CreateElementBitCast(
- ElemPtr, CGF.ConvertTypeForMem(Private->getType()));
-
- // Get pointer to location in transfer medium.
- // MediumPtr = &medium[warp_id]
- llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
- TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
- Address MediumPtr(MediumPtrVal, C.getTypeAlignInChars(Private->getType()));
- // Casting to actual data type.
- // MediumPtr = (type[i]*)MediumPtrAddr;
- MediumPtr = Bld.CreateElementBitCast(
- MediumPtr, CGF.ConvertTypeForMem(Private->getType()));
-
- // elem = *elemptr
- //*MediumPtr = elem
- if (Private->getType()->isScalarType()) {
- llvm::Value *Elem = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
- Private->getType(), Loc);
- // Store the source element value to the dest element address.
- CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/false,
- Private->getType());
- } else {
- CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()),
- CGF.MakeAddrLValue(MediumPtr, Private->getType()),
- Private->getType(), AggValueSlot::DoesNotOverlap);
- }
-
- Bld.CreateBr(MergeBB);
-
- CGF.EmitBlock(ElseBB);
- Bld.CreateBr(MergeBB);
-
- CGF.EmitBlock(MergeBB);
+ unsigned RealTySize =
+ C.getTypeSizeInChars(Private->getType())
+ .alignTo(C.getTypeAlignInChars(Private->getType()))
+ .getQuantity();
+ for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
+ unsigned NumIters = RealTySize / TySize;
+ if (NumIters == 0)
+ continue;
+ QualType CType = C.getIntTypeForBitwidth(
+ C.toBits(CharUnits::fromQuantity(TySize)), /*Signed=*/1);
+ llvm::Type *CopyType = CGF.ConvertTypeForMem(CType);
+ CharUnits Align = CharUnits::fromQuantity(TySize);
+ llvm::Value *Cnt = nullptr;
+ Address CntAddr = Address::invalid();
+ llvm::BasicBlock *PrecondBB = nullptr;
+ llvm::BasicBlock *ExitBB = nullptr;
+ if (NumIters > 1) {
+ CntAddr = CGF.CreateMemTemp(C.IntTy, ".cnt.addr");
+ CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.IntTy), CntAddr,
+ /*Volatile=*/false, C.IntTy);
+ PrecondBB = CGF.createBasicBlock("precond");
+ ExitBB = CGF.createBasicBlock("exit");
+ llvm::BasicBlock *BodyBB = CGF.createBasicBlock("body");
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(PrecondBB);
+ Cnt = CGF.EmitLoadOfScalar(CntAddr, /*Volatile=*/false, C.IntTy, Loc);
+ llvm::Value *Cmp =
+ Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.IntTy, NumIters));
+ Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
+ CGF.EmitBlock(BodyBB);
+ }
+ // kmpc_barrier.
+ CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+ /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+
+ // if (lane_id == 0)
+ llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
+ Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
+ CGF.EmitBlock(ThenBB);
- Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
- llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
- AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, SourceLocation());
+ // Reduce element = LocalReduceList[i]
+ Address ElemPtrPtrAddr =
+ Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
+ llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+ ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+ // elemptr = ((CopyType*)(elemptrptr)) + I
+ Address ElemPtr = Address(ElemPtrPtr, Align);
+ ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType);
+ if (NumIters > 1) {
+ ElemPtr = Address(Bld.CreateGEP(ElemPtr.getPointer(), Cnt),
+ ElemPtr.getAlignment());
+ }
- llvm::Value *NumActiveThreads = Bld.CreateNSWMul(
- NumWarpsVal, getNVPTXWarpSize(CGF), "num_active_threads");
- // named_barrier_sync(ParallelBarrierID, num_active_threads)
- syncParallelThreads(CGF, NumActiveThreads);
+ // Get pointer to location in transfer medium.
+ // MediumPtr = &medium[warp_id]
+ llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
+ TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
+ Address MediumPtr(MediumPtrVal, Align);
+ // Casting to actual data type.
+ // MediumPtr = (CopyType*)MediumPtrAddr;
+ MediumPtr = Bld.CreateElementBitCast(MediumPtr, CopyType);
+
+ // elem = *elemptr
+ //*MediumPtr = elem
+ llvm::Value *Elem =
+ CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc);
+ // Store the source element value to the dest element address.
+ CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType);
+
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(ElseBB);
+ Bld.CreateBr(MergeBB);
+
+ CGF.EmitBlock(MergeBB);
+
+ // kmpc_barrier.
+ CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+ /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+
+ //
+ // Warp 0 copies reduce element from transfer medium.
+ //
+ llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
+ llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
+ llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
+
+ Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
+ llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
+ AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc);
+
+ // Up to 32 threads in warp 0 are active.
+ llvm::Value *IsActiveThread =
+ Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
+ Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
+
+ CGF.EmitBlock(W0ThenBB);
+
+ // SrcMediumPtr = &medium[tid]
+ llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
+ TransferMedium,
+ {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
+ Address SrcMediumPtr(SrcMediumPtrVal, Align);
+ // SrcMediumVal = *SrcMediumPtr;
+ SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType);
+
+ // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I
+ Address TargetElemPtrPtr =
+ Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
+ llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
+ TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc);
+ Address TargetElemPtr = Address(TargetElemPtrVal, Align);
+ TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType);
+ if (NumIters > 1) {
+ TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getPointer(), Cnt),
+ TargetElemPtr.getAlignment());
+ }
- //
- // Warp 0 copies reduce element from transfer medium.
- //
- llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
- llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
- llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
-
- // Up to 32 threads in warp 0 are active.
- llvm::Value *IsActiveThread =
- Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
- Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
-
- CGF.EmitBlock(W0ThenBB);
-
- // SrcMediumPtr = &medium[tid]
- llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
- TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
- Address SrcMediumPtr(SrcMediumPtrVal,
- C.getTypeAlignInChars(Private->getType()));
- // SrcMediumVal = *SrcMediumPtr;
- SrcMediumPtr = Bld.CreateElementBitCast(
- SrcMediumPtr, CGF.ConvertTypeForMem(Private->getType()));
-
- // TargetElemPtr = (type[i]*)(SrcDataAddr[i])
- Address TargetElemPtrPtr =
- Bld.CreateConstArrayGEP(LocalReduceList, Idx, CGF.getPointerSize());
- llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
- TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
- Address TargetElemPtr =
- Address(TargetElemPtrVal, C.getTypeAlignInChars(Private->getType()));
- TargetElemPtr = Bld.CreateElementBitCast(
- TargetElemPtr, CGF.ConvertTypeForMem(Private->getType()));
-
- // *TargetElemPtr = SrcMediumVal;
- if (Private->getType()->isScalarType()) {
- llvm::Value *SrcMediumValue = CGF.EmitLoadOfScalar(
- SrcMediumPtr, /*Volatile=*/false, Private->getType(), Loc);
+ // *TargetElemPtr = SrcMediumVal;
+ llvm::Value *SrcMediumValue =
+ CGF.EmitLoadOfScalar(SrcMediumPtr, /*Volatile=*/true, CType, Loc);
CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
- Private->getType());
- } else {
- CGF.EmitAggregateCopy(
- CGF.MakeAddrLValue(SrcMediumPtr, Private->getType()),
- CGF.MakeAddrLValue(TargetElemPtr, Private->getType()),
- Private->getType(), AggValueSlot::DoesNotOverlap);
- }
- Bld.CreateBr(W0MergeBB);
+ CType);
+ Bld.CreateBr(W0MergeBB);
- CGF.EmitBlock(W0ElseBB);
- Bld.CreateBr(W0MergeBB);
+ CGF.EmitBlock(W0ElseBB);
+ Bld.CreateBr(W0MergeBB);
- CGF.EmitBlock(W0MergeBB);
+ CGF.EmitBlock(W0MergeBB);
- // While warp 0 copies values from transfer medium, all other warps must
- // wait.
- syncParallelThreads(CGF, NumActiveThreads);
+ if (NumIters > 1) {
+ Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1));
+ CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy);
+ CGF.EmitBranch(PrecondBB);
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ExitBB);
+ }
+ RealTySize %= TySize;
+ }
++Idx;
}
@@ -3103,7 +3613,7 @@ static llvm::Value *emitShuffleAndReduceFunction(
/// 3. Call the OpenMP runtime on the GPU to reduce within a team
/// and store the result on the team master:
///
-/// __kmpc_nvptx_parallel_reduce_nowait(...,
+/// __kmpc_nvptx_parallel_reduce_nowait_v2(...,
/// reduceData, shuffleReduceFn, interWarpCpyFn)
///
/// where:
@@ -3274,7 +3784,7 @@ static llvm::Value *emitShuffleAndReduceFunction(
/// Intra-Team Reduction
///
/// This function, as implemented in the runtime call
-/// '__kmpc_nvptx_parallel_reduce_nowait', aggregates data across OpenMP
+/// '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP
/// threads in a team. It first reduces within a warp using the
/// aforementioned algorithms. We then proceed to gather all such
/// reduced values at the first warp.
@@ -3297,7 +3807,7 @@ static llvm::Value *emitShuffleAndReduceFunction(
/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,
/// the k'th worker reduces every k'th element.
///
-/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait' to
+/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to
/// reduce across workers and compute a globally reduced value.
///
void CGOpenMPRuntimeNVPTX::emitReduction(
@@ -3308,125 +3818,116 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
return;
bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
+#ifndef NDEBUG
bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
- bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind);
- assert((TeamsReduction || ParallelReduction || SimdReduction) &&
- "Invalid reduction selection in emitReduction.");
+#endif
if (Options.SimpleReduction) {
+ assert(!TeamsReduction && !ParallelReduction &&
+ "Invalid reduction selection in emitReduction.");
CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
ReductionOps, Options);
return;
}
- ASTContext &C = CGM.getContext();
-
- // 1. Build a list of reduction variables.
- // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
- auto Size = RHSExprs.size();
- for (const Expr *E : Privates) {
- if (E->getType()->isVariablyModifiedType())
- // Reserve place for array size.
- ++Size;
- }
- llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
- QualType ReductionArrayTy =
- C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
- /*IndexTypeQuals=*/0);
- Address ReductionList =
- CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
- auto IPriv = Privates.begin();
- unsigned Idx = 0;
- for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
- Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
- CGF.getPointerSize());
- CGF.Builder.CreateStore(
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
- Elem);
- if ((*IPriv)->getType()->isVariablyModifiedType()) {
- // Store array size.
- ++Idx;
- Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
- CGF.getPointerSize());
- llvm::Value *Size = CGF.Builder.CreateIntCast(
- CGF.getVLASize(
- CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
- .NumElts,
- CGF.SizeTy, /*isSigned=*/false);
- CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
- Elem);
- }
- }
-
- // 2. Emit reduce_func().
- llvm::Value *ReductionFn = emitReductionFunction(
- CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
- Privates, LHSExprs, RHSExprs, ReductionOps);
+ assert((TeamsReduction || ParallelReduction) &&
+ "Invalid reduction selection in emitReduction.");
- // 4. Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
+ // Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
// RedList, shuffle_reduce_func, interwarp_copy_func);
+ // or
+ // Build res = __kmpc_reduce_teams_nowait_simple(<loc>, <gtid>, <lck>);
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
- llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
- llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- ReductionList.getPointer(), CGF.VoidPtrTy);
-
- llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
- CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
- llvm::Value *InterWarpCopyFn =
- emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
-
- llvm::Value *Args[] = {ThreadId,
- CGF.Builder.getInt32(RHSExprs.size()),
- ReductionArrayTySize,
- RL,
- ShuffleAndReduceFn,
- InterWarpCopyFn};
-
- llvm::Value *Res = nullptr;
- if (ParallelReduction)
- Res = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait),
- Args);
- else if (SimdReduction)
- Res = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_simd_reduce_nowait),
- Args);
- if (TeamsReduction) {
- llvm::Value *ScratchPadCopyFn =
- emitCopyToScratchpad(CGM, Privates, ReductionArrayTy, Loc);
- llvm::Value *LoadAndReduceFn = emitReduceScratchpadFunction(
+ llvm::Value *Res;
+ if (ParallelReduction) {
+ ASTContext &C = CGM.getContext();
+ // 1. Build a list of reduction variables.
+ // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+ auto Size = RHSExprs.size();
+ for (const Expr *E : Privates) {
+ if (E->getType()->isVariablyModifiedType())
+ // Reserve place for array size.
+ ++Size;
+ }
+ llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
+ QualType ReductionArrayTy =
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ Address ReductionList =
+ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+ auto IPriv = Privates.begin();
+ unsigned Idx = 0;
+ for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
+ Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
+ CGF.getPointerSize());
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
+ Elem);
+ if ((*IPriv)->getType()->isVariablyModifiedType()) {
+ // Store array size.
+ ++Idx;
+ Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
+ CGF.getPointerSize());
+ llvm::Value *Size = CGF.Builder.CreateIntCast(
+ CGF.getVLASize(
+ CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+ .NumElts,
+ CGF.SizeTy, /*isSigned=*/false);
+ CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+ Elem);
+ }
+ }
+
+ llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+ llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ ReductionList.getPointer(), CGF.VoidPtrTy);
+ llvm::Value *ReductionFn = emitReductionFunction(
+ CGM, Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(),
+ Privates, LHSExprs, RHSExprs, ReductionOps);
+ llvm::Value *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
+ llvm::Value *InterWarpCopyFn =
+ emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
- llvm::Value *Args[] = {ThreadId,
+ llvm::Value *Args[] = {RTLoc,
+ ThreadId,
CGF.Builder.getInt32(RHSExprs.size()),
ReductionArrayTySize,
RL,
ShuffleAndReduceFn,
- InterWarpCopyFn,
- ScratchPadCopyFn,
- LoadAndReduceFn};
+ InterWarpCopyFn};
+
+ Res = CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_parallel_reduce_nowait_v2),
+ Args);
+ } else {
+ assert(TeamsReduction && "expected teams reduction.");
+ std::string Name = getName({"reduction"});
+ llvm::Value *Lock = getCriticalRegionLock(Name);
+ llvm::Value *Args[] = {RTLoc, ThreadId, Lock};
Res = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_teams_reduce_nowait),
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_simple),
Args);
}
- // 5. Build switch(res)
- llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
- llvm::SwitchInst *SwInst =
- CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/1);
+ // 5. Build if (res == 1)
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.reduction.done");
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.then");
+ llvm::Value *Cond = CGF.Builder.CreateICmpEQ(
+ Res, llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1));
+ CGF.Builder.CreateCondBr(Cond, ThenBB, ExitBB);
- // 6. Build case 1: where we have reduced values in the master
+ // 6. Build then branch: where we have reduced values in the master
// thread in each team.
// __kmpc_end_reduce{_nowait}(<gtid>);
// break;
- llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
- SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
- CGF.EmitBlock(Case1BB);
+ CGF.EmitBlock(ThenBB);
// Add emission of __kmpc_end_reduce{_nowait}(<gtid>);
- llvm::Value *EndArgs[] = {ThreadId};
auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
this](CodeGenFunction &CGF, PrePostActionTy &Action) {
auto IPriv = Privates.begin();
@@ -3440,15 +3941,33 @@ void CGOpenMPRuntimeNVPTX::emitReduction(
++IRHS;
}
};
- RegionCodeGenTy RCG(CodeGen);
- NVPTXActionTy Action(
- nullptr, llvm::None,
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
- EndArgs);
- RCG.setAction(Action);
- RCG(CGF);
- CGF.EmitBranch(DefaultBB);
- CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
+ if (ParallelReduction) {
+ llvm::Value *EndArgs[] = {ThreadId};
+ RegionCodeGenTy RCG(CodeGen);
+ NVPTXActionTy Action(
+ nullptr, llvm::None,
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
+ EndArgs);
+ RCG.setAction(Action);
+ RCG(CGF);
+ } else {
+ assert(TeamsReduction && "expected teams reduction.");
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ std::string Name = getName({"reduction"});
+ llvm::Value *Lock = getCriticalRegionLock(Name);
+ llvm::Value *EndArgs[] = {RTLoc, ThreadId, Lock};
+ RegionCodeGenTy RCG(CodeGen);
+ NVPTXActionTy Action(
+ nullptr, llvm::None,
+ createNVPTXRuntimeFunction(
+ OMPRTL_NVPTX__kmpc_nvptx_teams_end_reduce_nowait_simple),
+ EndArgs);
+ RCG.setAction(Action);
+ RCG(CGF);
+ }
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
}
const VarDecl *
@@ -3478,7 +3997,7 @@ CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD,
return ParmVarDecl::Create(
CGM.getContext(),
const_cast<DeclContext *>(NativeParam->getDeclContext()),
- NativeParam->getLocStart(), NativeParam->getLocation(),
+ NativeParam->getBeginLoc(), NativeParam->getLocation(),
NativeParam->getIdentifier(), ArgType,
/*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
}
@@ -3556,10 +4075,10 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false);
QualType Int32QTy =
Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);
- ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getLocStart(),
+ ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
/*Id=*/nullptr, Int16QTy,
ImplicitParamDecl::Other);
- ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getLocStart(),
+ ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
/*Id=*/nullptr, Int32QTy,
ImplicitParamDecl::Other);
WrapperArgs.emplace_back(&ParallelLevelArg);
@@ -3577,7 +4096,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
- D.getLocStart(), D.getLocStart());
+ D.getBeginLoc(), D.getBeginLoc());
const auto *RD = CS.getCapturedRecordDecl();
auto CurField = RD->field_begin();
@@ -3662,7 +4181,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
}
}
- emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedParallelFn, Args);
+ emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);
CGF.FinishFunction();
return Fn;
}
@@ -3675,6 +4194,8 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
assert(D && "Expected function or captured|block decl.");
assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
"Function is registered already.");
+ assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
+ "Team is set but not processed.");
const Stmt *Body = nullptr;
bool NeedToDelayGlobalization = false;
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
@@ -3684,12 +4205,18 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
} else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
Body = CD->getBody();
NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
+ if (NeedToDelayGlobalization &&
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
+ return;
}
if (!Body)
return;
- CheckVarsEscapingDeclContext VarChecker(CGF);
+ CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
VarChecker.Visit(Body);
- const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord();
+ const RecordDecl *GlobalizedVarsRecord =
+ VarChecker.getGlobalizedRecord(IsInTTDRegion);
+ TeamAndReductions.first = nullptr;
+ TeamAndReductions.second.clear();
ArrayRef<const ValueDecl *> EscapedVariableLengthDecls =
VarChecker.getEscapedVariableLengthDecls();
if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
@@ -3707,16 +4234,30 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
assert(VD->isCanonicalDecl() && "Expected canonical declaration");
const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
- Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+ Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
+ }
+ if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
+ CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
+ VarChecker.Visit(Body);
+ I->getSecond().SecondaryGlobalRecord =
+ VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true);
+ I->getSecond().SecondaryLocalVarData.emplace();
+ DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
+ for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
+ assert(VD->isCanonicalDecl() && "Expected canonical declaration");
+ const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
+ Data.insert(
+ std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true)));
+ }
}
if (!NeedToDelayGlobalization) {
- emitGenericVarsProlog(CGF, D->getLocStart());
+ emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
struct GlobalizationScope final : EHScopeStack::Cleanup {
GlobalizationScope() = default;
void Emit(CodeGenFunction &CGF, Flags flags) override {
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsEpilog(CGF);
+ .emitGenericVarsEpilog(CGF, /*WithSPMDCheck=*/true);
}
};
CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
@@ -3734,7 +4275,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
return Address::invalid();
auto VDI = I->getSecond().LocalVarData.find(VD);
if (VDI != I->getSecond().LocalVarData.end())
- return VDI->second.second;
+ return VDI->second.PrivateAddr;
if (VD->hasAttrs()) {
for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()),
E(VD->attr_end());
@@ -3743,7 +4284,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
->getCanonicalDecl());
if (VDI != I->getSecond().LocalVarData.end())
- return VDI->second.second;
+ return VDI->second.PrivateAddr;
}
}
return Address::invalid();
@@ -3753,3 +4294,311 @@ void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
FunctionGlobalizedDecls.erase(CGF.CurFn);
CGOpenMPRuntime::functionFinished(CGF);
}
+
+void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk(
+ CodeGenFunction &CGF, const OMPLoopDirective &S,
+ OpenMPDistScheduleClauseKind &ScheduleKind,
+ llvm::Value *&Chunk) const {
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
+ ScheduleKind = OMPC_DIST_SCHEDULE_static;
+ Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
+ CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ S.getIterationVariable()->getType(), S.getBeginLoc());
+ return;
+ }
+ CGOpenMPRuntime::getDefaultDistScheduleAndChunk(
+ CGF, S, ScheduleKind, Chunk);
+}
+
+void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk(
+ CodeGenFunction &CGF, const OMPLoopDirective &S,
+ OpenMPScheduleClauseKind &ScheduleKind,
+ const Expr *&ChunkExpr) const {
+ ScheduleKind = OMPC_SCHEDULE_static;
+ // Chunk size is 1 in this case.
+ llvm::APInt ChunkSize(32, 1);
+ ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
+ CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation());
+}
+
+void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+ assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+ " Expected target-based directive.");
+ const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);
+ for (const CapturedStmt::Capture &C : CS->captures()) {
+ // Capture variables captured by reference in lambdas for target-based
+ // directives.
+ if (!C.capturesVariable())
+ continue;
+ const VarDecl *VD = C.getCapturedVar();
+ const auto *RD = VD->getType()
+ .getCanonicalType()
+ .getNonReferenceType()
+ ->getAsCXXRecordDecl();
+ if (!RD || !RD->isLambda())
+ continue;
+ Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+ LValue VDLVal;
+ if (VD->getType().getCanonicalType()->isReferenceType())
+ VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
+ else
+ VDLVal = CGF.MakeAddrLValue(
+ VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+ llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+ FieldDecl *ThisCapture = nullptr;
+ RD->getCaptureFields(Captures, ThisCapture);
+ if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
+ LValue ThisLVal =
+ CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+ llvm::Value *CXXThis = CGF.LoadCXXThis();
+ CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
+ }
+ for (const LambdaCapture &LC : RD->captures()) {
+ if (LC.getCaptureKind() != LCK_ByRef)
+ continue;
+ const VarDecl *VD = LC.getCapturedVar();
+ if (!CS->capturesVariable(VD))
+ continue;
+ auto It = Captures.find(VD);
+ assert(It != Captures.end() && "Found lambda capture without field.");
+ LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+ Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+ if (VD->getType().getCanonicalType()->isReferenceType())
+ VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
+ VD->getType().getCanonicalType())
+ .getAddress();
+ CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal);
+ }
+ }
+}
+
+// Get current CudaArch and ignore any unknown values
+static CudaArch getCudaArch(CodeGenModule &CGM) {
+ if (!CGM.getTarget().hasFeature("ptx"))
+ return CudaArch::UNKNOWN;
+ llvm::StringMap<bool> Features;
+ CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
+ CGM.getTarget().getTargetOpts().CPU,
+ CGM.getTarget().getTargetOpts().Features);
+ for (const auto &Feature : Features) {
+ if (Feature.getValue()) {
+ CudaArch Arch = StringToCudaArch(Feature.getKey());
+ if (Arch != CudaArch::UNKNOWN)
+ return Arch;
+ }
+ }
+ return CudaArch::UNKNOWN;
+}
+
+/// Check to see if target architecture supports unified addressing which is
+/// a restriction for OpenMP requires clause "unified_shared_memory".
+void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
+ CodeGenModule &CGM, const OMPRequiresDecl *D) const {
+ for (const OMPClause *Clause : D->clauselists()) {
+ if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
+ switch (getCudaArch(CGM)) {
+ case CudaArch::SM_20:
+ case CudaArch::SM_21:
+ case CudaArch::SM_30:
+ case CudaArch::SM_32:
+ case CudaArch::SM_35:
+ case CudaArch::SM_37:
+ case CudaArch::SM_50:
+ case CudaArch::SM_52:
+ case CudaArch::SM_53:
+ case CudaArch::SM_60:
+ case CudaArch::SM_61:
+ case CudaArch::SM_62:
+ CGM.Error(Clause->getBeginLoc(),
+ "Target architecture does not support unified addressing");
+ return;
+ case CudaArch::SM_70:
+ case CudaArch::SM_72:
+ case CudaArch::SM_75:
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX902:
+ case CudaArch::GFX904:
+ case CudaArch::GFX906:
+ case CudaArch::GFX909:
+ case CudaArch::UNKNOWN:
+ break;
+ case CudaArch::LAST:
+ llvm_unreachable("Unexpected Cuda arch.");
+ }
+ }
+ }
+}
+
+/// Get number of SMs and number of blocks per SM.
+static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
+ std::pair<unsigned, unsigned> Data;
+ if (CGM.getLangOpts().OpenMPCUDANumSMs)
+ Data.first = CGM.getLangOpts().OpenMPCUDANumSMs;
+ if (CGM.getLangOpts().OpenMPCUDABlocksPerSM)
+ Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM;
+ if (Data.first && Data.second)
+ return Data;
+ switch (getCudaArch(CGM)) {
+ case CudaArch::SM_20:
+ case CudaArch::SM_21:
+ case CudaArch::SM_30:
+ case CudaArch::SM_32:
+ case CudaArch::SM_35:
+ case CudaArch::SM_37:
+ case CudaArch::SM_50:
+ case CudaArch::SM_52:
+ case CudaArch::SM_53:
+ return {16, 16};
+ case CudaArch::SM_60:
+ case CudaArch::SM_61:
+ case CudaArch::SM_62:
+ return {56, 32};
+ case CudaArch::SM_70:
+ case CudaArch::SM_72:
+ case CudaArch::SM_75:
+ return {84, 32};
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX902:
+ case CudaArch::GFX904:
+ case CudaArch::GFX906:
+ case CudaArch::GFX909:
+ case CudaArch::UNKNOWN:
+ break;
+ case CudaArch::LAST:
+ llvm_unreachable("Unexpected Cuda arch.");
+ }
+ llvm_unreachable("Unexpected NVPTX target without ptx feature.");
+}
+
+void CGOpenMPRuntimeNVPTX::clear() {
+ if (!GlobalizedRecords.empty()) {
+ ASTContext &C = CGM.getContext();
+ llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs;
+ llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs;
+ RecordDecl *StaticRD = C.buildImplicitRecord(
+ "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
+ StaticRD->startDefinition();
+ RecordDecl *SharedStaticRD = C.buildImplicitRecord(
+ "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
+ SharedStaticRD->startDefinition();
+ for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
+ if (Records.Records.empty())
+ continue;
+ unsigned Size = 0;
+ unsigned RecAlignment = 0;
+ for (const RecordDecl *RD : Records.Records) {
+ QualType RDTy = C.getRecordType(RD);
+ unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity();
+ RecAlignment = std::max(RecAlignment, Alignment);
+ unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity();
+ Size =
+ llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
+ }
+ Size = llvm::alignTo(Size, RecAlignment);
+ llvm::APInt ArySize(/*numBits=*/64, Size);
+ QualType SubTy = C.getConstantArrayType(
+ C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ const bool UseSharedMemory = Size <= SharedMemorySize;
+ auto *Field =
+ FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD,
+ SourceLocation(), SourceLocation(), nullptr, SubTy,
+ C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ if (UseSharedMemory) {
+ SharedStaticRD->addDecl(Field);
+ SharedRecs.push_back(&Records);
+ } else {
+ StaticRD->addDecl(Field);
+ GlobalRecs.push_back(&Records);
+ }
+ Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size));
+ Records.UseSharedMemory->setInitializer(
+ llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0));
+ }
+ // Allocate SharedMemorySize buffer for the shared memory.
+ // FIXME: nvlink does not handle weak linkage correctly (object with the
+ // different size are reported as erroneous).
+ // Restore this code as sson as nvlink is fixed.
+ if (!SharedStaticRD->field_empty()) {
+ llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize);
+ QualType SubTy = C.getConstantArrayType(
+ C.CharTy, ArySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ auto *Field = FieldDecl::Create(
+ C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy,
+ C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ SharedStaticRD->addDecl(Field);
+ }
+ SharedStaticRD->completeDefinition();
+ if (!SharedStaticRD->field_empty()) {
+ QualType StaticTy = C.getRecordType(SharedStaticRD);
+ llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
+ auto *GV = new llvm::GlobalVariable(
+ CGM.getModule(), LLVMStaticTy,
+ /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
+ llvm::Constant::getNullValue(LLVMStaticTy),
+ "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ C.getTargetAddressSpace(LangAS::cuda_shared));
+ auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ GV, CGM.VoidPtrTy);
+ for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
+ Rec->Buffer->replaceAllUsesWith(Replacement);
+ Rec->Buffer->eraseFromParent();
+ }
+ }
+ StaticRD->completeDefinition();
+ if (!StaticRD->field_empty()) {
+ QualType StaticTy = C.getRecordType(StaticRD);
+ std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM);
+ llvm::APInt Size1(32, SMsBlockPerSM.second);
+ QualType Arr1Ty =
+ C.getConstantArrayType(StaticTy, Size1, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ llvm::APInt Size2(32, SMsBlockPerSM.first);
+ QualType Arr2Ty = C.getConstantArrayType(Arr1Ty, Size2, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
+ auto *GV = new llvm::GlobalVariable(
+ CGM.getModule(), LLVMArr2Ty,
+ /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
+ llvm::Constant::getNullValue(LLVMArr2Ty),
+ "_openmp_static_glob_rd_$_");
+ auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ GV, CGM.VoidPtrTy);
+ for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
+ Rec->Buffer->replaceAllUsesWith(Replacement);
+ Rec->Buffer->eraseFromParent();
+ }
+ }
+ }
+ CGOpenMPRuntime::clear();
+}
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index f83e99f8a3b7..6091610c37e3 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -56,6 +56,11 @@ private:
ExecutionMode getExecutionMode() const;
+ bool requiresFullRuntime() const { return RequiresFullRuntime; }
+
+ /// Get barrier to synchronize all threads in a block.
+ void syncCTAThreads(CodeGenFunction &CGF);
+
/// Emit the worker function for the current target region.
void emitWorkerFunction(WorkerFunctionState &WST);
@@ -72,10 +77,11 @@ private:
void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
/// Helper for generic variables globalization prolog.
- void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
+ void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
+ bool WithSPMDCheck = false);
/// Helper for generic variables globalization epilog.
- void emitGenericVarsEpilog(CodeGenFunction &CGF);
+ void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
/// Helper for SPMD mode target directive's entry function.
void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
@@ -179,8 +185,19 @@ protected:
return "__omp_outlined__";
}
+ /// Check if the default location must be constant.
+ /// Constant for NVPTX for better optimization.
+ bool isDefaultLocationConstant() const override { return true; }
+
+ /// Returns additional flags that can be stored in reserved_2 field of the
+ /// default location.
+ /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
+ /// Full/Lightweight runtime mode. Used for better optimization.
+ unsigned getDefaultLocationReserved2Flags() const override;
+
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
+ void clear() override;
/// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
@@ -260,6 +277,18 @@ public:
ArrayRef<llvm::Value *> CapturedVars,
const Expr *IfCond) override;
+ /// Emit an implicit/explicit barrier for OpenMP threads.
+ /// \param Kind Directive for which this implicit barrier call must be
+ /// generated. Must be OMPD_barrier for explicit barrier generation.
+ /// \param EmitChecks true if need to emit checks for cancellation barriers.
+ /// \param ForceSimpleCall true simple barrier call must be emitted, false if
+ /// runtime class decides which one to emit (simple or with cancellation
+ /// checks).
+ ///
+ void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPDirectiveKind Kind, bool EmitChecks = true,
+ bool ForceSimpleCall = false) override;
+
/// Emits a critical region.
/// \param CriticalName Name of the critical region.
/// \param CriticalOpGen Generator for the statement associated with the given
@@ -339,6 +368,26 @@ public:
///
void functionFinished(CodeGenFunction &CGF) override;
+ /// Choose a default value for the dist_schedule clause.
+ void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+ const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+ llvm::Value *&Chunk) const override;
+
+ /// Choose a default value for the schedule clause.
+ void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
+ const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
+ const Expr *&ChunkExpr) const override;
+
+ /// Adjust some parameters for the target-based directives, like addresses of
+ /// the variables captured by reference in lambdas.
+ void adjustTargetSpecificDataForLambdas(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
+
+ /// Perform check on requires decl to ensure that target architecture
+ /// supports unified addressing
+ void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+ const OMPRequiresDecl *D) const override;
+
private:
/// Track the execution mode when codegening directives within a target
/// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
@@ -346,9 +395,15 @@ private:
/// to emit optimized code.
ExecutionMode CurrentExecutionMode = EM_Unknown;
+ /// Check if the full runtime is required (default - yes).
+ bool RequiresFullRuntime = true;
+
/// true if we're emitting the code for the target region and next parallel
/// region is L0 for sure.
bool IsInTargetMasterThreadRegion = false;
+ /// true if currently emitting code for target/teams/distribute region, false
+ /// - otherwise.
+ bool IsInTTDRegion = false;
/// true if we're definitely in the parallel region.
bool IsInParallelRegion = false;
@@ -362,23 +417,59 @@ private:
llvm::Function *createParallelDataSharingWrapper(
llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
+ /// The data for the single globalized variable.
+ struct MappedVarData {
+ /// Corresponding field in the global record.
+ const FieldDecl *FD = nullptr;
+ /// Corresponding address.
+ Address PrivateAddr = Address::invalid();
+ /// true, if only one element is required (for latprivates in SPMD mode),
+ /// false, if need to create based on the warp-size.
+ bool IsOnePerTeam = false;
+ MappedVarData() = delete;
+ MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false)
+ : FD(FD), IsOnePerTeam(IsOnePerTeam) {}
+ };
/// The map of local variables to their addresses in the global memory.
- using DeclToAddrMapTy = llvm::MapVector<const Decl *,
- std::pair<const FieldDecl *, Address>>;
+ using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
/// Set of the parameters passed by value escaping OpenMP context.
using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
struct FunctionData {
DeclToAddrMapTy LocalVarData;
+ llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
EscapedParamsTy EscapedParameters;
llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
const RecordDecl *GlobalRecord = nullptr;
+ llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None;
llvm::Value *GlobalRecordAddr = nullptr;
+ llvm::Value *IsInSPMDModeFlag = nullptr;
std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
};
/// Maps the function to the list of the globalized variables with their
/// addresses.
llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
+ /// List of records for the globalized variables in target/teams/distribute
+ /// contexts. Inner records are going to be joined into the single record,
+ /// while those resulting records are going to be joined into the single
+ /// union. This resulting union (one per CU) is the entry point for the static
+ /// memory management runtime functions.
+ struct GlobalPtrSizeRecsTy {
+ llvm::GlobalVariable *UseSharedMemory = nullptr;
+ llvm::GlobalVariable *RecSize = nullptr;
+ llvm::GlobalVariable *Buffer = nullptr;
+ SourceLocation Loc;
+ llvm::SmallVector<const RecordDecl *, 2> Records;
+ unsigned RegionCounter = 0;
+ };
+ llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords;
+ /// Shared pointer for the global memory in the global memory buffer used for
+ /// the given kernel.
+ llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
+ /// Pair of the Non-SPMD team and all reductions variables in this team
+ /// region.
+ std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
+ TeamAndReductions;
};
} // CodeGen namespace.
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 58aaae692552..c754541ac121 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -20,7 +20,7 @@
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecordLayout.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 79662ec0099f..0242b48659d1 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -19,8 +19,6 @@
#include "clang/Basic/Builtins.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Sema/LoopHint.h"
-#include "clang/Sema/SemaDiagnostic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -38,7 +36,7 @@ using namespace CodeGen;
void CodeGenFunction::EmitStopPoint(const Stmt *S) {
if (CGDebugInfo *DI = getDebugInfo()) {
SourceLocation Loc;
- Loc = S->getLocStart();
+ Loc = S->getBeginLoc();
DI->EmitLocation(Builder, Loc);
LastStopPoint = Loc;
@@ -932,6 +930,8 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
LexicalScope ForScope(*this, S.getSourceRange());
// Evaluate the first pieces before the loop.
+ if (S.getInit())
+ EmitStmt(S.getInit());
EmitStmt(S.getRangeStmt());
EmitStmt(S.getBeginStmt());
EmitStmt(S.getEndStmt());
@@ -1020,7 +1020,7 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
/// non-void. Fun stuff :).
void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
if (requiresReturnValueCheck()) {
- llvm::Constant *SLoc = EmitCheckSourceLocation(S.getLocStart());
+ llvm::Constant *SLoc = EmitCheckSourceLocation(S.getBeginLoc());
auto *SLocPtr =
new llvm::GlobalVariable(CGM.getModule(), SLoc->getType(), false,
llvm::GlobalVariable::PrivateLinkage, SLoc);
@@ -1045,10 +1045,9 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
// exception to our over-conservative rules about not jumping to
// statements following block literals with non-trivial cleanups.
RunCleanupsScope cleanupScope(*this);
- if (const ExprWithCleanups *cleanups =
- dyn_cast_or_null<ExprWithCleanups>(RV)) {
- enterFullExpression(cleanups);
- RV = cleanups->getSubExpr();
+ if (const FullExpr *fe = dyn_cast_or_null<FullExpr>(RV)) {
+ enterFullExpression(fe);
+ RV = fe->getSubExpr();
}
// FIXME: Clean this up by using an LValue for ReturnTemp,
@@ -1821,11 +1820,14 @@ llvm::Value* CodeGenFunction::EmitAsmInput(
// If this can't be a register or memory, i.e., has to be a constant
// (immediate or symbolic), try to emit it as such.
if (!Info.allowsRegister() && !Info.allowsMemory()) {
- llvm::APSInt Result;
+ if (Info.requiresImmediateConstant()) {
+ llvm::APSInt AsmConst = InputExpr->EvaluateKnownConstInt(getContext());
+ return llvm::ConstantInt::get(getLLVMContext(), AsmConst);
+ }
+
+ Expr::EvalResult Result;
if (InputExpr->EvaluateAsInt(Result, getContext()))
- return llvm::ConstantInt::get(getLLVMContext(), Result);
- assert(!Info.requiresImmediateConstant() &&
- "Required-immediate inlineasm arg isn't constant?");
+ return llvm::ConstantInt::get(getLLVMContext(), Result.Val.getInt());
}
if (Info.allowsRegister() || !Info.allowsMemory())
@@ -1848,7 +1850,7 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
SmallVector<llvm::Metadata *, 8> Locs;
// Add the location of the first line to the MDNode.
Locs.push_back(llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
- CGF.Int32Ty, Str->getLocStart().getRawEncoding())));
+ CGF.Int32Ty, Str->getBeginLoc().getRawEncoding())));
StringRef StrVal = Str->getString();
if (!StrVal.empty()) {
const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
@@ -1979,6 +1981,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
diag::err_asm_invalid_type_in_input)
<< OutExpr->getType() << OutputConstraint;
}
+
+ // Update largest vector width for any vector types.
+ if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
} else {
ArgTypes.push_back(Dest.getAddress().getType());
Args.push_back(Dest.getPointer());
@@ -2000,6 +2007,10 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
Arg->getType()))
Arg = Builder.CreateBitCast(Arg, AdjTy);
+ // Update largest vector width for any vector types.
+ if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
if (Info.allowsRegister())
InOutConstraints += llvm::utostr(i);
else
@@ -2080,6 +2091,11 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
CGM.getDiags().Report(S.getAsmLoc(), diag::err_asm_invalid_type_in_input)
<< InputExpr->getType() << InputConstraint;
+ // Update largest vector width for any vector types.
+ if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+
ArgTypes.push_back(Arg->getType());
Args.push_back(Arg);
Constraints += InputConstraint;
@@ -2272,7 +2288,7 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
"CapturedStmtInfo should be set when generating the captured function");
const CapturedDecl *CD = S.getCapturedDecl();
const RecordDecl *RD = S.getCapturedRecordDecl();
- SourceLocation Loc = S.getLocStart();
+ SourceLocation Loc = S.getBeginLoc();
assert(CD->hasBody() && "missing CapturedDecl body");
// Build the argument list.
@@ -2293,9 +2309,8 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
F->addFnAttr(llvm::Attribute::NoUnwind);
// Generate the function.
- StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args,
- CD->getLocation(),
- CD->getBody()->getLocStart());
+ StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
+ CD->getBody()->getBeginLoc());
// Set the context parameter in CapturedStmtInfo.
Address DeclPtr = GetAddrOfLocalVar(CD->getContextParam());
CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr));
@@ -2305,8 +2320,9 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
Ctx.getTagDeclType(RD));
for (auto *FD : RD->fields()) {
if (FD->hasCapturedVLAType()) {
- auto *ExprArg = EmitLoadOfLValue(EmitLValueForField(Base, FD),
- S.getLocStart()).getScalarVal();
+ auto *ExprArg =
+ EmitLoadOfLValue(EmitLValueForField(Base, FD), S.getBeginLoc())
+ .getScalarVal();
auto VAT = FD->getCapturedVLAType();
VLASizeMap[VAT->getSizeExpr()] = ExprArg;
}
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 0d343f84c71f..eb1304d89345 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -73,7 +73,7 @@ public:
assert(VD == VD->getCanonicalDecl() &&
"Canonical decl must be captured.");
DeclRefExpr DRE(
- const_cast<VarDecl *>(VD),
+ CGF.getContext(), const_cast<VarDecl *>(VD),
isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
InlinedShareds.isGlobalVarCaptured(VD)),
VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
@@ -191,7 +191,7 @@ public:
auto *VD = C.getCapturedVar();
assert(VD == VD->getCanonicalDecl() &&
"Canonical decl must be captured.");
- DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+ DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
isCapturedVar(CGF, VD) ||
(CGF.CapturedStmtInfo &&
InlinedShareds.isGlobalVarCaptured(VD)),
@@ -222,7 +222,7 @@ LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
LambdaCaptureFields.lookup(OrigVD) ||
(CapturedStmtInfo && CapturedStmtInfo->lookup(OrigVD)) ||
(CurCodeDecl && isa<BlockDecl>(CurCodeDecl));
- DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), IsCaptured,
+ DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
return EmitLValue(&DRE);
}
@@ -385,12 +385,12 @@ static llvm::Function *emitOutlinedFunctionPrologue(
FunctionDecl *DebugFunctionDecl = nullptr;
if (!FO.UIntPtrCastRequired) {
FunctionProtoType::ExtProtoInfo EPI;
+ QualType FunctionTy = Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI);
DebugFunctionDecl = FunctionDecl::Create(
- Ctx, Ctx.getTranslationUnitDecl(), FO.S->getLocStart(),
- SourceLocation(), DeclarationName(), Ctx.VoidTy,
- Ctx.getTrivialTypeSourceInfo(
- Ctx.getFunctionType(Ctx.VoidTy, llvm::None, EPI)),
- SC_Static, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
+ Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
+ SourceLocation(), DeclarationName(), FunctionTy,
+ Ctx.getTrivialTypeSourceInfo(FunctionTy), SC_Static,
+ /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/false);
}
for (const FieldDecl *FD : RD->fields()) {
QualType ArgType = FD->getType();
@@ -422,7 +422,7 @@ static llvm::Function *emitOutlinedFunctionPrologue(
if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
Arg = ParmVarDecl::Create(
Ctx, DebugFunctionDecl,
- CapVar ? CapVar->getLocStart() : FD->getLocStart(),
+ CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
CapVar ? CapVar->getLocation() : FD->getLocation(), II, ArgType,
/*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
} else {
@@ -459,7 +459,7 @@ static llvm::Function *emitOutlinedFunctionPrologue(
// Generate the function.
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
- FO.S->getLocStart(), CD->getBody()->getLocStart());
+ FO.S->getBeginLoc(), CD->getBody()->getBeginLoc());
unsigned Cnt = CD->getContextParamPosition();
I = FO.S->captures().begin();
for (const FieldDecl *FD : RD->fields()) {
@@ -602,7 +602,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
I->second.second,
I->second.first ? I->second.first->getType() : Arg->getType(),
AlignmentSource::Decl);
- CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
+ CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
} else {
auto EI = VLASizes.find(Arg);
if (EI != VLASizes.end()) {
@@ -611,12 +611,12 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
Arg->getType(),
AlignmentSource::Decl);
- CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getLocStart());
+ CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
}
}
CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
}
- CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getLocStart(),
+ CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(),
F, CallArgs);
WrapperCGF.FinishFunction();
return WrapperF;
@@ -763,7 +763,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
const auto *VDInit =
cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
bool IsRegistered;
- DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+ DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
/*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
(*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
LValue OriginalLVal = EmitLValue(&DRE);
@@ -878,8 +878,8 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
getContext().getTargetInfo().isTLSSupported()) {
assert(CapturedStmtInfo->lookup(VD) &&
"Copyin threadprivates should have been captured!");
- DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
- VK_LValue, (*IRef)->getExprLoc());
+ DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
+ (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
MasterAddr = EmitLValue(&DRE).getAddress();
LocalDeclMap.erase(VD);
} else {
@@ -953,11 +953,10 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
const auto *DestVD =
cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() {
- DeclRefExpr DRE(
- const_cast<VarDecl *>(OrigVD),
- /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
- OrigVD) != nullptr,
- (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
+ DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
+ /*RefersToEnclosingVariableOrCapture=*/
+ CapturedStmtInfo->lookup(OrigVD) != nullptr,
+ (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
return EmitLValue(&DRE).getAddress();
});
// Check if the variable is also a firstprivate: in this case IInit is
@@ -1183,7 +1182,7 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
// Emit nowait reduction if nowait clause is present or directive is a
// parallel directive (it always has implicit barrier).
CGM.getOpenMPRuntime().emitReduction(
- *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
+ *this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
{WithNowait, SimpleReduction, ReductionKind});
}
}
@@ -1237,12 +1236,12 @@ static void emitCommonOMPParallelDirective(
CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
/*IgnoreResultAssign=*/true);
CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
- CGF, NumThreads, NumThreadsClause->getLocStart());
+ CGF, NumThreads, NumThreadsClause->getBeginLoc());
}
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
CGF.CGM.getOpenMPRuntime().emitProcBindClause(
- CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
+ CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
}
const Expr *IfCond = nullptr;
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
@@ -1261,7 +1260,7 @@ static void emitCommonOMPParallelDirective(
// parameters when necessary
CodeGenBoundParameters(CGF, S, CapturedVars);
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
- CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
+ CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
CapturedVars, IfCond);
}
@@ -1281,7 +1280,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// propagation master's thread values of threadprivate variables to local
// instances of that variables of all other implicit threads.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
- CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+ CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
}
CGF.EmitOMPPrivateClause(S, PrivateScope);
@@ -1384,7 +1383,7 @@ bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
- DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+ DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
CapturedStmtInfo->lookup(OrigVD) != nullptr,
VD->getInit()->getType(), VK_LValue,
VD->getInit()->getExprLoc());
@@ -1429,7 +1428,7 @@ void CodeGenFunction::EmitOMPLinearClauseFinal(
}
}
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
- DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+ DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
CapturedStmtInfo->lookup(OrigVD) != nullptr,
(*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
Address OrigAddr = EmitLValue(&DRE).getAddress();
@@ -1473,7 +1472,8 @@ static void emitAlignedClause(CodeGenFunction &CGF,
"alignment is not power of 2");
if (Alignment != 0) {
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
- CGF.EmitAlignmentAssumption(PtrValue, Alignment);
+ CGF.EmitAlignmentAssumption(
+ PtrValue, E, /*No second loc needed*/ SourceLocation(), Alignment);
}
}
}
@@ -1497,7 +1497,7 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters(
if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
VD->hasGlobalStorage()) {
(void)LoopScope.addPrivate(PrivateVD, [this, VD, E]() {
- DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+ DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
E->getType(), VK_LValue, E->getExprLoc());
return EmitLValue(&DRE).getAddress();
@@ -1509,6 +1509,23 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters(
}
++I;
}
+ // Privatize extra loop counters used in loops for ordered(n) clauses.
+ for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
+ if (!C->getNumForLoops())
+ continue;
+ for (unsigned I = S.getCollapsedNumber(),
+ E = C->getLoopNumIterations().size();
+ I < E; ++I) {
+ const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
+ const auto *VD = cast<VarDecl>(DRE->getDecl());
+ // Override only those variables that are really emitted already.
+ if (LocalDeclMap.count(VD)) {
+ (void)LoopScope.addPrivate(VD, [this, DRE, VD]() {
+ return CreateMemTemp(DRE->getType(), VD->getName());
+ });
+ }
+ }
+ }
}
static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
@@ -1627,7 +1644,7 @@ void CodeGenFunction::EmitOMPSimdFinal(
if (CED) {
OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
} else {
- DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
+ DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
/*RefersToEnclosingVariableOrCapture=*/false,
(*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
OrigAddr = EmitLValue(&DRE).getAddress();
@@ -1721,6 +1738,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitOMPReductionClauseInit(S, LoopScope);
bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
S.getInc(),
[&S](CodeGenFunction &CGF) {
@@ -1785,7 +1804,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
} else {
BoolCondVal =
- RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
+ RT.emitForNext(*this, S.getBeginLoc(), IVSize, IVSigned, LoopArgs.IL,
LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
}
@@ -1819,7 +1838,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
else
EmitOMPSimdInit(S, IsMonotonic);
- SourceLocation Loc = S.getLocStart();
+ SourceLocation Loc = S.getBeginLoc();
// when 'distribute' is not combined with a 'for':
// while (idx <= UB) { BODY; ++idx; }
@@ -1851,7 +1870,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
// Tell the runtime we are done.
auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
if (!DynamicOrOrdered)
- CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
+ CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
S.getDirectiveKind());
};
OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
@@ -1934,13 +1953,13 @@ void CodeGenFunction::EmitOMPForOuterLoop(
llvm::Value *UBVal = DispatchBounds.second;
CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
LoopArgs.Chunk};
- RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
+ RT.emitForDispatchInit(*this, S.getBeginLoc(), ScheduleKind, IVSize,
IVSigned, Ordered, DipatchRTInputValues);
} else {
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
LoopArgs.ST, LoopArgs.Chunk);
- RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
+ RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
ScheduleKind, StaticInit);
}
@@ -1986,10 +2005,10 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
- RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, StaticInit);
+ RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
// for combined 'distribute' and 'for' the increment expression of distribute
- // is store in DistInc. For 'distribute' alone, it is in Inc.
+ // is stored in DistInc. For 'distribute' alone, it is in Inc.
Expr *IncExpr;
if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
IncExpr = S.getDistInc();
@@ -2082,9 +2101,9 @@ emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
// distribute chunk
QualType IteratorTy = IVExpr->getType();
llvm::Value *LBVal =
- CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getLocStart());
+ CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
llvm::Value *UBVal =
- CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getLocStart());
+ CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, S.getBeginLoc());
return {LBVal, UBVal};
}
@@ -2244,7 +2263,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
bool Ordered = false;
if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
if (OrderedClause->getNumForLoops())
- RT.emitDoacrossInit(*this, S);
+ RT.emitDoacrossInit(*this, S, OrderedClause->getLoopNumIterations());
else
Ordered = true;
}
@@ -2270,7 +2289,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// initialization of firstprivate variables and post-update of
// lastprivate variables.
CGM.getOpenMPRuntime().emitBarrierCall(
- *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+ *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
}
EmitOMPPrivateClause(S, LoopScope);
@@ -2279,19 +2298,33 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
EmitOMPPrivateLoopCounters(S, LoopScope);
EmitOMPLinearClause(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
// Detect the loop schedule kind and chunk.
- llvm::Value *Chunk = nullptr;
+ const Expr *ChunkExpr = nullptr;
OpenMPScheduleTy ScheduleKind;
if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
ScheduleKind.Schedule = C->getScheduleKind();
ScheduleKind.M1 = C->getFirstScheduleModifier();
ScheduleKind.M2 = C->getSecondScheduleModifier();
- if (const Expr *Ch = C->getChunkSize()) {
- Chunk = EmitScalarExpr(Ch);
- Chunk = EmitScalarConversion(Chunk, Ch->getType(),
- S.getIterationVariable()->getType(),
- S.getLocStart());
+ ChunkExpr = C->getChunkSize();
+ } else {
+ // Default behaviour for schedule clause.
+ CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
+ *this, S, ScheduleKind.Schedule, ChunkExpr);
+ }
+ bool HasChunkSizeOne = false;
+ llvm::Value *Chunk = nullptr;
+ if (ChunkExpr) {
+ Chunk = EmitScalarExpr(ChunkExpr);
+ Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
+ S.getIterationVariable()->getType(),
+ S.getBeginLoc());
+ Expr::EvalResult Result;
+ if (ChunkExpr->EvaluateAsInt(Result, getContext())) {
+ llvm::APSInt EvaluatedChunk = Result.Val.getInt();
+ HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
}
}
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
@@ -2300,8 +2333,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// If the static schedule kind is specified or if the ordered clause is
// specified, and if no monotonic modifier is specified, the effect will
// be as if the monotonic modifier was specified.
- if (RT.isStaticNonchunked(ScheduleKind.Schedule,
- /* Chunked */ Chunk != nullptr) &&
+ bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
+ /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
+ isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
+ if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
+ /* Chunked */ Chunk != nullptr) ||
+ StaticChunkedOne) &&
!Ordered) {
if (isOpenMPSimdDirective(S.getDirectiveKind()))
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
@@ -2312,27 +2349,42 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// unspecified in this case.
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
- UB.getAddress(), ST.getAddress());
- RT.emitForStaticInit(*this, S.getLocStart(), S.getDirectiveKind(),
+ UB.getAddress(), ST.getAddress(),
+ StaticChunkedOne ? Chunk : nullptr);
+ RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
ScheduleKind, StaticInit);
JumpDest LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ if (!StaticChunkedOne)
+ EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB;
EmitIgnoredExpr(S.getInit());
- // while (idx <= UB) { BODY; ++idx; }
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
- S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
- },
- [](CodeGenFunction &) {});
+ // For unchunked static schedule generate:
+ //
+ // while (idx <= UB) {
+ // BODY;
+ // ++idx;
+ // }
+ //
+ // For static schedule with chunk one:
+ //
+ // while (IV <= PrevUB) {
+ // BODY;
+ // IV += ST;
+ // }
+ EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+ StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
+ StaticChunkedOne ? S.getDistInc() : S.getInc(),
+ [&S, LoopExit](CodeGenFunction &CGF) {
+ CGF.EmitOMPLoopBody(S, LoopExit);
+ CGF.EmitStopPoint(&S);
+ },
+ [](CodeGenFunction &) {});
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
- CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
+ CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
S.getDirectiveKind());
};
OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
@@ -2351,11 +2403,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
LoopArguments, CGDispatchBounds);
}
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
- EmitOMPSimdFinal(S,
- [IL, &S](CodeGenFunction &CGF) {
- return CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getLocStart()));
- });
+ EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
+ return CGF.Builder.CreateIsNotNull(
+ CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
+ });
}
EmitOMPReductionClauseFinal(
S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
@@ -2365,17 +2416,17 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
emitPostUpdateForReductionClause(
*this, S, [IL, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+ CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
});
// Emit final copy of the lastprivate variables if IsLastIter != 0.
if (HasLastprivateClause)
EmitOMPLastprivateClauseFinal(
S, isOpenMPSimdDirective(S.getDirectiveKind()),
- Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
+ Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
}
EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+ CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
});
DoacrossCleanupScope.ForceCleanup();
// We're now done with the loop, so jump to the continuation block.
@@ -2432,7 +2483,7 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
// Emit an implicit barrier at the end.
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
- CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
+ CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
}
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
@@ -2450,7 +2501,7 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
// Emit an implicit barrier at the end.
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
- CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
+ CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
}
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
@@ -2485,16 +2536,16 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CGF.Builder.getInt32(0));
// Loop counter.
LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
- OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
+ OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
- OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
+ OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
// Generate condition for loop.
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
- OK_Ordinary, S.getLocStart(), FPOptions());
+ OK_Ordinary, S.getBeginLoc(), FPOptions());
// Increment for loop counter.
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
- S.getLocStart(), true);
+ S.getBeginLoc(), true);
auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
// Iterate through all sections and emit a switch construct:
// switch (IV) {
@@ -2509,7 +2560,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// .omp.sections.exit:
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
llvm::SwitchInst *SwitchStmt =
- CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getLocStart()),
+ CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
ExitBB, CS == nullptr ? 1 : CS->size());
if (CS) {
unsigned CaseNumber = 0;
@@ -2537,13 +2588,15 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// initialization of firstprivate variables and post-update of lastprivate
// variables.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(
- CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+ CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
}
CGF.EmitOMPPrivateClause(S, LoopScope);
HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
CGF.EmitOMPReductionClauseInit(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
// Emit static non-chunked loop.
OpenMPScheduleTy ScheduleKind;
@@ -2552,20 +2605,20 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
/*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
LB.getAddress(), UB.getAddress(), ST.getAddress());
CGF.CGM.getOpenMPRuntime().emitForStaticInit(
- CGF, S.getLocStart(), S.getDirectiveKind(), ScheduleKind, StaticInit);
+ CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
// UB = min(UB, GlobalUB);
- llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
+ llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
// IV = LB;
- CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
+ CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
// while (idx <= UB) { BODY; ++idx; }
CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
[](CodeGenFunction &) {});
// Tell the runtime we are done.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
- CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd(),
+ CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
S.getDirectiveKind());
};
CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
@@ -2573,7 +2626,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+ CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
});
// Emit final copy of the lastprivate variables if IsLastIter != 0.
@@ -2581,7 +2634,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CGF.EmitOMPLastprivateClauseFinal(
S, /*NoFinals=*/false,
CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getLocStart())));
+ CGF.EmitLoadOfScalar(IL, S.getBeginLoc())));
};
bool HasCancel = false;
@@ -2598,7 +2651,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
// Emit implicit barrier to synchronize threads and avoid data races on
// initialization of firstprivate variables.
- CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+ CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
OMPD_unknown);
}
}
@@ -2610,7 +2663,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
}
// Emit an implicit barrier at the end.
if (!S.getSingleClause<OMPNowaitClause>()) {
- CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+ CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
OMPD_sections);
}
}
@@ -2652,7 +2705,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
};
{
OMPLexicalScope Scope(*this, S, OMPD_unknown);
- CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
+ CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
CopyprivateVars, DestExprs,
SrcExprs, AssignmentOps);
}
@@ -2660,7 +2713,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
// init or if no 'nowait' clause was specified and no 'copyprivate' clause).
if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
CGM.getOpenMPRuntime().emitBarrierCall(
- *this, S.getLocStart(),
+ *this, S.getBeginLoc(),
S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
}
}
@@ -2671,7 +2724,7 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
OMPLexicalScope Scope(*this, S, OMPD_unknown);
- CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
+ CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
}
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
@@ -2685,7 +2738,7 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitCriticalRegion(*this,
S.getDirectiveName().getAsString(),
- CodeGen, S.getLocStart(), Hint);
+ CodeGen, S.getBeginLoc(), Hint);
}
void CodeGenFunction::EmitOMPParallelForDirective(
@@ -2828,7 +2881,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
}
}
Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
- *this, S.getLocStart(), LHSs, RHSs, Data);
+ *this, S.getBeginLoc(), LHSs, RHSs, Data);
// Build list of dependences.
for (const auto *C : S.getClausesOfKind<OMPDependClause>())
for (const Expr *IRef : C->varlists())
@@ -2872,15 +2925,15 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
}
- CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
+ CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
CopyFn, CallArgs);
for (const auto &Pair : LastprivateDstsOrigs) {
const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
- DeclRefExpr DRE(
- const_cast<VarDecl *>(OrigVD),
- /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
- OrigVD) != nullptr,
- Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
+ DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
+ /*RefersToEnclosingVariableOrCapture=*/
+ CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
+ Pair.second->getType(), VK_LValue,
+ Pair.second->getExprLoc());
Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
return CGF.EmitLValue(&DRE).getAddress();
});
@@ -2902,11 +2955,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
RedCG.emitAggregateType(CGF, Cnt);
// FIXME: This must removed once the runtime library is fixed.
// Emit required threadprivate variables for
- // initilizer/combiner/finalizer.
- CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+ // initializer/combiner/finalizer.
+ CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
RedCG, Cnt);
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
- CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
+ CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
Replacement =
Address(CGF.EmitScalarConversion(
Replacement.getPointer(), CGF.getContext().VoidPtrTy,
@@ -2948,17 +3001,17 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
RedCG.emitSharedLValue(CGF, Cnt);
RedCG.emitAggregateType(CGF, Cnt);
// The taskgroup descriptor variable is always implicit firstprivate and
- // privatized already during procoessing of the firstprivates.
+ // privatized already during processing of the firstprivates.
// FIXME: This must removed once the runtime library is fixed.
// Emit required threadprivate variables for
- // initilizer/combiner/finalizer.
- CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+ // initializer/combiner/finalizer.
+ CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
RedCG, Cnt);
llvm::Value *ReductionsPtr =
CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
TaskgroupDescriptors[Cnt]->getExprLoc());
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
- CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
+ CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
Replacement = Address(
CGF.EmitScalarConversion(
Replacement.getPointer(), CGF.getContext().VoidPtrTy,
@@ -3049,14 +3102,14 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
/*IndexTypeQuals=*/0);
BPVD = createImplicitFirstprivateForType(
- getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
+ getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
PVD = createImplicitFirstprivateForType(
- getContext(), Data, BaseAndPointersType, CD, S.getLocStart());
+ getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
QualType SizesType = getContext().getConstantArrayType(
getContext().getSizeType(), ArrSize, ArrayType::Normal,
/*IndexTypeQuals=*/0);
SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
- S.getLocStart());
+ S.getBeginLoc());
TargetScope.addPrivate(
BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
TargetScope.addPrivate(PVD,
@@ -3091,7 +3144,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
}
- CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
+ CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
CopyFn, CallArgs);
for (const auto &Pair : PrivatePtrs) {
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
@@ -3122,7 +3175,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
SourceLocation());
- CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn,
+ CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
SharedsTy, CapturedStruct, &IfCond, Data);
}
@@ -3149,7 +3202,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
const OMPTaskDataTy &Data) {
- CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
+ CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn,
SharedsTy, CapturedStruct, IfCond,
Data);
};
@@ -3158,15 +3211,15 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
void CodeGenFunction::EmitOMPTaskyieldDirective(
const OMPTaskyieldDirective &S) {
- CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
+ CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc());
}
void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
- CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
+ CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
}
void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
- CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
+ CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc());
}
void CodeGenFunction::EmitOMPTaskgroupDirective(
@@ -3195,7 +3248,7 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
}
}
llvm::Value *ReductionDesc =
- CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getLocStart(),
+ CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
LHSs, RHSs, Data);
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
CGF.EmitVarDecl(*VD);
@@ -3205,7 +3258,7 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
OMPLexicalScope Scope(*this, S, OMPD_unknown);
- CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
+ CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getBeginLoc());
}
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
@@ -3217,7 +3270,7 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
FlushClause->varlist_end());
return llvm::None;
}(),
- S.getLocStart());
+ S.getBeginLoc());
}
void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
@@ -3286,7 +3339,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
// on initialization of firstprivate variables and post-update of
// lastprivate variables.
CGM.getOpenMPRuntime().emitBarrierCall(
- *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+ *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
/*ForceSimpleCall=*/true);
}
EmitOMPPrivateClause(S, LoopScope);
@@ -3297,6 +3350,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
EmitOMPPrivateLoopCounters(S, LoopScope);
(void)LoopScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
// Detect the distribute schedule kind and chunk.
llvm::Value *Chunk = nullptr;
@@ -3307,8 +3362,12 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
Chunk = EmitScalarExpr(Ch);
Chunk = EmitScalarConversion(Chunk, Ch->getType(),
S.getIterationVariable()->getType(),
- S.getLocStart());
+ S.getBeginLoc());
}
+ } else {
+ // Default behaviour for dist_schedule clause.
+ CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
+ *this, S, ScheduleKind, Chunk);
}
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
@@ -3321,14 +3380,19 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
// iteration space is divided into chunks that are approximately equal
// in size, and at most one chunk is distributed to each team of the
// league. The size of the chunks is unspecified in this case.
+ bool StaticChunked = RT.isStaticChunked(
+ ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+ isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
if (RT.isStaticNonchunked(ScheduleKind,
- /* Chunked */ Chunk != nullptr)) {
+ /* Chunked */ Chunk != nullptr) ||
+ StaticChunked) {
if (isOpenMPSimdDirective(S.getDirectiveKind()))
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
- LB.getAddress(), UB.getAddress(), ST.getAddress());
- RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
+ LB.getAddress(), UB.getAddress(), ST.getAddress(),
+ StaticChunked ? Chunk : nullptr);
+ RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
StaticInit);
JumpDest LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
@@ -3346,18 +3410,48 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
? S.getCombinedCond()
: S.getCond();
- // for distribute alone, codegen
- // while (idx <= UB) { BODY; ++idx; }
- // when combined with 'for' (e.g. as in 'distribute parallel for')
- // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ if (StaticChunked)
+ Cond = S.getCombinedDistCond();
+
+ // For static unchunked schedules generate:
+ //
+ // 1. For distribute alone, codegen
+ // while (idx <= UB) {
+ // BODY;
+ // ++idx;
+ // }
+ //
+ // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
+ // while (idx <= UB) {
+ // <CodeGen rest of pragma>(LB, UB);
+ // idx += ST;
+ // }
+ //
+ // For static chunk one schedule generate:
+ //
+ // while (IV <= GlobalUB) {
+ // <CodeGen rest of pragma>(LB, UB);
+ // LB += ST;
+ // UB += ST;
+ // UB = min(UB, GlobalUB);
+ // IV = LB;
+ // }
+ //
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
[&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
CodeGenLoop(CGF, S, LoopExit);
},
- [](CodeGenFunction &) {});
+ [&S, StaticChunked](CodeGenFunction &CGF) {
+ if (StaticChunked) {
+ CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
+ CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
+ CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
+ CGF.EmitIgnoredExpr(S.getCombinedInit());
+ }
+ });
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
- RT.emitForStaticFinish(*this, S.getLocStart(), S.getDirectiveKind());
+ RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
} else {
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
@@ -3370,38 +3464,25 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+ CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
});
}
if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
!isOpenMPParallelDirective(S.getDirectiveKind()) &&
!isOpenMPTeamsDirective(S.getDirectiveKind())) {
- OpenMPDirectiveKind ReductionKind = OMPD_unknown;
- if (isOpenMPParallelDirective(S.getDirectiveKind()) &&
- isOpenMPSimdDirective(S.getDirectiveKind())) {
- ReductionKind = OMPD_parallel_for_simd;
- } else if (isOpenMPParallelDirective(S.getDirectiveKind())) {
- ReductionKind = OMPD_parallel_for;
- } else if (isOpenMPSimdDirective(S.getDirectiveKind())) {
- ReductionKind = OMPD_simd;
- } else if (!isOpenMPTeamsDirective(S.getDirectiveKind()) &&
- S.hasClausesOfKind<OMPReductionClause>()) {
- llvm_unreachable(
- "No reduction clauses is allowed in distribute directive.");
- }
- EmitOMPReductionClauseFinal(S, ReductionKind);
+ EmitOMPReductionClauseFinal(S, OMPD_simd);
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(
*this, S, [IL, &S](CodeGenFunction &CGF) {
return CGF.Builder.CreateIsNotNull(
- CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+ CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
});
}
// Emit final copy of the lastprivate variables if IsLastIter != 0.
if (HasLastprivateClause) {
EmitOMPLastprivateClauseFinal(
S, /*NoFinals=*/false,
- Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
+ Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
}
}
@@ -3448,7 +3529,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
- CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
+ CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
OutlinedFn, CapturedVars);
} else {
Action.Enter(CGF);
@@ -3456,7 +3537,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
}
};
OMPLexicalScope Scope(*this, S, OMPD_unknown);
- CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
+ CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getBeginLoc(), !C);
}
static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
@@ -3887,6 +3968,11 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_from:
case OMPC_use_device_ptr:
case OMPC_is_device_ptr:
+ case OMPC_unified_address:
+ case OMPC_unified_shared_memory:
+ case OMPC_reverse_offload:
+ case OMPC_dynamic_allocators:
+ case OMPC_atomic_default_mem_order:
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
}
}
@@ -3903,13 +3989,13 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
}
const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
- if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
- enterFullExpression(EWC);
+ if (const auto *FE = dyn_cast<FullExpr>(CS))
+ enterFullExpression(FE);
// Processing for statements under 'atomic capture'.
if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
for (const Stmt *C : Compound->body()) {
- if (const auto *EWC = dyn_cast<ExprWithCleanups>(C))
- enterFullExpression(EWC);
+ if (const auto *FE = dyn_cast<FullExpr>(C))
+ enterFullExpression(FE);
}
}
@@ -3918,7 +4004,7 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
CGF.EmitStopPoint(CS);
emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
S.getV(), S.getExpr(), S.getUpdateExpr(),
- S.isXLHSInRHSPart(), S.getLocStart());
+ S.isXLHSInRHSPart(), S.getBeginLoc());
};
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
@@ -3986,6 +4072,16 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
IsOffloadEntry, CodeGen);
OMPLexicalScope Scope(CGF, S, OMPD_task);
+ auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) {
+ OMPLoopScope(CGF, D);
+ // Emit calculation of the iterations count.
+ llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
+ NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
+ /*IsSigned=*/false);
+ return NumIterations;
+ };
+ CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device,
+ SizeEmitter);
CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
}
@@ -3996,6 +4092,8 @@ static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
(void)PrivateScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
}
@@ -4037,13 +4135,13 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
- S.getLocStart());
+ S.getBeginLoc());
}
OMPTeamsScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
- CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
+ CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
CapturedVars);
}
@@ -4076,6 +4174,8 @@ static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
@@ -4394,7 +4494,7 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
void CodeGenFunction::EmitOMPCancellationPointDirective(
const OMPCancellationPointDirective &S) {
- CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
+ CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
S.getCancelRegion());
}
@@ -4407,7 +4507,7 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
break;
}
}
- CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
+ CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
S.getCancelRegion());
}
@@ -4634,6 +4734,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF,
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
+ if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+ CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
// TODO: Add support for clauses.
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
@@ -4864,7 +4966,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
S, isOpenMPSimdDirective(S.getDirectiveKind()),
CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
- (*LIP)->getType(), S.getLocStart())));
+ (*LIP)->getType(), S.getBeginLoc())));
}
};
auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
@@ -4873,7 +4975,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
&Data](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
- CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
+ CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S,
OutlinedFn, SharedsTy,
CapturedStruct, IfCond, Data);
};
@@ -4891,7 +4993,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
Data);
},
- S.getLocStart());
+ S.getBeginLoc());
}
}
@@ -4934,16 +5036,37 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
} else {
+ OMPPrivateScope LoopGlobals(CGF);
if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
for (const Expr *E : LD->counters()) {
- if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
- cast<DeclRefExpr>(E)->getDecl())) {
+ const auto *VD = dyn_cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
+ LValue GlobLVal = CGF.EmitLValue(E);
+ LoopGlobals.addPrivate(
+ VD, [&GlobLVal]() { return GlobLVal.getAddress(); });
+ }
+ if (isa<OMPCapturedExprDecl>(VD)) {
// Emit only those that were not explicitly referenced in clauses.
if (!CGF.LocalDeclMap.count(VD))
CGF.EmitVarDecl(*VD);
}
}
+ for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
+ if (!C->getNumForLoops())
+ continue;
+ for (unsigned I = LD->getCollapsedNumber(),
+ E = C->getLoopNumIterations().size();
+ I < E; ++I) {
+ if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
+ cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
+ // Emit only those that were not explicitly referenced in clauses.
+ if (!CGF.LocalDeclMap.count(VD))
+ CGF.EmitVarDecl(*VD);
+ }
+ }
+ }
}
+ LoopGlobals.Privatize();
CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
}
};
diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp
index b0a3a0bffa2e..fbd8146702a9 100644
--- a/lib/CodeGen/CGVTT.cpp
+++ b/lib/CodeGen/CGVTT.cpp
@@ -119,10 +119,10 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
llvm::ArrayType *ArrayType =
llvm::ArrayType::get(CGM.Int8PtrTy, Builder.getVTTComponents().size());
+ unsigned Align = CGM.getDataLayout().getABITypeAlignment(CGM.Int8PtrTy);
- llvm::GlobalVariable *GV =
- CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType,
- llvm::GlobalValue::ExternalLinkage);
+ llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
+ Name, ArrayType, llvm::GlobalValue::ExternalLinkage, Align);
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
return GV;
}
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index cc334637a831..bfb089ff908e 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -16,9 +16,9 @@
#include "CodeGenModule.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Format.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -128,7 +128,7 @@ static void resolveTopLevelMetadata(llvm::Function *Fn,
// they are referencing.
for (auto &BB : Fn->getBasicBlockList()) {
for (auto &I : BB) {
- if (auto *DII = dyn_cast<llvm::DbgInfoIntrinsic>(&I)) {
+ if (auto *DII = dyn_cast<llvm::DbgVariableIntrinsic>(&I)) {
auto *DILocal = DII->getVariable();
if (!DILocal->isResolved())
DILocal->resolve();
@@ -231,7 +231,7 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD,
// Build FunctionArgs.
const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
- QualType ThisType = MD->getThisType(getContext());
+ QualType ThisType = MD->getThisType();
const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
QualType ResultType;
if (IsUnprototyped)
@@ -304,13 +304,13 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
CGM.ErrorUnsupported(
MD, "non-trivial argument copy for return-adjusting thunk");
}
- EmitMustTailThunk(MD, AdjustedThisPtr, CalleePtr);
+ EmitMustTailThunk(CurGD, AdjustedThisPtr, CalleePtr);
return;
}
// Start building CallArgs.
CallArgList CallArgs;
- QualType ThisType = MD->getThisType(getContext());
+ QualType ThisType = MD->getThisType();
CallArgs.add(RValue::get(AdjustedThisPtr), ThisType);
if (isa<CXXDestructorDecl>(MD))
@@ -350,13 +350,12 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
: FPT->getReturnType();
ReturnValueSlot Slot;
if (!ResultType->isVoidType() &&
- CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
- !hasScalarEvaluationKind(CurFnInfo->getReturnType()))
+ CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect)
Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified());
// Now emit our call.
llvm::Instruction *CallOrInvoke;
- CGCallee Callee = CGCallee::forDirect(CalleePtr, MD);
+ CGCallee Callee = CGCallee::forDirect(CalleePtr, CurGD);
RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, &CallOrInvoke);
// Consider return adjustment if we have ThunkInfo.
@@ -375,7 +374,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Constant *CalleePtr,
FinishThunk();
}
-void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
+void CodeGenFunction::EmitMustTailThunk(GlobalDecl GD,
llvm::Value *AdjustedThisPtr,
llvm::Value *CalleePtr) {
// Emitting a musttail call thunk doesn't use any of the CGCall.cpp machinery
@@ -412,7 +411,7 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
// Apply the standard set of call attributes.
unsigned CallingConv;
llvm::AttributeList Attrs;
- CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, MD, Attrs,
+ CGM.ConstructAttributeList(CalleePtr->getName(), *CurFnInfo, GD, Attrs,
CallingConv, /*AttrOnCallSite=*/true);
Call->setAttributes(Attrs);
Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
@@ -756,9 +755,11 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
if (Linkage == llvm::GlobalVariable::AvailableExternallyLinkage)
Linkage = llvm::GlobalVariable::InternalLinkage;
+ unsigned Align = CGM.getDataLayout().getABITypeAlignment(VTType);
+
// Create the variable that will hold the construction vtable.
llvm::GlobalVariable *VTable =
- CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage);
+ CGM.CreateOrReplaceCXXRuntimeVariable(Name, VTType, Linkage, Align);
CGM.setGVProperties(VTable, RD);
// V-tables are always unnamed_addr.
@@ -1020,8 +1021,8 @@ void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
AP.second.AddressPointIndex));
// Sort the address points for determinism.
- llvm::sort(AddressPoints.begin(), AddressPoints.end(),
- [this](const AddressPoint &AP1, const AddressPoint &AP2) {
+ llvm::sort(AddressPoints, [this](const AddressPoint &AP1,
+ const AddressPoint &AP2) {
if (&AP1 == &AP2)
return false;
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index 0dcbea423ad7..da8a8efb840b 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -562,7 +562,10 @@ public:
}
void setVolatile(bool flag) {
- Quals.setVolatile(flag);
+ if (flag)
+ Quals.addVolatile();
+ else
+ Quals.removeVolatile();
}
Qualifiers::ObjCLifetime getObjCLifetime() const {
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 2a0f4f0e83ec..29c6793c601e 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -102,4 +102,5 @@ add_clang_library(clangCodeGen
clangBasic
clangFrontend
clangLex
+ clangSerialization
)
diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp
index c152291b15b9..27f5d53ffe11 100644
--- a/lib/CodeGen/CodeGenABITypes.cpp
+++ b/lib/CodeGen/CodeGenABITypes.cpp
@@ -20,7 +20,6 @@
#include "CGRecordLayout.h"
#include "CodeGenModule.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/PreprocessorOptions.h"
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index d499364002f0..fd4506f2d197 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -127,6 +127,7 @@ namespace clang {
CodeGenOpts, C, CoverageInfo)),
LinkModules(std::move(LinkModules)) {
FrontendTimesIsEnabled = TimePasses;
+ llvm::TimePassesIsEnabled = TimePasses;
}
llvm::Module *getModule() const { return Gen->GetModule(); }
std::unique_ptr<llvm::Module> takeModule() {
@@ -548,12 +549,16 @@ const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
SourceLocation DILoc;
if (D.isLocationAvailable()) {
- D.getLocation(&Filename, &Line, &Column);
- const FileEntry *FE = FileMgr.getFile(Filename);
- if (FE && Line > 0) {
- // If -gcolumn-info was not used, Column will be 0. This upsets the
- // source manager, so pass 1 if Column is not set.
- DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
+ D.getLocation(Filename, Line, Column);
+ if (Line > 0) {
+ const FileEntry *FE = FileMgr.getFile(Filename);
+ if (!FE)
+ FE = FileMgr.getFile(D.getAbsolutePath());
+ if (FE) {
+ // If -gcolumn-info was not used, Column will be 0. This upsets the
+ // source manager, so pass 1 if Column is not set.
+ DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
+ }
}
BadDebugInfo = DILoc.isInvalid();
}
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index 3c582688e91e..1713e40c312b 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -28,10 +28,10 @@
#include "clang/AST/StmtCXX.h"
#include "clang/AST/StmtObjC.h"
#include "clang/Basic/Builtins.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
-#include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Intrinsics.h"
@@ -430,10 +430,25 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
NormalCleanupDest = Address::invalid();
}
- // Add the required-vector-width attribute.
- if (LargestVectorWidth != 0)
- CurFn->addFnAttr("min-legal-vector-width",
- llvm::utostr(LargestVectorWidth));
+ // Scan function arguments for vector width.
+ for (llvm::Argument &A : CurFn->args())
+ if (auto *VT = dyn_cast<llvm::VectorType>(A.getType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+
+ // Update vector width based on return type.
+ if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType()))
+ LargestVectorWidth = std::max(LargestVectorWidth,
+ VT->getPrimitiveSizeInBits());
+
+ // Add the required-vector-width attribute. This contains the max width from:
+ // 1. min-vector-width attribute used in the source program.
+ // 2. Any builtins used that have a vector width specified.
+ // 3. Values passed in and out of inline assembly.
+ // 4. Width of vector arguments and return types for this function.
+ // 5. Width of vector aguments and return types for functions called by this
+ // function.
+ CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));
}
/// ShouldInstrumentFunction - Return true if the current function should be
@@ -772,9 +787,11 @@ static bool endsWithReturn(const Decl* F) {
return false;
}
-static void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) {
- Fn->addFnAttr("sanitize_thread_no_checking_at_run_time");
- Fn->removeFnAttr(llvm::Attribute::SanitizeThread);
+void CodeGenFunction::markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn) {
+ if (SanOpts.has(SanitizerKind::Thread)) {
+ Fn->addFnAttr("sanitize_thread_no_checking_at_run_time");
+ Fn->removeFnAttr(llvm::Attribute::SanitizeThread);
+ }
}
static bool matchesStlAllocatorFn(const Decl *D, const ASTContext &Ctx) {
@@ -866,7 +883,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
Fn->addFnAttr(llvm::Attribute::SanitizeHWAddress);
if (SanOpts.has(SanitizerKind::Thread))
Fn->addFnAttr(llvm::Attribute::SanitizeThread);
- if (SanOpts.has(SanitizerKind::Memory))
+ if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory))
Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
if (SanOpts.has(SanitizerKind::SafeStack))
Fn->addFnAttr(llvm::Attribute::SafeStack);
@@ -887,10 +904,6 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
(OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) {
markAsIgnoreThreadCheckingAtRuntime(Fn);
}
- } else if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
- IdentifierInfo *II = FD->getIdentifier();
- if (II && II->isStr("__destroy_helper_block_"))
- markAsIgnoreThreadCheckingAtRuntime(Fn);
}
}
@@ -903,21 +916,21 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
}
// Apply xray attributes to the function (as a string, for now)
- bool InstrumentXray = ShouldXRayInstrumentFunction() &&
- CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
- XRayInstrKind::Function);
- if (D && InstrumentXray) {
+ if (D) {
if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) {
- if (XRayAttr->alwaysXRayInstrument())
- Fn->addFnAttr("function-instrument", "xray-always");
- if (XRayAttr->neverXRayInstrument())
- Fn->addFnAttr("function-instrument", "xray-never");
- if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>()) {
- Fn->addFnAttr("xray-log-args",
- llvm::utostr(LogArgs->getArgumentCount()));
+ if (CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
+ XRayInstrKind::Function)) {
+ if (XRayAttr->alwaysXRayInstrument() && ShouldXRayInstrumentFunction())
+ Fn->addFnAttr("function-instrument", "xray-always");
+ if (XRayAttr->neverXRayInstrument())
+ Fn->addFnAttr("function-instrument", "xray-never");
+ if (const auto *LogArgs = D->getAttr<XRayLogArgsAttr>())
+ if (ShouldXRayInstrumentFunction())
+ Fn->addFnAttr("xray-log-args",
+ llvm::utostr(LogArgs->getArgumentCount()));
}
} else {
- if (!CGM.imbueXRayAttrs(Fn, Loc))
+ if (ShouldXRayInstrumentFunction() && !CGM.imbueXRayAttrs(Fn, Loc))
Fn->addFnAttr(
"xray-instruction-threshold",
llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
@@ -981,6 +994,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
if (FD->isMain())
Fn->addFnAttr(llvm::Attribute::NoRecurse);
+ // If a custom alignment is used, force realigning to this alignment on
+ // any main function which certainly will need it.
+ if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+ if ((FD->isMain() || FD->isMSVCRTEntryPoint()) &&
+ CGM.getCodeGenOpts().StackAlignment)
+ Fn->addFnAttr("stackrealign");
+
llvm::BasicBlock *EntryBB = createBasicBlock("entry", CurFn);
// Create a marker to make it easy to insert allocas into the entryblock
@@ -1053,9 +1073,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
// Count the implicit return.
if (!endsWithReturn(D))
++NumReturnExprs;
- } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
- !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
- // Indirect aggregate return; emit returned value directly into sret slot.
+ } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect) {
+ // Indirect return; emit returned value directly into sret slot.
// This reduces code size, and affects correctness in C++.
auto AI = CurFn->arg_begin();
if (CurFnInfo->getReturnInfo().isSRetAfterThis())
@@ -1137,7 +1156,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
if (CXXABIThisValue) {
SanitizerSet SkippedChecks;
SkippedChecks.set(SanitizerKind::ObjectSize, true);
- QualType ThisTy = MD->getThisType(getContext());
+ QualType ThisTy = MD->getThisType();
// If this is the call operator of a lambda with no capture-default, it
// may have a static invoker function, which may call this operator with
@@ -1183,8 +1202,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
LargestVectorWidth = VecWidth->getVectorWidth();
}
-void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
- const Stmt *Body) {
+void CodeGenFunction::EmitFunctionBody(const Stmt *Body) {
incrementProfileCounter(Body);
if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
EmitCompoundStmtWithoutScope(*S);
@@ -1238,7 +1256,7 @@ QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
if (MD && MD->isInstance()) {
if (CGM.getCXXABI().HasThisReturn(GD))
- ResTy = MD->getThisType(getContext());
+ ResTy = MD->getThisType();
else if (CGM.getCXXABI().hasMostDerivedReturn(GD))
ResTy = CGM.getContext().VoidPtrTy;
CGM.getCXXABI().buildThisParam(*this, Args);
@@ -1352,7 +1370,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
// copy-constructors.
emitImplicitAssignmentOperatorBody(Args);
} else if (Body) {
- EmitFunctionBody(Args, Body);
+ EmitFunctionBody(Body);
} else
llvm_unreachable("no definition for emitted function");
@@ -1493,10 +1511,11 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
bool AllowLabels) {
// FIXME: Rename and handle conversion of other evaluatable things
// to bool.
- llvm::APSInt Int;
- if (!Cond->EvaluateAsInt(Int, getContext()))
+ Expr::EvalResult Result;
+ if (!Cond->EvaluateAsInt(Result, getContext()))
return false; // Not foldable, not integer or not fully evaluatable.
+ llvm::APSInt Int = Result.Val.getInt();
if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond))
return false; // Contains a label.
@@ -1681,7 +1700,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
// create metadata that specifies that the branch is unpredictable.
// Don't bother if not optimizing because that metadata would not be used.
llvm::MDNode *Unpredictable = nullptr;
- auto *Call = dyn_cast<CallExpr>(Cond);
+ auto *Call = dyn_cast<CallExpr>(Cond->IgnoreImpCasts());
if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
@@ -2089,9 +2108,8 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
SanitizerScope SanScope(this);
llvm::Value *Zero = llvm::Constant::getNullValue(Size->getType());
llvm::Constant *StaticArgs[] = {
- EmitCheckSourceLocation(size->getLocStart()),
- EmitCheckTypeDescriptor(size->getType())
- };
+ EmitCheckSourceLocation(size->getBeginLoc()),
+ EmitCheckTypeDescriptor(size->getType())};
EmitCheck(std::make_pair(Builder.CreateICmpSGT(Size, Zero),
SanitizerKind::VLABound),
SanitizerHandler::VLABoundNotPositive, StaticArgs, Size);
@@ -2189,6 +2207,49 @@ void CodeGenFunction::unprotectFromPeepholes(PeepholeProtection protection) {
protection.Inst->eraseFromParent();
}
+void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
+ QualType Ty, SourceLocation Loc,
+ SourceLocation AssumptionLoc,
+ llvm::Value *Alignment,
+ llvm::Value *OffsetValue) {
+ llvm::Value *TheCheck;
+ llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
+ CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck);
+ if (SanOpts.has(SanitizerKind::Alignment)) {
+ EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
+ OffsetValue, TheCheck, Assumption);
+ }
+}
+
+void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
+ QualType Ty, SourceLocation Loc,
+ SourceLocation AssumptionLoc,
+ unsigned Alignment,
+ llvm::Value *OffsetValue) {
+ llvm::Value *TheCheck;
+ llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
+ CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck);
+ if (SanOpts.has(SanitizerKind::Alignment)) {
+ llvm::Value *AlignmentVal = llvm::ConstantInt::get(IntPtrTy, Alignment);
+ EmitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, AlignmentVal,
+ OffsetValue, TheCheck, Assumption);
+ }
+}
+
+void CodeGenFunction::EmitAlignmentAssumption(llvm::Value *PtrValue,
+ const Expr *E,
+ SourceLocation AssumptionLoc,
+ unsigned Alignment,
+ llvm::Value *OffsetValue) {
+ if (auto *CE = dyn_cast<CastExpr>(E))
+ E = CE->getSubExprAsWritten();
+ QualType Ty = E->getType();
+ SourceLocation Loc = E->getExprLoc();
+
+ EmitAlignmentAssumption(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
+ OffsetValue);
+}
+
llvm::Value *CodeGenFunction::EmitAnnotationCall(llvm::Value *AnnotationFn,
llvm::Value *AnnotatedVal,
StringRef AnnotationStr,
@@ -2225,7 +2286,7 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
// annotation on the first field of a struct and annotation on the struct
// itself.
if (VTy != CGM.Int8PtrTy)
- V = Builder.Insert(new llvm::BitCastInst(V, CGM.Int8PtrTy));
+ V = Builder.CreateBitCast(V, CGM.Int8PtrTy);
V = EmitAnnotationCall(F, V, I->getAnnotation(), D->getLocation());
V = Builder.CreateBitCast(V, VTy);
}
@@ -2272,7 +2333,7 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures,
// Now build up the set of caller features and verify that all the required
// features are there.
llvm::StringMap<bool> CallerFeatureMap;
- CGM.getFunctionFeatureMap(CallerFeatureMap, FD);
+ CGM.getFunctionFeatureMap(CallerFeatureMap, GlobalDecl().getWithDecl(FD));
// If we have at least one of the features in the feature list return
// true, otherwise return false.
@@ -2280,14 +2341,13 @@ static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures,
ReqFeatures.begin(), ReqFeatures.end(), [&](StringRef Feature) {
SmallVector<StringRef, 1> OrFeatures;
Feature.split(OrFeatures, '|');
- return std::any_of(OrFeatures.begin(), OrFeatures.end(),
- [&](StringRef Feature) {
- if (!CallerFeatureMap.lookup(Feature)) {
- FirstMissing = Feature.str();
- return false;
- }
- return true;
- });
+ return llvm::any_of(OrFeatures, [&](StringRef Feature) {
+ if (!CallerFeatureMap.lookup(Feature)) {
+ FirstMissing = Feature.str();
+ return false;
+ }
+ return true;
+ });
});
}
@@ -2319,7 +2379,7 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
return;
StringRef(FeatureList).split(ReqFeatures, ',');
if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature))
- CGM.getDiags().Report(E->getLocStart(), diag::err_builtin_needs_feature)
+ CGM.getDiags().Report(E->getBeginLoc(), diag::err_builtin_needs_feature)
<< TargetDecl->getDeclName()
<< CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID);
@@ -2345,7 +2405,7 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
ReqFeatures.push_back(F.getKey());
}
if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature))
- CGM.getDiags().Report(E->getLocStart(), diag::err_function_needs_feature)
+ CGM.getDiags().Report(E->getBeginLoc(), diag::err_function_needs_feature)
<< FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
}
}
@@ -2359,91 +2419,81 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) {
CGM.getSanStats().create(IRB, SSK);
}
-llvm::Value *CodeGenFunction::FormResolverCondition(
- const TargetMultiVersionResolverOption &RO) {
- llvm::Value *TrueCondition = nullptr;
- if (!RO.ParsedAttribute.Architecture.empty())
- TrueCondition = EmitX86CpuIs(RO.ParsedAttribute.Architecture);
+llvm::Value *
+CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) {
+ llvm::Value *Condition = nullptr;
+
+ if (!RO.Conditions.Architecture.empty())
+ Condition = EmitX86CpuIs(RO.Conditions.Architecture);
- if (!RO.ParsedAttribute.Features.empty()) {
- SmallVector<StringRef, 8> FeatureList;
- llvm::for_each(RO.ParsedAttribute.Features,
- [&FeatureList](const std::string &Feature) {
- FeatureList.push_back(StringRef{Feature}.substr(1));
- });
- llvm::Value *FeatureCmp = EmitX86CpuSupports(FeatureList);
- TrueCondition = TrueCondition ? Builder.CreateAnd(TrueCondition, FeatureCmp)
- : FeatureCmp;
+ if (!RO.Conditions.Features.empty()) {
+ llvm::Value *FeatureCond = EmitX86CpuSupports(RO.Conditions.Features);
+ Condition =
+ Condition ? Builder.CreateAnd(Condition, FeatureCond) : FeatureCond;
}
- return TrueCondition;
+ return Condition;
}
-void CodeGenFunction::EmitTargetMultiVersionResolver(
- llvm::Function *Resolver,
- ArrayRef<TargetMultiVersionResolverOption> Options) {
- assert((getContext().getTargetInfo().getTriple().getArch() ==
- llvm::Triple::x86 ||
- getContext().getTargetInfo().getTriple().getArch() ==
- llvm::Triple::x86_64) &&
- "Only implemented for x86 targets");
-
- // Main function's basic block.
- llvm::BasicBlock *CurBlock = createBasicBlock("entry", Resolver);
- Builder.SetInsertPoint(CurBlock);
- EmitX86CpuInit();
+static void CreateMultiVersionResolverReturn(CodeGenModule &CGM,
+ llvm::Function *Resolver,
+ CGBuilderTy &Builder,
+ llvm::Function *FuncToReturn,
+ bool SupportsIFunc) {
+ if (SupportsIFunc) {
+ Builder.CreateRet(FuncToReturn);
+ return;
+ }
- llvm::Function *DefaultFunc = nullptr;
- for (const TargetMultiVersionResolverOption &RO : Options) {
- Builder.SetInsertPoint(CurBlock);
- llvm::Value *TrueCondition = FormResolverCondition(RO);
+ llvm::SmallVector<llvm::Value *, 10> Args;
+ llvm::for_each(Resolver->args(),
+ [&](llvm::Argument &Arg) { Args.push_back(&Arg); });
- if (!TrueCondition) {
- DefaultFunc = RO.Function;
- } else {
- llvm::BasicBlock *RetBlock = createBasicBlock("ro_ret", Resolver);
- llvm::IRBuilder<> RetBuilder(RetBlock);
- RetBuilder.CreateRet(RO.Function);
- CurBlock = createBasicBlock("ro_else", Resolver);
- Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock);
- }
- }
+ llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args);
+ Result->setTailCallKind(llvm::CallInst::TCK_MustTail);
- assert(DefaultFunc && "No default version?");
- // Emit return from the 'else-ist' block.
- Builder.SetInsertPoint(CurBlock);
- Builder.CreateRet(DefaultFunc);
+ if (Resolver->getReturnType()->isVoidTy())
+ Builder.CreateRetVoid();
+ else
+ Builder.CreateRet(Result);
}
-void CodeGenFunction::EmitCPUDispatchMultiVersionResolver(
- llvm::Function *Resolver,
- ArrayRef<CPUDispatchMultiVersionResolverOption> Options) {
+void CodeGenFunction::EmitMultiVersionResolver(
+ llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
assert((getContext().getTargetInfo().getTriple().getArch() ==
llvm::Triple::x86 ||
getContext().getTargetInfo().getTriple().getArch() ==
llvm::Triple::x86_64) &&
"Only implemented for x86 targets");
+ bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc();
+
// Main function's basic block.
llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver);
Builder.SetInsertPoint(CurBlock);
EmitX86CpuInit();
- for (const CPUDispatchMultiVersionResolverOption &RO : Options) {
+ for (const MultiVersionResolverOption &RO : Options) {
Builder.SetInsertPoint(CurBlock);
-
- // "generic" case should catch-all.
- if (RO.FeatureMask == 0) {
- Builder.CreateRet(RO.Function);
+ llvm::Value *Condition = FormResolverCondition(RO);
+
+ // The 'default' or 'generic' case.
+ if (!Condition) {
+ assert(&RO == Options.end() - 1 &&
+ "Default or Generic case must be last");
+ CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function,
+ SupportsIFunc);
return;
}
+
llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver);
- llvm::IRBuilder<> RetBuilder(RetBlock);
- RetBuilder.CreateRet(RO.Function);
+ CGBuilderTy RetBuilder(*this, RetBlock);
+ CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function,
+ SupportsIFunc);
CurBlock = createBasicBlock("resolver_else", Resolver);
- llvm::Value *TrueCondition = EmitX86CpuSupports(RO.FeatureMask);
- Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock);
+ Builder.CreateCondBr(Condition, RetBlock, CurBlock);
}
+ // If no generic/default, emit an unreachable.
Builder.SetInsertPoint(CurBlock);
llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
TrapCall->setDoesNotReturn();
@@ -2452,6 +2502,61 @@ void CodeGenFunction::EmitCPUDispatchMultiVersionResolver(
Builder.ClearInsertionPoint();
}
+// Loc - where the diagnostic will point, where in the source code this
+// alignment has failed.
+// SecondaryLoc - if present (will be present if sufficiently different from
+// Loc), the diagnostic will additionally point a "Note:" to this location.
+// It should be the location where the __attribute__((assume_aligned))
+// was written e.g.
+void CodeGenFunction::EmitAlignmentAssumptionCheck(
+ llvm::Value *Ptr, QualType Ty, SourceLocation Loc,
+ SourceLocation SecondaryLoc, llvm::Value *Alignment,
+ llvm::Value *OffsetValue, llvm::Value *TheCheck,
+ llvm::Instruction *Assumption) {
+ assert(Assumption && isa<llvm::CallInst>(Assumption) &&
+ cast<llvm::CallInst>(Assumption)->getCalledValue() ==
+ llvm::Intrinsic::getDeclaration(
+ Builder.GetInsertBlock()->getParent()->getParent(),
+ llvm::Intrinsic::assume) &&
+ "Assumption should be a call to llvm.assume().");
+ assert(&(Builder.GetInsertBlock()->back()) == Assumption &&
+ "Assumption should be the last instruction of the basic block, "
+ "since the basic block is still being generated.");
+
+ if (!SanOpts.has(SanitizerKind::Alignment))
+ return;
+
+ // Don't check pointers to volatile data. The behavior here is implementation-
+ // defined.
+ if (Ty->getPointeeType().isVolatileQualified())
+ return;
+
+ // We need to temorairly remove the assumption so we can insert the
+ // sanitizer check before it, else the check will be dropped by optimizations.
+ Assumption->removeFromParent();
+
+ {
+ SanitizerScope SanScope(this);
+
+ if (!OffsetValue)
+ OffsetValue = Builder.getInt1(0); // no offset.
+
+ llvm::Constant *StaticData[] = {EmitCheckSourceLocation(Loc),
+ EmitCheckSourceLocation(SecondaryLoc),
+ EmitCheckTypeDescriptor(Ty)};
+ llvm::Value *DynamicData[] = {EmitCheckValue(Ptr),
+ EmitCheckValue(Alignment),
+ EmitCheckValue(OffsetValue)};
+ EmitCheck({std::make_pair(TheCheck, SanitizerKind::Alignment)},
+ SanitizerHandler::AlignmentAssumption, StaticData, DynamicData);
+ }
+
+ // We are now in the (new, empty) "cont" basic block.
+ // Reintroduce the assumption.
+ Builder.Insert(Assumption);
+ // FIXME: Assumption still has it's original basic block as it's Parent.
+}
+
llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) {
if (CGDebugInfo *DI = getDebugInfo())
return DI->SourceLocToDebugLoc(Location);
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index f9e284232972..89cb850ab1b1 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -29,9 +29,9 @@
#include "clang/AST/Type.h"
#include "clang/Basic/ABI.h"
#include "clang/Basic/CapturedStmt.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
@@ -131,6 +131,7 @@ enum TypeEvaluationKind {
SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0) \
SANITIZER_CHECK(SubOverflow, sub_overflow, 0) \
SANITIZER_CHECK(TypeMismatch, type_mismatch, 1) \
+ SANITIZER_CHECK(AlignmentAssumption, alignment_assumption, 0) \
SANITIZER_CHECK(VLABoundNotPositive, vla_bound_not_positive, 0)
enum SanitizerHandler {
@@ -470,7 +471,7 @@ public:
/// potentially set the return value.
bool SawAsmBlock = false;
- const FunctionDecl *CurSEHParent = nullptr;
+ const NamedDecl *CurSEHParent = nullptr;
/// True if the current function is an outlined SEH helper. This can be a
/// finally block or filter expression.
@@ -1197,6 +1198,8 @@ public:
private:
CGDebugInfo *DebugInfo;
+ /// Used to create unique names for artificial VLA size debug info variables.
+ unsigned VLAExprCounter = 0;
bool DisableDebugInfo = false;
/// DidCallStackSave - Whether llvm.stacksave has been called. Used to avoid
@@ -1746,6 +1749,9 @@ public:
bool IsLambdaConversionToBlock,
bool BuildGlobalBlock);
+ /// Check if \p T is a C++ class that has a destructor that can throw.
+ static bool cxxDestructorCanThrow(QualType T);
+
llvm::Constant *GenerateCopyHelperFunction(const CGBlockInfo &blockInfo);
llvm::Constant *GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo);
llvm::Constant *GenerateObjCAtomicSetterCopyHelperFunction(
@@ -1754,7 +1760,8 @@ public:
const ObjCPropertyImplDecl *PID);
llvm::Value *EmitBlockCopyAndAutorelease(llvm::Value *Block, QualType Ty);
- void BuildBlockRelease(llvm::Value *DeclPtr, BlockFieldFlags flags);
+ void BuildBlockRelease(llvm::Value *DeclPtr, BlockFieldFlags flags,
+ bool CanThrow);
class AutoVarEmission;
@@ -1777,13 +1784,13 @@ public:
/// \param LoadBlockVarAddr Indicates whether we need to emit a load from
/// \p Addr to get the address of the __block structure.
void enterByrefCleanup(CleanupKind Kind, Address Addr, BlockFieldFlags Flags,
- bool LoadBlockVarAddr);
+ bool LoadBlockVarAddr, bool CanThrow);
void setBlockContextParameter(const ImplicitParamDecl *D, unsigned argNum,
llvm::Value *ptr);
Address LoadBlockStruct();
- Address GetAddrOfBlockDecl(const VarDecl *var, bool ByRef);
+ Address GetAddrOfBlockDecl(const VarDecl *var);
/// BuildBlockByrefAddress - Computes the location of the
/// data in a variable which is declared as __block.
@@ -1800,6 +1807,11 @@ public:
void GenerateCode(GlobalDecl GD, llvm::Function *Fn,
const CGFunctionInfo &FnInfo);
+
+ /// Annotate the function with an attribute that disables TSan checking at
+ /// runtime.
+ void markAsIgnoreThreadCheckingAtRuntime(llvm::Function *Fn);
+
/// Emit code for the start of a function.
/// \param Loc The location to be associated with the function.
/// \param StartLoc The location of the function body.
@@ -1816,7 +1828,7 @@ public:
void EmitConstructorBody(FunctionArgList &Args);
void EmitDestructorBody(FunctionArgList &Args);
void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
- void EmitFunctionBody(FunctionArgList &Args, const Stmt *Body);
+ void EmitFunctionBody(const Stmt *Body);
void EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S);
void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator,
@@ -1845,7 +1857,7 @@ public:
void FinishThunk();
/// Emit a musttail call for a thunk with a potentially adjusted this pointer.
- void EmitMustTailThunk(const CXXMethodDecl *MD, llvm::Value *AdjustedThisPtr,
+ void EmitMustTailThunk(GlobalDecl GD, llvm::Value *AdjustedThisPtr,
llvm::Value *Callee);
/// Generate a thunk for the given method.
@@ -2622,12 +2634,6 @@ public:
ComplexPairTy EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre);
- void EmitAlignmentAssumption(llvm::Value *PtrValue, unsigned Alignment,
- llvm::Value *OffsetValue = nullptr) {
- Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment,
- OffsetValue);
- }
-
/// Converts Location to a DebugLoc, if debug information is enabled.
llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location);
@@ -2674,8 +2680,9 @@ public:
llvm::Value *NRVOFlag;
- /// True if the variable is a __block variable.
- bool IsByRef;
+ /// True if the variable is a __block variable that is captured by an
+ /// escaping block.
+ bool IsEscapingByRef;
/// True if the variable is of aggregate type and has a constant
/// initializer.
@@ -2695,7 +2702,7 @@ public:
AutoVarEmission(const VarDecl &variable)
: Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr),
- IsByRef(false), IsConstantAggregate(false),
+ IsEscapingByRef(false), IsConstantAggregate(false),
SizeForLifetimeMarkers(nullptr), AllocaAddr(Address::invalid()) {}
bool wasEmittedAsGlobal() const { return !Addr.isValid(); }
@@ -2725,7 +2732,7 @@ public:
/// Note that this does not chase the forwarding pointer for
/// __block decls.
Address getObjectAddress(CodeGenFunction &CGF) const {
- if (!IsByRef) return Addr;
+ if (!IsEscapingByRef) return Addr;
return CGF.emitBlockByrefAddress(Addr, Variable, /*forward*/ false);
}
@@ -2790,11 +2797,27 @@ public:
PeepholeProtection protectFromPeepholes(RValue rvalue);
void unprotectFromPeepholes(PeepholeProtection protection);
- void EmitAlignmentAssumption(llvm::Value *PtrValue, llvm::Value *Alignment,
- llvm::Value *OffsetValue = nullptr) {
- Builder.CreateAlignmentAssumption(CGM.getDataLayout(), PtrValue, Alignment,
- OffsetValue);
- }
+ void EmitAlignmentAssumptionCheck(llvm::Value *Ptr, QualType Ty,
+ SourceLocation Loc,
+ SourceLocation AssumptionLoc,
+ llvm::Value *Alignment,
+ llvm::Value *OffsetValue,
+ llvm::Value *TheCheck,
+ llvm::Instruction *Assumption);
+
+ void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty,
+ SourceLocation Loc, SourceLocation AssumptionLoc,
+ llvm::Value *Alignment,
+ llvm::Value *OffsetValue = nullptr);
+
+ void EmitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty,
+ SourceLocation Loc, SourceLocation AssumptionLoc,
+ unsigned Alignment,
+ llvm::Value *OffsetValue = nullptr);
+
+ void EmitAlignmentAssumption(llvm::Value *PtrValue, const Expr *E,
+ SourceLocation AssumptionLoc, unsigned Alignment,
+ llvm::Value *OffsetValue = nullptr);
//===--------------------------------------------------------------------===//
// Statement Emission
@@ -2878,6 +2901,8 @@ public:
void EnterSEHTryStmt(const SEHTryStmt &S);
void ExitSEHTryStmt(const SEHTryStmt &S);
+ void pushSEHCleanup(CleanupKind kind,
+ llvm::Function *FinallyFunc);
void startOutlinedSEHHelper(CodeGenFunction &ParentCGF, bool IsFilter,
const Stmt *OutlinedStmt);
@@ -3512,6 +3537,7 @@ public:
ConstantEmission tryEmitAsConstant(DeclRefExpr *refExpr);
ConstantEmission tryEmitAsConstant(const MemberExpr *ME);
+ llvm::Value *emitScalarConstant(const ConstantEmission &Constant, Expr *E);
RValue EmitPseudoObjectRValue(const PseudoObjectExpr *e,
AggValueSlot slot = AggValueSlot::ignored());
@@ -3603,6 +3629,19 @@ public:
CXXDtorType Type,
const CXXRecordDecl *RD);
+ // Return the copy constructor name with the prefix "__copy_constructor_"
+ // removed.
+ static std::string getNonTrivialCopyConstructorStr(QualType QT,
+ CharUnits Alignment,
+ bool IsVolatile,
+ ASTContext &Ctx);
+
+ // Return the destructor name with the prefix "__destructor_" removed.
+ static std::string getNonTrivialDestructorStr(QualType QT,
+ CharUnits Alignment,
+ bool IsVolatile,
+ ASTContext &Ctx);
+
// These functions emit calls to the special functions of non-trivial C
// structs.
void defaultInitNonTrivialCStructVar(LValue Dst);
@@ -3653,9 +3692,10 @@ public:
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue);
- RValue EmitBuiltinExpr(const FunctionDecl *FD,
- unsigned BuiltinID, const CallExpr *E,
- ReturnValueSlot ReturnValue);
+ RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
+ const CallExpr *E, ReturnValueSlot ReturnValue);
+
+ RValue emitRotate(const CallExpr *E, bool IsRotateRight);
/// Emit IR for __builtin_os_log_format.
RValue emitBuiltinOSLogFormat(const CallExpr &E);
@@ -3769,6 +3809,11 @@ public:
llvm::Value *EmitARCRetainAutoreleasedReturnValue(llvm::Value *value);
llvm::Value *EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value);
+ llvm::Value *EmitObjCAutorelease(llvm::Value *value, llvm::Type *returnType);
+ llvm::Value *EmitObjCRetainNonBlock(llvm::Value *value,
+ llvm::Type *returnType);
+ void EmitObjCRelease(llvm::Value *value, ARCPreciseLifetime_t precise);
+
std::pair<LValue,llvm::Value*>
EmitARCStoreAutoreleasing(const BinaryOperator *e);
std::pair<LValue,llvm::Value*>
@@ -3776,6 +3821,10 @@ public:
std::pair<LValue,llvm::Value*>
EmitARCStoreUnsafeUnretained(const BinaryOperator *e, bool ignored);
+ llvm::Value *EmitObjCAlloc(llvm::Value *value,
+ llvm::Type *returnType);
+ llvm::Value *EmitObjCAllocWithZone(llvm::Value *value,
+ llvm::Type *returnType);
llvm::Value *EmitObjCThrowOperand(const Expr *expr);
llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr);
llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr);
@@ -3865,6 +3914,8 @@ public:
AddInitializerToStaticVarDecl(const VarDecl &D,
llvm::GlobalVariable *GV);
+ // Emit an @llvm.invariant.start call for the given memory region.
+ void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size);
/// EmitCXXGlobalVarDeclInit - Create the initializer for a C++
/// variable with global storage.
@@ -3900,9 +3951,10 @@ public:
/// GenerateCXXGlobalInitFunc - Generates code for initializing global
/// variables.
- void GenerateCXXGlobalInitFunc(llvm::Function *Fn,
- ArrayRef<llvm::Function *> CXXThreadLocals,
- Address Guard = Address::invalid());
+ void
+ GenerateCXXGlobalInitFunc(llvm::Function *Fn,
+ ArrayRef<llvm::Function *> CXXThreadLocals,
+ ConstantAddress Guard = ConstantAddress::invalid());
/// GenerateCXXGlobalDtorsFunc - Generates code for destroying global
/// variables.
@@ -3920,11 +3972,13 @@ public:
void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp);
- void enterFullExpression(const ExprWithCleanups *E) {
- if (E->getNumObjects() == 0) return;
+ void enterFullExpression(const FullExpr *E) {
+ if (const auto *EWC = dyn_cast<ExprWithCleanups>(E))
+ if (EWC->getNumObjects() == 0)
+ return;
enterNonTrivialFullExpression(E);
}
- void enterNonTrivialFullExpression(const ExprWithCleanups *E);
+ void enterNonTrivialFullExpression(const FullExpr *E);
void EmitCXXThrowExpr(const CXXThrowExpr *E, bool KeepInsertionPoint = true);
@@ -4245,47 +4299,29 @@ public:
void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK);
- struct TargetMultiVersionResolverOption {
+ struct MultiVersionResolverOption {
llvm::Function *Function;
- TargetAttr::ParsedTargetAttr ParsedAttribute;
- unsigned Priority;
- TargetMultiVersionResolverOption(
- const TargetInfo &TargInfo, llvm::Function *F,
- const clang::TargetAttr::ParsedTargetAttr &PT)
- : Function(F), ParsedAttribute(PT), Priority(0u) {
- for (StringRef Feat : PT.Features)
- Priority = std::max(Priority,
- TargInfo.multiVersionSortPriority(Feat.substr(1)));
-
- if (!PT.Architecture.empty())
- Priority = std::max(Priority,
- TargInfo.multiVersionSortPriority(PT.Architecture));
- }
-
- bool operator>(const TargetMultiVersionResolverOption &Other) const {
- return Priority > Other.Priority;
- }
+ FunctionDecl *FD;
+ struct Conds {
+ StringRef Architecture;
+ llvm::SmallVector<StringRef, 8> Features;
+
+ Conds(StringRef Arch, ArrayRef<StringRef> Feats)
+ : Architecture(Arch), Features(Feats.begin(), Feats.end()) {}
+ } Conditions;
+
+ MultiVersionResolverOption(llvm::Function *F, StringRef Arch,
+ ArrayRef<StringRef> Feats)
+ : Function(F), Conditions(Arch, Feats) {}
};
- void EmitTargetMultiVersionResolver(
- llvm::Function *Resolver,
- ArrayRef<TargetMultiVersionResolverOption> Options);
- struct CPUDispatchMultiVersionResolverOption {
- llvm::Function *Function;
- // Note: EmitX86CPUSupports only has 32 bits available, so we store the mask
- // as 32 bits here. When 64-bit support is added to __builtin_cpu_supports,
- // this can be extended to 64 bits.
- uint32_t FeatureMask;
- CPUDispatchMultiVersionResolverOption(llvm::Function *F, uint64_t Mask)
- : Function(F), FeatureMask(static_cast<uint32_t>(Mask)) {}
- bool operator>(const CPUDispatchMultiVersionResolverOption &Other) const {
- return FeatureMask > Other.FeatureMask;
- }
- };
- void EmitCPUDispatchMultiVersionResolver(
- llvm::Function *Resolver,
- ArrayRef<CPUDispatchMultiVersionResolverOption> Options);
- static uint32_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
+ // Emits the body of a multiversion function's resolver. Assumes that the
+ // options are already sorted in the proper order, with the 'default' option
+ // last (if it exists).
+ void EmitMultiVersionResolver(llvm::Function *Resolver,
+ ArrayRef<MultiVersionResolverOption> Options);
+
+ static uint64_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
private:
QualType getVarArgType(const Expr *Arg);
@@ -4302,10 +4338,9 @@ private:
llvm::Value *EmitX86CpuIs(StringRef CPUStr);
llvm::Value *EmitX86CpuSupports(const CallExpr *E);
llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs);
- llvm::Value *EmitX86CpuSupports(uint32_t Mask);
+ llvm::Value *EmitX86CpuSupports(uint64_t Mask);
llvm::Value *EmitX86CpuInit();
- llvm::Value *
- FormResolverCondition(const TargetMultiVersionResolverOption &RO);
+ llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO);
};
inline DominatingLLVMValue::saved_type
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 8c5e0df0969b..244738042cef 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -36,14 +36,15 @@
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/Version.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
-#include "clang/Frontend/CodeGenOptions.h"
-#include "clang/Sema/SemaDiagnostic.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
@@ -53,6 +54,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MD5.h"
@@ -124,7 +126,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC();
- if (LangOpts.ObjC1)
+ if (LangOpts.ObjC)
createObjCRuntime();
if (LangOpts.OpenCL)
createOpenCLRuntime();
@@ -147,12 +149,12 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
Block.GlobalUniqueCount = 0;
- if (C.getLangOpts().ObjC1)
+ if (C.getLangOpts().ObjC)
ObjCData.reset(new ObjCEntrypoints());
if (CodeGenOpts.hasProfileClangUse()) {
auto ReaderOrErr = llvm::IndexedInstrProfReader::create(
- CodeGenOpts.ProfileInstrumentUsePath);
+ CodeGenOpts.ProfileInstrumentUsePath, CodeGenOpts.ProfileRemappingFile);
if (auto E = ReaderOrErr.takeError()) {
unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
"Could not read profile %0: %1");
@@ -320,8 +322,6 @@ void CodeGenModule::checkAliases() {
assert(FTy);
if (!FTy->getReturnType()->isPointerTy())
Diags.Report(Location, diag::err_ifunc_resolver_return);
- if (FTy->getNumParams())
- Diags.Report(Location, diag::err_ifunc_resolver_params);
}
llvm::Constant *Aliasee = Alias->getIndirectSymbol();
@@ -458,9 +458,12 @@ void CodeGenModule::Release() {
// Indicate that we want CodeView in the metadata.
getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1);
}
+ if (CodeGenOpts.CodeViewGHash) {
+ getModule().addModuleFlag(llvm::Module::Warning, "CodeViewGHash", 1);
+ }
if (CodeGenOpts.ControlFlowGuard) {
// We want function ID tables for Control Flow Guard.
- getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 1);
+ getModule().addModuleFlag(llvm::Module::Warning, "cfguardtable", 1);
}
if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) {
// We don't support LTO with 2 with different StrictVTablePointers
@@ -556,6 +559,20 @@ void CodeGenModule::Release() {
getModule().setPIELevel(static_cast<llvm::PIELevel::Level>(PLevel));
}
+ if (getCodeGenOpts().CodeModel.size() > 0) {
+ unsigned CM = llvm::StringSwitch<unsigned>(getCodeGenOpts().CodeModel)
+ .Case("tiny", llvm::CodeModel::Tiny)
+ .Case("small", llvm::CodeModel::Small)
+ .Case("kernel", llvm::CodeModel::Kernel)
+ .Case("medium", llvm::CodeModel::Medium)
+ .Case("large", llvm::CodeModel::Large)
+ .Default(~0u);
+ if (CM != ~0u) {
+ llvm::CodeModel::Model codeModel = static_cast<llvm::CodeModel::Model>(CM);
+ getModule().setCodeModel(codeModel);
+ }
+ }
+
if (CodeGenOpts.NoPLT)
getModule().setRtLibUseGOT();
@@ -573,6 +590,9 @@ void CodeGenModule::Release() {
if (getCodeGenOpts().EmitVersionIdentMetadata)
EmitVersionIdentMetadata();
+ if (!getCodeGenOpts().RecordCommandLine.empty())
+ EmitCommandLineMetadata();
+
EmitTargetMetadata();
}
@@ -683,8 +703,8 @@ void CodeGenModule::ErrorUnsupported(const Stmt *S, const char *Type) {
unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error,
"cannot compile this %0 yet");
std::string Msg = Type;
- getDiags().Report(Context.getFullLoc(S->getLocStart()), DiagID)
- << Msg << S->getSourceRange();
+ getDiags().Report(Context.getFullLoc(S->getBeginLoc()), DiagID)
+ << Msg << S->getSourceRange();
}
/// ErrorUnsupported - Print out an error that codegen doesn't support the
@@ -730,6 +750,14 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM,
return false;
const llvm::Triple &TT = CGM.getTriple();
+ if (TT.isWindowsGNUEnvironment()) {
+ // In MinGW, variables without DLLImport can still be automatically
+ // imported from a DLL by the linker; don't mark variables that
+ // potentially could come from another DLL as DSO local.
+ if (GV->isDeclarationForLinker() && isa<llvm::GlobalVariable>(GV) &&
+ !GV->isThreadLocal())
+ return false;
+ }
// Every other GV is local on COFF.
// Make an exception for windows OS in the triple: Some firmware builds use
// *-win32-macho triples. This (accidentally?) produced windows relocations
@@ -869,11 +897,13 @@ static std::string getCPUSpecificMangling(const CodeGenModule &CGM,
static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
const CPUSpecificAttr *Attr,
+ unsigned CPUIndex,
raw_ostream &Out) {
- // cpu_specific gets the current name, dispatch gets the resolver.
+ // cpu_specific gets the current name, dispatch gets the resolver if IFunc is
+ // supported.
if (Attr)
- Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName());
- else
+ Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName());
+ else if (CGM.getTarget().supportsIFunc())
Out << ".resolver";
}
@@ -939,11 +969,19 @@ static std::string getMangledNameImpl(const CodeGenModule &CGM, GlobalDecl GD,
if (const auto *FD = dyn_cast<FunctionDecl>(ND))
if (FD->isMultiVersion() && !OmitMultiVersionMangling) {
- if (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())
- AppendCPUSpecificCPUDispatchMangling(
- CGM, FD->getAttr<CPUSpecificAttr>(), Out);
- else
+ switch (FD->getMultiVersionKind()) {
+ case MultiVersionKind::CPUDispatch:
+ case MultiVersionKind::CPUSpecific:
+ AppendCPUSpecificCPUDispatchMangling(CGM,
+ FD->getAttr<CPUSpecificAttr>(),
+ GD.getMultiVersionIndex(), Out);
+ break;
+ case MultiVersionKind::Target:
AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
+ break;
+ case MultiVersionKind::None:
+ llvm_unreachable("None multiversion type isn't valid here");
+ }
}
return Out.str();
@@ -968,8 +1006,10 @@ void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD,
"Other GD should now be a multiversioned function");
// OtherFD is the version of this function that was mangled BEFORE
// becoming a MultiVersion function. It potentially needs to be updated.
- const FunctionDecl *OtherFD =
- OtherGD.getCanonicalDecl().getDecl()->getAsFunction();
+ const FunctionDecl *OtherFD = OtherGD.getCanonicalDecl()
+ .getDecl()
+ ->getAsFunction()
+ ->getMostRecentDecl();
std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD);
// This is so that if the initial version was already the 'default'
// version, we don't try to update it.
@@ -1001,26 +1041,6 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
}
}
- const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl());
- // Since CPUSpecific can require multiple emits per decl, store the manglings
- // separately.
- if (FD &&
- (FD->isCPUDispatchMultiVersion() || FD->isCPUSpecificMultiVersion())) {
- const auto *SD = FD->getAttr<CPUSpecificAttr>();
-
- std::pair<GlobalDecl, unsigned> SpecCanonicalGD{
- CanonicalGD,
- SD ? SD->ActiveArgIndex : std::numeric_limits<unsigned>::max()};
-
- auto FoundName = CPUSpecificMangledDeclNames.find(SpecCanonicalGD);
- if (FoundName != CPUSpecificMangledDeclNames.end())
- return FoundName->second;
-
- auto Result = CPUSpecificManglings.insert(
- std::make_pair(getMangledNameImpl(*this, GD, FD), SpecCanonicalGD));
- return CPUSpecificMangledDeclNames[SpecCanonicalGD] = Result.first->first();
- }
-
auto FoundName = MangledDeclNames.find(CanonicalGD);
if (FoundName != MangledDeclNames.end())
return FoundName->second;
@@ -1082,11 +1102,12 @@ void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) {
// Ctor function type is void()*.
llvm::FunctionType* CtorFTy = llvm::FunctionType::get(VoidTy, false);
- llvm::Type *CtorPFTy = llvm::PointerType::getUnqual(CtorFTy);
+ llvm::Type *CtorPFTy = llvm::PointerType::get(CtorFTy,
+ TheModule.getDataLayout().getProgramAddressSpace());
// Get the type of a ctor entry, { i32, void ()*, i8* }.
llvm::StructType *CtorStructTy = llvm::StructType::get(
- Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy);
+ Int32Ty, CtorPFTy, VoidPtrTy);
// Construct the constructor and destructor arrays.
ConstantInitBuilder builder(*this);
@@ -1142,12 +1163,12 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString()));
}
-void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D,
+void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD,
const CGFunctionInfo &Info,
llvm::Function *F) {
unsigned CallingConv;
llvm::AttributeList PAL;
- ConstructAttributeList(F->getName(), Info, D, PAL, CallingConv, false);
+ ConstructAttributeList(F->getName(), Info, GD, PAL, CallingConv, false);
F->setAttributes(PAL);
F->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
}
@@ -1277,9 +1298,19 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
// Otherwise, propagate the inline hint attribute and potentially use its
// absence to mark things as noinline.
if (auto *FD = dyn_cast<FunctionDecl>(D)) {
- if (any_of(FD->redecls(), [&](const FunctionDecl *Redecl) {
- return Redecl->isInlineSpecified();
- })) {
+ // Search function and template pattern redeclarations for inline.
+ auto CheckForInline = [](const FunctionDecl *FD) {
+ auto CheckRedeclForInline = [](const FunctionDecl *Redecl) {
+ return Redecl->isInlineSpecified();
+ };
+ if (any_of(FD->redecls(), CheckRedeclForInline))
+ return true;
+ const FunctionDecl *Pattern = FD->getTemplateInstantiationPattern();
+ if (!Pattern)
+ return false;
+ return any_of(Pattern->redecls(), CheckRedeclForInline);
+ };
+ if (CheckForInline(FD)) {
B.addAttribute(llvm::Attribute::InlineHint);
} else if (CodeGenOpts.getInlining() ==
CodeGenOptions::OnlyHintInlining &&
@@ -1350,23 +1381,30 @@ void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) {
if (D && D->hasAttr<UsedAttr>())
addUsedGlobal(GV);
+
+ if (CodeGenOpts.KeepStaticConsts && D && isa<VarDecl>(D)) {
+ const auto *VD = cast<VarDecl>(D);
+ if (VD->getType().isConstQualified() &&
+ VD->getStorageDuration() == SD_Static)
+ addUsedGlobal(GV);
+ }
}
-bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D,
+bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
llvm::AttrBuilder &Attrs) {
// Add target-cpu and target-features attributes to functions. If
// we have a decl for the function and it has a target attribute then
// parse that and add it to the feature set.
StringRef TargetCPU = getTarget().getTargetOpts().CPU;
std::vector<std::string> Features;
- const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
+ const auto *FD = dyn_cast_or_null<FunctionDecl>(GD.getDecl());
FD = FD ? FD->getMostRecentDecl() : FD;
const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
bool AddedAttr = false;
if (TD || SD) {
llvm::StringMap<bool> FeatureMap;
- getFunctionFeatureMap(FeatureMap, FD);
+ getFunctionFeatureMap(FeatureMap, GD);
// Produce the canonical string for this set of features.
for (const llvm::StringMap<bool>::value_type &Entry : FeatureMap)
@@ -1393,7 +1431,7 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(const Decl *D,
AddedAttr = true;
}
if (!Features.empty()) {
- llvm::sort(Features.begin(), Features.end());
+ llvm::sort(Features);
Attrs.addAttribute("target-features", llvm::join(Features, ","));
AddedAttr = true;
}
@@ -1422,7 +1460,7 @@ void CodeGenModule::setNonAliasAttributes(GlobalDecl GD,
F->addFnAttr("implicit-section-name", SA->getName());
llvm::AttrBuilder Attrs;
- if (GetCPUAndFeaturesAttributes(D, Attrs)) {
+ if (GetCPUAndFeaturesAttributes(GD, Attrs)) {
// We know that GetCPUAndFeaturesAttributes will always have the
// newest set, since it has the newest possible FunctionDecl, so the
// new ones should replace the old.
@@ -1445,7 +1483,7 @@ void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD,
llvm::Function *F,
const CGFunctionInfo &FI) {
const Decl *D = GD.getDecl();
- SetLLVMFunctionAttributes(D, FI, F);
+ SetLLVMFunctionAttributes(GD, FI, F);
SetLLVMFunctionAttributesForDefinition(D, F);
F->setLinkage(llvm::Function::InternalLinkage);
@@ -1507,7 +1545,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
const auto *FD = cast<FunctionDecl>(GD.getDecl());
if (!IsIncompleteFunction) {
- SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F);
+ SetLLVMFunctionAttributes(GD, getTypes().arrangeGlobalDeclaration(GD), F);
// Setup target-specific attributes.
if (F->isDeclaration())
getTargetCodeGenInfo().setTargetAttributes(FD, F, *this);
@@ -1654,6 +1692,8 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
// Add linker options to link against the libraries/frameworks
// described by this module.
llvm::LLVMContext &Context = CGM.getLLVMContext();
+ bool IsELF = CGM.getTarget().getTriple().isOSBinFormatELF();
+ bool IsPS4 = CGM.getTarget().getTriple().isPS4();
// For modules that use export_as for linking, use that module
// name instead.
@@ -1673,11 +1713,19 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
}
// Link against a library.
- llvm::SmallString<24> Opt;
- CGM.getTargetCodeGenInfo().getDependentLibraryOption(
- Mod->LinkLibraries[I-1].Library, Opt);
- auto *OptString = llvm::MDString::get(Context, Opt);
- Metadata.push_back(llvm::MDNode::get(Context, OptString));
+ if (IsELF && !IsPS4) {
+ llvm::Metadata *Args[2] = {
+ llvm::MDString::get(Context, "lib"),
+ llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library),
+ };
+ Metadata.push_back(llvm::MDNode::get(Context, Args));
+ } else {
+ llvm::SmallString<24> Opt;
+ CGM.getTargetCodeGenInfo().getDependentLibraryOption(
+ Mod->LinkLibraries[I - 1].Library, Opt);
+ auto *OptString = llvm::MDString::get(Context, Opt);
+ Metadata.push_back(llvm::MDNode::get(Context, OptString));
+ }
}
}
@@ -1708,16 +1756,14 @@ void CodeGenModule::EmitModuleLinkOptions() {
bool AnyChildren = false;
// Visit the submodules of this module.
- for (clang::Module::submodule_iterator Sub = Mod->submodule_begin(),
- SubEnd = Mod->submodule_end();
- Sub != SubEnd; ++Sub) {
+ for (const auto &SM : Mod->submodules()) {
// Skip explicit children; they need to be explicitly imported to be
// linked against.
- if ((*Sub)->IsExplicit)
+ if (SM->IsExplicit)
continue;
- if (Visited.insert(*Sub).second) {
- Stack.push_back(*Sub);
+ if (Visited.insert(SM).second) {
+ Stack.push_back(SM);
AnyChildren = true;
}
}
@@ -1747,6 +1793,10 @@ void CodeGenModule::EmitModuleLinkOptions() {
}
void CodeGenModule::EmitDeferred() {
+ // Emit deferred declare target declarations.
+ if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd)
+ getOpenMPRuntime().emitDeferredTargetDecls();
+
// Emit code for any potentially referenced deferred decls. Since a
// previously unused static decl may become used during the generation of code
// for a static function, iterate until no changes are made.
@@ -1949,9 +1999,6 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::GlobalVariable *GV,
bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc,
StringRef Category) const {
- if (!LangOpts.XRayInstrument)
- return false;
-
const auto &XRayFilter = getContext().getXRayFilter();
using ImbueAttr = XRayFunctionFilter::ImbueAttribute;
auto Attr = ImbueAttr::NONE;
@@ -1981,6 +2028,13 @@ bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) {
if (LangOpts.EmitAllDecls)
return true;
+ if (CodeGenOpts.KeepStaticConsts) {
+ const auto *VD = dyn_cast<VarDecl>(Global);
+ if (VD && VD->getType().isConstQualified() &&
+ VD->getStorageDuration() == SD_Static)
+ return true;
+ }
+
return getContext().DeclMustBeEmitted(Global);
}
@@ -2000,7 +2054,8 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
// codegen for global variables, because they may be marked as threadprivate.
if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS &&
getContext().getTargetInfo().isTLSSupported() && isa<VarDecl>(Global) &&
- !isTypeConstant(Global->getType(), false))
+ !isTypeConstant(Global->getType(), false) &&
+ !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Global))
return false;
return true;
@@ -2141,16 +2196,22 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
} else {
const auto *VD = cast<VarDecl>(Global);
assert(VD->isFileVarDecl() && "Cannot emit local var decl as global.");
- // We need to emit device-side global CUDA variables even if a
- // variable does not have a definition -- we still need to define
- // host-side shadow for it.
- bool MustEmitForCuda = LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
- !VD->hasDefinition() &&
- (VD->hasAttr<CUDAConstantAttr>() ||
- VD->hasAttr<CUDADeviceAttr>());
- if (!MustEmitForCuda &&
- VD->isThisDeclarationADefinition() != VarDecl::Definition &&
+ if (VD->isThisDeclarationADefinition() != VarDecl::Definition &&
!Context.isMSStaticDataMemberInlineDefinition(VD)) {
+ if (LangOpts.OpenMP) {
+ // Emit declaration of the must-be-emitted declare target variable.
+ if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
+ OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
+ if (*Res == OMPDeclareTargetDeclAttr::MT_To) {
+ (void)GetAddrOfGlobalVar(VD);
+ } else {
+ assert(*Res == OMPDeclareTargetDeclAttr::MT_Link &&
+ "link claue expected.");
+ (void)getOpenMPRuntime().getAddrOfDeclareTargetLink(VD);
+ }
+ return;
+ }
+ }
// If this declaration may have caused an inline variable definition to
// change linkage, make sure that it's emitted.
if (Context.getInlineVariableDefinitionKind(VD) ==
@@ -2360,6 +2421,19 @@ bool CodeGenModule::shouldOpportunisticallyEmitVTables() {
return CodeGenOpts.OptimizationLevel > 0;
}
+void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
+ llvm::GlobalValue *GV) {
+ const auto *FD = cast<FunctionDecl>(GD.getDecl());
+
+ if (FD->isCPUSpecificMultiVersion()) {
+ auto *Spec = FD->getAttr<CPUSpecificAttr>();
+ for (unsigned I = 0; I < Spec->cpus_size(); ++I)
+ EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
+ // Requires multiple emits.
+ } else
+ EmitGlobalFunctionDefinition(GD, GV);
+}
+
void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
const auto *D = cast<ValueDecl>(GD.getDecl());
@@ -2367,7 +2441,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
Context.getSourceManager(),
"Generating code for declaration");
- if (isa<FunctionDecl>(D)) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
// At -O0, don't generate IR for functions with available_externally
// linkage.
if (!shouldEmitFunction(GD))
@@ -2380,6 +2454,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
ABI->emitCXXStructor(CD, getFromCtorType(GD.getCtorType()));
else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method))
ABI->emitCXXStructor(DD, getFromDtorType(GD.getDtorType()));
+ else if (FD->isMultiVersion())
+ EmitMultiVersionFunctionDefinition(GD, GV);
else
EmitGlobalFunctionDefinition(GD, GV);
@@ -2389,6 +2465,8 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
return;
}
+ if (FD->isMultiVersion())
+ return EmitMultiVersionFunctionDefinition(GD, GV);
return EmitGlobalFunctionDefinition(GD, GV);
}
@@ -2401,9 +2479,22 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
llvm::Function *NewFn);
+static unsigned
+TargetMVPriority(const TargetInfo &TI,
+ const CodeGenFunction::MultiVersionResolverOption &RO) {
+ unsigned Priority = 0;
+ for (StringRef Feat : RO.Conditions.Features)
+ Priority = std::max(Priority, TI.multiVersionSortPriority(Feat));
+
+ if (!RO.Conditions.Architecture.empty())
+ Priority = std::max(
+ Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture));
+ return Priority;
+}
+
void CodeGenModule::emitMultiVersionFunctions() {
for (GlobalDecl GD : MultiVersionFuncs) {
- SmallVector<CodeGenFunction::TargetMultiVersionResolverOption, 10> Options;
+ SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
getContext().forEachMultiversionedFunctionVersion(
FD, [this, &GD, &Options](const FunctionDecl *CurFD) {
@@ -2424,20 +2515,36 @@ void CodeGenModule::emitMultiVersionFunctions() {
}
assert(Func && "This should have just been created");
}
- Options.emplace_back(getTarget(), cast<llvm::Function>(Func),
- CurFD->getAttr<TargetAttr>()->parse());
+
+ const auto *TA = CurFD->getAttr<TargetAttr>();
+ llvm::SmallVector<StringRef, 8> Feats;
+ TA->getAddedFeatures(Feats);
+
+ Options.emplace_back(cast<llvm::Function>(Func),
+ TA->getArchitecture(), Feats);
});
- llvm::Function *ResolverFunc = cast<llvm::Function>(
- GetGlobalValue((getMangledName(GD) + ".resolver").str()));
+ llvm::Function *ResolverFunc;
+ const TargetInfo &TI = getTarget();
+
+ if (TI.supportsIFunc() || FD->isTargetMultiVersion())
+ ResolverFunc = cast<llvm::Function>(
+ GetGlobalValue((getMangledName(GD) + ".resolver").str()));
+ else
+ ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD)));
+
if (supportsCOMDAT())
ResolverFunc->setComdat(
getModule().getOrInsertComdat(ResolverFunc->getName()));
+
std::stable_sort(
Options.begin(), Options.end(),
- std::greater<CodeGenFunction::TargetMultiVersionResolverOption>());
+ [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
+ const CodeGenFunction::MultiVersionResolverOption &RHS) {
+ return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS);
+ });
CodeGenFunction CGF(*this);
- CGF.EmitTargetMultiVersionResolver(ResolverFunc, Options);
+ CGF.EmitMultiVersionResolver(ResolverFunc, Options);
}
}
@@ -2446,27 +2553,58 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
assert(FD && "Not a FunctionDecl?");
const auto *DD = FD->getAttr<CPUDispatchAttr>();
assert(DD && "Not a cpu_dispatch Function?");
- llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType());
+ QualType CanonTy = Context.getCanonicalType(FD->getType());
+ llvm::Type *DeclTy = getTypes().ConvertFunctionType(CanonTy, FD);
+
+ if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) {
+ const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD);
+ DeclTy = getTypes().GetFunctionType(FInfo);
+ }
StringRef ResolverName = getMangledName(GD);
- llvm::Type *ResolverType = llvm::FunctionType::get(
- llvm::PointerType::get(DeclTy,
- Context.getTargetAddressSpace(FD->getType())),
- false);
- auto *ResolverFunc = cast<llvm::Function>(
- GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
- /*ForVTable=*/false));
-
- SmallVector<CodeGenFunction::CPUDispatchMultiVersionResolverOption, 10>
- Options;
+
+ llvm::Type *ResolverType;
+ GlobalDecl ResolverGD;
+ if (getTarget().supportsIFunc())
+ ResolverType = llvm::FunctionType::get(
+ llvm::PointerType::get(DeclTy,
+ Context.getTargetAddressSpace(FD->getType())),
+ false);
+ else {
+ ResolverType = DeclTy;
+ ResolverGD = GD;
+ }
+
+ auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction(
+ ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false));
+
+ SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
const TargetInfo &Target = getTarget();
+ unsigned Index = 0;
for (const IdentifierInfo *II : DD->cpus()) {
// Get the name of the target function so we can look it up/create it.
std::string MangledName = getMangledNameImpl(*this, GD, FD, true) +
getCPUSpecificMangling(*this, II->getName());
- llvm::Constant *Func = GetOrCreateLLVMFunction(
- MangledName, DeclTy, GD, /*ForVTable=*/false, /*DontDefer=*/false,
- /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+
+ llvm::Constant *Func = GetGlobalValue(MangledName);
+
+ if (!Func) {
+ GlobalDecl ExistingDecl = Manglings.lookup(MangledName);
+ if (ExistingDecl.getDecl() &&
+ ExistingDecl.getDecl()->getAsFunction()->isDefined()) {
+ EmitGlobalFunctionDefinition(ExistingDecl, nullptr);
+ Func = GetGlobalValue(MangledName);
+ } else {
+ if (!ExistingDecl.getDecl())
+ ExistingDecl = GD.getWithMultiVersionIndex(Index);
+
+ Func = GetOrCreateLLVMFunction(
+ MangledName, DeclTy, ExistingDecl,
+ /*ForVTable=*/false, /*DontDefer=*/true,
+ /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+ }
+ }
+
llvm::SmallVector<StringRef, 32> Features;
Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features);
llvm::transform(Features, Features.begin(),
@@ -2475,27 +2613,54 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {
Features.begin(), Features.end(), [&Target](StringRef Feat) {
return !Target.validateCpuSupports(Feat);
}), Features.end());
- Options.emplace_back(cast<llvm::Function>(Func),
- CodeGenFunction::GetX86CpuSupportsMask(Features));
+ Options.emplace_back(cast<llvm::Function>(Func), StringRef{}, Features);
+ ++Index;
}
llvm::sort(
- Options.begin(), Options.end(),
- std::greater<CodeGenFunction::CPUDispatchMultiVersionResolverOption>());
+ Options, [](const CodeGenFunction::MultiVersionResolverOption &LHS,
+ const CodeGenFunction::MultiVersionResolverOption &RHS) {
+ return CodeGenFunction::GetX86CpuSupportsMask(LHS.Conditions.Features) >
+ CodeGenFunction::GetX86CpuSupportsMask(RHS.Conditions.Features);
+ });
+
+ // If the list contains multiple 'default' versions, such as when it contains
+ // 'pentium' and 'generic', don't emit the call to the generic one (since we
+ // always run on at least a 'pentium'). We do this by deleting the 'least
+ // advanced' (read, lowest mangling letter).
+ while (Options.size() > 1 &&
+ CodeGenFunction::GetX86CpuSupportsMask(
+ (Options.end() - 2)->Conditions.Features) == 0) {
+ StringRef LHSName = (Options.end() - 2)->Function->getName();
+ StringRef RHSName = (Options.end() - 1)->Function->getName();
+ if (LHSName.compare(RHSName) < 0)
+ Options.erase(Options.end() - 2);
+ else
+ Options.erase(Options.end() - 1);
+ }
+
CodeGenFunction CGF(*this);
- CGF.EmitCPUDispatchMultiVersionResolver(ResolverFunc, Options);
+ CGF.EmitMultiVersionResolver(ResolverFunc, Options);
}
-/// If an ifunc for the specified mangled name is not in the module, create and
-/// return an llvm IFunc Function with the specified type.
-llvm::Constant *
-CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy,
- const FunctionDecl *FD) {
+/// If a dispatcher for the specified mangled name is not in the module, create
+/// and return an llvm Function with the specified type.
+llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(
+ GlobalDecl GD, llvm::Type *DeclTy, const FunctionDecl *FD) {
std::string MangledName =
getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true);
- std::string IFuncName = MangledName + ".ifunc";
- if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName))
- return IFuncGV;
+
+ // Holds the name of the resolver, in ifunc mode this is the ifunc (which has
+ // a separate resolver).
+ std::string ResolverName = MangledName;
+ if (getTarget().supportsIFunc())
+ ResolverName += ".ifunc";
+ else if (FD->isTargetMultiVersion())
+ ResolverName += ".resolver";
+
+ // If this already exists, just return that one.
+ if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName))
+ return ResolverGV;
// Since this is the first time we've created this IFunc, make sure
// that we put this multiversioned function into the list to be
@@ -2503,20 +2668,28 @@ CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy,
if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion())
MultiVersionFuncs.push_back(GD);
- std::string ResolverName = MangledName + ".resolver";
- llvm::Type *ResolverType = llvm::FunctionType::get(
- llvm::PointerType::get(DeclTy,
- Context.getTargetAddressSpace(FD->getType())),
- false);
- llvm::Constant *Resolver =
- GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
- /*ForVTable=*/false);
- llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
- DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule());
- GIF->setName(IFuncName);
- SetCommonAttributes(FD, GIF);
+ if (getTarget().supportsIFunc()) {
+ llvm::Type *ResolverType = llvm::FunctionType::get(
+ llvm::PointerType::get(
+ DeclTy, getContext().getTargetAddressSpace(FD->getType())),
+ false);
+ llvm::Constant *Resolver = GetOrCreateLLVMFunction(
+ MangledName + ".resolver", ResolverType, GlobalDecl{},
+ /*ForVTable=*/false);
+ llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
+ DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule());
+ GIF->setName(ResolverName);
+ SetCommonAttributes(FD, GIF);
- return GIF;
+ return GIF;
+ }
+
+ llvm::Constant *Resolver = GetOrCreateLLVMFunction(
+ ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false);
+ assert(isa<llvm::GlobalValue>(Resolver) &&
+ "Resolver should be created for the first time");
+ SetCommonAttributes(FD, cast<llvm::GlobalValue>(Resolver));
+ return Resolver;
}
/// GetOrCreateLLVMFunction - If the specified mangled name is not in the
@@ -2539,15 +2712,16 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
if (getLangOpts().OpenMPIsDevice && OpenMPRuntime &&
!OpenMPRuntime->markAsGlobalTarget(GD) && FD->isDefined() &&
!DontDefer && !IsForDefinition) {
- const FunctionDecl *FDDef = FD->getDefinition();
- GlobalDecl GDDef;
- if (const auto *CD = dyn_cast<CXXConstructorDecl>(FDDef))
- GDDef = GlobalDecl(CD, GD.getCtorType());
- else if (const auto *DD = dyn_cast<CXXDestructorDecl>(FDDef))
- GDDef = GlobalDecl(DD, GD.getDtorType());
- else
- GDDef = GlobalDecl(FDDef);
- addDeferredDeclToEmit(GDDef);
+ if (const FunctionDecl *FDDef = FD->getDefinition()) {
+ GlobalDecl GDDef;
+ if (const auto *CD = dyn_cast<CXXConstructorDecl>(FDDef))
+ GDDef = GlobalDecl(CD, GD.getCtorType());
+ else if (const auto *DD = dyn_cast<CXXDestructorDecl>(FDDef))
+ GDDef = GlobalDecl(DD, GD.getDtorType());
+ else
+ GDDef = GlobalDecl(FDDef);
+ EmitGlobal(GDDef);
+ }
}
if (FD->isMultiVersion()) {
@@ -2555,7 +2729,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction(
if (TA && TA->isDefaultVersion())
UpdateMultiVersionNames(GD, FD);
if (!IsForDefinition)
- return GetOrCreateMultiVersionIFunc(GD, Ty, FD);
+ return GetOrCreateMultiVersionResolver(GD, Ty, FD);
}
}
@@ -3058,10 +3232,9 @@ CodeGenModule::GetAddrOfGlobal(GlobalDecl GD,
IsForDefinition);
}
-llvm::GlobalVariable *
-CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
- llvm::Type *Ty,
- llvm::GlobalValue::LinkageTypes Linkage) {
+llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable(
+ StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage,
+ unsigned Alignment) {
llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name);
llvm::GlobalVariable *OldGV = nullptr;
@@ -3097,6 +3270,8 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
!GV->hasAvailableExternallyLinkage())
GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
+ GV->setAlignment(Alignment);
+
return GV;
}
@@ -3313,8 +3488,15 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
// CUDA E.2.4.1 "__shared__ variables cannot have an initialization
// as part of their declaration." Sema has already checked for
// error cases, so we just need to set Init to UndefValue.
- if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
- D->hasAttr<CUDASharedAttr>())
+ bool IsCUDASharedVar =
+ getLangOpts().CUDAIsDevice && D->hasAttr<CUDASharedAttr>();
+ // Shadows of initialized device-side global variables are also left
+ // undefined.
+ bool IsCUDAShadowVar =
+ !getLangOpts().CUDAIsDevice &&
+ (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>() ||
+ D->hasAttr<CUDASharedAttr>());
+ if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar))
Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
else if (!InitExpr) {
// This is a tentative definition; tentative definitions are
@@ -3434,7 +3616,10 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
Flags |= CGCUDARuntime::ExternDeviceVar;
if (D->hasAttr<CUDAConstantAttr>())
Flags |= CGCUDARuntime::ConstantDeviceVar;
- getCUDARuntime().registerDeviceVar(*GV, Flags);
+ // Extern global variables will be registered in the TU where they are
+ // defined.
+ if (!D->hasExternalStorage())
+ getCUDARuntime().registerDeviceVar(*GV, Flags);
} else if (D->hasAttr<CUDASharedAttr>())
// __shared__ variables are odd. Shadows do get created, but
// they are not registered with the CUDA runtime, so they
@@ -3577,6 +3762,15 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
}
}
+ // Microsoft's link.exe doesn't support alignments greater than 32 for common
+ // symbols, so symbols with greater alignment requirements cannot be common.
+ // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two
+ // alignments for common symbols via the aligncomm directive, so this
+ // restriction only applies to MSVC environments.
+ if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() &&
+ Context.getTypeAlignIfKnown(D->getType()) > 32)
+ return true;
+
return false;
}
@@ -3592,6 +3786,10 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
return llvm::GlobalVariable::WeakAnyLinkage;
}
+ if (const auto *FD = D->getAsFunction())
+ if (FD->isMultiVersion() && Linkage == GVA_AvailableExternally)
+ return llvm::GlobalVariable::LinkOnceAnyLinkage;
+
// We are guaranteed to have a strong definition somewhere else,
// so we can use available_externally linkage.
if (Linkage == GVA_AvailableExternally)
@@ -3828,15 +4026,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
AddGlobalDtor(Fn, DA->getPriority());
if (D->hasAttr<AnnotateAttr>())
AddGlobalAnnotations(D, Fn);
-
- if (D->isCPUSpecificMultiVersion()) {
- auto *Spec = D->getAttr<CPUSpecificAttr>();
- // If there is another specific version we need to emit, do so here.
- if (Spec->ActiveArgIndex + 1 < Spec->cpus_size()) {
- ++Spec->ActiveArgIndex;
- EmitGlobalFunctionDefinition(GD, nullptr);
- }
- }
}
void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
@@ -4030,39 +4219,81 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty);
llvm::Constant *Zeros[] = { Zero, Zero };
+ const ASTContext &Context = getContext();
+ const llvm::Triple &Triple = getTriple();
+
+ const auto CFRuntime = getLangOpts().CFRuntime;
+ const bool IsSwiftABI =
+ static_cast<unsigned>(CFRuntime) >=
+ static_cast<unsigned>(LangOptions::CoreFoundationABI::Swift);
+ const bool IsSwift4_1 = CFRuntime == LangOptions::CoreFoundationABI::Swift4_1;
+
// If we don't already have it, get __CFConstantStringClassReference.
if (!CFConstantStringClassRef) {
+ const char *CFConstantStringClassName = "__CFConstantStringClassReference";
llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy);
Ty = llvm::ArrayType::get(Ty, 0);
- llvm::GlobalValue *GV = cast<llvm::GlobalValue>(
- CreateRuntimeVariable(Ty, "__CFConstantStringClassReference"));
-
- if (getTriple().isOSBinFormatCOFF()) {
- IdentifierInfo &II = getContext().Idents.get(GV->getName());
- TranslationUnitDecl *TUDecl = getContext().getTranslationUnitDecl();
- DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
-
- const VarDecl *VD = nullptr;
- for (const auto &Result : DC->lookup(&II))
- if ((VD = dyn_cast<VarDecl>(Result)))
- break;
-
- if (!VD || !VD->hasAttr<DLLExportAttr>()) {
- GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
- GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
- } else {
- GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
- GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+
+ switch (CFRuntime) {
+ default: break;
+ case LangOptions::CoreFoundationABI::Swift: LLVM_FALLTHROUGH;
+ case LangOptions::CoreFoundationABI::Swift5_0:
+ CFConstantStringClassName =
+ Triple.isOSDarwin() ? "$s15SwiftFoundation19_NSCFConstantStringCN"
+ : "$s10Foundation19_NSCFConstantStringCN";
+ Ty = IntPtrTy;
+ break;
+ case LangOptions::CoreFoundationABI::Swift4_2:
+ CFConstantStringClassName =
+ Triple.isOSDarwin() ? "$S15SwiftFoundation19_NSCFConstantStringCN"
+ : "$S10Foundation19_NSCFConstantStringCN";
+ Ty = IntPtrTy;
+ break;
+ case LangOptions::CoreFoundationABI::Swift4_1:
+ CFConstantStringClassName =
+ Triple.isOSDarwin() ? "__T015SwiftFoundation19_NSCFConstantStringCN"
+ : "__T010Foundation19_NSCFConstantStringCN";
+ Ty = IntPtrTy;
+ break;
+ }
+
+ llvm::Constant *C = CreateRuntimeVariable(Ty, CFConstantStringClassName);
+
+ if (Triple.isOSBinFormatELF() || Triple.isOSBinFormatCOFF()) {
+ llvm::GlobalValue *GV = nullptr;
+
+ if ((GV = dyn_cast<llvm::GlobalValue>(C))) {
+ IdentifierInfo &II = Context.Idents.get(GV->getName());
+ TranslationUnitDecl *TUDecl = Context.getTranslationUnitDecl();
+ DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+
+ const VarDecl *VD = nullptr;
+ for (const auto &Result : DC->lookup(&II))
+ if ((VD = dyn_cast<VarDecl>(Result)))
+ break;
+
+ if (Triple.isOSBinFormatELF()) {
+ if (!VD)
+ GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ } else {
+ GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+ if (!VD || !VD->hasAttr<DLLExportAttr>())
+ GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+ else
+ GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+ }
+
+ setDSOLocal(GV);
}
}
- setDSOLocal(GV);
// Decay array -> ptr
CFConstantStringClassRef =
- llvm::ConstantExpr::getGetElementPtr(Ty, GV, Zeros);
+ IsSwiftABI ? llvm::ConstantExpr::getPtrToInt(C, Ty)
+ : llvm::ConstantExpr::getGetElementPtr(Ty, C, Zeros);
}
- QualType CFTy = getContext().getCFConstantStringType();
+ QualType CFTy = Context.getCFConstantStringType();
auto *STy = cast<llvm::StructType>(getTypes().ConvertType(CFTy));
@@ -4073,7 +4304,12 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
Fields.add(cast<llvm::ConstantExpr>(CFConstantStringClassRef));
// Flags.
- Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8);
+ if (IsSwiftABI) {
+ Fields.addInt(IntPtrTy, IsSwift4_1 ? 0x05 : 0x01);
+ Fields.addInt(Int64Ty, isUTF16 ? 0x07d0 : 0x07c8);
+ } else {
+ Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8);
+ }
// String pointer.
llvm::Constant *C = nullptr;
@@ -4094,17 +4330,20 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
// Don't enforce the target's minimum global alignment, since the only use
// of the string is via this class initializer.
- CharUnits Align = isUTF16
- ? getContext().getTypeAlignInChars(getContext().ShortTy)
- : getContext().getTypeAlignInChars(getContext().CharTy);
+ CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy)
+ : Context.getTypeAlignInChars(Context.CharTy);
GV->setAlignment(Align.getQuantity());
// FIXME: We set the section explicitly to avoid a bug in ld64 224.1.
// Without it LLVM can merge the string with a non unnamed_addr one during
// LTO. Doing that changes the section it ends in, which surprises ld64.
- if (getTriple().isOSBinFormatMachO())
+ if (Triple.isOSBinFormatMachO())
GV->setSection(isUTF16 ? "__TEXT,__ustring"
: "__TEXT,__cstring,cstring_literals");
+ // Make sure the literal ends up in .rodata to allow for safe ICF and for
+ // the static linker to adjust permissions to read-only later on.
+ else if (Triple.isOSBinFormatELF())
+ GV->setSection(".rodata");
// String.
llvm::Constant *Str =
@@ -4116,8 +4355,17 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
Fields.add(Str);
// String length.
- auto Ty = getTypes().ConvertType(getContext().LongTy);
- Fields.addInt(cast<llvm::IntegerType>(Ty), StringLength);
+ llvm::IntegerType *LengthTy =
+ llvm::IntegerType::get(getModule().getContext(),
+ Context.getTargetInfo().getLongWidth());
+ if (IsSwiftABI) {
+ if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 ||
+ CFRuntime == LangOptions::CoreFoundationABI::Swift4_2)
+ LengthTy = Int32Ty;
+ else
+ LengthTy = IntPtrTy;
+ }
+ Fields.addInt(LengthTy, StringLength);
CharUnits Alignment = getPointerAlign();
@@ -4125,7 +4373,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment,
/*isConstant=*/false,
llvm::GlobalVariable::PrivateLinkage);
- switch (getTriple().getObjectFormat()) {
+ switch (Triple.getObjectFormat()) {
case llvm::Triple::UnknownObjectFormat:
llvm_unreachable("unknown file format");
case llvm::Triple::COFF:
@@ -4264,15 +4512,13 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
StringRef GlobalVariableName;
llvm::GlobalValue::LinkageTypes LT;
- // Mangle the string literal if the ABI allows for it. However, we cannot
- // do this if we are compiling with ASan or -fwritable-strings because they
- // rely on strings having normal linkage.
- if (!LangOpts.WritableStrings &&
- !LangOpts.Sanitize.has(SanitizerKind::Address) &&
- getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) {
+ // Mangle the string literal if that's how the ABI merges duplicate strings.
+ // Don't do it if they are writable, since we don't want writes in one TU to
+ // affect strings in another.
+ if (getCXXABI().getMangleContext().shouldMangleStringLiteral(S) &&
+ !LangOpts.WritableStrings) {
llvm::raw_svector_ostream Out(MangledNameBuffer);
getCXXABI().getMangleContext().mangleStringLiteral(S, Out);
-
LT = llvm::GlobalValue::LinkOnceODRLinkage;
GlobalVariableName = MangledNameBuffer;
} else {
@@ -4620,6 +4866,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
case Decl::TypeAliasTemplate:
case Decl::Block:
case Decl::Empty:
+ case Decl::Binding:
break;
case Decl::Using: // using X; [C++]
if (CGDebugInfo *DI = getModuleDebugInfo())
@@ -4787,6 +5034,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D));
break;
+ case Decl::OMPRequires:
+ EmitOMPRequiresDecl(cast<OMPRequiresDecl>(D));
+ break;
+
default:
// Make sure we handled everything we should, every other kind is a
// non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind
@@ -4810,7 +5061,7 @@ void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) {
if (!cast<FunctionDecl>(D)->doesThisDeclarationHaveABody())
return;
SourceManager &SM = getContext().getSourceManager();
- if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getLocStart()))
+ if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getBeginLoc()))
return;
auto I = DeferredEmptyCoverageMappingDecls.find(D);
if (I == DeferredEmptyCoverageMappingDecls.end())
@@ -4981,6 +5232,16 @@ void CodeGenModule::EmitVersionIdentMetadata() {
IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode));
}
+void CodeGenModule::EmitCommandLineMetadata() {
+ llvm::NamedMDNode *CommandLineMetadata =
+ TheModule.getOrInsertNamedMetadata("llvm.commandline");
+ std::string CommandLine = getCodeGenOpts().RecordCommandLine;
+ llvm::LLVMContext &Ctx = TheModule.getContext();
+
+ llvm::Metadata *CommandLineNode[] = {llvm::MDString::get(Ctx, CommandLine)};
+ CommandLineMetadata->addOperand(llvm::MDNode::get(Ctx, CommandLineNode));
+}
+
void CodeGenModule::EmitTargetMetadata() {
// Warning, new MangledDeclNames may be appended within this loop.
// We rely on MapVector insertions adding new elements to the end
@@ -5073,7 +5334,7 @@ void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) {
Address Addr(GetAddrOfGlobalVar(VD), getContext().getDeclAlign(VD));
if (auto InitFunction = getOpenMPRuntime().emitThreadPrivateVarDefinition(
- VD, Addr, RefExpr->getLocStart(), PerformInit))
+ VD, Addr, RefExpr->getBeginLoc(), PerformInit))
CXXGlobalInits.push_back(InitFunction);
}
}
@@ -5196,8 +5457,9 @@ TargetAttr::ParsedTargetAttr CodeGenModule::filterFunctionTargetAttrs(const Targ
// Fills in the supplied string map with the set of target features for the
// passed in function.
void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
- const FunctionDecl *FD) {
+ GlobalDecl GD) {
StringRef TargetCPU = Target.getTargetOpts().CPU;
+ const FunctionDecl *FD = GD.getDecl()->getAsFunction();
if (const auto *TD = FD->getAttr<TargetAttr>()) {
TargetAttr::ParsedTargetAttr ParsedAttr = filterFunctionTargetAttrs(TD);
@@ -5219,8 +5481,8 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
ParsedAttr.Features);
} else if (const auto *SD = FD->getAttr<CPUSpecificAttr>()) {
llvm::SmallVector<StringRef, 32> FeaturesTmp;
- Target.getCPUSpecificCPUDispatchFeatures(SD->getCurCPUName()->getName(),
- FeaturesTmp);
+ Target.getCPUSpecificCPUDispatchFeatures(
+ SD->getCPUName(GD.getMultiVersionIndex())->getName(), FeaturesTmp);
std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, Features);
} else {
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 91f3d94330f1..75679d11c13c 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -119,15 +119,29 @@ struct OrderGlobalInits {
struct ObjCEntrypoints {
ObjCEntrypoints() { memset(this, 0, sizeof(*this)); }
- /// void objc_autoreleasePoolPop(void*);
+ /// void objc_alloc(id);
+ llvm::Constant *objc_alloc;
+
+ /// void objc_allocWithZone(id);
+ llvm::Constant *objc_allocWithZone;
+
+ /// void objc_autoreleasePoolPop(void*);
llvm::Constant *objc_autoreleasePoolPop;
+ /// void objc_autoreleasePoolPop(void*);
+ /// Note this method is used when we are using exception handling
+ llvm::Constant *objc_autoreleasePoolPopInvoke;
+
/// void *objc_autoreleasePoolPush(void);
llvm::Constant *objc_autoreleasePoolPush;
/// id objc_autorelease(id);
llvm::Constant *objc_autorelease;
+ /// id objc_autorelease(id);
+ /// Note this is the runtime method not the intrinsic.
+ llvm::Constant *objc_autoreleaseRuntimeFunction;
+
/// id objc_autoreleaseReturnValue(id);
llvm::Constant *objc_autoreleaseReturnValue;
@@ -152,6 +166,10 @@ struct ObjCEntrypoints {
/// id objc_retain(id);
llvm::Constant *objc_retain;
+ /// id objc_retain(id);
+ /// Note this is the runtime method not the intrinsic.
+ llvm::Constant *objc_retainRuntimeFunction;
+
/// id objc_retainAutorelease(id);
llvm::Constant *objc_retainAutorelease;
@@ -167,6 +185,10 @@ struct ObjCEntrypoints {
/// void objc_release(id);
llvm::Constant *objc_release;
+ /// void objc_release(id);
+ /// Note this is the runtime method not the intrinsic.
+ llvm::Constant *objc_releaseRuntimeFunction;
+
/// void objc_storeStrong(id*, id);
llvm::Constant *objc_storeStrong;
@@ -764,7 +786,8 @@ public:
/// bitcast to the new variable.
llvm::GlobalVariable *
CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty,
- llvm::GlobalValue::LinkageTypes Linkage);
+ llvm::GlobalValue::LinkageTypes Linkage,
+ unsigned Alignment);
llvm::Function *
CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name,
@@ -1042,8 +1065,7 @@ public:
const CGFunctionInfo &FI);
/// Set the LLVM function attributes (sext, zext, etc).
- void SetLLVMFunctionAttributes(const Decl *D,
- const CGFunctionInfo &Info,
+ void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info,
llvm::Function *F);
/// Set the LLVM function attributes which only apply to a function
@@ -1103,8 +1125,7 @@ public:
// Fills in the supplied string map with the set of target features for the
// passed in function.
- void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
- const FunctionDecl *FD);
+ void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap, GlobalDecl GD);
StringRef getMangledName(GlobalDecl GD);
StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD);
@@ -1223,6 +1244,10 @@ public:
void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
CodeGenFunction *CGF = nullptr);
+ /// Emit a code for requires directive.
+ /// \param D Requires declaration
+ void EmitOMPRequiresDecl(const OMPRequiresDecl *D);
+
/// Returns whether the given record has hidden LTO visibility and therefore
/// may participate in (single-module) CFI and whole-program vtable
/// optimization.
@@ -1288,9 +1313,9 @@ private:
llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
ForDefinition_t IsForDefinition = NotForDefinition);
- llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD,
- llvm::Type *DeclTy,
- const FunctionDecl *FD);
+ llvm::Constant *GetOrCreateMultiVersionResolver(GlobalDecl GD,
+ llvm::Type *DeclTy,
+ const FunctionDecl *FD);
void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD);
llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
@@ -1299,7 +1324,7 @@ private:
ForDefinition_t IsForDefinition
= NotForDefinition);
- bool GetCPUAndFeaturesAttributes(const Decl *D,
+ bool GetCPUAndFeaturesAttributes(GlobalDecl GD,
llvm::AttrBuilder &AttrBuilder);
void setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO);
@@ -1310,6 +1335,8 @@ private:
void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr);
void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
+ void EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
+
void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false);
void EmitAliasDefinition(GlobalDecl GD);
void emitIFuncDefinition(GlobalDecl GD);
@@ -1397,6 +1424,9 @@ private:
/// Emit the Clang version as llvm.ident metadata.
void EmitVersionIdentMetadata();
+ /// Emit the Clang commandline as llvm.commandline metadata.
+ void EmitCommandLineMetadata();
+
/// Emits target specific Metadata for global declarations.
void EmitTargetMetadata();
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index c8c2a1b956b8..776060743a63 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -165,7 +165,12 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
// Blocks and lambdas are handled as separate functions, so we need not
// traverse them in the parent context.
bool TraverseBlockExpr(BlockExpr *BE) { return true; }
- bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
+ bool TraverseLambdaExpr(LambdaExpr *LE) {
+ // Traverse the captures, but not the body.
+ for (const auto &C : zip(LE->captures(), LE->capture_inits()))
+ TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C));
+ return true;
+ }
bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
bool VisitDecl(const Decl *D) {
@@ -544,6 +549,8 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
RecordStmtCount(S);
+ if (S->getInit())
+ Visit(S->getInit());
Visit(S->getLoopVarStmt());
Visit(S->getRangeStmt());
Visit(S->getBeginStmt());
@@ -815,7 +822,7 @@ bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
// Don't map the functions in system headers.
const auto &SM = CGM.getContext().getSourceManager();
- auto Loc = D->getBody()->getLocStart();
+ auto Loc = D->getBody()->getBeginLoc();
return SM.isInSystemHeader(Loc);
}
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index 0759e65388b8..120ab651a4a8 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -17,7 +17,6 @@
#include "CGBuilder.h"
#include "CodeGenModule.h"
#include "CodeGenTypes.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include <array>
#include <memory>
diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp
index ec48231e5247..27d39716d22f 100644
--- a/lib/CodeGen/CodeGenTBAA.cpp
+++ b/lib/CodeGen/CodeGenTBAA.cpp
@@ -20,7 +20,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/Mangle.h"
#include "clang/AST/RecordLayout.h"
-#include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LLVMContext.h"
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 1a1395e6ae74..2acf1ac16180 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -503,6 +503,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id:
#include "clang/Basic/OpenCLImageTypes.def"
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id:
+#include "clang/Basic/OpenCLExtensionTypes.def"
case BuiltinType::OCLSampler:
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 626869f00021..8e344e91b8cd 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -17,7 +17,6 @@
#include "CGCall.h"
#include "clang/Basic/ABI.h"
#include "clang/CodeGen/CGFunctionInfo.h"
-#include "clang/Sema/Sema.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Module.h"
diff --git a/lib/CodeGen/ConstantEmitter.h b/lib/CodeGen/ConstantEmitter.h
index b4d1b65743c7..7ad8e5d37cd1 100644
--- a/lib/CodeGen/ConstantEmitter.h
+++ b/lib/CodeGen/ConstantEmitter.h
@@ -38,6 +38,9 @@ private:
/// Whether the constant-emission failed.
bool Failed = false;
+ /// Whether we're in a constant context.
+ bool InConstantContext = false;
+
/// The AST address space where this (non-abstract) initializer is going.
/// Used for generating appropriate placeholders.
LangAS DestAddressSpace;
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index 2d8446463594..35962c73d9a8 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -67,7 +67,7 @@ public:
void setStartLoc(SourceLocation Loc) { LocStart = Loc; }
- SourceLocation getStartLoc() const {
+ SourceLocation getBeginLoc() const {
assert(LocStart && "Region has no start location");
return *LocStart;
}
@@ -116,7 +116,7 @@ struct SpellingRegion {
}
SpellingRegion(SourceManager &SM, SourceMappingRegion &R)
- : SpellingRegion(SM, R.getStartLoc(), R.getEndLoc()) {}
+ : SpellingRegion(SM, R.getBeginLoc(), R.getEndLoc()) {}
/// Check if the start and end locations appear in source order, i.e
/// top->bottom, left->right.
@@ -204,7 +204,7 @@ public:
/// Get the start of \c S ignoring macro arguments and builtin macros.
SourceLocation getStart(const Stmt *S) {
- SourceLocation Loc = S->getLocStart();
+ SourceLocation Loc = S->getBeginLoc();
while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc))
Loc = SM.getImmediateExpansionRange(Loc).getBegin();
return Loc;
@@ -212,7 +212,7 @@ public:
/// Get the end of \c S ignoring macro arguments and builtin macros.
SourceLocation getEnd(const Stmt *S) {
- SourceLocation Loc = S->getLocEnd();
+ SourceLocation Loc = S->getEndLoc();
while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc))
Loc = SM.getImmediateExpansionRange(Loc).getBegin();
return getPreciseTokenLocEnd(Loc);
@@ -229,7 +229,7 @@ public:
llvm::SmallSet<FileID, 8> Visited;
SmallVector<std::pair<SourceLocation, unsigned>, 8> FileLocs;
for (const auto &Region : SourceRegions) {
- SourceLocation Loc = Region.getStartLoc();
+ SourceLocation Loc = Region.getBeginLoc();
FileID File = SM.getFileID(Loc);
if (!Visited.insert(File).second)
continue;
@@ -311,7 +311,7 @@ public:
for (const auto &Region : SourceRegions) {
assert(Region.hasEndLoc() && "incomplete region");
- SourceLocation LocStart = Region.getStartLoc();
+ SourceLocation LocStart = Region.getBeginLoc();
assert(SM.getFileID(LocStart).isValid() && "region in invalid file");
// Ignore regions from system headers.
@@ -502,7 +502,7 @@ struct CounterCoverageMappingBuilder
DeferredRegion = None;
// If the region ends in an expansion, find the expansion site.
- FileID StartFile = SM.getFileID(DR.getStartLoc());
+ FileID StartFile = SM.getFileID(DR.getBeginLoc());
if (SM.getFileID(DeferredEndLoc) != StartFile) {
if (isNestedIn(DeferredEndLoc, StartFile)) {
do {
@@ -515,12 +515,12 @@ struct CounterCoverageMappingBuilder
// The parent of this deferred region ends where the containing decl ends,
// so the region isn't useful.
- if (DR.getStartLoc() == DeferredEndLoc)
+ if (DR.getBeginLoc() == DeferredEndLoc)
return Index;
// If we're visiting statements in non-source order (e.g switch cases or
// a loop condition) we can't construct a sensible deferred region.
- if (!SpellingRegion(SM, DR.getStartLoc(), DeferredEndLoc).isInSourceOrder())
+ if (!SpellingRegion(SM, DR.getBeginLoc(), DeferredEndLoc).isInSourceOrder())
return Index;
DR.setGap(true);
@@ -552,6 +552,15 @@ struct CounterCoverageMappingBuilder
completeDeferred(Count, DeferredEndLoc);
}
+ size_t locationDepth(SourceLocation Loc) {
+ size_t Depth = 0;
+ while (Loc.isValid()) {
+ Loc = getIncludeOrExpansionLoc(Loc);
+ Depth++;
+ }
+ return Depth;
+ }
+
/// Pop regions from the stack into the function's list of regions.
///
/// Adds all regions from \c ParentIndex to the top of the stack to the
@@ -562,23 +571,45 @@ struct CounterCoverageMappingBuilder
while (RegionStack.size() > ParentIndex) {
SourceMappingRegion &Region = RegionStack.back();
if (Region.hasStartLoc()) {
- SourceLocation StartLoc = Region.getStartLoc();
+ SourceLocation StartLoc = Region.getBeginLoc();
SourceLocation EndLoc = Region.hasEndLoc()
? Region.getEndLoc()
: RegionStack[ParentIndex].getEndLoc();
+ size_t StartDepth = locationDepth(StartLoc);
+ size_t EndDepth = locationDepth(EndLoc);
while (!SM.isWrittenInSameFile(StartLoc, EndLoc)) {
- // The region ends in a nested file or macro expansion. Create a
- // separate region for each expansion.
- SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc);
- assert(SM.isWrittenInSameFile(NestedLoc, EndLoc));
-
- if (!isRegionAlreadyAdded(NestedLoc, EndLoc))
- SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc);
-
- EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc));
- if (EndLoc.isInvalid())
- llvm::report_fatal_error("File exit not handled before popRegions");
+ bool UnnestStart = StartDepth >= EndDepth;
+ bool UnnestEnd = EndDepth >= StartDepth;
+ if (UnnestEnd) {
+ // The region ends in a nested file or macro expansion. Create a
+ // separate region for each expansion.
+ SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc);
+ assert(SM.isWrittenInSameFile(NestedLoc, EndLoc));
+
+ if (!isRegionAlreadyAdded(NestedLoc, EndLoc))
+ SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc);
+
+ EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc));
+ if (EndLoc.isInvalid())
+ llvm::report_fatal_error("File exit not handled before popRegions");
+ EndDepth--;
+ }
+ if (UnnestStart) {
+ // The region begins in a nested file or macro expansion. Create a
+ // separate region for each expansion.
+ SourceLocation NestedLoc = getEndOfFileOrMacro(StartLoc);
+ assert(SM.isWrittenInSameFile(StartLoc, NestedLoc));
+
+ if (!isRegionAlreadyAdded(StartLoc, NestedLoc))
+ SourceRegions.emplace_back(Region.getCounter(), StartLoc, NestedLoc);
+
+ StartLoc = getIncludeOrExpansionLoc(StartLoc);
+ if (StartLoc.isInvalid())
+ llvm::report_fatal_error("File exit not handled before popRegions");
+ StartDepth--;
+ }
}
+ Region.setStartLoc(StartLoc);
Region.setEndLoc(EndLoc);
MostRecentLocation = EndLoc;
@@ -588,7 +619,7 @@ struct CounterCoverageMappingBuilder
EndLoc == getEndOfFileOrMacro(EndLoc))
MostRecentLocation = getIncludeOrExpansionLoc(EndLoc);
- assert(SM.isWrittenInSameFile(Region.getStartLoc(), EndLoc));
+ assert(SM.isWrittenInSameFile(Region.getBeginLoc(), EndLoc));
assert(SpellingRegion(SM, Region).isInSourceOrder());
SourceRegions.push_back(Region);
@@ -625,18 +656,21 @@ struct CounterCoverageMappingBuilder
return RegionStack.back();
}
- /// Propagate counts through the children of \c S.
- Counter propagateCounts(Counter TopCount, const Stmt *S) {
+ /// Propagate counts through the children of \p S if \p VisitChildren is true.
+ /// Otherwise, only emit a count for \p S itself.
+ Counter propagateCounts(Counter TopCount, const Stmt *S,
+ bool VisitChildren = true) {
SourceLocation StartLoc = getStart(S);
SourceLocation EndLoc = getEnd(S);
size_t Index = pushRegion(TopCount, StartLoc, EndLoc);
- Visit(S);
+ if (VisitChildren)
+ Visit(S);
Counter ExitCount = getRegion().getCounter();
popRegions(Index);
// The statement may be spanned by an expansion. Make sure we handle a file
// exit out of this expansion before moving to the next statement.
- if (SM.isBeforeInTranslationUnit(StartLoc, S->getLocStart()))
+ if (SM.isBeforeInTranslationUnit(StartLoc, S->getBeginLoc()))
MostRecentLocation = EndLoc;
return ExitCount;
@@ -648,7 +682,7 @@ struct CounterCoverageMappingBuilder
return SourceRegions.rend() !=
std::find_if(SourceRegions.rbegin(), SourceRegions.rend(),
[&](const SourceMappingRegion &Region) {
- return Region.getStartLoc() == StartLoc &&
+ return Region.getBeginLoc() == StartLoc &&
Region.getEndLoc() == EndLoc;
});
}
@@ -700,7 +734,7 @@ struct CounterCoverageMappingBuilder
for (SourceMappingRegion &I : llvm::reverse(RegionStack)) {
if (!I.hasStartLoc())
continue;
- SourceLocation Loc = I.getStartLoc();
+ SourceLocation Loc = I.getBeginLoc();
if (!isNestedIn(Loc, ParentFile)) {
ParentCounter = I.getCounter();
break;
@@ -826,7 +860,7 @@ struct CounterCoverageMappingBuilder
}
void VisitStmt(const Stmt *S) {
- if (S->getLocStart().isValid())
+ if (S->getBeginLoc().isValid())
extendRegion(S);
for (const Stmt *Child : S->children())
if (Child)
@@ -843,7 +877,16 @@ struct CounterCoverageMappingBuilder
if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body))))
return;
- propagateCounts(getRegionCounter(Body), Body);
+ // Do not visit the artificial children nodes of defaulted methods. The
+ // lexer may not be able to report back precise token end locations for
+ // these children nodes (llvm.org/PR39822), and moreover users will not be
+ // able to see coverage for them.
+ bool Defaulted = false;
+ if (auto *Method = dyn_cast<CXXMethodDecl>(D))
+ Defaulted = Method->isDefaulted();
+
+ propagateCounts(getRegionCounter(Body), Body,
+ /*VisitChildren=*/!Defaulted);
assert(RegionStack.empty() && "Regions entered but never exited");
// Discard the last uncompleted deferred region in a decl, if one exists.
@@ -1004,6 +1047,8 @@ struct CounterCoverageMappingBuilder
void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
extendRegion(S);
+ if (S->getInit())
+ Visit(S->getInit());
Visit(S->getLoopVarStmt());
Visit(S->getRangeStmt());
@@ -1109,7 +1154,7 @@ struct CounterCoverageMappingBuilder
Counter Count = addCounters(Parent.getCounter(), getRegionCounter(S));
// Reuse the existing region if it starts at our label. This is typical of
// the first case in a switch.
- if (Parent.hasStartLoc() && Parent.getStartLoc() == getStart(S))
+ if (Parent.hasStartLoc() && Parent.getBeginLoc() == getStart(S))
Parent.setCounter(Count);
else
pushRegion(Count, getStart(S));
diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h
index b08ad896d7a5..c62db096952a 100644
--- a/lib/CodeGen/CoverageMappingGen.h
+++ b/lib/CodeGen/CoverageMappingGen.h
@@ -16,7 +16,6 @@
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Lex/PPCallbacks.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/GlobalValue.h"
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index 00fff144b597..b53304528c3d 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -287,6 +287,7 @@ public:
void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override;
+ bool canSpeculativelyEmitVTableAsBaseClass(const CXXRecordDecl *RD) const;
void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD,
bool ReturnAdjustment) override {
@@ -634,7 +635,7 @@ CGCallee ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
if (ShouldEmitCFICheck) {
CodeGenFunction::SanitizerScope SanScope(&CGF);
- CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getLocStart());
+ CheckSourceLocation = CGF.EmitCheckSourceLocation(E->getBeginLoc());
CheckTypeDesc = CGF.EmitCheckTypeDescriptor(QualType(MPT, 0));
llvm::Constant *StaticData[] = {
llvm::ConstantInt::get(CGF.Int8Ty, CodeGenFunction::CFITCK_VMFCall),
@@ -1562,9 +1563,8 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
Type != Dtor_Base && DD->isVirtual())
Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent());
else
- Callee =
- CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
- DD);
+ Callee = CGCallee::forDirect(
+ CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)), GD);
CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(),
This.getPointer(), VTT, VTTTy,
@@ -1598,12 +1598,6 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
// Set the right visibility.
CGM.setGVProperties(VTable, RD);
- // Use pointer alignment for the vtable. Otherwise we would align them based
- // on the size of the initializer which doesn't make sense as only single
- // values are read.
- unsigned PAlign = CGM.getTarget().getPointerAlign(0);
- VTable->setAlignment(getContext().toCharUnitsFromBits(PAlign).getQuantity());
-
// If this is the magic class __cxxabiv1::__fundamental_type_info,
// we will emit the typeinfo for the fundamental types. This is the
// same behaviour as GCC.
@@ -1703,8 +1697,14 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
CGM.getItaniumVTableContext().getVTableLayout(RD);
llvm::Type *VTableType = CGM.getVTables().getVTableType(VTLayout);
+ // Use pointer alignment for the vtable. Otherwise we would align them based
+ // on the size of the initializer which doesn't make sense as only single
+ // values are read.
+ unsigned PAlign = CGM.getTarget().getPointerAlign(0);
+
VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
- Name, VTableType, llvm::GlobalValue::ExternalLinkage);
+ Name, VTableType, llvm::GlobalValue::ExternalLinkage,
+ getContext().toCharUnitsFromBits(PAlign).getQuantity());
VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
CGM.setGVProperties(VTable, RD);
@@ -1750,7 +1750,7 @@ CGCallee ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
VFunc = VFuncLoad;
}
- CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc);
+ CGCallee Callee(GD, VFunc);
return Callee;
}
@@ -1778,7 +1778,8 @@ void ItaniumCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
VTables.EmitVTTDefinition(VTT, CGM.getVTableLinkage(RD), RD);
}
-bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
+bool ItaniumCXXABI::canSpeculativelyEmitVTableAsBaseClass(
+ const CXXRecordDecl *RD) const {
// We don't emit available_externally vtables if we are in -fapple-kext mode
// because kext mode does not permit devirtualization.
if (CGM.getLangOpts().AppleKext)
@@ -1796,7 +1797,43 @@ bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
// to emit an available_externally copy of vtable.
// FIXME we can still emit a copy of the vtable if we
// can emit definition of the inline functions.
- return !hasAnyUnusedVirtualInlineFunction(RD);
+ if (hasAnyUnusedVirtualInlineFunction(RD))
+ return false;
+
+ // For a class with virtual bases, we must also be able to speculatively
+ // emit the VTT, because CodeGen doesn't have separate notions of "can emit
+ // the vtable" and "can emit the VTT". For a base subobject, this means we
+ // need to be able to emit non-virtual base vtables.
+ if (RD->getNumVBases()) {
+ for (const auto &B : RD->bases()) {
+ auto *BRD = B.getType()->getAsCXXRecordDecl();
+ assert(BRD && "no class for base specifier");
+ if (B.isVirtual() || !BRD->isDynamicClass())
+ continue;
+ if (!canSpeculativelyEmitVTableAsBaseClass(BRD))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
+ if (!canSpeculativelyEmitVTableAsBaseClass(RD))
+ return false;
+
+ // For a complete-object vtable (or more specifically, for the VTT), we need
+ // to be able to speculatively emit the vtables of all dynamic virtual bases.
+ for (const auto &B : RD->vbases()) {
+ auto *BRD = B.getType()->getAsCXXRecordDecl();
+ assert(BRD && "no class for base specifier");
+ if (!BRD->isDynamicClass())
+ continue;
+ if (!canSpeculativelyEmitVTableAsBaseClass(BRD))
+ return false;
+ }
+
+ return true;
}
static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
Address InitialPtr,
@@ -1916,7 +1953,7 @@ Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
// Handle the array cookie specially in ASan.
if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 &&
(expr->getOperatorNew()->isReplaceableGlobalAllocationFunction() ||
- CGM.getCodeGenOpts().SanitizeAddressPoisonClassMemberArrayNewCookie)) {
+ CGM.getCodeGenOpts().SanitizeAddressPoisonCustomArrayCookie)) {
// The store to the CookiePtr does not need to be instrumented.
CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI);
llvm::FunctionType *FTy =
@@ -2315,11 +2352,13 @@ void CodeGenModule::registerGlobalDtorsWithAtExit() {
FTy, GlobalInitFnName, getTypes().arrangeNullaryFunction(),
SourceLocation());
ASTContext &Ctx = getContext();
+ QualType ReturnTy = Ctx.VoidTy;
+ QualType FunctionTy = Ctx.getFunctionType(ReturnTy, llvm::None, {});
FunctionDecl *FD = FunctionDecl::Create(
Ctx, Ctx.getTranslationUnitDecl(), SourceLocation(), SourceLocation(),
- &Ctx.Idents.get(GlobalInitFnName), Ctx.VoidTy, nullptr, SC_Static,
+ &Ctx.Idents.get(GlobalInitFnName), FunctionTy, nullptr, SC_Static,
false, false);
- CGF.StartFunction(GlobalDecl(FD), getContext().VoidTy, GlobalInitFn,
+ CGF.StartFunction(GlobalDecl(FD), ReturnTy, GlobalInitFn,
getTypes().arrangeNullaryFunction(), FunctionArgList(),
SourceLocation(), SourceLocation());
@@ -2342,6 +2381,9 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF,
const VarDecl &D,
llvm::Constant *dtor,
llvm::Constant *addr) {
+ if (D.isNoDestroy(CGM.getContext()))
+ return;
+
// Use __cxa_atexit if available.
if (CGM.getCodeGenOpts().CXAAtExit)
return emitGlobalDtorWithCXAAtExit(CGF, dtor, addr, D.getTLSKind());
@@ -2415,7 +2457,7 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM),
WrapperName.str(), &CGM.getModule());
- CGM.SetLLVMFunctionAttributes(nullptr, FI, Wrapper);
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper);
if (VD->hasDefinition())
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
@@ -2469,8 +2511,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
CharUnits GuardAlign = CharUnits::One();
Guard->setAlignment(GuardAlign.getQuantity());
- CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, OrderedInits,
- Address(Guard, GuardAlign));
+ CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(
+ InitFunc, OrderedInits, ConstantAddress(Guard, GuardAlign));
// On Darwin platforms, use CXX_FAST_TLS calling convention.
if (CGM.getTarget().getTriple().isOSDarwin()) {
InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
@@ -2522,7 +2564,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
llvm::GlobalVariable::ExternalWeakLinkage,
InitFnName.str(), &CGM.getModule());
const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
- CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init));
+ CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI,
+ cast<llvm::Function>(Init));
}
if (Init) {
@@ -2722,9 +2765,10 @@ llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName(
// get the mangled name of the type.
llvm::Constant *Init = llvm::ConstantDataArray::getString(VMContext,
Name.substr(4));
+ auto Align = CGM.getContext().getTypeAlignInChars(CGM.getContext().CharTy);
- llvm::GlobalVariable *GV =
- CGM.CreateOrReplaceCXXRuntimeVariable(Name, Init->getType(), Linkage);
+ llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
+ Name, Init->getType(), Linkage, Align.getQuantity());
GV->setInitializer(Init);
@@ -2808,6 +2852,9 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
case BuiltinType::Id:
#include "clang/Basic/OpenCLImageTypes.def"
+#define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
+ case BuiltinType::Id:
+#include "clang/Basic/OpenCLExtensionTypes.def"
case BuiltinType::OCLSampler:
case BuiltinType::OCLEvent:
case BuiltinType::OCLClkEvent:
@@ -3084,7 +3131,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
}
assert(isa<ObjCInterfaceType>(Ty));
- // Fall through.
+ LLVM_FALLTHROUGH;
case Type::ObjCInterface:
if (cast<ObjCInterfaceType>(Ty)->getDecl()->getSuperClass()) {
@@ -3363,6 +3410,10 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
if (CGM.supportsCOMDAT() && GV->isWeakForLinker())
GV->setComdat(M.getOrInsertComdat(GV->getName()));
+ CharUnits Align =
+ CGM.getContext().toCharUnitsFromBits(CGM.getTarget().getPointerAlign(0));
+ GV->setAlignment(Align.getQuantity());
+
// The Itanium ABI specifies that type_info objects must be globally
// unique, with one exception: if the type is an incomplete class
// type or a (possibly indirect) pointer to one. That exception
@@ -4017,7 +4068,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
switch (CatchType.getQualifiers().getObjCLifetime()) {
case Qualifiers::OCL_Strong:
CastExn = CGF.EmitARCRetainNonBlock(CastExn);
- // fallthrough
+ LLVM_FALLTHROUGH;
case Qualifiers::OCL_None:
case Qualifiers::OCL_ExplicitNone:
@@ -4146,7 +4197,7 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF,
// Emit the local.
CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam);
- InitCatchParam(CGF, *CatchParam, var.getObjectAddress(CGF), S->getLocStart());
+ InitCatchParam(CGF, *CatchParam, var.getObjectAddress(CGF), S->getBeginLoc());
CGF.EmitAutoVarCleanups(var);
}
diff --git a/lib/CodeGen/MacroPPCallbacks.cpp b/lib/CodeGen/MacroPPCallbacks.cpp
index 48dea7d54b1e..013ca15e2391 100644
--- a/lib/CodeGen/MacroPPCallbacks.cpp
+++ b/lib/CodeGen/MacroPPCallbacks.cpp
@@ -14,7 +14,8 @@
#include "MacroPPCallbacks.h"
#include "CGDebugInfo.h"
#include "clang/CodeGen/ModuleBuilder.h"
-#include "clang/Parse/Parser.h"
+#include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/Preprocessor.h"
using namespace clang;
@@ -88,16 +89,6 @@ SourceLocation MacroPPCallbacks::getCorrectLocation(SourceLocation Loc) {
return SourceLocation();
}
-static bool isBuiltinFile(SourceManager &SM, SourceLocation Loc) {
- StringRef Filename(SM.getPresumedLoc(Loc).getFilename());
- return Filename.equals("<built-in>");
-}
-
-static bool isCommandLineFile(SourceManager &SM, SourceLocation Loc) {
- StringRef Filename(SM.getPresumedLoc(Loc).getFilename());
- return Filename.equals("<command line>");
-}
-
void MacroPPCallbacks::updateStatusToNextScope() {
switch (Status) {
case NoScope:
@@ -127,7 +118,7 @@ void MacroPPCallbacks::FileEntered(SourceLocation Loc) {
updateStatusToNextScope();
return;
case BuiltinScope:
- if (isCommandLineFile(PP.getSourceManager(), Loc))
+ if (PP.getSourceManager().isWrittenInCommandLineFile(Loc))
return;
updateStatusToNextScope();
LLVM_FALLTHROUGH;
@@ -147,7 +138,7 @@ void MacroPPCallbacks::FileExited(SourceLocation Loc) {
default:
llvm_unreachable("Do not expect to exit a file from current scope");
case BuiltinScope:
- if (!isBuiltinFile(PP.getSourceManager(), Loc))
+ if (!PP.getSourceManager().isWrittenInBuiltinFile(Loc))
// Skip next scope and change status to MainFileScope.
Status = MainFileScope;
return;
diff --git a/lib/CodeGen/MacroPPCallbacks.h b/lib/CodeGen/MacroPPCallbacks.h
index 48c67e2d36ad..b87a4005d481 100644
--- a/lib/CodeGen/MacroPPCallbacks.h
+++ b/lib/CodeGen/MacroPPCallbacks.h
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_LIB_CODEGEN_MACROPPCALLBACKS_H
+#define LLVM_CLANG_LIB_CODEGEN_MACROPPCALLBACKS_H
+
#include "clang/Lex/PPCallbacks.h"
namespace llvm {
@@ -116,3 +119,5 @@ public:
};
} // end namespace clang
+
+#endif
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 059adb78ca30..5545bc6647e6 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -1552,9 +1552,9 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
if (Type == Dtor_Complete && DD->getParent()->getNumVBases() == 0)
Type = Dtor_Base;
- CGCallee Callee = CGCallee::forDirect(
- CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
- DD);
+ CGCallee Callee =
+ CGCallee::forDirect(CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type)),
+ GlobalDecl(DD, Type));
if (DD->isVirtual()) {
assert(Type != CXXDtorType::Dtor_Deleting &&
@@ -1872,7 +1872,7 @@ CGCallee MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
VFunc = Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
}
- CGCallee Callee(MethodDecl->getCanonicalDecl(), VFunc);
+ CGCallee Callee(GD, VFunc);
return Callee;
}
@@ -2024,8 +2024,10 @@ MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
assert(!CGM.getModule().getNamedGlobal(Name) &&
"vbtable with this name already exists: mangling bug?");
- llvm::GlobalVariable *GV =
- CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType, Linkage);
+ CharUnits Alignment =
+ CGM.getContext().getTypeAlignInChars(CGM.getContext().IntTy);
+ llvm::GlobalVariable *GV = CGM.CreateOrReplaceCXXRuntimeVariable(
+ Name, VBTableType, Linkage, Alignment.getQuantity());
GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
if (RD->hasAttr<DLLImportAttr>())
@@ -2240,6 +2242,9 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD,
void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
llvm::Constant *Dtor,
llvm::Constant *Addr) {
+ if (D.isNoDestroy(CGM.getContext()))
+ return;
+
if (D.getTLSKind())
return emitGlobalDtorWithTLRegDtor(CGF, D, Dtor, Addr);
@@ -3924,7 +3929,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
CallArgList Args;
// Push the this ptr.
- Args.add(RValue::get(This), CD->getThisType(getContext()));
+ Args.add(RValue::get(This), CD->getThisType());
// Push the src ptr.
if (SrcVal)
@@ -3951,7 +3956,8 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
// Call the destructor with our arguments.
llvm::Constant *CalleePtr =
CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
- CGCallee Callee = CGCallee::forDirect(CalleePtr, CD);
+ CGCallee Callee =
+ CGCallee::forDirect(CalleePtr, GlobalDecl(CD, Ctor_Complete));
const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall(
Args, CD, Ctor_Complete, ExtraArgs.Prefix, ExtraArgs.Suffix);
CGF.EmitCall(CalleeInfo, Callee, ReturnValueSlot(), Args);
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index 8aa9bfb421b4..c0a37698e762 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -17,9 +17,9 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/Expr.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetInfo.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
@@ -64,7 +64,7 @@ namespace {
std::unique_ptr<CodeGen::CodeGenModule> Builder;
private:
- SmallVector<CXXMethodDecl *, 8> DeferredInlineMethodDefinitions;
+ SmallVector<FunctionDecl *, 8> DeferredInlineMemberFuncDefs;
public:
CodeGeneratorImpl(DiagnosticsEngine &diags, llvm::StringRef ModuleName,
@@ -80,7 +80,7 @@ namespace {
~CodeGeneratorImpl() override {
// There should normally not be any leftover inline method definitions.
- assert(DeferredInlineMethodDefinitions.empty() ||
+ assert(DeferredInlineMemberFuncDefs.empty() ||
Diags.hasErrorOccurred());
}
@@ -132,6 +132,9 @@ namespace {
M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
M->setDataLayout(Ctx->getTargetInfo().getDataLayout());
+ const auto &SDKVersion = Ctx->getTargetInfo().getSDKVersion();
+ if (!SDKVersion.empty())
+ M->setSDKVersion(SDKVersion);
Builder.reset(new CodeGen::CodeGenModule(Context, HeaderSearchOpts,
PreprocessorOpts, CodeGenOpts,
*M, Diags, CoverageInfo));
@@ -163,16 +166,16 @@ namespace {
}
void EmitDeferredDecls() {
- if (DeferredInlineMethodDefinitions.empty())
+ if (DeferredInlineMemberFuncDefs.empty())
return;
// Emit any deferred inline method definitions. Note that more deferred
// methods may be added during this loop, since ASTConsumer callbacks
// can be invoked if AST inspection results in declarations being added.
HandlingTopLevelDeclRAII HandlingDecl(*this);
- for (unsigned I = 0; I != DeferredInlineMethodDefinitions.size(); ++I)
- Builder->EmitTopLevelDecl(DeferredInlineMethodDefinitions[I]);
- DeferredInlineMethodDefinitions.clear();
+ for (unsigned I = 0; I != DeferredInlineMemberFuncDefs.size(); ++I)
+ Builder->EmitTopLevelDecl(DeferredInlineMemberFuncDefs[I]);
+ DeferredInlineMemberFuncDefs.clear();
}
void HandleInlineFunctionDefinition(FunctionDecl *D) override {
@@ -181,17 +184,6 @@ namespace {
assert(D->doesThisDeclarationHaveABody());
- // Handle friend functions.
- if (D->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend)) {
- if (Ctx->getTargetInfo().getCXXABI().isMicrosoft()
- && !D->getLexicalDeclContext()->isDependentContext())
- Builder->EmitTopLevelDecl(D);
- return;
- }
-
- // Otherwise, must be a method.
- auto MD = cast<CXXMethodDecl>(D);
-
// We may want to emit this definition. However, that decision might be
// based on computing the linkage, and we have to defer that in case we
// are inside of something that will change the method's final linkage,
@@ -200,13 +192,13 @@ namespace {
// void bar();
// void foo() { bar(); }
// } A;
- DeferredInlineMethodDefinitions.push_back(MD);
+ DeferredInlineMemberFuncDefs.push_back(D);
// Provide some coverage mapping even for methods that aren't emitted.
// Don't do this for templated classes though, as they may not be
// instantiable.
- if (!MD->getParent()->isDependentContext())
- Builder->AddDeferredUnusedCoverageMapping(MD);
+ if (!D->getLexicalDeclContext()->isDependentContext())
+ Builder->AddDeferredUnusedCoverageMapping(D);
}
/// HandleTagDeclDefinition - This callback is invoked each time a TagDecl
diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index c164cec5d942..6f00c836f93d 100644
--- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -14,14 +14,13 @@
#include "clang/AST/DeclObjC.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CodeGen/BackendUtil.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
-#include "clang/Serialization/ASTWriter.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitcode/BitstreamReader.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -156,6 +155,8 @@ public:
LangOpts.CurrentModule.empty() ? MainFileName : LangOpts.CurrentModule;
CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo);
CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning());
+ CodeGenOpts.DebugPrefixMap =
+ CI.getInvocation().getCodeGenOpts().DebugPrefixMap;
}
~PCHContainerGenerator() override = default;
diff --git a/lib/CodeGen/SwiftCallingConv.cpp b/lib/CodeGen/SwiftCallingConv.cpp
index b411a501ea81..75a0fa5ce189 100644
--- a/lib/CodeGen/SwiftCallingConv.cpp
+++ b/lib/CodeGen/SwiftCallingConv.cpp
@@ -415,6 +415,40 @@ static bool areBytesInSameUnit(CharUnits first, CharUnits second,
== getOffsetAtStartOfUnit(second, chunkSize);
}
+static bool isMergeableEntryType(llvm::Type *type) {
+ // Opaquely-typed memory is always mergeable.
+ if (type == nullptr) return true;
+
+ // Pointers and integers are always mergeable. In theory we should not
+ // merge pointers, but (1) it doesn't currently matter in practice because
+ // the chunk size is never greater than the size of a pointer and (2)
+ // Swift IRGen uses integer types for a lot of things that are "really"
+ // just storing pointers (like Optional<SomePointer>). If we ever have a
+ // target that would otherwise combine pointers, we should put some effort
+ // into fixing those cases in Swift IRGen and then call out pointer types
+ // here.
+
+ // Floating-point and vector types should never be merged.
+ // Most such types are too large and highly-aligned to ever trigger merging
+ // in practice, but it's important for the rule to cover at least 'half'
+ // and 'float', as well as things like small vectors of 'i1' or 'i8'.
+ return (!type->isFloatingPointTy() && !type->isVectorTy());
+}
+
+bool SwiftAggLowering::shouldMergeEntries(const StorageEntry &first,
+ const StorageEntry &second,
+ CharUnits chunkSize) {
+ // Only merge entries that overlap the same chunk. We test this first
+ // despite being a bit more expensive because this is the condition that
+ // tends to prevent merging.
+ if (!areBytesInSameUnit(first.End - CharUnits::One(), second.Begin,
+ chunkSize))
+ return false;
+
+ return (isMergeableEntryType(first.Type) &&
+ isMergeableEntryType(second.Type));
+}
+
void SwiftAggLowering::finish() {
if (Entries.empty()) {
Finished = true;
@@ -425,12 +459,12 @@ void SwiftAggLowering::finish() {
// which is generally the size of a pointer.
const CharUnits chunkSize = getMaximumVoluntaryIntegerSize(CGM);
- // First pass: if two entries share a chunk, make them both opaque
+ // First pass: if two entries should be merged, make them both opaque
// and stretch one to meet the next.
+ // Also, remember if there are any opaque entries.
bool hasOpaqueEntries = (Entries[0].Type == nullptr);
for (size_t i = 1, e = Entries.size(); i != e; ++i) {
- if (areBytesInSameUnit(Entries[i - 1].End - CharUnits::One(),
- Entries[i].Begin, chunkSize)) {
+ if (shouldMergeEntries(Entries[i - 1], Entries[i], chunkSize)) {
Entries[i - 1].Type = nullptr;
Entries[i].Type = nullptr;
Entries[i - 1].End = Entries[i].Begin;
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 6f6c5f50c2e7..89ec73670a73 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -19,9 +19,9 @@
#include "CGValue.h"
#include "CodeGenFunction.h"
#include "clang/AST/RecordLayout.h"
+#include "clang/Basic/CodeGenOptions.h"
#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"
-#include "clang/Frontend/CodeGenOptions.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
@@ -720,10 +720,12 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
// This is a very simple ABI that relies a lot on DefaultABIInfo.
//===----------------------------------------------------------------------===//
-class WebAssemblyABIInfo final : public DefaultABIInfo {
+class WebAssemblyABIInfo final : public SwiftABIInfo {
+ DefaultABIInfo defaultInfo;
+
public:
explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT)
- : DefaultABIInfo(CGT) {}
+ : SwiftABIInfo(CGT), defaultInfo(CGT) {}
private:
ABIArgInfo classifyReturnType(QualType RetTy) const;
@@ -741,6 +743,15 @@ private:
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;
+
+ bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
+ bool asReturnValue) const override {
+ return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+ }
+
+ bool isSwiftErrorInRegister() const override {
+ return false;
+ }
};
class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
@@ -778,7 +789,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const {
}
// Otherwise just do the default thing.
- return DefaultABIInfo::classifyArgumentType(Ty);
+ return defaultInfo.classifyArgumentType(Ty);
}
ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
@@ -798,7 +809,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
}
// Otherwise just do the default thing.
- return DefaultABIInfo::classifyReturnType(RetTy);
+ return defaultInfo.classifyReturnType(RetTy);
}
Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -2337,7 +2348,7 @@ static std::string qualifyWindowsLibrary(llvm::StringRef Lib) {
bool Quote = (Lib.find(" ") != StringRef::npos);
std::string ArgStr = Quote ? "\"" : "";
ArgStr += Lib;
- if (!Lib.endswith_lower(".lib"))
+ if (!Lib.endswith_lower(".lib") && !Lib.endswith_lower(".a"))
ArgStr += ".lib";
ArgStr += Quote ? "\"" : "";
return ArgStr;
@@ -3944,18 +3955,39 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
}
- // Bool type is always extended to the ABI, other builtin types are not
- // extended.
- const BuiltinType *BT = Ty->getAs<BuiltinType>();
- if (BT && BT->getKind() == BuiltinType::Bool)
- return ABIArgInfo::getExtend(Ty);
+ if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+ switch (BT->getKind()) {
+ case BuiltinType::Bool:
+ // Bool type is always extended to the ABI, other builtin types are not
+ // extended.
+ return ABIArgInfo::getExtend(Ty);
- // Mingw64 GCC uses the old 80 bit extended precision floating point unit. It
- // passes them indirectly through memory.
- if (IsMingw64 && BT && BT->getKind() == BuiltinType::LongDouble) {
- const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
- if (LDF == &llvm::APFloat::x87DoubleExtended())
- return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+ case BuiltinType::LongDouble:
+ // Mingw64 GCC uses the old 80 bit extended precision floating point
+ // unit. It passes them indirectly through memory.
+ if (IsMingw64) {
+ const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
+ if (LDF == &llvm::APFloat::x87DoubleExtended())
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+ }
+ break;
+
+ case BuiltinType::Int128:
+ case BuiltinType::UInt128:
+ // If it's a parameter type, the normal ABI rule is that arguments larger
+ // than 8 bytes are passed indirectly. GCC follows it. We follow it too,
+ // even though it isn't particularly efficient.
+ if (!IsReturnType)
+ return ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
+
+ // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
+ // Clang matches them for compatibility.
+ return ABIArgInfo::getDirect(
+ llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), 2));
+
+ default:
+ break;
+ }
}
return ABIArgInfo::getDirect();
@@ -4969,6 +5001,31 @@ public:
}
bool doesReturnSlotInterfereWithArgs() const override { return false; }
+
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &CGM) const override {
+ const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
+ if (!FD)
+ return;
+ llvm::Function *Fn = cast<llvm::Function>(GV);
+
+ auto Kind = CGM.getCodeGenOpts().getSignReturnAddress();
+ if (Kind != CodeGenOptions::SignReturnAddressScope::None) {
+ Fn->addFnAttr("sign-return-address",
+ Kind == CodeGenOptions::SignReturnAddressScope::All
+ ? "all"
+ : "non-leaf");
+
+ auto Key = CGM.getCodeGenOpts().getSignReturnAddressKey();
+ Fn->addFnAttr("sign-return-address-key",
+ Key == CodeGenOptions::SignReturnAddressKeyValue::AKey
+ ? "a_key"
+ : "b_key");
+ }
+
+ if (CGM.getCodeGenOpts().BranchTargetEnforcement)
+ Fn->addFnAttr("branch-target-enforcement");
+ }
};
class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
@@ -4976,6 +5033,9 @@ public:
WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K)
: AArch64TargetCodeGenInfo(CGT, K) {}
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &CGM) const override;
+
void getDependentLibraryOption(llvm::StringRef Lib,
llvm::SmallString<24> &Opt) const override {
Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
@@ -4986,6 +5046,14 @@ public:
Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
}
};
+
+void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+ AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
+ if (GV->isDeclaration())
+ return;
+ addStackProbeTargetAttributes(D, GV, CGM);
+}
}
ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
@@ -5532,6 +5600,9 @@ public:
private:
ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const;
+ ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base,
+ uint64_t Members) const;
+ ABIArgInfo coerceIllegalVector(QualType Ty) const;
bool isIllegalVectorType(QualType Ty) const;
bool isHomogeneousAggregateBaseType(QualType Ty) const override;
@@ -5706,6 +5777,41 @@ void ARMABIInfo::setCCs() {
RuntimeCC = abiCC;
}
+ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const {
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if (Size <= 32) {
+ llvm::Type *ResType =
+ llvm::Type::getInt32Ty(getVMContext());
+ return ABIArgInfo::getDirect(ResType);
+ }
+ if (Size == 64 || Size == 128) {
+ llvm::Type *ResType = llvm::VectorType::get(
+ llvm::Type::getInt32Ty(getVMContext()), Size / 32);
+ return ABIArgInfo::getDirect(ResType);
+ }
+ return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+}
+
+ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty,
+ const Type *Base,
+ uint64_t Members) const {
+ assert(Base && "Base class should be set for homogeneous aggregate");
+ // Base can be a floating-point or a vector.
+ if (const VectorType *VT = Base->getAs<VectorType>()) {
+ // FP16 vectors should be converted to integer vectors
+ if (!getTarget().hasLegalHalfType() &&
+ (VT->getElementType()->isFloat16Type() ||
+ VT->getElementType()->isHalfType())) {
+ uint64_t Size = getContext().getTypeSize(VT);
+ llvm::Type *NewVecTy = llvm::VectorType::get(
+ llvm::Type::getInt32Ty(getVMContext()), Size / 32);
+ llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members);
+ return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
+ }
+ }
+ return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
+}
+
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
bool isVariadic) const {
// 6.1.2.1 The following argument types are VFP CPRCs:
@@ -5720,25 +5826,8 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
Ty = useFirstFieldIfTransparentUnion(Ty);
// Handle illegal vector types here.
- if (isIllegalVectorType(Ty)) {
- uint64_t Size = getContext().getTypeSize(Ty);
- if (Size <= 32) {
- llvm::Type *ResType =
- llvm::Type::getInt32Ty(getVMContext());
- return ABIArgInfo::getDirect(ResType);
- }
- if (Size == 64) {
- llvm::Type *ResType = llvm::VectorType::get(
- llvm::Type::getInt32Ty(getVMContext()), 2);
- return ABIArgInfo::getDirect(ResType);
- }
- if (Size == 128) {
- llvm::Type *ResType = llvm::VectorType::get(
- llvm::Type::getInt32Ty(getVMContext()), 4);
- return ABIArgInfo::getDirect(ResType);
- }
- return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
- }
+ if (isIllegalVectorType(Ty))
+ return coerceIllegalVector(Ty);
// _Float16 and __fp16 get passed as if it were an int or float, but with
// the top 16 bits unspecified. This is not done for OpenCL as it handles the
@@ -5774,11 +5863,8 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
// into VFP registers.
const Type *Base = nullptr;
uint64_t Members = 0;
- if (isHomogeneousAggregate(Ty, Base, Members)) {
- assert(Base && "Base class should be set for homogeneous aggregate");
- // Base can be a floating-point or a vector.
- return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
- }
+ if (isHomogeneousAggregate(Ty, Base, Members))
+ return classifyHomogeneousAggregate(Ty, Base, Members);
} else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) {
// WatchOS does have homogeneous aggregates. Note that we intentionally use
// this convention even for a variadic function: the backend will use GPRs
@@ -5937,9 +6023,15 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();
- // Large vector types should be returned via memory.
- if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) {
- return getNaturalAlignIndirect(RetTy);
+ if (const VectorType *VT = RetTy->getAs<VectorType>()) {
+ // Large vector types should be returned via memory.
+ if (getContext().getTypeSize(RetTy) > 128)
+ return getNaturalAlignIndirect(RetTy);
+ // FP16 vectors should be converted to integer vectors
+ if (!getTarget().hasLegalHalfType() &&
+ (VT->getElementType()->isFloat16Type() ||
+ VT->getElementType()->isHalfType()))
+ return coerceIllegalVector(RetTy);
}
// _Float16 and __fp16 get returned as if it were an int or float, but with
@@ -5999,11 +6091,8 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
if (IsEffectivelyAAPCS_VFP) {
const Type *Base = nullptr;
uint64_t Members = 0;
- if (isHomogeneousAggregate(RetTy, Base, Members)) {
- assert(Base && "Base class should be set for homogeneous aggregate");
- // Homogeneous Aggregates are returned directly.
- return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
- }
+ if (isHomogeneousAggregate(RetTy, Base, Members))
+ return classifyHomogeneousAggregate(RetTy, Base, Members);
}
// Aggregates <= 4 bytes are returned in r0; other aggregates
@@ -6038,6 +6127,13 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
/// isIllegalVector - check whether Ty is an illegal vector type.
bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
if (const VectorType *VT = Ty->getAs<VectorType> ()) {
+ // On targets that don't support FP16, FP16 is expanded into float, and we
+ // don't want the ABI to depend on whether or not FP16 is supported in
+ // hardware. Thus return false to coerce FP16 vectors into integer vectors.
+ if (!getTarget().hasLegalHalfType() &&
+ (VT->getElementType()->isFloat16Type() ||
+ VT->getElementType()->isHalfType()))
+ return true;
if (isAndroid()) {
// Android shipped using Clang 3.1, which supported a slightly different
// vector ABI. The primary differences were that 3-element vector types
@@ -8164,6 +8260,137 @@ SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
return false;
}
+// ARC ABI implementation.
+namespace {
+
+class ARCABIInfo : public DefaultABIInfo {
+public:
+ using DefaultABIInfo::DefaultABIInfo;
+
+private:
+ Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const override;
+
+ void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const {
+ if (!State.FreeRegs)
+ return;
+ if (Info.isIndirect() && Info.getInReg())
+ State.FreeRegs--;
+ else if (Info.isDirect() && Info.getInReg()) {
+ unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32;
+ if (sz < State.FreeRegs)
+ State.FreeRegs -= sz;
+ else
+ State.FreeRegs = 0;
+ }
+ }
+
+ void computeInfo(CGFunctionInfo &FI) const override {
+ CCState State(FI.getCallingConvention());
+ // ARC uses 8 registers to pass arguments.
+ State.FreeRegs = 8;
+
+ if (!getCXXABI().classifyReturnType(FI))
+ FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ updateState(FI.getReturnInfo(), FI.getReturnType(), State);
+ for (auto &I : FI.arguments()) {
+ I.info = classifyArgumentType(I.type, State.FreeRegs);
+ updateState(I.info, I.type, State);
+ }
+ }
+
+ ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const;
+ ABIArgInfo getIndirectByValue(QualType Ty) const;
+ ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const;
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
+};
+
+class ARCTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+ ARCTargetCodeGenInfo(CodeGenTypes &CGT)
+ : TargetCodeGenInfo(new ARCABIInfo(CGT)) {}
+};
+
+
+ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const {
+ return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) :
+ getNaturalAlignIndirect(Ty, false);
+}
+
+ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const {
+ // Compute the byval alignment.
+ const unsigned MinABIStackAlignInBytes = 4;
+ unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
+ return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
+ TypeAlign > MinABIStackAlignInBytes);
+}
+
+Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty) const {
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
+ getContext().getTypeInfoInChars(Ty),
+ CharUnits::fromQuantity(4), true);
+}
+
+ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty,
+ uint8_t FreeRegs) const {
+ // Handle the generic C++ ABI.
+ const RecordType *RT = Ty->getAs<RecordType>();
+ if (RT) {
+ CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+ if (RAA == CGCXXABI::RAA_Indirect)
+ return getIndirectByRef(Ty, FreeRegs > 0);
+
+ if (RAA == CGCXXABI::RAA_DirectInMemory)
+ return getIndirectByValue(Ty);
+ }
+
+ // Treat an enum type as its underlying type.
+ if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+ Ty = EnumTy->getDecl()->getIntegerType();
+
+ auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32;
+
+ if (isAggregateTypeForABI(Ty)) {
+ // Structures with flexible arrays are always indirect.
+ if (RT && RT->getDecl()->hasFlexibleArrayMember())
+ return getIndirectByValue(Ty);
+
+ // Ignore empty structs/unions.
+ if (isEmptyRecord(getContext(), Ty, true))
+ return ABIArgInfo::getIgnore();
+
+ llvm::LLVMContext &LLVMContext = getVMContext();
+
+ llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
+ SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
+ llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
+
+ return FreeRegs >= SizeInRegs ?
+ ABIArgInfo::getDirectInReg(Result) :
+ ABIArgInfo::getDirect(Result, 0, nullptr, false);
+ }
+
+ return Ty->isPromotableIntegerType() ?
+ (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty) :
+ ABIArgInfo::getExtend(Ty)) :
+ (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg() :
+ ABIArgInfo::getDirect());
+}
+
+ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const {
+ if (RetTy->isAnyComplexType())
+ return ABIArgInfo::getDirectInReg();
+
+ // Arguments of size > 4 registers are indirect.
+ auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32;
+ if (RetSize > 4)
+ return getIndirectByRef(RetTy, /*HasFreeRegs*/ true);
+
+ return DefaultABIInfo::classifyReturnType(RetTy);
+}
+
+} // End anonymous namespace.
//===----------------------------------------------------------------------===//
// XCore ABI Implementation
@@ -8553,7 +8780,7 @@ static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT,
// The ABI requires unions to be sorted but not structures.
// See FieldEncoding::operator< for sort algorithm.
if (RT->isUnionType())
- llvm::sort(FE.begin(), FE.end());
+ llvm::sort(FE);
// We can now complete the TypeString.
unsigned E = FE.size();
for (unsigned I = 0; I != E; ++I) {
@@ -8597,7 +8824,7 @@ static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
EnumEnc += '}';
FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc));
}
- llvm::sort(FE.begin(), FE.end());
+ llvm::sort(FE);
unsigned E = FE.size();
for (unsigned I = 0; I != E; ++I) {
if (I)
@@ -9185,6 +9412,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
return SetCGInfo(new SparcV9TargetCodeGenInfo(Types));
case llvm::Triple::xcore:
return SetCGInfo(new XCoreTargetCodeGenInfo(Types));
+ case llvm::Triple::arc:
+ return SetCGInfo(new ARCTargetCodeGenInfo(Types));
case llvm::Triple::spir:
case llvm::Triple::spir64:
return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
diff --git a/lib/CodeGen/VarBypassDetector.cpp b/lib/CodeGen/VarBypassDetector.cpp
index 2f8a591a3e7f..859cdd4282cc 100644
--- a/lib/CodeGen/VarBypassDetector.cpp
+++ b/lib/CodeGen/VarBypassDetector.cpp
@@ -78,7 +78,7 @@ bool VarBypassDetector::BuildScopeInformation(const Stmt *S,
return false;
++StmtsToSkip;
}
- // Fall through
+ LLVM_FALLTHROUGH;
case Stmt::GotoStmtClass:
FromScopes.push_back({S, ParentScope});
diff --git a/lib/CodeGen/VarBypassDetector.h b/lib/CodeGen/VarBypassDetector.h
index f50baf4bab9f..47fe13cfacd6 100644
--- a/lib/CodeGen/VarBypassDetector.h
+++ b/lib/CodeGen/VarBypassDetector.h
@@ -15,6 +15,7 @@
#ifndef LLVM_CLANG_LIB_CODEGEN_VARBYPASSDETECTOR_H
#define LLVM_CLANG_LIB_CODEGEN_VARBYPASSDETECTOR_H
+#include "clang/AST/Decl.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"