aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp860
1 files changed, 528 insertions, 332 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 9f8aa6c8d964..5db29eb6004d 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -19,6 +19,7 @@
#include "clang/AST/Decl.h"
#include "clang/AST/StmtOpenMP.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DerivedTypes.h"
@@ -427,7 +428,7 @@ public:
/// \brief Values for bit flags used in the ident_t to describe the fields.
/// All enumeric elements are named and described in accordance with the code
/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
-enum OpenMPLocationFlags {
+enum OpenMPLocationFlags : unsigned {
/// \brief Use trampoline for internal microtask.
OMP_IDENT_IMD = 0x01,
/// \brief Use c-style ident structure.
@@ -443,7 +444,14 @@ enum OpenMPLocationFlags {
/// \brief Implicit barrier in 'sections' directive.
OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
/// \brief Implicit barrier in 'single' directive.
- OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
+ OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
+ /// Call of __kmp_for_static_init for static loop.
+ OMP_IDENT_WORK_LOOP = 0x200,
+ /// Call of __kmp_for_static_init for sections.
+ OMP_IDENT_WORK_SECTIONS = 0x400,
+ /// Call of __kmp_for_static_init for distribute.
+ OMP_IDENT_WORK_DISTRIBUTE = 0x800,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
};
/// \brief Describes ident structure that describes a source location.
@@ -660,27 +668,47 @@ enum OpenMPRTLFunction {
//
// Offloading related calls
//
- // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
- // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+ // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
// *arg_types);
OMPRTL__tgt_target,
- // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
- // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
- // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+ // *arg_types);
+ OMPRTL__tgt_target_nowait,
+ // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+ // *arg_types, int32_t num_teams, int32_t thread_limit);
OMPRTL__tgt_target_teams,
+ // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
+ // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
+ // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ OMPRTL__tgt_target_teams_nowait,
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_register_lib,
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_unregister_lib,
- // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
- // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+ // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
+ // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
OMPRTL__tgt_target_data_begin,
- // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
- // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+ // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+ // *arg_types);
+ OMPRTL__tgt_target_data_begin_nowait,
+ // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
+ // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
OMPRTL__tgt_target_data_end,
- // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
- // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+ // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+ // *arg_types);
+ OMPRTL__tgt_target_data_end_nowait,
+ // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
+ // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
OMPRTL__tgt_target_data_update,
+ // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+ // *arg_types);
+ OMPRTL__tgt_target_data_update_nowait,
};
/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
@@ -862,18 +890,7 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
}
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
- if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
- return CGF.EmitOMPArraySectionExpr(OASE);
- if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
- return CGF.EmitLValue(ASE);
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
- DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
- CGF.CapturedStmtInfo &&
- CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
- E->getType(), VK_LValue, E->getExprLoc());
- // Store the address of the original variable associated with the LHS
- // implicit variable.
- return CGF.EmitLValue(&DRE);
+ return CGF.EmitOMPSharedLValue(E);
}
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
@@ -919,8 +936,9 @@ ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
assert(SharedAddresses.size() == N &&
"Number of generated lvalues must be exactly N.");
- SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
- emitSharedLValueUB(CGF, ClausesData[N].Ref));
+ LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
+ LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
+ SharedAddresses.emplace_back(First, Second);
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
@@ -928,7 +946,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
- if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+ if (!PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(
CGF.getTypeSize(
SharedAddresses[N].first.getType().getNonReferenceType()),
@@ -966,10 +984,9 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
- bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
- if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+ if (!PrivateType->isVariablyModifiedType()) {
assert(!Size && !Sizes[N].second &&
- "Size should be nullptr for non-variably modified redution "
+ "Size should be nullptr for non-variably modified reduction "
"items.");
return;
}
@@ -995,9 +1012,9 @@ void ReductionCodeGen::emitInitialization(
SharedLVal = CGF.MakeAddrLValue(
CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
CGF.ConvertTypeForMem(SharedType)),
- SharedType, SharedAddresses[N].first.getBaseInfo());
- if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
- CGF.getContext().getAsArrayType(PrivateVD->getType())) {
+ SharedType, SharedAddresses[N].first.getBaseInfo(),
+ CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
+ if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
@@ -1040,15 +1057,16 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
if (auto *PtrTy = BaseTy->getAs<PointerType>())
BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
else {
- BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
- BaseTy->castAs<ReferenceType>());
+ LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
+ BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
}
BaseTy = BaseTy->getPointeeType();
}
return CGF.MakeAddrLValue(
CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
CGF.ConvertTypeForMem(ElTy)),
- BaseLV.getType(), BaseLV.getBaseInfo());
+ BaseLV.getType(), BaseLV.getBaseInfo(),
+ CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
}
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
@@ -1106,11 +1124,14 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
OriginalBaseLValue);
llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
- llvm::Value *Ptr =
- CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
+ llvm::Value *PrivatePointer =
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ PrivateAddr.getPointer(),
+ SharedAddresses[N].first.getAddress().getType());
+ llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
return castToBase(CGF, OrigVD->getType(),
SharedAddresses[N].first.getType(),
- OriginalBaseLValue.getPointer()->getType(),
+ OriginalBaseLValue.getAddress().getType(),
OriginalBaseLValue.getAlignment(), Ptr);
}
BaseDecls.emplace_back(
@@ -1146,7 +1167,7 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
CodeGenFunction &CGF) {
return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType(),
- LValueBaseInfo(AlignmentSource::Decl, false));
+ AlignmentSource::Decl);
}
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
@@ -1204,7 +1225,14 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
.getAddress();
});
(void)Scope.Privatize();
- CGF.EmitIgnoredExpr(CombinerInitializer);
+ if (!IsCombiner && Out->hasInit() &&
+ !CGF.isTrivialInitializer(Out->getInit())) {
+ CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
+ Out->getType().getQualifiers(),
+ /*IsInitializer=*/true);
+ }
+ if (CombinerInitializer)
+ CGF.EmitIgnoredExpr(CombinerInitializer);
Scope.ForceCleanup();
CGF.FinishFunction();
return Fn;
@@ -1230,7 +1258,10 @@ void CGOpenMPRuntime::emitUserDefinedReduction(
Orig = &C.Idents.get("omp_orig");
}
Initializer = emitCombinerOrInitializer(
- CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
+ CGM, D->getType(),
+ D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
+ : nullptr,
+ cast<VarDecl>(D->lookup(Orig).front()),
cast<VarDecl>(D->lookup(Priv).front()),
/*IsCombiner=*/false);
}
@@ -1283,6 +1314,15 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction(
HasCancel = OPSD->hasCancel();
else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
+ else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
+ HasCancel = OPFD->hasCancel();
+ else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
+ HasCancel = OPFD->hasCancel();
+ else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
+ HasCancel = OPFD->hasCancel();
+ else if (auto *OPFD =
+ dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
+ HasCancel = OPFD->hasCancel();
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
@@ -1442,19 +1482,24 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
if (ThreadID != nullptr)
return ThreadID;
}
- if (auto *OMPRegionInfo =
- dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
- if (OMPRegionInfo->getThreadIDVariable()) {
- // Check if this an outlined function with thread id passed as argument.
- auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
- ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
- // If value loaded in entry block, cache it and use it everywhere in
- // function.
- if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
- auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
- Elem.second.ThreadID = ThreadID;
+ // If exceptions are enabled, do not use parameter to avoid possible crash.
+ if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
+ !CGF.getLangOpts().CXXExceptions ||
+ CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
+ if (auto *OMPRegionInfo =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+ if (OMPRegionInfo->getThreadIDVariable()) {
+ // Check if this an outlined function with thread id passed as argument.
+ auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+ ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+ // If value loaded in entry block, cache it and use it everywhere in
+ // function.
+ if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ Elem.second.ThreadID = ThreadID;
+ }
+ return ThreadID;
}
- return ThreadID;
}
}
@@ -1464,12 +1509,13 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
// function.
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
- ThreadID =
- CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
- emitUpdateLocation(CGF, Loc));
+ auto *Call = CGF.Builder.CreateCall(
+ createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
+ emitUpdateLocation(CGF, Loc));
+ Call->setCallingConv(CGF.getRuntimeCC());
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
- Elem.second.ThreadID = ThreadID;
- return ThreadID;
+ Elem.second.ThreadID = Call;
+ return Call;
}
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
@@ -2001,32 +2047,48 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
break;
}
case OMPRTL__tgt_target: {
- // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
- // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+ // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
// *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
CGM.VoidPtrTy,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
- CGM.Int32Ty->getPointerTo()};
+ CGM.Int64Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
break;
}
+ case OMPRTL__tgt_target_nowait: {
+ // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+ // int64_t *arg_types);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
+ CGM.VoidPtrTy,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int64Ty->getPointerTo()};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
+ break;
+ }
case OMPRTL__tgt_target_teams: {
- // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+ // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
- // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
CGM.VoidPtrTy,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
- CGM.Int32Ty->getPointerTo(),
+ CGM.Int64Ty->getPointerTo(),
CGM.Int32Ty,
CGM.Int32Ty};
llvm::FunctionType *FnTy =
@@ -2034,6 +2096,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
break;
}
+ case OMPRTL__tgt_target_teams_nowait: {
+ // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
+ // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
+ // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
+ CGM.VoidPtrTy,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int64Ty->getPointerTo(),
+ CGM.Int32Ty,
+ CGM.Int32Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
+ break;
+ }
case OMPRTL__tgt_register_lib: {
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
QualType ParamTy =
@@ -2055,47 +2135,92 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
break;
}
case OMPRTL__tgt_target_data_begin: {
- // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
- // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
+ // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
- CGM.Int32Ty->getPointerTo()};
+ CGM.Int64Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
break;
}
+ case OMPRTL__tgt_target_data_begin_nowait: {
+ // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+ // *arg_types);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int64Ty->getPointerTo()};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
+ break;
+ }
case OMPRTL__tgt_target_data_end: {
- // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
- // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
+ // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
- CGM.Int32Ty->getPointerTo()};
+ CGM.Int64Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
break;
}
+ case OMPRTL__tgt_target_data_end_nowait: {
+ // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+ // *arg_types);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int64Ty->getPointerTo()};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
+ break;
+ }
case OMPRTL__tgt_target_data_update: {
- // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
- // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
- llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
+ // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
CGM.Int32Ty,
CGM.VoidPtrPtrTy,
CGM.VoidPtrPtrTy,
CGM.SizeTy->getPointerTo(),
- CGM.Int32Ty->getPointerTo()};
+ CGM.Int64Ty->getPointerTo()};
llvm::FunctionType *FnTy =
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
break;
}
+ case OMPRTL__tgt_target_data_update_nowait: {
+ // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+ // *arg_types);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int64Ty->getPointerTo()};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
+ break;
+ }
}
assert(RTLFn && "Unable to find OpenMP runtime function");
return RTLFn;
@@ -2459,7 +2584,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
- CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+ RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
// __kmpc_end_serialized_parallel(&Loc, GTid);
llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
@@ -2968,87 +3093,101 @@ static void emitForStaticInitCall(
CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
- unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
- Address ST, llvm::Value *Chunk) {
+ const CGOpenMPRuntime::StaticRTInput &Values) {
if (!CGF.HaveInsertPoint())
- return;
-
- assert(!Ordered);
- assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
- Schedule == OMP_sch_static_balanced_chunked ||
- Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
- Schedule == OMP_dist_sch_static ||
- Schedule == OMP_dist_sch_static_chunked);
-
- // Call __kmpc_for_static_init(
- // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
- // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
- // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
- // kmp_int[32|64] incr, kmp_int[32|64] chunk);
- if (Chunk == nullptr) {
- assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
- Schedule == OMP_dist_sch_static) &&
- "expected static non-chunked schedule");
- // If the Chunk was not specified in the clause - use default value 1.
- Chunk = CGF.Builder.getIntN(IVSize, 1);
- } else {
- assert((Schedule == OMP_sch_static_chunked ||
- Schedule == OMP_sch_static_balanced_chunked ||
- Schedule == OMP_ord_static_chunked ||
- Schedule == OMP_dist_sch_static_chunked) &&
- "expected static chunked schedule");
- }
- llvm::Value *Args[] = {
- UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
- Schedule, M1, M2)), // Schedule type
- IL.getPointer(), // &isLastIter
- LB.getPointer(), // &LB
- UB.getPointer(), // &UB
- ST.getPointer(), // &Stride
- CGF.Builder.getIntN(IVSize, 1), // Incr
- Chunk // Chunk
- };
- CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
+ return;
+
+ assert(!Values.Ordered);
+ assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
+ Schedule == OMP_sch_static_balanced_chunked ||
+ Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
+ Schedule == OMP_dist_sch_static ||
+ Schedule == OMP_dist_sch_static_chunked);
+
+ // Call __kmpc_for_static_init(
+ // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+ // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+ // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+ // kmp_int[32|64] incr, kmp_int[32|64] chunk);
+ llvm::Value *Chunk = Values.Chunk;
+ if (Chunk == nullptr) {
+ assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
+ Schedule == OMP_dist_sch_static) &&
+ "expected static non-chunked schedule");
+ // If the Chunk was not specified in the clause - use default value 1.
+ Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
+ } else {
+ assert((Schedule == OMP_sch_static_chunked ||
+ Schedule == OMP_sch_static_balanced_chunked ||
+ Schedule == OMP_ord_static_chunked ||
+ Schedule == OMP_dist_sch_static_chunked) &&
+ "expected static chunked schedule");
+ }
+ llvm::Value *Args[] = {
+ UpdateLocation,
+ ThreadId,
+ CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
+ M2)), // Schedule type
+ Values.IL.getPointer(), // &isLastIter
+ Values.LB.getPointer(), // &LB
+ Values.UB.getPointer(), // &UB
+ Values.ST.getPointer(), // &Stride
+ CGF.Builder.getIntN(Values.IVSize, 1), // Incr
+ Chunk // Chunk
+ };
+ CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
}
void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
SourceLocation Loc,
+ OpenMPDirectiveKind DKind,
const OpenMPScheduleTy &ScheduleKind,
- unsigned IVSize, bool IVSigned,
- bool Ordered, Address IL, Address LB,
- Address UB, Address ST,
- llvm::Value *Chunk) {
- OpenMPSchedType ScheduleNum =
- getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
- auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
+ const StaticRTInput &Values) {
+ OpenMPSchedType ScheduleNum = getRuntimeSchedule(
+ ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
+ assert(isOpenMPWorksharingDirective(DKind) &&
+ "Expected loop-based or sections-based directive.");
+ auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
+ isOpenMPLoopDirective(DKind)
+ ? OMP_IDENT_WORK_LOOP
+ : OMP_IDENT_WORK_SECTIONS);
auto *ThreadId = getThreadID(CGF, Loc);
- auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
+ auto *StaticInitFunction =
+ createForStaticInitFunction(Values.IVSize, Values.IVSigned);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
- ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
- Ordered, IL, LB, UB, ST, Chunk);
+ ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
}
void CGOpenMPRuntime::emitDistributeStaticInit(
CodeGenFunction &CGF, SourceLocation Loc,
- OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
- bool Ordered, Address IL, Address LB, Address UB, Address ST,
- llvm::Value *Chunk) {
- OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
- auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
+ OpenMPDistScheduleClauseKind SchedKind,
+ const CGOpenMPRuntime::StaticRTInput &Values) {
+ OpenMPSchedType ScheduleNum =
+ getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
+ auto *UpdatedLocation =
+ emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
auto *ThreadId = getThreadID(CGF, Loc);
- auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
+ auto *StaticInitFunction =
+ createForStaticInitFunction(Values.IVSize, Values.IVSigned);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
- OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
- UB, ST, Chunk);
+ OMPC_SCHEDULE_MODIFIER_unknown, Values);
}
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
- SourceLocation Loc) {
+ SourceLocation Loc,
+ OpenMPDirectiveKind DKind) {
if (!CGF.HaveInsertPoint())
return;
// Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
- llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, Loc,
+ isOpenMPDistributeDirective(DKind)
+ ? OMP_IDENT_WORK_DISTRIBUTE
+ : isOpenMPLoopDirective(DKind)
+ ? OMP_IDENT_WORK_LOOP
+ : OMP_IDENT_WORK_SECTIONS),
+ getThreadID(CGF, Loc)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
Args);
}
@@ -3360,14 +3499,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
CGM, ".omp_offloading.descriptor_unreg",
[&](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
- Desc);
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
+ Desc);
});
auto *RegFn = createOffloadingBinaryDescriptorFunction(
CGM, ".omp_offloading.descriptor_reg",
[&](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
- Desc);
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib),
+ Desc);
CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
});
if (CGM.supportsCOMDAT()) {
@@ -3802,7 +3941,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
".omp_task_entry.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
CodeGenFunction CGF(CGM);
- CGF.disableDebugInfo();
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
@@ -3871,7 +4009,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
}
CallArgs.push_back(SharedsParam);
- CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
+ CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
+ CallArgs);
CGF.EmitStoreThroughLValue(
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
@@ -3893,7 +4032,6 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
ImplicitParamDecl::Other);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
- FunctionType::ExtInfo Info;
auto &DestructorFnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
@@ -4020,9 +4158,9 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
return TaskPrivatesMap;
}
-static int array_pod_sort_comparator(const PrivateDataTy *P1,
- const PrivateDataTy *P2) {
- return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
+static bool stable_sort_comparator(const PrivateDataTy P1,
+ const PrivateDataTy P2) {
+ return P1.first > P2.first;
}
/// Emit initialization for private variables in task-based directives.
@@ -4059,8 +4197,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
SharedRefLValue = CGF.MakeAddrLValue(
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
SharedRefLValue.getType(),
- LValueBaseInfo(AlignmentSource::Decl,
- SharedRefLValue.getBaseInfo().getMayAlias()));
+ LValueBaseInfo(AlignmentSource::Decl),
+ SharedRefLValue.getTBAAInfo());
QualType Type = OriginalVD->getType();
if (Type->isArrayType()) {
// Initialize firstprivate array.
@@ -4250,8 +4388,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
/*PrivateElemInit=*/nullptr)));
++I;
}
- llvm::array_pod_sort(Privates.begin(), Privates.end(),
- array_pod_sort_comparator);
+ std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Build type kmp_routine_entry_t (if not built yet).
emitKmpRoutineEntryT(KmpInt32Ty);
@@ -4262,7 +4399,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
}
KmpTaskTQTy = SavedKmpTaskloopTQTy;
- } else if (D.getDirectiveKind() == OMPD_task) {
+ } else {
assert(D.getDirectiveKind() == OMPD_task &&
"Expected taskloop or task directive");
if (SavedKmpTaskTQTy.isNull()) {
@@ -4557,8 +4694,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
- NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ NumDependencies, &DepWaitTaskArgs,
+ Loc](CodeGenFunction &CGF, PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime();
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
@@ -4569,11 +4706,12 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
DepWaitTaskArgs);
// Call proxy_task_entry(gtid, new_task);
- auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
- CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
+ Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
- CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
+ CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
+ OutlinedFnArgs);
};
// Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
@@ -5805,21 +5943,21 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
- // FIXME: Accommodate other combined directives with teams when they become
- // available.
- if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
+ if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
ignoreCompoundStmts(CS.getCapturedStmt()))) {
- if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
- CGOpenMPInnerExprInfo CGInfo(CGF, CS);
- CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
- return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
- /*IsSigned=*/true);
- }
+ if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
+ if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+ CGOpenMPInnerExprInfo CGInfo(CGF, CS);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
+ return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
- // If we have an enclosed teams directive but no num_teams clause we use
- // the default value 0.
- return Bld.getInt32(0);
+ // If we have an enclosed teams directive but no num_teams clause we use
+ // the default value 0.
+ return Bld.getInt32(0);
+ }
}
// No teams associated with the directive.
@@ -5908,21 +6046,21 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
- // FIXME: Accommodate other combined directives with teams when they become
- // available.
- if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
+ if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>(
ignoreCompoundStmts(CS.getCapturedStmt()))) {
- if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
- CGOpenMPInnerExprInfo CGInfo(CGF, CS);
- CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
- return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
- /*IsSigned=*/true);
- }
+ if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) {
+ if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+ CGOpenMPInnerExprInfo CGInfo(CGF, CS);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
+ return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
- // If we have an enclosed teams directive but no thread_limit clause we use
- // the default value 0.
- return CGF.Builder.getInt32(0);
+ // If we have an enclosed teams directive but no thread_limit clause we
+ // use the default value 0.
+ return CGF.Builder.getInt32(0);
+ }
}
// No teams associated with the directive.
@@ -5949,22 +6087,23 @@ public:
/// \brief Delete the element from the device environment, ignoring the
/// current reference count associated with the element.
OMP_MAP_DELETE = 0x08,
- /// \brief The element being mapped is a pointer, therefore the pointee
- /// should be mapped as well.
- OMP_MAP_IS_PTR = 0x10,
- /// \brief This flags signals that an argument is the first one relating to
- /// a map/private clause expression. For some cases a single
- /// map/privatization results in multiple arguments passed to the runtime
- /// library.
- OMP_MAP_FIRST_REF = 0x20,
+ /// \brief The element being mapped is a pointer-pointee pair; both the
+ /// pointer and the pointee should be mapped.
+ OMP_MAP_PTR_AND_OBJ = 0x10,
+ /// \brief This flags signals that the base address of an entry should be
+ /// passed to the target kernel as an argument.
+ OMP_MAP_TARGET_PARAM = 0x20,
/// \brief Signal that the runtime library has to return the device pointer
- /// in the current position for the data being mapped.
- OMP_MAP_RETURN_PTR = 0x40,
+ /// in the current position for the data being mapped. Used when we have the
+ /// use_device_ptr clause.
+ OMP_MAP_RETURN_PARAM = 0x40,
/// \brief This flag signals that the reference being passed is a pointer to
/// private data.
- OMP_MAP_PRIVATE_PTR = 0x80,
+ OMP_MAP_PRIVATE = 0x80,
/// \brief Pass the element to the device by value.
- OMP_MAP_PRIVATE_VAL = 0x100,
+ OMP_MAP_LITERAL = 0x100,
+ /// Implicit map
+ OMP_MAP_IMPLICIT = 0x200,
};
/// Class that associates information with a base pointer to be passed to the
@@ -5986,7 +6125,7 @@ public:
typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
- typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
+ typedef SmallVector<uint64_t, 16> MapFlagsArrayTy;
private:
/// \brief Directive from where the map clauses were extracted.
@@ -5997,6 +6136,8 @@ private:
/// \brief Set of all first private variables in the current directive.
llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
+ /// Set of all reduction variables in the current directive.
+ llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;
/// Map between device pointer declarations and their expression components.
/// The key value for declarations in 'this' is null.
@@ -6051,10 +6192,10 @@ private:
/// a flag marking the map as a pointer if requested. Add a flag marking the
/// map as the first one of a series of maps that relate to the same map
/// expression.
- unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
+ uint64_t getMapTypeBits(OpenMPMapClauseKind MapType,
OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
- bool AddIsFirstFlag) const {
- unsigned Bits = 0u;
+ bool AddIsTargetParamFlag) const {
+ uint64_t Bits = 0u;
switch (MapType) {
case OMPC_MAP_alloc:
case OMPC_MAP_release:
@@ -6080,9 +6221,9 @@ private:
break;
}
if (AddPtrFlag)
- Bits |= OMP_MAP_IS_PTR;
- if (AddIsFirstFlag)
- Bits |= OMP_MAP_FIRST_REF;
+ Bits |= OMP_MAP_PTR_AND_OBJ;
+ if (AddIsTargetParamFlag)
+ Bits |= OMP_MAP_TARGET_PARAM;
if (MapTypeModifier == OMPC_MAP_always)
Bits |= OMP_MAP_ALWAYS;
return Bits;
@@ -6135,7 +6276,7 @@ private:
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
- bool IsFirstComponentList) const {
+ bool IsFirstComponentList, bool IsImplicit) const {
// The following summarizes what has to be generated for each map and the
// types bellow. The generated information is expressed in this order:
@@ -6189,28 +6330,28 @@ private:
//
// map(s.p[:22], s.a s.b)
// &s, &(s.p), sizeof(double*), noflags
- // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
+ // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag
//
// map(s.ps)
// &s, &(s.ps), sizeof(S2*), noflags
//
// map(s.ps->s.i)
// &s, &(s.ps), sizeof(S2*), noflags
- // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
+ // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag
//
// map(s.ps->ps)
// &s, &(s.ps), sizeof(S2*), noflags
- // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
+ // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
//
// map(s.ps->ps->ps)
// &s, &(s.ps), sizeof(S2*), noflags
- // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
- // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+ // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
+ // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag
//
// map(s.ps->ps->s.f[:22])
// &s, &(s.ps), sizeof(S2*), noflags
- // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
- // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
+ // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag
+ // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
//
// map(ps)
// &ps, &ps, sizeof(S2*), noflags
@@ -6226,29 +6367,28 @@ private:
//
// map(ps->p[:22])
// ps, &(ps->p), sizeof(double*), noflags
- // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
+ // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag
//
// map(ps->ps)
// ps, &(ps->ps), sizeof(S2*), noflags
//
// map(ps->ps->s.i)
// ps, &(ps->ps), sizeof(S2*), noflags
- // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
+ // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag
//
// map(ps->ps->ps)
// ps, &(ps->ps), sizeof(S2*), noflags
- // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+ // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
//
// map(ps->ps->ps->ps)
// ps, &(ps->ps), sizeof(S2*), noflags
- // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
- // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+ // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
+ // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag
//
// map(ps->ps->ps->s.f[:22])
// ps, &(ps->ps), sizeof(S2*), noflags
- // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
- // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
- // extra_flag
+ // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag
+ // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag
// Track if the map information being generated is the first for a capture.
bool IsCaptureFirstInfo = IsFirstComponentList;
@@ -6270,8 +6410,7 @@ private:
} else {
// The base is the reference to the variable.
// BP = &Var.
- BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
- .getPointer();
+ BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
// If the variable is a pointer and is being dereferenced (i.e. is not
// the last component), the base has to be the pointer itself, not its
@@ -6290,6 +6429,7 @@ private:
}
}
+ uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0;
for (; I != CE; ++I) {
auto Next = std::next(I);
@@ -6324,7 +6464,8 @@ private:
isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
"Unexpected expression");
- auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
+ llvm::Value *LB =
+ CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer();
auto *Size = getExprTypeSize(I->getAssociatedExpression());
// If we have a member expression and the current component is a
@@ -6339,9 +6480,11 @@ private:
BasePointers.push_back(BP);
Pointers.push_back(RefAddr);
Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
- Types.push_back(getMapTypeBits(
- /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown,
- !IsExpressionFirstInfo, IsCaptureFirstInfo));
+ Types.push_back(DefaultFlags |
+ getMapTypeBits(
+ /*MapType*/ OMPC_MAP_alloc,
+ /*MapTypeModifier=*/OMPC_MAP_unknown,
+ !IsExpressionFirstInfo, IsCaptureFirstInfo));
IsExpressionFirstInfo = false;
IsCaptureFirstInfo = false;
// The reference will be the next base address.
@@ -6356,9 +6499,9 @@ private:
// same expression except for the first one. We also need to signal
// this map is the first one that relates with the current capture
// (there is a set of entries for each capture).
- Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
- !IsExpressionFirstInfo,
- IsCaptureFirstInfo));
+ Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier,
+ !IsExpressionFirstInfo,
+ IsCaptureFirstInfo));
// If we have a final array section, we are done with this expression.
if (IsFinalArraySection)
@@ -6370,7 +6513,6 @@ private:
IsExpressionFirstInfo = false;
IsCaptureFirstInfo = false;
- continue;
}
}
}
@@ -6386,8 +6528,14 @@ private:
// 'private ptr' and 'map to' flag. Return the right flags if the captured
// declaration is known as first-private in this handler.
if (FirstPrivateDecls.count(Cap.getCapturedVar()))
- return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
+ return MappableExprsHandler::OMP_MAP_PRIVATE |
MappableExprsHandler::OMP_MAP_TO;
+ // Reduction variable will use only the 'private ptr' and 'map to_from'
+ // flag.
+ if (ReductionDecls.count(Cap.getCapturedVar())) {
+ return MappableExprsHandler::OMP_MAP_TO |
+ MappableExprsHandler::OMP_MAP_FROM;
+ }
// We didn't modify anything.
return CurrentModifiers;
@@ -6401,6 +6549,12 @@ public:
for (const auto *D : C->varlists())
FirstPrivateDecls.insert(
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+ for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) {
+ for (const auto *D : C->varlists()) {
+ ReductionDecls.insert(
+ cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+ }
+ }
// Extract device pointer clause information.
for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
for (auto L : C->component_lists())
@@ -6432,20 +6586,19 @@ public:
RPK_MemberReference,
};
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
- OpenMPMapClauseKind MapType;
- OpenMPMapClauseKind MapTypeModifier;
- ReturnPointerKind ReturnDevicePointer;
+ OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
+ OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
+ ReturnPointerKind ReturnDevicePointer = RPK_None;
+ bool IsImplicit = false;
- MapInfo()
- : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown),
- ReturnDevicePointer(RPK_None) {}
+ MapInfo() = default;
MapInfo(
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
- ReturnPointerKind ReturnDevicePointer)
+ ReturnPointerKind ReturnDevicePointer, bool IsImplicit)
: Components(Components), MapType(MapType),
MapTypeModifier(MapTypeModifier),
- ReturnDevicePointer(ReturnDevicePointer) {}
+ ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
};
// We have to process the component lists that relate with the same
@@ -6459,25 +6612,29 @@ public:
const ValueDecl *D,
OMPClauseMappableExprCommon::MappableExprComponentListRef L,
OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
- MapInfo::ReturnPointerKind ReturnDevicePointer) {
+ MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) {
const ValueDecl *VD =
D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
- Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer});
+ Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer,
+ IsImplicit);
};
// FIXME: MSVC 2013 seems to require this-> to find member CurDir.
for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
- for (auto L : C->component_lists())
+ for (auto L : C->component_lists()) {
InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
- MapInfo::RPK_None);
+ MapInfo::RPK_None, C->isImplicit());
+ }
for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
- for (auto L : C->component_lists())
+ for (auto L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
- MapInfo::RPK_None);
+ MapInfo::RPK_None, C->isImplicit());
+ }
for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
- for (auto L : C->component_lists())
+ for (auto L : C->component_lists()) {
InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
- MapInfo::RPK_None);
+ MapInfo::RPK_None, C->isImplicit());
+ }
// Look at the use_device_ptr clause information and mark the existing map
// entries as such. If there is no map information for an entry in the
@@ -6524,7 +6681,7 @@ public:
BasePointers.push_back({Ptr, VD});
Pointers.push_back(Ptr);
Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
- Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
+ Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
}
for (auto &M : Info) {
@@ -6538,9 +6695,9 @@ public:
// Remember the current base pointer index.
unsigned CurrentBasePointersIdx = BasePointers.size();
// FIXME: MSVC 2013 seems to require this-> to find the member method.
- this->generateInfoForComponentList(L.MapType, L.MapTypeModifier,
- L.Components, BasePointers, Pointers,
- Sizes, Types, IsFirstComponentList);
+ this->generateInfoForComponentList(
+ L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers,
+ Sizes, Types, IsFirstComponentList, L.IsImplicit);
// If this entry relates with a device pointer, set the relevant
// declaration and add the 'return pointer' flag.
@@ -6562,7 +6719,7 @@ public:
"No relevant declaration related with device pointer??");
BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
- Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
+ Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
}
IsFirstComponentList = false;
}
@@ -6604,7 +6761,8 @@ public:
for (auto L : It->second) {
generateInfoForComponentList(
/*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
- BasePointers, Pointers, Sizes, Types, IsFirstComponentList);
+ BasePointers, Pointers, Sizes, Types, IsFirstComponentList,
+ /*IsImplicit=*/false);
IsFirstComponentList = false;
}
return;
@@ -6613,7 +6771,7 @@ public:
BasePointers.push_back({Arg, VD});
Pointers.push_back(Arg);
Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
- Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF);
+ Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
return;
}
@@ -6624,9 +6782,9 @@ public:
"We got information for the wrong declaration??");
assert(!L.second.empty() &&
"Not expecting declaration with no component lists.");
- generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
- L.second, BasePointers, Pointers, Sizes,
- Types, IsFirstComponentList);
+ generateInfoForComponentList(
+ C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers,
+ Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit());
IsFirstComponentList = false;
}
@@ -6656,7 +6814,7 @@ public:
if (!RI.getType()->isAnyPointerType()) {
// We have to signal to the runtime captures passed by value that are
// not pointers.
- CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
+ CurMapTypes.push_back(OMP_MAP_LITERAL);
CurSizes.push_back(CGF.getTypeSize(RI.getType()));
} else {
// Pointers are implicitly mapped with a zero size and no flags
@@ -6676,19 +6834,12 @@ public:
// The default map type for a scalar/complex type is 'to' because by
// default the value doesn't have to be retrieved. For an aggregate
// type, the default is 'tofrom'.
- CurMapTypes.push_back(ElementType->isAggregateType()
- ? (OMP_MAP_TO | OMP_MAP_FROM)
- : OMP_MAP_TO);
-
- // If we have a capture by reference we may need to add the private
- // pointer flag if the base declaration shows in some first-private
- // clause.
- CurMapTypes.back() =
- adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
+ CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses(
+ CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
+ : OMP_MAP_TO));
}
- // Every default map produces a single argument, so, it is always the
- // first one.
- CurMapTypes.back() |= OMP_MAP_FIRST_REF;
+ // Every default map produces a single argument which is a target parameter.
+ CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
}
};
@@ -6831,7 +6982,7 @@ static void emitOffloadingArraysArgument(
llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
/*Idx0=*/0, /*Idx1=*/0);
MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
- llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
+ llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
Info.MapTypesArray,
/*Idx0=*/0,
/*Idx1=*/0);
@@ -6840,7 +6991,7 @@ static void emitOffloadingArraysArgument(
PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
MapTypesArrayArg =
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
+ llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
}
}
@@ -6855,8 +7006,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
assert(OutlinedFn && "Invalid outlined function!");
- auto &Ctx = CGF.getContext();
-
// Fill up the arrays with all the captured variables.
MappableExprsHandler::MapValuesArrayTy KernelArgs;
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
@@ -6878,9 +7027,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
CE = CS.capture_end();
CI != CE; ++CI, ++RI, ++CV) {
- StringRef Name;
- QualType Ty;
-
CurBasePointers.clear();
CurPointers.clear();
CurSizes.clear();
@@ -6893,8 +7039,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
CurPointers.push_back(*CV);
CurSizes.push_back(CGF.getTypeSize(RI->getType()));
// Copy to the device as an argument. No need to retrieve it.
- CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
- MappableExprsHandler::OMP_MAP_FIRST_REF);
+ CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
+ MappableExprsHandler::OMP_MAP_TARGET_PARAM);
} else {
// If we have any information in the map clause, we use it, otherwise we
// just do a default mapping.
@@ -6921,19 +7067,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
}
- // Keep track on whether the host function has to be executed.
- auto OffloadErrorQType =
- Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
- auto OffloadError = CGF.MakeAddrLValue(
- CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
- OffloadErrorQType);
- CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
- OffloadError);
-
// Fill up the pointer arrays and transfer execution to the device.
- auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device,
- OutlinedFnID, OffloadError,
- &D](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
+ OutlinedFn, OutlinedFnID, &D,
+ &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime();
// Emit the offloading arrays.
TargetDataInfo Info;
@@ -6956,11 +7093,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// Emit device ID if any.
llvm::Value *DeviceID;
- if (Device)
+ if (Device) {
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
- CGF.Int32Ty, /*isSigned=*/true);
- else
- DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+ CGF.Int64Ty, /*isSigned=*/true);
+ } else {
+ DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+ }
// Emit the number of elements in the offloading arrays.
llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
@@ -6971,6 +7109,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
+ bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
// The target region is an outlined function launched by the runtime
// via calls __tgt_target() or __tgt_target_teams().
//
@@ -7013,24 +7152,41 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
Info.MapTypesArray, NumTeams,
NumThreads};
Return = CGF.EmitRuntimeCall(
- RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+ RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
+ : OMPRTL__tgt_target_teams),
+ OffloadingArgs);
} else {
llvm::Value *OffloadingArgs[] = {
DeviceID, OutlinedFnID,
PointerNum, Info.BasePointersArray,
Info.PointersArray, Info.SizesArray,
Info.MapTypesArray};
- Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
- OffloadingArgs);
+ Return = CGF.EmitRuntimeCall(
+ RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
+ : OMPRTL__tgt_target),
+ OffloadingArgs);
}
- CGF.EmitStoreOfScalar(Return, OffloadError);
+ // Check the error code and execute the host version if required.
+ llvm::BasicBlock *OffloadFailedBlock =
+ CGF.createBasicBlock("omp_offload.failed");
+ llvm::BasicBlock *OffloadContBlock =
+ CGF.createBasicBlock("omp_offload.cont");
+ llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
+ CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
+
+ CGF.EmitBlock(OffloadFailedBlock);
+ emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs);
+ CGF.EmitBranch(OffloadContBlock);
+
+ CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
};
// Notify that the host version must be executed.
- auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
- OffloadError);
+ auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn,
+ KernelArgs);
};
// If we have a target function ID it means that we need to support
@@ -7048,19 +7204,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
RegionCodeGenTy ElseRCG(ElseGen);
ElseRCG(CGF);
}
-
- // Check the error code and execute the host version if required.
- auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
- auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
- auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
- auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
- CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
-
- CGF.EmitBlock(OffloadFailedBlock);
- CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
- CGF.EmitBranch(OffloadContBlock);
-
- CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
}
void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
@@ -7101,6 +7244,26 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
break;
+ case Stmt::OMPTargetTeamsDistributeDirectiveClass:
+ CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
+ CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S));
+ break;
+ case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
+ CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
+ CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S));
+ break;
+ case Stmt::OMPTargetParallelForDirectiveClass:
+ CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
+ CGM, ParentName, cast<OMPTargetParallelForDirective>(*S));
+ break;
+ case Stmt::OMPTargetParallelForSimdDirectiveClass:
+ CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
+ CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S));
+ break;
+ case Stmt::OMPTargetSimdDirectiveClass:
+ CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
+ CGM, ParentName, cast<OMPTargetSimdDirective>(*S));
+ break;
default:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
}
@@ -7278,11 +7441,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
- if (Device)
+ if (Device) {
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
- CGF.Int32Ty, /*isSigned=*/true);
- else
- DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+ CGF.Int64Ty, /*isSigned=*/true);
+ } else {
+ DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+ }
// Emit the number of elements in the offloading arrays.
auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
@@ -7313,11 +7477,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
- if (Device)
+ if (Device) {
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
- CGF.Int32Ty, /*isSigned=*/true);
- else
- DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+ CGF.Int64Ty, /*isSigned=*/true);
+ } else {
+ DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+ }
// Emit the number of elements in the offloading arrays.
auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
@@ -7399,11 +7564,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
- if (Device)
+ if (Device) {
DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
- CGF.Int32Ty, /*isSigned=*/true);
- else
- DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+ CGF.Int64Ty, /*isSigned=*/true);
+ } else {
+ DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+ }
// Emit the number of elements in the offloading arrays.
auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
@@ -7415,19 +7581,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
auto &RT = CGF.CGM.getOpenMPRuntime();
// Select the right runtime function call for each expected standalone
// directive.
+ const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
OpenMPRTLFunction RTLFn;
switch (D.getDirectiveKind()) {
default:
llvm_unreachable("Unexpected standalone target data directive.");
break;
case OMPD_target_enter_data:
- RTLFn = OMPRTL__tgt_target_data_begin;
+ RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
+ : OMPRTL__tgt_target_data_begin;
break;
case OMPD_target_exit_data:
- RTLFn = OMPRTL__tgt_target_data_end;
+ RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
+ : OMPRTL__tgt_target_data_end;
break;
case OMPD_target_update:
- RTLFn = OMPRTL__tgt_target_data_update;
+ RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
+ : OMPRTL__tgt_target_data_update;
break;
}
CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
@@ -7777,3 +7947,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
CGF.EmitRuntimeCall(RTLFn, Args);
}
+void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee,
+ ArrayRef<llvm::Value *> Args,
+ SourceLocation Loc) const {
+ auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
+
+ if (auto *Fn = dyn_cast<llvm::Function>(Callee)) {
+ if (Fn->doesNotThrow()) {
+ CGF.EmitNounwindRuntimeCall(Fn, Args);
+ return;
+ }
+ }
+ CGF.EmitRuntimeCall(Callee, Args);
+}
+
+void CGOpenMPRuntime::emitOutlinedFunctionCall(
+ CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> Args) const {
+ assert(Loc.isValid() && "Outlined function call location must be valid.");
+ emitCall(CGF, OutlinedFn, Args, Loc);
+}
+
+Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
+ const VarDecl *NativeParam,
+ const VarDecl *TargetParam) const {
+ return CGF.GetAddrOfLocalVar(NativeParam);
+}