diff options
Diffstat (limited to 'lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 860 |
1 files changed, 528 insertions, 332 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 9f8aa6c8d964..5db29eb6004d 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -427,7 +428,7 @@ public: /// \brief Values for bit flags used in the ident_t to describe the fields. /// All enumeric elements are named and described in accordance with the code /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h -enum OpenMPLocationFlags { +enum OpenMPLocationFlags : unsigned { /// \brief Use trampoline for internal microtask. OMP_IDENT_IMD = 0x01, /// \brief Use c-style ident structure. @@ -443,7 +444,14 @@ enum OpenMPLocationFlags { /// \brief Implicit barrier in 'sections' directive. OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, /// \brief Implicit barrier in 'single' directive. - OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 + OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, + /// Call of __kmp_for_static_init for static loop. + OMP_IDENT_WORK_LOOP = 0x200, + /// Call of __kmp_for_static_init for sections. + OMP_IDENT_WORK_SECTIONS = 0x400, + /// Call of __kmp_for_static_init for distribute. + OMP_IDENT_WORK_DISTRIBUTE = 0x800, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; /// \brief Describes ident structure that describes a source location. @@ -660,27 +668,47 @@ enum OpenMPRTLFunction { // // Offloading related calls // - // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t + // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target, - // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, - // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, - // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + OMPRTL__tgt_target_nowait, + // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types, int32_t num_teams, int32_t thread_limit); OMPRTL__tgt_target_teams, + // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void + // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t + // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams_nowait, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); OMPRTL__tgt_unregister_lib, - // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_begin, - // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + OMPRTL__tgt_target_data_begin_nowait, + // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_end, - // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); + // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + OMPRTL__tgt_target_data_end_nowait, + // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); OMPRTL__tgt_target_data_update, + // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + OMPRTL__tgt_target_data_update_nowait, }; /// A basic class for pre|post-action for advanced codegen sequence for OpenMP @@ -862,18 +890,7 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, } LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { - if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) - return CGF.EmitOMPArraySectionExpr(OASE); - if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) - return CGF.EmitLValue(ASE); - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CGF.CapturedStmtInfo && - CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, - E->getType(), VK_LValue, E->getExprLoc()); - // Store the address of the original variable associated with the LHS - // implicit variable. - return CGF.EmitLValue(&DRE); + return CGF.EmitOMPSharedLValue(E); } LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, @@ -919,8 +936,9 @@ ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { assert(SharedAddresses.size() == N && "Number of generated lvalues must be exactly N."); - SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref), - emitSharedLValueUB(CGF, ClausesData[N].Ref)); + LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); + LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); + SharedAddresses.emplace_back(First, Second); } void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { @@ -928,7 +946,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); - if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + if (!PrivateType->isVariablyModifiedType()) { Sizes.emplace_back( CGF.getTypeSize( SharedAddresses[N].first.getType().getNonReferenceType()), @@ -966,10 +984,9 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); QualType PrivateType = PrivateVD->getType(); - bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); - if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + if (!PrivateType->isVariablyModifiedType()) { assert(!Size && !Sizes[N].second && - "Size should be nullptr for non-variably modified redution " + "Size should be nullptr for non-variably modified reduction " "items."); return; } @@ -995,9 +1012,9 @@ void ReductionCodeGen::emitInitialization( SharedLVal = CGF.MakeAddrLValue( CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), CGF.ConvertTypeForMem(SharedType)), - SharedType, SharedAddresses[N].first.getBaseInfo()); - if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) || - CGF.getContext().getAsArrayType(PrivateVD->getType())) { + SharedType, SharedAddresses[N].first.getBaseInfo(), + CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); + if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, @@ -1040,15 +1057,16 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, if (auto *PtrTy = BaseTy->getAs<PointerType>()) BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); else { - BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), - BaseTy->castAs<ReferenceType>()); + LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); + BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } BaseTy = BaseTy->getPointeeType(); } return CGF.MakeAddrLValue( CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), CGF.ConvertTypeForMem(ElTy)), - BaseLV.getType(), BaseLV.getBaseInfo()); + BaseLV.getType(), BaseLV.getBaseInfo(), + CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); } static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, @@ -1106,11 +1124,14 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, OriginalBaseLValue); llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); - llvm::Value *Ptr = - CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment); + llvm::Value *PrivatePointer = + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + PrivateAddr.getPointer(), + SharedAddresses[N].first.getAddress().getType()); + llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); return castToBase(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), - OriginalBaseLValue.getPointer()->getType(), + OriginalBaseLValue.getAddress().getType(), OriginalBaseLValue.getAlignment(), Ptr); } BaseDecls.emplace_back( @@ -1146,7 +1167,7 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( CodeGenFunction &CGF) { return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), getThreadIDVariable()->getType(), - LValueBaseInfo(AlignmentSource::Decl, false)); + AlignmentSource::Decl); } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) @@ -1204,7 +1225,14 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, .getAddress(); }); (void)Scope.Privatize(); - CGF.EmitIgnoredExpr(CombinerInitializer); + if (!IsCombiner && Out->hasInit() && + !CGF.isTrivialInitializer(Out->getInit())) { + CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), + Out->getType().getQualifiers(), + /*IsInitializer=*/true); + } + if (CombinerInitializer) + CGF.EmitIgnoredExpr(CombinerInitializer); Scope.ForceCleanup(); CGF.FinishFunction(); return Fn; @@ -1230,7 +1258,10 @@ void CGOpenMPRuntime::emitUserDefinedReduction( Orig = &C.Idents.get("omp_orig"); } Initializer = emitCombinerOrInitializer( - CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()), + CGM, D->getType(), + D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init + : nullptr, + cast<VarDecl>(D->lookup(Orig).front()), cast<VarDecl>(D->lookup(Priv).front()), /*IsCombiner=*/false); } @@ -1283,6 +1314,15 @@ static llvm::Value *emitParallelOrTeamsOutlinedFunction( HasCancel = OPSD->hasCancel(); else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) HasCancel = OPFD->hasCancel(); + else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) + HasCancel = OPFD->hasCancel(); + else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) + HasCancel = OPFD->hasCancel(); + else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) + HasCancel = OPFD->hasCancel(); + else if (auto *OPFD = + dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) + HasCancel = OPFD->hasCancel(); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); @@ -1442,19 +1482,24 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, if (ThreadID != nullptr) return ThreadID; } - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - if (OMPRegionInfo->getThreadIDVariable()) { - // Check if this an outlined function with thread id passed as argument. - auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); - ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); - // If value loaded in entry block, cache it and use it everywhere in - // function. - if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { - auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - Elem.second.ThreadID = ThreadID; + // If exceptions are enabled, do not use parameter to avoid possible crash. + if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || + !CGF.getLangOpts().CXXExceptions || + CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + if (auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo->getThreadIDVariable()) { + // Check if this an outlined function with thread id passed as argument. + auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); + // If value loaded in entry block, cache it and use it everywhere in + // function. + if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + Elem.second.ThreadID = ThreadID; + } + return ThreadID; } - return ThreadID; } } @@ -1464,12 +1509,13 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, // function. CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); - ThreadID = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), - emitUpdateLocation(CGF, Loc)); + auto *Call = CGF.Builder.CreateCall( + createRuntimeFunction(OMPRTL__kmpc_global_thread_num), + emitUpdateLocation(CGF, Loc)); + Call->setCallingConv(CGF.getRuntimeCC()); auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); - Elem.second.ThreadID = ThreadID; - return ThreadID; + Elem.second.ThreadID = Call; + return Call; } void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { @@ -2001,32 +2047,48 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { break; } case OMPRTL__tgt_target: { - // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t - // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t + // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t // *arg_types); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo()}; + CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_nowait: { + // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int64_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); + break; + } case OMPRTL__tgt_target_teams: { - // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, - // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo(), + CGM.Int64Ty->getPointerTo(), CGM.Int32Ty, CGM.Int32Ty}; llvm::FunctionType *FnTy = @@ -2034,6 +2096,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); break; } + case OMPRTL__tgt_target_teams_nowait: { + // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void + // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t + // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -2055,47 +2135,92 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { break; } case OMPRTL__tgt_target_data_begin: { - // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo()}; + CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); break; } + case OMPRTL__tgt_target_data_begin_nowait: { + // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); + break; + } case OMPRTL__tgt_target_data_end: { - // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo()}; + CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); break; } + case OMPRTL__tgt_target_data_end_nowait: { + // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); + break; + } case OMPRTL__tgt_target_data_update: { - // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num, - // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types); - llvm::Type *TypeParams[] = {CGM.Int32Ty, + // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, + // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int32Ty, CGM.VoidPtrPtrTy, CGM.VoidPtrPtrTy, CGM.SizeTy->getPointerTo(), - CGM.Int32Ty->getPointerTo()}; + CGM.Int64Ty->getPointerTo()}; llvm::FunctionType *FnTy = llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); break; } + case OMPRTL__tgt_target_data_update_nowait: { + // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t + // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t + // *arg_types); + llvm::Type *TypeParams[] = {CGM.Int64Ty, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int64Ty->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); + break; + } } assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; @@ -2459,7 +2584,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); + RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; @@ -2968,87 +3093,101 @@ static void emitForStaticInitCall( CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, - unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, - Address ST, llvm::Value *Chunk) { + const CGOpenMPRuntime::StaticRTInput &Values) { if (!CGF.HaveInsertPoint()) - return; - - assert(!Ordered); - assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || - Schedule == OMP_sch_static_balanced_chunked || - Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || - Schedule == OMP_dist_sch_static || - Schedule == OMP_dist_sch_static_chunked); - - // Call __kmpc_for_static_init( - // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, - // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, - // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, - // kmp_int[32|64] incr, kmp_int[32|64] chunk); - if (Chunk == nullptr) { - assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || - Schedule == OMP_dist_sch_static) && - "expected static non-chunked schedule"); - // If the Chunk was not specified in the clause - use default value 1. - Chunk = CGF.Builder.getIntN(IVSize, 1); - } else { - assert((Schedule == OMP_sch_static_chunked || - Schedule == OMP_sch_static_balanced_chunked || - Schedule == OMP_ord_static_chunked || - Schedule == OMP_dist_sch_static_chunked) && - "expected static chunked schedule"); - } - llvm::Value *Args[] = { - UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier( - Schedule, M1, M2)), // Schedule type - IL.getPointer(), // &isLastIter - LB.getPointer(), // &LB - UB.getPointer(), // &UB - ST.getPointer(), // &Stride - CGF.Builder.getIntN(IVSize, 1), // Incr - Chunk // Chunk - }; - CGF.EmitRuntimeCall(ForStaticInitFunction, Args); + return; + + assert(!Values.Ordered); + assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static || + Schedule == OMP_dist_sch_static_chunked); + + // Call __kmpc_for_static_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + llvm::Value *Chunk = Values.Chunk; + if (Chunk == nullptr) { + assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || + Schedule == OMP_dist_sch_static) && + "expected static non-chunked schedule"); + // If the Chunk was not specified in the clause - use default value 1. + Chunk = CGF.Builder.getIntN(Values.IVSize, 1); + } else { + assert((Schedule == OMP_sch_static_chunked || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static_chunked || + Schedule == OMP_dist_sch_static_chunked) && + "expected static chunked schedule"); + } + llvm::Value *Args[] = { + UpdateLocation, + ThreadId, + CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, + M2)), // Schedule type + Values.IL.getPointer(), // &isLastIter + Values.LB.getPointer(), // &LB + Values.UB.getPointer(), // &UB + Values.ST.getPointer(), // &Stride + CGF.Builder.getIntN(Values.IVSize, 1), // Incr + Chunk // Chunk + }; + CGF.EmitRuntimeCall(ForStaticInitFunction, Args); } void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, - unsigned IVSize, bool IVSigned, - bool Ordered, Address IL, Address LB, - Address UB, Address ST, - llvm::Value *Chunk) { - OpenMPSchedType ScheduleNum = - getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); - auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + const StaticRTInput &Values) { + OpenMPSchedType ScheduleNum = getRuntimeSchedule( + ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); + assert(isOpenMPWorksharingDirective(DKind) && + "Expected loop-based or sections-based directive."); + auto *UpdatedLocation = emitUpdateLocation(CGF, Loc, + isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS); auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + auto *StaticInitFunction = + createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, - ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize, - Ordered, IL, LB, UB, ST, Chunk); + ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); } void CGOpenMPRuntime::emitDistributeStaticInit( CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, - bool Ordered, Address IL, Address LB, Address UB, Address ST, - llvm::Value *Chunk) { - OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr); - auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); + OpenMPDistScheduleClauseKind SchedKind, + const CGOpenMPRuntime::StaticRTInput &Values) { + OpenMPSchedType ScheduleNum = + getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); + auto *UpdatedLocation = + emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); auto *ThreadId = getThreadID(CGF, Loc); - auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned); + auto *StaticInitFunction = + createForStaticInitFunction(Values.IVSize, Values.IVSigned); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, - OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB, - UB, ST, Chunk); + OMPC_SCHEDULE_MODIFIER_unknown, Values); } void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, - SourceLocation Loc) { + SourceLocation Loc, + OpenMPDirectiveKind DKind) { if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc, + isOpenMPDistributeDirective(DKind) + ? OMP_IDENT_WORK_DISTRIBUTE + : isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS), + getThreadID(CGF, Loc)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), Args); } @@ -3360,14 +3499,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { auto *UnRegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), + Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), - Desc); + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), + Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); if (CGM.supportsCOMDAT()) { @@ -3802,7 +3941,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, ".omp_task_entry.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo); CodeGenFunction CGF(CGM); - CGF.disableDebugInfo(); CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, @@ -3871,7 +4009,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, } CallArgs.push_back(SharedsParam); - CGF.EmitCallOrInvoke(TaskFunction, CallArgs); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, + CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); @@ -3893,7 +4032,6 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, ImplicitParamDecl::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); - FunctionType::ExtInfo Info; auto &DestructorFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo); @@ -4020,9 +4158,9 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, return TaskPrivatesMap; } -static int array_pod_sort_comparator(const PrivateDataTy *P1, - const PrivateDataTy *P2) { - return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); +static bool stable_sort_comparator(const PrivateDataTy P1, + const PrivateDataTy P2) { + return P1.first > P2.first; } /// Emit initialization for private variables in task-based directives. @@ -4059,8 +4197,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, SharedRefLValue = CGF.MakeAddrLValue( Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), - LValueBaseInfo(AlignmentSource::Decl, - SharedRefLValue.getBaseInfo().getMayAlias())); + LValueBaseInfo(AlignmentSource::Decl), + SharedRefLValue.getTBAAInfo()); QualType Type = OriginalVD->getType(); if (Type->isArrayType()) { // Initialize firstprivate array. @@ -4250,8 +4388,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, /*PrivateElemInit=*/nullptr))); ++I; } - llvm::array_pod_sort(Privates.begin(), Privates.end(), - array_pod_sort_comparator); + std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); @@ -4262,7 +4399,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); } KmpTaskTQTy = SavedKmpTaskloopTQTy; - } else if (D.getDirectiveKind() == OMPD_task) { + } else { assert(D.getDirectiveKind() == OMPD_task && "Expected taskloop or task directive"); if (SavedKmpTaskTQTy.isNull()) { @@ -4557,8 +4694,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, - NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF, - PrePostActionTy &) { + NumDependencies, &DepWaitTaskArgs, + Loc](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, @@ -4569,11 +4706,12 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); // Call proxy_task_entry(gtid, new_task); - auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( - CodeGenFunction &CGF, PrePostActionTy &Action) { + auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, + Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; - CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, + OutlinedFnArgs); }; // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, @@ -5805,21 +5943,21 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - // FIXME: Accommodate other combined directives with teams when they become - // available. - if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( + if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); - return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, - /*IsSigned=*/true); - } + if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { + if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); + return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); + } - // If we have an enclosed teams directive but no num_teams clause we use - // the default value 0. - return Bld.getInt32(0); + // If we have an enclosed teams directive but no num_teams clause we use + // the default value 0. + return Bld.getInt32(0); + } } // No teams associated with the directive. @@ -5908,21 +6046,21 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - // FIXME: Accommodate other combined directives with teams when they become - // available. - if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>( + if (auto *TeamsDir = dyn_cast_or_null<OMPExecutableDirective>( ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); - return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, - /*IsSigned=*/true); - } + if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { + if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); + return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, + /*IsSigned=*/true); + } - // If we have an enclosed teams directive but no thread_limit clause we use - // the default value 0. - return CGF.Builder.getInt32(0); + // If we have an enclosed teams directive but no thread_limit clause we + // use the default value 0. + return CGF.Builder.getInt32(0); + } } // No teams associated with the directive. @@ -5949,22 +6087,23 @@ public: /// \brief Delete the element from the device environment, ignoring the /// current reference count associated with the element. OMP_MAP_DELETE = 0x08, - /// \brief The element being mapped is a pointer, therefore the pointee - /// should be mapped as well. - OMP_MAP_IS_PTR = 0x10, - /// \brief This flags signals that an argument is the first one relating to - /// a map/private clause expression. For some cases a single - /// map/privatization results in multiple arguments passed to the runtime - /// library. - OMP_MAP_FIRST_REF = 0x20, + /// \brief The element being mapped is a pointer-pointee pair; both the + /// pointer and the pointee should be mapped. + OMP_MAP_PTR_AND_OBJ = 0x10, + /// \brief This flags signals that the base address of an entry should be + /// passed to the target kernel as an argument. + OMP_MAP_TARGET_PARAM = 0x20, /// \brief Signal that the runtime library has to return the device pointer - /// in the current position for the data being mapped. - OMP_MAP_RETURN_PTR = 0x40, + /// in the current position for the data being mapped. Used when we have the + /// use_device_ptr clause. + OMP_MAP_RETURN_PARAM = 0x40, /// \brief This flag signals that the reference being passed is a pointer to /// private data. - OMP_MAP_PRIVATE_PTR = 0x80, + OMP_MAP_PRIVATE = 0x80, /// \brief Pass the element to the device by value. - OMP_MAP_PRIVATE_VAL = 0x100, + OMP_MAP_LITERAL = 0x100, + /// Implicit map + OMP_MAP_IMPLICIT = 0x200, }; /// Class that associates information with a base pointer to be passed to the @@ -5986,7 +6125,7 @@ public: typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy; typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy; - typedef SmallVector<unsigned, 16> MapFlagsArrayTy; + typedef SmallVector<uint64_t, 16> MapFlagsArrayTy; private: /// \brief Directive from where the map clauses were extracted. @@ -5997,6 +6136,8 @@ private: /// \brief Set of all first private variables in the current directive. llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls; + /// Set of all reduction variables in the current directive. + llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls; /// Map between device pointer declarations and their expression components. /// The key value for declarations in 'this' is null. @@ -6051,10 +6192,10 @@ private: /// a flag marking the map as a pointer if requested. Add a flag marking the /// map as the first one of a series of maps that relate to the same map /// expression. - unsigned getMapTypeBits(OpenMPMapClauseKind MapType, + uint64_t getMapTypeBits(OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag, - bool AddIsFirstFlag) const { - unsigned Bits = 0u; + bool AddIsTargetParamFlag) const { + uint64_t Bits = 0u; switch (MapType) { case OMPC_MAP_alloc: case OMPC_MAP_release: @@ -6080,9 +6221,9 @@ private: break; } if (AddPtrFlag) - Bits |= OMP_MAP_IS_PTR; - if (AddIsFirstFlag) - Bits |= OMP_MAP_FIRST_REF; + Bits |= OMP_MAP_PTR_AND_OBJ; + if (AddIsTargetParamFlag) + Bits |= OMP_MAP_TARGET_PARAM; if (MapTypeModifier == OMPC_MAP_always) Bits |= OMP_MAP_ALWAYS; return Bits; @@ -6135,7 +6276,7 @@ private: OMPClauseMappableExprCommon::MappableExprComponentListRef Components, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, - bool IsFirstComponentList) const { + bool IsFirstComponentList, bool IsImplicit) const { // The following summarizes what has to be generated for each map and the // types bellow. The generated information is expressed in this order: @@ -6189,28 +6330,28 @@ private: // // map(s.p[:22], s.a s.b) // &s, &(s.p), sizeof(double*), noflags - // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag + // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag // // map(s.ps) // &s, &(s.ps), sizeof(S2*), noflags // // map(s.ps->s.i) // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag + // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag // // map(s.ps->ps) // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag // // map(s.ps->ps->ps) // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag - // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag // // map(s.ps->ps->s.f[:22]) // &s, &(s.ps), sizeof(S2*), noflags - // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag - // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag + // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag // // map(ps) // &ps, &ps, sizeof(S2*), noflags @@ -6226,29 +6367,28 @@ private: // // map(ps->p[:22]) // ps, &(ps->p), sizeof(double*), noflags - // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag + // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag // // map(ps->ps) // ps, &(ps->ps), sizeof(S2*), noflags // // map(ps->ps->s.i) // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag + // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag // // map(ps->ps->ps) // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag // // map(ps->ps->ps->ps) // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag - // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag // // map(ps->ps->ps->s.f[:22]) // ps, &(ps->ps), sizeof(S2*), noflags - // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag - // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + - // extra_flag + // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag // Track if the map information being generated is the first for a capture. bool IsCaptureFirstInfo = IsFirstComponentList; @@ -6270,8 +6410,7 @@ private: } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression())) - .getPointer(); + BP = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); // If the variable is a pointer and is being dereferenced (i.e. is not // the last component), the base has to be the pointer itself, not its @@ -6290,6 +6429,7 @@ private: } } + uint64_t DefaultFlags = IsImplicit ? OMP_MAP_IMPLICIT : 0; for (; I != CE; ++I) { auto Next = std::next(I); @@ -6324,7 +6464,8 @@ private: isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && "Unexpected expression"); - auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer(); + llvm::Value *LB = + CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getPointer(); auto *Size = getExprTypeSize(I->getAssociatedExpression()); // If we have a member expression and the current component is a @@ -6339,9 +6480,11 @@ private: BasePointers.push_back(BP); Pointers.push_back(RefAddr); Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); - Types.push_back(getMapTypeBits( - /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown, - !IsExpressionFirstInfo, IsCaptureFirstInfo)); + Types.push_back(DefaultFlags | + getMapTypeBits( + /*MapType*/ OMPC_MAP_alloc, + /*MapTypeModifier=*/OMPC_MAP_unknown, + !IsExpressionFirstInfo, IsCaptureFirstInfo)); IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; // The reference will be the next base address. @@ -6356,9 +6499,9 @@ private: // same expression except for the first one. We also need to signal // this map is the first one that relates with the current capture // (there is a set of entries for each capture). - Types.push_back(getMapTypeBits(MapType, MapTypeModifier, - !IsExpressionFirstInfo, - IsCaptureFirstInfo)); + Types.push_back(DefaultFlags | getMapTypeBits(MapType, MapTypeModifier, + !IsExpressionFirstInfo, + IsCaptureFirstInfo)); // If we have a final array section, we are done with this expression. if (IsFinalArraySection) @@ -6370,7 +6513,6 @@ private: IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; - continue; } } } @@ -6386,8 +6528,14 @@ private: // 'private ptr' and 'map to' flag. Return the right flags if the captured // declaration is known as first-private in this handler. if (FirstPrivateDecls.count(Cap.getCapturedVar())) - return MappableExprsHandler::OMP_MAP_PRIVATE_PTR | + return MappableExprsHandler::OMP_MAP_PRIVATE | MappableExprsHandler::OMP_MAP_TO; + // Reduction variable will use only the 'private ptr' and 'map to_from' + // flag. + if (ReductionDecls.count(Cap.getCapturedVar())) { + return MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM; + } // We didn't modify anything. return CurrentModifiers; @@ -6401,6 +6549,12 @@ public: for (const auto *D : C->varlists()) FirstPrivateDecls.insert( cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) { + for (const auto *D : C->varlists()) { + ReductionDecls.insert( + cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + } + } // Extract device pointer clause information. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) @@ -6432,20 +6586,19 @@ public: RPK_MemberReference, }; OMPClauseMappableExprCommon::MappableExprComponentListRef Components; - OpenMPMapClauseKind MapType; - OpenMPMapClauseKind MapTypeModifier; - ReturnPointerKind ReturnDevicePointer; + OpenMPMapClauseKind MapType = OMPC_MAP_unknown; + OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown; + ReturnPointerKind ReturnDevicePointer = RPK_None; + bool IsImplicit = false; - MapInfo() - : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown), - ReturnDevicePointer(RPK_None) {} + MapInfo() = default; MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier, - ReturnPointerKind ReturnDevicePointer) + ReturnPointerKind ReturnDevicePointer, bool IsImplicit) : Components(Components), MapType(MapType), MapTypeModifier(MapTypeModifier), - ReturnDevicePointer(ReturnDevicePointer) {} + ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} }; // We have to process the component lists that relate with the same @@ -6459,25 +6612,29 @@ public: const ValueDecl *D, OMPClauseMappableExprCommon::MappableExprComponentListRef L, OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier, - MapInfo::ReturnPointerKind ReturnDevicePointer) { + MapInfo::ReturnPointerKind ReturnDevicePointer, bool IsImplicit) { const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; - Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer}); + Info[VD].emplace_back(L, MapType, MapModifier, ReturnDevicePointer, + IsImplicit); }; // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) - for (auto L : C->component_lists()) + for (auto L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(), - MapInfo::RPK_None); + MapInfo::RPK_None, C->isImplicit()); + } for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) - for (auto L : C->component_lists()) + for (auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown, - MapInfo::RPK_None); + MapInfo::RPK_None, C->isImplicit()); + } for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) - for (auto L : C->component_lists()) + for (auto L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown, - MapInfo::RPK_None); + MapInfo::RPK_None, C->isImplicit()); + } // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the @@ -6524,7 +6681,7 @@ public: BasePointers.push_back({Ptr, VD}); Pointers.push_back(Ptr); Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy)); - Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF); + Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); } for (auto &M : Info) { @@ -6538,9 +6695,9 @@ public: // Remember the current base pointer index. unsigned CurrentBasePointersIdx = BasePointers.size(); // FIXME: MSVC 2013 seems to require this-> to find the member method. - this->generateInfoForComponentList(L.MapType, L.MapTypeModifier, - L.Components, BasePointers, Pointers, - Sizes, Types, IsFirstComponentList); + this->generateInfoForComponentList( + L.MapType, L.MapTypeModifier, L.Components, BasePointers, Pointers, + Sizes, Types, IsFirstComponentList, L.IsImplicit); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -6562,7 +6719,7 @@ public: "No relevant declaration related with device pointer??"); BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); - Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR; + Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; } IsFirstComponentList = false; } @@ -6604,7 +6761,8 @@ public: for (auto L : It->second) { generateInfoForComponentList( /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L, - BasePointers, Pointers, Sizes, Types, IsFirstComponentList); + BasePointers, Pointers, Sizes, Types, IsFirstComponentList, + /*IsImplicit=*/false); IsFirstComponentList = false; } return; @@ -6613,7 +6771,7 @@ public: BasePointers.push_back({Arg, VD}); Pointers.push_back(Arg); Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy)); - Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF); + Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); return; } @@ -6624,9 +6782,9 @@ public: "We got information for the wrong declaration??"); assert(!L.second.empty() && "Not expecting declaration with no component lists."); - generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(), - L.second, BasePointers, Pointers, Sizes, - Types, IsFirstComponentList); + generateInfoForComponentList( + C->getMapType(), C->getMapTypeModifier(), L.second, BasePointers, + Pointers, Sizes, Types, IsFirstComponentList, C->isImplicit()); IsFirstComponentList = false; } @@ -6656,7 +6814,7 @@ public: if (!RI.getType()->isAnyPointerType()) { // We have to signal to the runtime captures passed by value that are // not pointers. - CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL); + CurMapTypes.push_back(OMP_MAP_LITERAL); CurSizes.push_back(CGF.getTypeSize(RI.getType())); } else { // Pointers are implicitly mapped with a zero size and no flags @@ -6676,19 +6834,12 @@ public: // The default map type for a scalar/complex type is 'to' because by // default the value doesn't have to be retrieved. For an aggregate // type, the default is 'tofrom'. - CurMapTypes.push_back(ElementType->isAggregateType() - ? (OMP_MAP_TO | OMP_MAP_FROM) - : OMP_MAP_TO); - - // If we have a capture by reference we may need to add the private - // pointer flag if the base declaration shows in some first-private - // clause. - CurMapTypes.back() = - adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back()); + CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses( + CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM) + : OMP_MAP_TO)); } - // Every default map produces a single argument, so, it is always the - // first one. - CurMapTypes.back() |= OMP_MAP_FIRST_REF; + // Every default map produces a single argument which is a target parameter. + CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; } }; @@ -6831,7 +6982,7 @@ static void emitOffloadingArraysArgument( llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray, /*Idx0=*/0, /*Idx1=*/0); MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs), + llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.MapTypesArray, /*Idx0=*/0, /*Idx1=*/0); @@ -6840,7 +6991,7 @@ static void emitOffloadingArraysArgument( PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); MapTypesArrayArg = - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); + llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); } } @@ -6855,8 +7006,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, assert(OutlinedFn && "Invalid outlined function!"); - auto &Ctx = CGF.getContext(); - // Fill up the arrays with all the captured variables. MappableExprsHandler::MapValuesArrayTy KernelArgs; MappableExprsHandler::MapBaseValuesArrayTy BasePointers; @@ -6878,9 +7027,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), CE = CS.capture_end(); CI != CE; ++CI, ++RI, ++CV) { - StringRef Name; - QualType Ty; - CurBasePointers.clear(); CurPointers.clear(); CurSizes.clear(); @@ -6893,8 +7039,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CurPointers.push_back(*CV); CurSizes.push_back(CGF.getTypeSize(RI->getType())); // Copy to the device as an argument. No need to retrieve it. - CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL | - MappableExprsHandler::OMP_MAP_FIRST_REF); + CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | + MappableExprsHandler::OMP_MAP_TARGET_PARAM); } else { // If we have any information in the map clause, we use it, otherwise we // just do a default mapping. @@ -6921,19 +7067,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); } - // Keep track on whether the host function has to be executed. - auto OffloadErrorQType = - Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true); - auto OffloadError = CGF.MakeAddrLValue( - CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"), - OffloadErrorQType); - CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), - OffloadError); - // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device, - OutlinedFnID, OffloadError, - &D](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&ThenGen = [this, &BasePointers, &Pointers, &Sizes, &MapTypes, Device, + OutlinedFn, OutlinedFnID, &D, + &KernelArgs](CodeGenFunction &CGF, PrePostActionTy &) { auto &RT = CGF.CGM.getOpenMPRuntime(); // Emit the offloading arrays. TargetDataInfo Info; @@ -6956,11 +7093,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Emit device ID if any. llvm::Value *DeviceID; - if (Device) + if (Device) { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int32Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } // Emit the number of elements in the offloading arrays. llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size()); @@ -6971,6 +7109,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); + bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime // via calls __tgt_target() or __tgt_target_teams(). // @@ -7013,24 +7152,41 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, Info.MapTypesArray, NumTeams, NumThreads}; Return = CGF.EmitRuntimeCall( - RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait + : OMPRTL__tgt_target_teams), + OffloadingArgs); } else { llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray}; - Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), - OffloadingArgs); + Return = CGF.EmitRuntimeCall( + RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait + : OMPRTL__tgt_target), + OffloadingArgs); } - CGF.EmitStoreOfScalar(Return, OffloadError); + // Check the error code and execute the host version if required. + llvm::BasicBlock *OffloadFailedBlock = + CGF.createBasicBlock("omp_offload.failed"); + llvm::BasicBlock *OffloadContBlock = + CGF.createBasicBlock("omp_offload.cont"); + llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); + CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); + + CGF.EmitBlock(OffloadFailedBlock); + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, KernelArgs); + CGF.EmitBranch(OffloadContBlock); + + CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); }; // Notify that the host version must be executed. - auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), - OffloadError); + auto &&ElseGen = [this, &D, OutlinedFn, &KernelArgs](CodeGenFunction &CGF, + PrePostActionTy &) { + emitOutlinedFunctionCall(CGF, D.getLocStart(), OutlinedFn, + KernelArgs); }; // If we have a target function ID it means that we need to support @@ -7048,19 +7204,6 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, RegionCodeGenTy ElseRCG(ElseGen); ElseRCG(CGF); } - - // Check the error code and execute the host version if required. - auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); - auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont"); - auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation()); - auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal); - CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); - - CGF.EmitBlock(OffloadFailedBlock); - CGF.Builder.CreateCall(OutlinedFn, KernelArgs); - CGF.EmitBranch(OffloadContBlock); - - CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); } void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, @@ -7101,6 +7244,26 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); break; + case Stmt::OMPTargetTeamsDistributeDirectiveClass: + CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( + CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(*S)); + break; + case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass: + CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( + CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(*S)); + break; + case Stmt::OMPTargetParallelForDirectiveClass: + CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( + CGM, ParentName, cast<OMPTargetParallelForDirective>(*S)); + break; + case Stmt::OMPTargetParallelForSimdDirectiveClass: + CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( + CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(*S)); + break; + case Stmt::OMPTargetSimdDirectiveClass: + CodeGenFunction::EmitOMPTargetSimdDeviceFunction( + CGM, ParentName, cast<OMPTargetSimdDirective>(*S)); + break; default: llvm_unreachable("Unknown target directive for OpenMP device codegen."); } @@ -7278,11 +7441,12 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Emit device ID if any. llvm::Value *DeviceID = nullptr; - if (Device) + if (Device) { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int32Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } // Emit the number of elements in the offloading arrays. auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); @@ -7313,11 +7477,12 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Emit device ID if any. llvm::Value *DeviceID = nullptr; - if (Device) + if (Device) { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int32Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } // Emit the number of elements in the offloading arrays. auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); @@ -7399,11 +7564,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Emit device ID if any. llvm::Value *DeviceID = nullptr; - if (Device) + if (Device) { DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int32Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } // Emit the number of elements in the offloading arrays. auto *PointerNum = CGF.Builder.getInt32(BasePointers.size()); @@ -7415,19 +7581,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( auto &RT = CGF.CGM.getOpenMPRuntime(); // Select the right runtime function call for each expected standalone // directive. + const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); OpenMPRTLFunction RTLFn; switch (D.getDirectiveKind()) { default: llvm_unreachable("Unexpected standalone target data directive."); break; case OMPD_target_enter_data: - RTLFn = OMPRTL__tgt_target_data_begin; + RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait + : OMPRTL__tgt_target_data_begin; break; case OMPD_target_exit_data: - RTLFn = OMPRTL__tgt_target_data_end; + RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait + : OMPRTL__tgt_target_data_end; break; case OMPD_target_update: - RTLFn = OMPRTL__tgt_target_data_update; + RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait + : OMPRTL__tgt_target_data_update; break; } CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs); @@ -7777,3 +7947,29 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, CGF.EmitRuntimeCall(RTLFn, Args); } +void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, llvm::Value *Callee, + ArrayRef<llvm::Value *> Args, + SourceLocation Loc) const { + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); + + if (auto *Fn = dyn_cast<llvm::Function>(Callee)) { + if (Fn->doesNotThrow()) { + CGF.EmitNounwindRuntimeCall(Fn, Args); + return; + } + } + CGF.EmitRuntimeCall(Callee, Args); +} + +void CGOpenMPRuntime::emitOutlinedFunctionCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args) const { + assert(Loc.isValid() && "Outlined function call location must be valid."); + emitCall(CGF, OutlinedFn, Args, Loc); +} + +Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, + const VarDecl *NativeParam, + const VarDecl *TargetParam) const { + return CGF.GetAddrOfLocalVar(NativeParam); +} |