diff options
Diffstat (limited to 'lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r-- | lib/CodeGen/CGOpenMPRuntime.cpp | 1214 |
1 files changed, 894 insertions, 320 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 27e7175da841..2a13a2a58156 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -752,6 +752,11 @@ enum OpenMPRTLFunction { // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t // *arg_types); OMPRTL__tgt_target_data_update_nowait, + // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); + OMPRTL__tgt_mapper_num_components, + // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void + // *base, void *begin, int64_t size, int64_t type); + OMPRTL__tgt_push_mapper_component, }; /// A basic class for pre|post-action for advanced codegen sequence for OpenMP @@ -1259,6 +1264,52 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, loadOffloadInfoMetadata(); } +bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { + // Emit at least a definition for the aliasee if the the address of the + // original function is requested. + if (IsForDefinition || OrigAddr) + (void)CGM.GetAddrOfGlobal(NewGD); + StringRef NewMangledName = CGM.getMangledName(NewGD); + llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); + if (Addr && !Addr->isDeclaration()) { + const auto *D = cast<FunctionDecl>(OldGD.getDecl()); + const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD); + llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); + + // Create a reference to the named value. This ensures that it is emitted + // if a deferred decl. + llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); + + // Create the new alias itself, but don't set a name yet. + auto *GA = + llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); + + if (OrigAddr) { + assert(OrigAddr->isDeclaration() && "Expected declaration"); + + GA->takeName(OrigAddr); + OrigAddr->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); + OrigAddr->eraseFromParent(); + } else { + GA->setName(CGM.getMangledName(OldGD)); + } + + // Set attributes which are particular to an alias; this is a + // specialization of the attributes which may be set on a global function. + if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || + D->isWeakImported()) + GA->setLinkage(llvm::Function::WeakAnyLinkage); + + CGM.SetCommonAttributes(OldGD, GA); + return true; + } + return false; +} + void CGOpenMPRuntime::clear() { InternalVars.clear(); // Clean non-target variable declarations possibly used only in debug info. @@ -1272,6 +1323,14 @@ void CGOpenMPRuntime::clear() { continue; GV->eraseFromParent(); } + // Emit aliases for the deferred aliasees. + for (const auto &Pair : DeferredVariantFunction) { + StringRef MangledName = CGM.getMangledName(Pair.second.second); + llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); + // If not able to emit alias, just emit original declaration. + (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, + /*IsForDefinition=*/false); + } } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { @@ -1638,18 +1697,23 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, return ThreadID; } // If exceptions are enabled, do not use parameter to avoid possible crash. - if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || - !CGF.getLangOpts().CXXExceptions || - CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { - if (auto *OMPRegionInfo = - dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - if (OMPRegionInfo->getThreadIDVariable()) { - // Check if this an outlined function with thread id passed as argument. - LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + if (auto *OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + if (OMPRegionInfo->getThreadIDVariable()) { + // Check if this an outlined function with thread id passed as argument. + LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); + if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || + !CGF.getLangOpts().CXXExceptions || + CGF.Builder.GetInsertBlock() == TopBlock || + !isa<llvm::Instruction>(LVal.getPointer()) || + cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock || + cast<llvm::Instruction>(LVal.getPointer())->getParent() == + CGF.Builder.GetInsertBlock()) { ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); // If value loaded in entry block, cache it and use it everywhere in // function. - if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + if (CGF.Builder.GetInsertBlock() == TopBlock) { auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); Elem.second.ThreadID = ThreadID; } @@ -1686,6 +1750,12 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { UDRMap.erase(D); FunctionUDRMap.erase(CGF.CurFn); } + auto I = FunctionUDMMap.find(CGF.CurFn); + if (I != FunctionUDMMap.end()) { + for(auto *D : I->second) + UDMMap.erase(D); + FunctionUDMMap.erase(I); + } } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { @@ -2459,6 +2529,24 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); break; } + case OMPRTL__tgt_mapper_num_components: { + // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); + break; + } + case OMPRTL__tgt_push_mapper_component: { + // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void + // *base, void *begin, int64_t size, int64_t type); + llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, + CGM.Int64Ty, CGM.Int64Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); + break; + } } assert(RTLFn && "Unable to find OpenMP runtime function"); return RTLFn; @@ -2552,6 +2640,32 @@ CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { return CGM.CreateRuntimeFunction(FnTy, Name); } +/// Obtain information that uniquely identifies a target entry. This +/// consists of the file and device IDs as well as line number associated with +/// the relevant entry source location. +static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, + unsigned &DeviceID, unsigned &FileID, + unsigned &LineNum) { + SourceManager &SM = C.getSourceManager(); + + // The loc should be always valid and have a file ID (the user cannot use + // #pragma directives in macros) + + assert(Loc.isValid() && "Source location is expected to be always valid."); + + PresumedLoc PLoc = SM.getPresumedLoc(Loc); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + SM.getDiagnostics().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + + DeviceID = ID.getDevice(); + FileID = ID.getFile(); + LineNum = PLoc.getLine(); +} + Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { if (CGM.getLangOpts().OpenMPSimd) return Address::invalid(); @@ -2563,19 +2677,27 @@ Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { SmallString<64> PtrName; { llvm::raw_svector_ostream OS(PtrName); - OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr"; + OS << CGM.getMangledName(GlobalDecl(VD)); + if (!VD->isExternallyVisible()) { + unsigned DeviceID, FileID, Line; + getTargetEntryUniqueInfo(CGM.getContext(), + VD->getCanonicalDecl()->getBeginLoc(), + DeviceID, FileID, Line); + OS << llvm::format("_%x", FileID); + } + OS << "_decl_tgt_ref_ptr"; } llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); if (!Ptr) { QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), PtrName); - if (!CGM.getLangOpts().OpenMPIsDevice) { - auto *GV = cast<llvm::GlobalVariable>(Ptr); - GV->setLinkage(llvm::GlobalValue::ExternalLinkage); + + auto *GV = cast<llvm::GlobalVariable>(Ptr); + GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + + if (!CGM.getLangOpts().OpenMPIsDevice) GV->setInitializer(CGM.GetAddrOfGlobal(VD)); - } - CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr)); registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); } return Address(Ptr, CGM.getContext().getDeclAlign(VD)); @@ -2749,35 +2871,12 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } -/// Obtain information that uniquely identifies a target entry. This -/// consists of the file and device IDs as well as line number associated with -/// the relevant entry source location. -static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - unsigned &DeviceID, unsigned &FileID, - unsigned &LineNum) { - SourceManager &SM = C.getSourceManager(); - - // The loc should be always valid and have a file ID (the user cannot use - // #pragma directives in macros) - - assert(Loc.isValid() && "Source location is expected to be always valid."); - - PresumedLoc PLoc = SM.getPresumedLoc(Loc); - assert(PLoc.isValid() && "Source location is expected to be always valid."); - - llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - SM.getDiagnostics().Report(diag::err_cannot_open_file) - << PLoc.getFilename() << EC.message(); - - DeviceID = ID.getDevice(); - FileID = ID.getFile(); - LineNum = PLoc.getLine(); -} - bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit) { + if (CGM.getLangOpts().OMPTargetTriples.empty() && + !CGM.getLangOpts().OpenMPIsDevice) + return false; Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || @@ -2981,14 +3080,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); - // OutlinedFn(>id, &zero, CapturedStruct); - Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, - /*Name*/ ".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + // OutlinedFn(>id, &zero_bound, CapturedStruct); + Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); + Address ZeroAddrBound = + CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, + /*Name=*/".bound.zero.addr"); + CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; // ThreadId for serialized parallels is 0. - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -3283,9 +3384,9 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, // <copy_func>, did_it); if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); - QualType CopyprivateArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType CopyprivateArrayTy = C.getConstantArrayType( + C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. Address CopyprivateList = CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); @@ -3472,7 +3573,7 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { return Schedule != OMP_sch_static; } -static int addMonoNonMonoModifier(OpenMPSchedType Schedule, +static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2) { int Modifier = 0; @@ -3506,6 +3607,18 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule, case OMPC_SCHEDULE_MODIFIER_unknown: break; } + // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if the nonmonotonic modifier is not specified, the effect is + // as if the monotonic modifier is specified. Otherwise, unless the monotonic + // modifier is specified, the effect is as if the nonmonotonic modifier is + // specified. + if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { + if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || + Schedule == OMP_sch_static_balanced_chunked || + Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static)) + Modifier = OMP_sch_modifier_nonmonotonic; + } return Schedule | Modifier; } @@ -3530,13 +3643,14 @@ void CGOpenMPRuntime::emitForDispatchInit( llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk : CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( - Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type - DispatchValues.LB, // Lower - DispatchValues.UB, // Upper - CGF.Builder.getIntN(IVSize, 1), // Stride - Chunk // Chunk + CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type + DispatchValues.LB, // Lower + DispatchValues.UB, // Upper + CGF.Builder.getIntN(IVSize, 1), // Stride + Chunk // Chunk }; CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); } @@ -3578,7 +3692,7 @@ static void emitForStaticInitCall( llvm::Value *Args[] = { UpdateLocation, ThreadId, - CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, + CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, M2)), // Schedule type Values.IL.getPointer(), // &isLastIter Values.LB.getPointer(), // &LB @@ -3899,157 +4013,6 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: Action(E.getKey(), E.getValue()); } -llvm::Function * -CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { - // If we don't have entries or if we are emitting code for the device, we - // don't need to do anything. - if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) - return nullptr; - - llvm::Module &M = CGM.getModule(); - ASTContext &C = CGM.getContext(); - - // Get list of devices we care about - const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; - - // We should be creating an offloading descriptor only if there are devices - // specified. - assert(!Devices.empty() && "No OpenMP offloading devices??"); - - // Create the external variables that will point to the begin and end of the - // host entries section. These will be defined by the linker. - llvm::Type *OffloadEntryTy = - CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); - std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); - auto *HostEntriesBegin = new llvm::GlobalVariable( - M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, - EntriesBeginName); - std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); - auto *HostEntriesEnd = - new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, - llvm::GlobalValue::ExternalLinkage, - /*Initializer=*/nullptr, EntriesEndName); - - // Create all device images - auto *DeviceImageTy = cast<llvm::StructType>( - CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); - ConstantInitBuilder DeviceImagesBuilder(CGM); - ConstantArrayBuilder DeviceImagesEntries = - DeviceImagesBuilder.beginArray(DeviceImageTy); - - for (const llvm::Triple &Device : Devices) { - StringRef T = Device.getTriple(); - std::string BeginName = getName({"omp_offloading", "img_start", ""}); - auto *ImgBegin = new llvm::GlobalVariable( - M, CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::ExternalWeakLinkage, - /*Initializer=*/nullptr, Twine(BeginName).concat(T)); - std::string EndName = getName({"omp_offloading", "img_end", ""}); - auto *ImgEnd = new llvm::GlobalVariable( - M, CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::ExternalWeakLinkage, - /*Initializer=*/nullptr, Twine(EndName).concat(T)); - - llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, - HostEntriesEnd}; - createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, - DeviceImagesEntries); - } - - // Create device images global array. - std::string ImagesName = getName({"omp_offloading", "device_images"}); - llvm::GlobalVariable *DeviceImages = - DeviceImagesEntries.finishAndCreateGlobal(ImagesName, - CGM.getPointerAlign(), - /*isConstant=*/true); - DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - // This is a Zero array to be used in the creation of the constant expressions - llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), - llvm::Constant::getNullValue(CGM.Int32Ty)}; - - // Create the target region descriptor. - llvm::Constant *Data[] = { - llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), - llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), - DeviceImages, Index), - HostEntriesBegin, HostEntriesEnd}; - std::string Descriptor = getName({"omp_offloading", "descriptor"}); - llvm::GlobalVariable *Desc = createGlobalStruct( - CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); - - // Emit code to register or unregister the descriptor at execution - // startup or closing, respectively. - - llvm::Function *UnRegFn; - { - FunctionArgList Args; - ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); - Args.push_back(&DummyPtr); - - CodeGenFunction CGF(CGM); - // Disable debug info for global (de-)initializer because they are not part - // of some particular construct. - CGF.disableDebugInfo(); - const auto &FI = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); - UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); - CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), - Desc); - CGF.FinishFunction(); - } - llvm::Function *RegFn; - { - CodeGenFunction CGF(CGM); - // Disable debug info for global (de-)initializer because they are not part - // of some particular construct. - CGF.disableDebugInfo(); - const auto &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - - // Encode offload target triples into the registration function name. It - // will serve as a comdat key for the registration/unregistration code for - // this particular combination of offloading targets. - SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); - RegFnNameParts[0] = "omp_offloading"; - RegFnNameParts[1] = "descriptor_reg"; - llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), - [](const llvm::Triple &T) -> const std::string& { - return T.getTriple(); - }); - llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); - std::string Descriptor = getName(RegFnNameParts); - RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); - CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); - // Create a variable to drive the registration and unregistration of the - // descriptor, so we can reuse the logic that emits Ctors and Dtors. - ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), - SourceLocation(), nullptr, C.CharTy, - ImplicitParamDecl::Other); - CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); - CGF.FinishFunction(); - } - if (CGM.supportsCOMDAT()) { - // It is sufficient to call registration function only once, so create a - // COMDAT group for registration/unregistration functions and associated - // data. That would reduce startup time and code size. Registration - // function serves as a COMDAT group key. - llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); - RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); - RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); - RegFn->setComdat(ComdatKey); - UnRegFn->setComdat(ComdatKey); - DeviceImages->setComdat(ComdatKey); - Desc->setComdat(ComdatKey); - } - return RegFn; -} - void CGOpenMPRuntime::createOffloadEntry( llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, llvm::GlobalValue::LinkageTypes Linkage) { @@ -4077,8 +4040,7 @@ void CGOpenMPRuntime::createOffloadEntry( Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); // The entry has to be created in the section the linker expects it to be. - std::string Section = getName({"omp_offloading", "entries"}); - Entry->setSection(Section); + Entry->setSection("omp_offloading_entries"); } void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { @@ -4091,13 +4053,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Right now we only generate metadata for function that contain target // regions. - // If we do not have entries, we don't need to do anything. - if (OffloadEntriesInfoManager.empty()) + // If we are in simd mode or there are no entries, we don't need to do + // anything. + if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) return; llvm::Module &M = CGM.getModule(); llvm::LLVMContext &C = M.getContext(); - SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> + SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, + SourceLocation, StringRef>, + 16> OrderedEntries(OffloadEntriesInfoManager.size()); llvm::SmallVector<StringRef, 16> ParentFunctions( OffloadEntriesInfoManager.size()); @@ -4115,7 +4080,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // Create function that emits metadata for each target region entry; auto &&TargetRegionMetadataEmitter = - [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( + [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, + &GetMDString]( unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { @@ -4133,8 +4099,19 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { GetMDInt(FileID), GetMDString(ParentName), GetMDInt(Line), GetMDInt(E.getOrder())}; + SourceLocation Loc; + for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), + E = CGM.getContext().getSourceManager().fileinfo_end(); + I != E; ++I) { + if (I->getFirst()->getUniqueID().getDevice() == DeviceID && + I->getFirst()->getUniqueID().getFile() == FileID) { + Loc = CGM.getContext().getSourceManager().translateFileLineCol( + I->getFirst(), Line, 1); + break; + } + } // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; + OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); ParentFunctions[E.getOrder()] = ParentName; // Add metadata to the named metadata node. @@ -4162,7 +4139,8 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = &E; + OrderedEntries[E.getOrder()] = + std::make_tuple(&E, SourceLocation(), MangledName); // Add metadata to the named metadata node. MD->addOperand(llvm::MDNode::get(C, Ops)); @@ -4171,11 +4149,11 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( DeviceGlobalVarMetadataEmitter); - for (const auto *E : OrderedEntries) { - assert(E && "All ordered entries must exist!"); + for (const auto &E : OrderedEntries) { + assert(std::get<0>(E) && "All ordered entries must exist!"); if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( - E)) { + std::get<0>(E))) { if (!CE->getID() || !CE->getAddress()) { // Do not blame the entry if the parent funtion is not emitted. StringRef FnName = ParentFunctions[CE->getOrder()]; @@ -4183,16 +4161,16 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { continue; unsigned DiagID = CGM.getDiags().getCustomDiagID( DiagnosticsEngine::Error, - "Offloading entry for target region is incorrect: either the " + "Offloading entry for target region in %0 is incorrect: either the " "address or the ID is invalid."); - CGM.getDiags().Report(DiagID); + CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; continue; } createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); - } else if (const auto *CE = - dyn_cast<OffloadEntriesInfoManagerTy:: - OffloadEntryInfoDeviceGlobalVar>(E)) { + } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: + OffloadEntryInfoDeviceGlobalVar>( + std::get<0>(E))) { OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( CE->getFlags()); @@ -4203,10 +4181,10 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { continue; if (!CE->getAddress()) { unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for declare target variable is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(DiagID); + DiagnosticsEngine::Error, "Offloading entry for declare target " + "variable %0 is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); continue; } // The vaiable has no definition - no need to add the entry. @@ -5242,7 +5220,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // Define type kmp_depend_info[<Dependences.size()>]; QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), - ArrayType::Normal, /*IndexTypeQuals=*/0); + nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[<Dependences.size()>] deps; DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); @@ -5763,7 +5741,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); Address ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); @@ -6235,7 +6213,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( unsigned Size = Data.ReductionVars.size(); llvm::APInt ArraySize(/*numBits=*/64, Size); QualType ArrayRDType = C.getConstantArrayType( - RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); + RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_task_red_input_t .rd_input.[Size]; Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, @@ -6720,12 +6698,16 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -7025,12 +7007,16 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: break; @@ -7079,12 +7065,24 @@ public: OMP_MAP_LITERAL = 0x100, /// Implicit map OMP_MAP_IMPLICIT = 0x200, + /// Close is a hint to the runtime to allocate memory close to + /// the target device. + OMP_MAP_CLOSE = 0x400, /// The 16 MSBs of the flags indicate whether the entry is member of some /// struct/class. OMP_MAP_MEMBER_OF = 0xffff000000000000, LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), }; + /// Get the offset of the OMP_MAP_MEMBER_OF field. + static unsigned getFlagMemberOffset() { + unsigned Offset = 0; + for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); + Remain = Remain >> 1) + Offset++; + return Offset; + } + /// Class that associates information with a base pointer to be passed to the /// runtime library. class BasePointerInfo { @@ -7148,8 +7146,11 @@ private: : IE(IE), VD(VD) {} }; - /// Directive from where the map clauses were extracted. - const OMPExecutableDirective &CurDir; + /// The target directive from where the mappable clauses were extracted. It + /// is either a executable directive or a user-defined mapper directive. + llvm::PointerUnion<const OMPExecutableDirective *, + const OMPDeclareMapperDecl *> + CurDir; /// Function the directive is being generated for. CodeGenFunction &CGF; @@ -7181,9 +7182,11 @@ private: OAE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); - // If there is no length associated with the expression, that means we - // are using the whole length of the base. - if (!OAE->getLength() && OAE->getColonLoc().isValid()) + // If there is no length associated with the expression and lower bound is + // not specified too, that means we are using the whole length of the + // base. + if (!OAE->getLength() && OAE->getColonLoc().isValid() && + !OAE->getLowerBound()) return CGF.getTypeSize(BaseTy); llvm::Value *ElemSize; @@ -7197,13 +7200,30 @@ private: // If we don't have a length at this point, that is because we have an // array section with a single element. - if (!OAE->getLength()) + if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) return ElemSize; - llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); - LengthVal = - CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); - return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + if (const Expr *LenExpr = OAE->getLength()) { + llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); + LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), + CGF.getContext().getSizeType(), + LenExpr->getExprLoc()); + return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); + } + assert(!OAE->getLength() && OAE->getColonLoc().isValid() && + OAE->getLowerBound() && "expected array_section[lb:]."); + // Size = sizetype - lb * elemtype; + llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); + llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); + LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), + CGF.getContext().getSizeType(), + OAE->getLowerBound()->getExprLoc()); + LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); + llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); + llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); + LengthVal = CGF.Builder.CreateSelect( + Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); + return LengthVal; } return CGF.getTypeSize(ExprTy); } @@ -7247,6 +7267,9 @@ private: if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) != MapModifiers.end()) Bits |= OMP_MAP_ALWAYS; + if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) + != MapModifiers.end()) + Bits |= OMP_MAP_CLOSE; return Bits; } @@ -7675,10 +7698,10 @@ private: if (!IsExpressionFirstInfo) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, - // then we reset the TO/FROM/ALWAYS/DELETE flags. + // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. if (IsPointer) Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | - OMP_MAP_DELETE); + OMP_MAP_DELETE | OMP_MAP_CLOSE); if (ShouldBeMemberOf) { // Set placeholder value MEMBER_OF=FFFF to indicate that the flag @@ -7752,9 +7775,9 @@ private: } static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { - // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. + // Rotate by getFlagMemberOffset() bits. return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) - << 48); + << getFlagMemberOffset()); } static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, @@ -7834,7 +7857,7 @@ private: public: MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) - : CurDir(Dir), CGF(CGF) { + : CurDir(&Dir), CGF(CGF) { // Extract firstprivate clause information. for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) for (const auto *D : C->varlists()) @@ -7846,6 +7869,10 @@ public: DevPointersMap[L.first].push_back(L.second); } + /// Constructor for the declare mapper directive. + MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) + : CurDir(&Dir), CGF(CGF) {} + /// Generate code for the combined entry if we have a partially mapped struct /// and take care of the mapping flags of the arguments corresponding to /// individual struct members. @@ -7907,18 +7934,20 @@ public: IsImplicit); }; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) + for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); } - for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) + for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) for (const auto &L : C->component_lists()) { InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit()); @@ -7933,9 +7962,8 @@ public: llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> DeferredInfo; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. for (const auto *C : - this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { + CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { for (const auto &L : C->component_lists()) { assert(!L.second.empty() && "Not expecting empty list of components!"); const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); @@ -7964,7 +7992,6 @@ public: // We didn't find any match in our map information - generate a zero // size array section - if the pointer is a struct member we defer this // action until the whole struct has been processed. - // FIXME: MSVC 2013 seems to require this-> to find member CGF. if (isa<MemberExpr>(IE)) { // Insert the pointer into Info to be processed by // generateInfoForComponentList. Because it is a member pointer @@ -7977,11 +8004,11 @@ public: /*ReturnDevicePointer=*/false, C->isImplicit()); DeferredInfo[nullptr].emplace_back(IE, VD); } else { - llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( - this->CGF.EmitLValue(IE), IE->getExprLoc()); + llvm::Value *Ptr = + CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); BasePointers.emplace_back(Ptr, VD); Pointers.push_back(Ptr); - Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); + Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); } } @@ -8005,11 +8032,10 @@ public: // Remember the current base pointer index. unsigned CurrentBasePointersIdx = CurBasePointers.size(); - // FIXME: MSVC 2013 seems to require this-> to find the member method. - this->generateInfoForComponentList( - L.MapType, L.MapModifiers, L.Components, CurBasePointers, - CurPointers, CurSizes, CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit); + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. @@ -8061,6 +8087,78 @@ public: } } + /// Generate all the base pointers, section pointers, sizes and map types for + /// the extracted map clauses of user-defined mapper. + void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, + MapValuesArrayTy &Pointers, + MapValuesArrayTy &Sizes, + MapFlagsArrayTy &Types) const { + assert(CurDir.is<const OMPDeclareMapperDecl *>() && + "Expect a declare mapper directive"); + const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); + // We have to process the component lists that relate with the same + // declaration in a single chunk so that we can generate the map flags + // correctly. Therefore, we organize all lists in a map. + llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; + + // Helper function to fill the information map for the different supported + // clauses. + auto &&InfoGen = [&Info]( + const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, + bool ReturnDevicePointer, bool IsImplicit) { + const ValueDecl *VD = + D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; + Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, + IsImplicit); + }; + + for (const auto *C : CurMapperDir->clauselists()) { + const auto *MC = cast<OMPMapClause>(C); + for (const auto &L : MC->component_lists()) { + InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), + /*ReturnDevicePointer=*/false, MC->isImplicit()); + } + } + + for (const auto &M : Info) { + // We need to know when we generate information for the first component + // associated with a capture, because the mapping flags depend on it. + bool IsFirstComponentList = true; + + // Temporary versions of arrays + MapBaseValuesArrayTy CurBasePointers; + MapValuesArrayTy CurPointers; + MapValuesArrayTy CurSizes; + MapFlagsArrayTy CurTypes; + StructRangeInfoTy PartialStruct; + + for (const MapInfo &L : M.second) { + assert(!L.Components.empty() && + "Not expecting declaration with no component lists."); + generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, + CurBasePointers, CurPointers, CurSizes, + CurTypes, PartialStruct, + IsFirstComponentList, L.IsImplicit); + IsFirstComponentList = false; + } + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) + emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, + PartialStruct); + + // We need to append the results of this capture to what we already have. + BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); + Pointers.append(CurPointers.begin(), CurPointers.end()); + Sizes.append(CurSizes.begin(), CurSizes.end()); + Types.append(CurTypes.begin(), CurTypes.end()); + } + } + /// Emit capture info for lambdas for variables captured by reference. void generateInfoForLambdaCaptures( const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, @@ -8184,8 +8282,10 @@ public: std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; SmallVector<MapData, 4> DeclComponentLists; - // FIXME: MSVC 2013 seems to require this-> to find member CurDir. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { for (const auto &L : C->decl_component_lists(VD)) { assert(L.first == VD && "We got information for the wrong declaration??"); @@ -8333,9 +8433,12 @@ public: MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types) const { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); // Map other list items in the map clause which are not captured variables // but "declare target link" global variables. - for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { for (const auto &L : C->component_lists()) { if (!L.first) continue; @@ -8472,9 +8575,9 @@ emitOffloadingArrays(CodeGenFunction &CGF, } llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); - QualType PointerArrayType = - Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType PointerArrayType = Ctx.getConstantArrayType( + Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); Info.BasePointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); @@ -8487,9 +8590,9 @@ emitOffloadingArrays(CodeGenFunction &CGF, QualType Int64Ty = Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); if (hasRuntimeEvaluationCaptureSize) { - QualType SizeArrayType = - Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, - /*IndexTypeQuals=*/0); + QualType SizeArrayType = Ctx.getConstantArrayType( + Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, + /*IndexTypeQuals=*/0); Info.SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); } else { @@ -8562,6 +8665,7 @@ emitOffloadingArrays(CodeGenFunction &CGF, } } } + /// Emit the arguments to be passed to the runtime library based on the /// arrays of pointers, sizes and map types. static void emitOffloadingArraysArgument( @@ -8677,12 +8781,16 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unexpected directive."); @@ -8692,10 +8800,343 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { return nullptr; } +/// Emit the user-defined mapper function. The code generation follows the +/// pattern in the example below. +/// \code +/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, +/// void *base, void *begin, +/// int64_t size, int64_t type) { +/// // Allocate space for an array section first. +/// if (size > 1 && !maptype.IsDelete) +/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, +/// size*sizeof(Ty), clearToFrom(type)); +/// // Map members. +/// for (unsigned i = 0; i < size; i++) { +/// // For each component specified by this mapper: +/// for (auto c : all_components) { +/// if (c.hasMapper()) +/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, +/// c.arg_type); +/// else +/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, +/// c.arg_begin, c.arg_size, c.arg_type); +/// } +/// } +/// // Delete the array section. +/// if (size > 1 && maptype.IsDelete) +/// __tgt_push_mapper_component(rt_mapper_handle, base, begin, +/// size*sizeof(Ty), clearToFrom(type)); +/// } +/// \endcode +void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, + CodeGenFunction *CGF) { + if (UDMMap.count(D) > 0) + return; + ASTContext &C = CGM.getContext(); + QualType Ty = D->getType(); + QualType PtrTy = C.getPointerType(Ty).withRestrict(); + QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); + auto *MapperVarDecl = + cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); + SourceLocation Loc = D->getLocation(); + CharUnits ElementSize = C.getTypeSizeInChars(Ty); + + // Prepare mapper function arguments and attributes. + ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); + ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.VoidPtrTy, ImplicitParamDecl::Other); + ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, + ImplicitParamDecl::Other); + ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, + ImplicitParamDecl::Other); + FunctionArgList Args; + Args.push_back(&HandleArg); + Args.push_back(&BaseArg); + Args.push_back(&BeginArg); + Args.push_back(&SizeArg); + Args.push_back(&TypeArg); + const CGFunctionInfo &FnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); + SmallString<64> TyStr; + llvm::raw_svector_ostream Out(TyStr); + CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); + std::string Name = getName({"omp_mapper", TyStr, D->getName()}); + auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + Name, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); + // Start the mapper function code generation. + CodeGenFunction MapperCGF(CGM); + MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); + // Compute the starting and end addreses of array elements. + llvm::Value *Size = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, + C.getPointerType(Int64Ty), Loc); + llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( + MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), + CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); + llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); + llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, + C.getPointerType(Int64Ty), Loc); + // Prepare common arguments for array initiation and deletion. + llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&HandleArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&BaseArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&BeginArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + + // Emit array initiation if this is an array section and \p MapType indicates + // that memory allocation is required. + llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); + emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, + ElementSize, HeadBB, /*IsInit=*/true); + + // Emit a for loop to iterate through SizeArg of elements and map all of them. + + // Emit the loop header block. + MapperCGF.EmitBlock(HeadBB); + llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); + llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); + // Evaluate whether the initial condition is satisfied. + llvm::Value *IsEmpty = + MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); + MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); + + // Emit the loop body block. + MapperCGF.EmitBlock(BodyBB); + llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( + PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); + PtrPHI->addIncoming(PtrBegin, EntryBB); + Address PtrCurrent = + Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) + .getAlignment() + .alignmentOfArrayElement(ElementSize)); + // Privatize the declared variable of mapper to be the current array element. + CodeGenFunction::OMPPrivateScope Scope(MapperCGF); + Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { + return MapperCGF + .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) + .getAddress(); + }); + (void)Scope.Privatize(); + + // Get map clause information. Fill up the arrays with all mapped variables. + MappableExprsHandler::MapBaseValuesArrayTy BasePointers; + MappableExprsHandler::MapValuesArrayTy Pointers; + MappableExprsHandler::MapValuesArrayTy Sizes; + MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler MEHandler(*D, MapperCGF); + MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); + + // Call the runtime API __tgt_mapper_num_components to get the number of + // pre-existing components. + llvm::Value *OffloadingArgs[] = {Handle}; + llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); + llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( + PreviousSize, + MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); + + // Fill up the runtime mapper handle for all components. + for (unsigned I = 0; I < BasePointers.size(); ++I) { + llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( + *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( + Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + llvm::Value *CurSizeArg = Sizes[I]; + + // Extract the MEMBER_OF field from the map type. + llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); + MapperCGF.EmitBlock(MemberBB); + llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); + llvm::Value *Member = MapperCGF.Builder.CreateAnd( + OriMapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); + llvm::BasicBlock *MemberCombineBB = + MapperCGF.createBasicBlock("omp.member.combine"); + llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); + llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); + MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); + // Add the number of pre-existing components to the MEMBER_OF field if it + // is valid. + MapperCGF.EmitBlock(MemberCombineBB); + llvm::Value *CombinedMember = + MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); + // Do nothing if it is not a member of previous components. + MapperCGF.EmitBlock(TypeBB); + llvm::PHINode *MemberMapType = + MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); + MemberMapType->addIncoming(OriMapType, MemberBB); + MemberMapType->addIncoming(CombinedMember, MemberCombineBB); + + // Combine the map type inherited from user-defined mapper with that + // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM + // bits of the \a MapType, which is the input argument of the mapper + // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM + // bits of MemberMapType. + // [OpenMP 5.0], 1.2.6. map-type decay. + // | alloc | to | from | tofrom | release | delete + // ---------------------------------------------------------- + // alloc | alloc | alloc | alloc | alloc | release | delete + // to | alloc | to | alloc | to | release | delete + // from | alloc | alloc | from | from | release | delete + // tofrom | alloc | to | from | tofrom | release | delete + llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM)); + llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); + llvm::BasicBlock *AllocElseBB = + MapperCGF.createBasicBlock("omp.type.alloc.else"); + llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); + llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); + llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); + llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); + llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); + MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); + // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. + MapperCGF.EmitBlock(AllocBB); + llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM))); + MapperCGF.Builder.CreateBr(EndBB); + MapperCGF.EmitBlock(AllocElseBB); + llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( + LeftToFrom, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); + MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); + // In case of to, clear OMP_MAP_FROM. + MapperCGF.EmitBlock(ToBB); + llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.CreateBr(EndBB); + MapperCGF.EmitBlock(ToElseBB); + llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( + LeftToFrom, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); + // In case of from, clear OMP_MAP_TO. + MapperCGF.EmitBlock(FromBB); + llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( + MemberMapType, + MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); + // In case of tofrom, do nothing. + MapperCGF.EmitBlock(EndBB); + llvm::PHINode *CurMapType = + MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); + CurMapType->addIncoming(AllocMapType, AllocBB); + CurMapType->addIncoming(ToMapType, ToBB); + CurMapType->addIncoming(FromMapType, FromBB); + CurMapType->addIncoming(MemberMapType, ToElseBB); + + // TODO: call the corresponding mapper function if a user-defined mapper is + // associated with this map clause. + // Call the runtime API __tgt_push_mapper_component to fill up the runtime + // data structure. + llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, + CurSizeArg, CurMapType}; + MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_push_mapper_component), + OffloadingArgs); + } + + // Update the pointer to point to the next element that needs to be mapped, + // and check whether we have mapped all elements. + llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( + PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); + PtrPHI->addIncoming(PtrNext, BodyBB); + llvm::Value *IsDone = + MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); + llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); + MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); + + MapperCGF.EmitBlock(ExitBB); + // Emit array deletion if this is an array section and \p MapType indicates + // that deletion is required. + emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, + ElementSize, DoneBB, /*IsInit=*/false); + + // Emit the function exit block. + MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); + MapperCGF.FinishFunction(); + UDMMap.try_emplace(D, Fn); + if (CGF) { + auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); + Decls.second.push_back(D); + } +} + +/// Emit the array initialization or deletion portion for user-defined mapper +/// code generation. First, it evaluates whether an array section is mapped and +/// whether the \a MapType instructs to delete this section. If \a IsInit is +/// true, and \a MapType indicates to not delete this array, array +/// initialization code is generated. If \a IsInit is false, and \a MapType +/// indicates to not this array, array deletion code is generated. +void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( + CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, + llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, + CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { + StringRef Prefix = IsInit ? ".init" : ".del"; + + // Evaluate if this is an array section. + llvm::BasicBlock *IsDeleteBB = + MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); + llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); + llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( + Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); + MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); + + // Evaluate if we are going to delete this section. + MapperCGF.EmitBlock(IsDeleteBB); + llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); + llvm::Value *DeleteCond; + if (IsInit) { + DeleteCond = MapperCGF.Builder.CreateIsNull( + DeleteBit, "omp.array" + Prefix + ".delete"); + } else { + DeleteCond = MapperCGF.Builder.CreateIsNotNull( + DeleteBit, "omp.array" + Prefix + ".delete"); + } + MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); + + MapperCGF.EmitBlock(BodyBB); + // Get the array size by multiplying element size and element number (i.e., \p + // Size). + llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( + Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); + // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves + // memory allocation/deletion purpose only. + llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | + MappableExprsHandler::OMP_MAP_FROM))); + // Call the runtime API __tgt_push_mapper_component to fill up the runtime + // data structure. + llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; + MapperCGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); +} + void CGOpenMPRuntime::emitTargetNumIterationsCall( - CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, - const llvm::function_ref<llvm::Value *( - CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Value *DeviceID, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; // Get nested teams distribute kind directive, if any. @@ -8704,30 +9145,24 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( if (!TD) return; const auto *LD = cast<OMPLoopDirective>(TD); - auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, + auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, PrePostActionTy &) { - llvm::Value *NumIterations = SizeEmitter(CGF, *LD); - - // Emit device ID if any. - llvm::Value *DeviceID; - if (Device) - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); - else - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); - - llvm::Value *Args[] = {DeviceID, NumIterations}; - CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { + llvm::Value *Args[] = {DeviceID, NumIterations}; + CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); + } }; emitInlinedDirective(CGF, OMPD_unknown, CodeGen); } -void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, const Expr *Device) { +void CGOpenMPRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { if (!CGF.HaveInsertPoint()) return; @@ -8746,8 +9181,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *MapTypesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &CS, RequiresOuterTask, - &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { + &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, + SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { // On top of the arrays that were filled up, the target offloading call // takes as arguments the device id as well as the host pointer. The host // pointer is used by the runtime library to identify the current target @@ -8779,6 +9214,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); + // Emit tripcount for the target loop-based directive. + emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); + bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime // via calls __tgt_target() or __tgt_target_teams(). @@ -9103,12 +9541,16 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_teams_distribute_parallel_for_simd: case OMPD_target_update: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_requires: case OMPD_unknown: llvm_unreachable("Unknown target directive for OpenMP device codegen."); @@ -9137,14 +9579,28 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. - if (!CGM.getLangOpts().OpenMPIsDevice) + if (!CGM.getLangOpts().OpenMPIsDevice) { + if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD); + // Do not emit device_type(nohost) functions for the host. + if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) + return true; + } return false; + } const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); StringRef Name = CGM.getMangledName(GD); // Try to detect target regions in the function. - if (const auto *FD = dyn_cast<FunctionDecl>(VD)) + if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { scanForTargetRegionsFunctions(FD->getBody(), Name); + Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(FD); + // Do not emit device_type(nohost) functions for the host. + if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) + return true; + } // Do not to emit function if it is not marked as declare target. return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && @@ -9221,6 +9677,9 @@ CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { + if (CGM.getLangOpts().OMPTargetTriples.empty() && + !CGM.getLangOpts().OpenMPIsDevice) + return; llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) { @@ -9433,17 +9892,6 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { return RequiresRegFn; } -llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { - // If we have offloading in the current module, we need to emit the entries - // now and register the offloading descriptor. - createOffloadEntriesAndInfoMetadata(); - - // Create and register the offloading binary descriptors. This is the main - // entity that captures all the information about offloading in the current - // compilation unit. - return createOffloadingBinaryDescriptorRegistration(); -} - void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, @@ -9711,12 +10159,16 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_teams_distribute_parallel_for: case OMPD_teams_distribute_parallel_for_simd: case OMPD_declare_simd: + case OMPD_declare_variant: case OMPD_declare_target: case OMPD_end_declare_target: case OMPD_declare_reduction: case OMPD_declare_mapper: case OMPD_taskloop: case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: case OMPD_target: case OMPD_target_simd: case OMPD_target_teams_distribute: @@ -10377,7 +10829,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, } llvm::APInt Size(/*numBits=*/32, NumIterations.size()); QualType ArrayTy = - C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); + C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); CGF.EmitNullInitialization(DimsAddr, ArrayTy); @@ -10428,7 +10880,7 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); QualType ArrayTy = CGM.getContext().getConstantArrayType( - Int64Ty, Size, ArrayType::Normal, 0); + Int64Ty, Size, nullptr, ArrayType::Normal, 0); Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { const Expr *CounterVal = C->getLoopData(I); @@ -10566,6 +11018,131 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, return Address(Addr, Align); } +/// Checks current context and returns true if it matches the context selector. +template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet, + OMPDeclareVariantAttr::CtxSelectorType Ctx> +static bool checkContext(const OMPDeclareVariantAttr *A) { + assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown && + Ctx != OMPDeclareVariantAttr::CtxUnknown && + "Unknown context selector or context selector set."); + return false; +} + +/// Checks for implementation={vendor(<vendor>)} context selector. +/// \returns true iff <vendor>="llvm", false otherwise. +template <> +bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation, + OMPDeclareVariantAttr::CtxVendor>( + const OMPDeclareVariantAttr *A) { + return llvm::all_of(A->implVendors(), + [](StringRef S) { return !S.compare_lower("llvm"); }); +} + +static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) { + // If both scores are unknown, choose the very first one. + if (!LHS && !RHS) + return true; + // If only one is known, return this one. + if (LHS && !RHS) + return true; + if (!LHS && RHS) + return false; + llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx); + llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx); + return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0; +} + +namespace { +/// Comparator for the priority queue for context selector. +class OMPDeclareVariantAttrComparer + : public std::greater<const OMPDeclareVariantAttr *> { +private: + ASTContext &Ctx; + +public: + OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {} + bool operator()(const OMPDeclareVariantAttr *LHS, + const OMPDeclareVariantAttr *RHS) const { + const Expr *LHSExpr = nullptr; + const Expr *RHSExpr = nullptr; + if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) + LHSExpr = LHS->getScore(); + if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) + RHSExpr = RHS->getScore(); + return greaterCtxScore(Ctx, LHSExpr, RHSExpr); + } +}; +} // anonymous namespace + +/// Finds the variant function that matches current context with its context +/// selector. +static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx, + const FunctionDecl *FD) { + if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) + return FD; + // Iterate through all DeclareVariant attributes and check context selectors. + auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS, + const OMPDeclareVariantAttr *RHS) { + const Expr *LHSExpr = nullptr; + const Expr *RHSExpr = nullptr; + if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) + LHSExpr = LHS->getScore(); + if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) + RHSExpr = RHS->getScore(); + return greaterCtxScore(Ctx, LHSExpr, RHSExpr); + }; + const OMPDeclareVariantAttr *TopMostAttr = nullptr; + for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { + const OMPDeclareVariantAttr *SelectedAttr = nullptr; + switch (A->getCtxSelectorSet()) { + case OMPDeclareVariantAttr::CtxSetImplementation: + switch (A->getCtxSelector()) { + case OMPDeclareVariantAttr::CtxVendor: + if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation, + OMPDeclareVariantAttr::CtxVendor>(A)) + SelectedAttr = A; + break; + case OMPDeclareVariantAttr::CtxUnknown: + llvm_unreachable( + "Unknown context selector in implementation selector set."); + } + break; + case OMPDeclareVariantAttr::CtxSetUnknown: + llvm_unreachable("Unknown context selector set."); + } + // If the attribute matches the context, find the attribute with the highest + // score. + if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr))) + TopMostAttr = SelectedAttr; + } + if (!TopMostAttr) + return FD; + return cast<FunctionDecl>( + cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) + ->getDecl()); +} + +bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { + const auto *D = cast<FunctionDecl>(GD.getDecl()); + // If the original function is defined already, use its definition. + StringRef MangledName = CGM.getMangledName(GD); + llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); + if (Orig && !Orig->isDeclaration()) + return false; + const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D); + // Emit original function if it does not have declare variant attribute or the + // context does not match. + if (NewFD == D) + return false; + GlobalDecl NewGD = GD.getWithDecl(NewFD); + if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { + DeferredVariantFunction.erase(D); + return true; + } + DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); + return true; +} + llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { @@ -10786,12 +11363,13 @@ void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - llvm::Function *OutlinedFn, - llvm::Value *OutlinedFnID, - const Expr *IfCond, - const Expr *Device) { +void CGOpenMPSIMDRuntime::emitTargetCall( + CodeGenFunction &CGF, const OMPExecutableDirective &D, + llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, + const Expr *Device, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -10807,10 +11385,6 @@ bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { return false; } -llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { - return nullptr; -} - void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, |