diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp | 1843 |
1 files changed, 1296 insertions, 547 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 14e0cba62b23..57cc2d60e2af 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -180,7 +180,7 @@ public: UntiedCodeGen(CGF); CodeGenFunction::JumpDest CurPoint = CGF.getJumpDestInCurrentScope(".untied.next."); - CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBranch(CGF.ReturnBlock.getBlock()); CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), CGF.Builder.GetInsertBlock()); @@ -1404,6 +1404,19 @@ void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { } } +static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, + SourceLocation Loc, + SmallString<128> &Buffer) { + llvm::raw_svector_ostream OS(Buffer); + // Build debug location + PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); + OS << ";" << PLoc.getFilename() << ";"; + if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) + OS << FD->getQualifiedNameAsString(); + OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; + return OS.str(); +} + llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { @@ -1430,6 +1443,16 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, SourceLocation Loc) { assert(CGF.CurFn && "No function in current CodeGenFunction."); + // If the OpenMPIRBuilder is used we need to use it for all thread id calls as + // the clang invariants used below might be broken. + if (CGM.getLangOpts().OpenMPIRBuilder) { + SmallString<128> Buffer; + OMPBuilder.updateToLocation(CGF.Builder.saveIP()); + auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( + getIdentStringFromSourceLocation(CGF, Loc, Buffer)); + return OMPBuilder.getOrCreateThreadID( + OMPBuilder.getOrCreateIdent(SrcLocStr)); + } llvm::Value *ThreadID = nullptr; // Check whether we've already cached a load of the thread id in this @@ -1503,6 +1526,7 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { FunctionUDMMap.erase(I); } LastprivateConditionalToTypes.erase(CGF.CurFn); + FunctionToUntiedTaskStackMap.erase(CGF.CurFn); } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { @@ -2074,6 +2098,14 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + + // Ensure we do not inline the function. This is trivially true for the ones + // passed to __kmpc_fork_call but the ones calles in serialized regions + // could be inlined. This is not a perfect but it is closer to the invariant + // we want, namely, every data environment starts with a new function. + // TODO: We should pass the if condition to the runtime function and do the + // handling there. Much cleaner code. + OutlinedFn->addFnAttr(llvm::Attribute::NoInline); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); @@ -2236,7 +2268,7 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { - OMPBuilder.CreateTaskyield(CGF.Builder); + OMPBuilder.createTaskyield(CGF.Builder); } else { // Build call __kmpc_omp_taskyield(loc, thread_id, 0); llvm::Value *Args[] = { @@ -2491,7 +2523,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { - CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( + CGF.Builder.restoreIP(OMPBuilder.createBarrier( CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); return; } @@ -2853,7 +2885,7 @@ void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, SourceLocation Loc, llvm::AtomicOrdering AO) { if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { - OMPBuilder.CreateFlush(CGF.Builder); + OMPBuilder.createFlush(CGF.Builder); } else { if (!CGF.HaveInsertPoint()) return; @@ -2917,20 +2949,23 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (CGM.getLangOpts().OpenMPIsDevice) { - if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Unable to find target region on line '%0' in the device code."); - CGM.getDiags().Report(DiagID) << LineNum; - return; - } + // This could happen if the device compilation is invoked standalone. + if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) + initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, + OffloadingEntriesNum); auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; - assert(Entry.isValid() && "Entry not initialized!"); Entry.setAddress(Addr); Entry.setID(ID); Entry.setFlags(Flags); } else { + if (Flags == + OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && + hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, + /*IgnoreAddressId*/ true)) + return; + assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && + "Target region entry already registered!"); OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; ++OffloadingEntriesNum; @@ -2938,8 +2973,8 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: } bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( - unsigned DeviceID, unsigned FileID, StringRef ParentName, - unsigned LineNum) const { + unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, + bool IgnoreAddressId) const { auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); if (PerDevice == OffloadEntriesTargetRegion.end()) return false; @@ -2953,7 +2988,8 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( if (PerLine == PerParentName->second.end()) return false; // Fail if this entry is already registered. - if (PerLine->second.getAddress() || PerLine->second.getID()) + if (!IgnoreAddressId && + (PerLine->second.getAddress() || PerLine->second.getID())) return false; return true; } @@ -2985,9 +3021,10 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: OMPTargetGlobalVarEntryKind Flags, llvm::GlobalValue::LinkageTypes Linkage) { if (CGM.getLangOpts().OpenMPIsDevice) { + // This could happen if the device compilation is invoked standalone. + if (!hasDeviceGlobalVarEntryInfo(VarName)) + initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; - assert(Entry.isValid() && Entry.getFlags() == Flags && - "Entry not initialized!"); assert((!Entry.getAddress() || Entry.getAddress() == Addr) && "Resetting with the new address."); if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { @@ -3043,11 +3080,12 @@ void CGOpenMPRuntime::createOffloadEntry( llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), - llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), - llvm::ConstantInt::get(CGM.SizeTy, Size), - llvm::ConstantInt::get(CGM.Int32Ty, Flags), - llvm::ConstantInt::get(CGM.Int32Ty, 0)}; + llvm::Constant *Data[] = { + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), + llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), + llvm::ConstantInt::get(CGM.SizeTy, Size), + llvm::ConstantInt::get(CGM.Int32Ty, Flags), + llvm::ConstantInt::get(CGM.Int32Ty, 0)}; std::string EntryName = getName({"omp_offloading", "entry", ""}); llvm::GlobalVariable *Entry = createGlobalStruct( CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, @@ -3347,14 +3385,29 @@ struct PrivateHelpersTy { const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), PrivateElemInit(PrivateElemInit) {} + PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} const Expr *OriginalRef = nullptr; const VarDecl *Original = nullptr; const VarDecl *PrivateCopy = nullptr; const VarDecl *PrivateElemInit = nullptr; + bool isLocalPrivate() const { + return !OriginalRef && !PrivateCopy && !PrivateElemInit; + } }; typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; } // anonymous namespace +static bool isAllocatableDecl(const VarDecl *VD) { + const VarDecl *CVD = VD->getCanonicalDecl(); + if (!CVD->hasAttr<OMPAllocateDeclAttr>()) + return false; + const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); + // Use the default allocation. + return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || + AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + !AA->getAllocator()); +} + static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { if (!Privates.empty()) { @@ -3367,6 +3420,14 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { for (const auto &Pair : Privates) { const VarDecl *VD = Pair.second.Original; QualType Type = VD->getType().getNonReferenceType(); + // If the private variable is a local variable with lvalue ref type, + // allocate the pointer instead of the pointee type. + if (Pair.second.isLocalPrivate()) { + if (VD->getType()->isLValueReferenceType()) + Type = C.getPointerType(Type); + if (isAllocatableDecl(VD)) + Type = C.getPointerType(Type); + } FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), @@ -3620,10 +3681,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, /// \endcode static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, - ArrayRef<const Expr *> PrivateVars, - ArrayRef<const Expr *> FirstprivateVars, - ArrayRef<const Expr *> LastprivateVars, - QualType PrivatesQTy, + const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef<PrivateDataTy> Privates) { ASTContext &C = CGM.getContext(); FunctionArgList Args; @@ -3632,9 +3690,9 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, C.getPointerType(PrivatesQTy).withConst().withRestrict(), ImplicitParamDecl::Other); Args.push_back(&TaskPrivatesArg); - llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; + llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; unsigned Counter = 1; - for (const Expr *E : PrivateVars) { + for (const Expr *E : Data.PrivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3645,7 +3703,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : FirstprivateVars) { + for (const Expr *E : Data.FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3656,7 +3714,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : LastprivateVars) { + for (const Expr *E : Data.LastprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3667,6 +3725,19 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, PrivateVarsPos[VD] = Counter; ++Counter; } + for (const VarDecl *VD : Data.PrivateLocals) { + QualType Ty = VD->getType().getNonReferenceType(); + if (VD->getType()->isLValueReferenceType()) + Ty = C.getPointerType(Ty); + if (isAllocatableDecl(VD)) + Ty = C.getPointerType(Ty); + Args.push_back(ImplicitParamDecl::Create( + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), + ImplicitParamDecl::Other)); + PrivateVarsPos[VD] = Counter; + ++Counter; + } const auto &TaskPrivatesMapFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); llvm::FunctionType *TaskPrivatesMapTy = @@ -3727,9 +3798,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF, bool IsTargetTask = isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || isOpenMPTargetExecutionDirective(D.getDirectiveKind()); - // For target-based directives skip 3 firstprivate arrays BasePointersArray, - // PointersArray and SizesArray. The original variables for these arrays are - // not captured and we get their addresses explicitly. + // For target-based directives skip 4 firstprivate arrays BasePointersArray, + // PointersArray, SizesArray, and MappersArray. The original variables for + // these arrays are not captured and we get their addresses explicitly. if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || (IsTargetTask && KmpTaskSharedsPtr.isValid())) { SrcBase = CGF.MakeAddrLValue( @@ -3739,6 +3810,11 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); for (const PrivateDataTy &Pair : Privates) { + // Do not initialize private locals. + if (Pair.second.isLocalPrivate()) { + ++FI; + continue; + } const VarDecl *VD = Pair.second.PrivateCopy; const Expr *Init = VD->getAnyInitializer(); if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && @@ -3747,7 +3823,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, if (const VarDecl *Elem = Pair.second.PrivateElemInit) { const VarDecl *OriginalVD = Pair.second.Original; // Check if the variable is the target-based BasePointersArray, - // PointersArray or SizesArray. + // PointersArray, SizesArray, or MappersArray. LValue SharedRefLValue; QualType Type = PrivateLValue.getType(); const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); @@ -3829,6 +3905,8 @@ static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef<PrivateDataTy> Privates) { bool InitRequired = false; for (const PrivateDataTy &Pair : Privates) { + if (Pair.second.isLocalPrivate()) + continue; const VarDecl *VD = Pair.second.PrivateCopy; const Expr *Init = VD->getAnyInitializer(); InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && @@ -3922,16 +4000,16 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, /// Checks if destructor function is required to be generated. /// \return true if cleanups are required, false otherwise. static bool -checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { - bool NeedsCleanup = false; - auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); - const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); - for (const FieldDecl *FD : PrivateRD->fields()) { - NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); - if (NeedsCleanup) - break; +checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, + ArrayRef<PrivateDataTy> Privates) { + for (const PrivateDataTy &P : Privates) { + if (P.second.isLocalPrivate()) + continue; + QualType Ty = P.second.Original->getType().getNonReferenceType(); + if (Ty.isDestructedType()) + return true; } - return NeedsCleanup; + return false; } namespace { @@ -4101,9 +4179,16 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, /*PrivateElemInit=*/nullptr)); ++I; } - llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { - return L.first > R.first; - }); + for (const VarDecl *VD : Data.PrivateLocals) { + if (isAllocatableDecl(VD)) + Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); + else + Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); + } + llvm::stable_sort(Privates, + [](const PrivateDataTy &L, const PrivateDataTy &R) { + return L.first > R.first; + }); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); @@ -4145,9 +4230,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, std::next(TaskFunction->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); - TaskPrivatesMap = emitTaskPrivateMappingFunction( - CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, - FI->getType(), Privates); + TaskPrivatesMap = + emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap, TaskPrivatesMapTy); } else { @@ -4177,7 +4261,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; if (!Privates.empty()) { - NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); + NeedsCleanup = + checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); if (NeedsCleanup) Flags = Flags | DestructorsFlag; } @@ -6108,7 +6193,7 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, return; if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { - OMPBuilder.CreateTaskwait(CGF.Builder); + OMPBuilder.createTaskwait(CGF.Builder); } else { // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); @@ -6391,6 +6476,8 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); OutlinedFn->setDSOLocal(false); + if (CGM.getTriple().isAMDGCN()) + OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); } else { std::string Name = getName({EntryFnName, "region_id"}); OutlinedFnID = new llvm::GlobalVariable( @@ -6954,6 +7041,13 @@ public: /// Close is a hint to the runtime to allocate memory close to /// the target device. OMP_MAP_CLOSE = 0x400, + /// 0x800 is reserved for compatibility with XLC. + /// Produce a runtime error if the data is not already allocated. + OMP_MAP_PRESENT = 0x1000, + /// Signal that the runtime library should use args as an array of + /// descriptor_dim pointers and use args_size as dims. Used when we have + /// non-contiguous list items in target update directive + OMP_MAP_NON_CONTIG = 0x100000000000, /// The 16 MSBs of the flags indicate whether the entry is member of some /// struct/class. OMP_MAP_MEMBER_OF = 0xffff000000000000, @@ -6969,6 +7063,23 @@ public: return Offset; } + /// Class that holds debugging information for a data mapping to be passed to + /// the runtime library. + class MappingExprInfo { + /// The variable declaration used for the data mapping. + const ValueDecl *MapDecl = nullptr; + /// The original expression used in the map clause, or null if there is + /// none. + const Expr *MapExpr = nullptr; + + public: + MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) + : MapDecl(MapDecl), MapExpr(MapExpr) {} + + const ValueDecl *getMapDecl() const { return MapDecl; } + const Expr *getMapExpr() const { return MapExpr; } + }; + /// Class that associates information with a base pointer to be passed to the /// runtime library. class BasePointerInfo { @@ -6986,9 +7097,52 @@ public: void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } }; + using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; + using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; + using MapDimArrayTy = SmallVector<uint64_t, 4>; + using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; + + /// This structure contains combined information generated for mappable + /// clauses, including base pointers, pointers, sizes, map types, user-defined + /// mappers, and non-contiguous information. + struct MapCombinedInfoTy { + struct StructNonContiguousInfo { + bool IsNonContiguous = false; + MapDimArrayTy Dims; + MapNonContiguousArrayTy Offsets; + MapNonContiguousArrayTy Counts; + MapNonContiguousArrayTy Strides; + }; + MapExprsArrayTy Exprs; + MapBaseValuesArrayTy BasePointers; + MapValuesArrayTy Pointers; + MapValuesArrayTy Sizes; + MapFlagsArrayTy Types; + MapMappersArrayTy Mappers; + StructNonContiguousInfo NonContigInfo; + + /// Append arrays in \a CurInfo. + void append(MapCombinedInfoTy &CurInfo) { + Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); + BasePointers.append(CurInfo.BasePointers.begin(), + CurInfo.BasePointers.end()); + Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); + Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); + Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); + Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); + NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), + CurInfo.NonContigInfo.Dims.end()); + NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), + CurInfo.NonContigInfo.Offsets.end()); + NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), + CurInfo.NonContigInfo.Counts.end()); + NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), + CurInfo.NonContigInfo.Strides.end()); + } + }; /// Map between a struct and the its lowest & highest elements which have been /// mapped. @@ -7000,6 +7154,7 @@ public: std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 0, Address::invalid()}; Address Base = Address::invalid(); + bool IsArraySection = false; }; private: @@ -7008,19 +7163,26 @@ private: OMPClauseMappableExprCommon::MappableExprComponentListRef Components; OpenMPMapClauseKind MapType = OMPC_MAP_unknown; ArrayRef<OpenMPMapModifierKind> MapModifiers; + ArrayRef<OpenMPMotionModifierKind> MotionModifiers; bool ReturnDevicePointer = false; bool IsImplicit = false; + const ValueDecl *Mapper = nullptr; + const Expr *VarRef = nullptr; bool ForDeviceAddr = false; MapInfo() = default; MapInfo( OMPClauseMappableExprCommon::MappableExprComponentListRef Components, OpenMPMapClauseKind MapType, - ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer, - bool IsImplicit, bool ForDeviceAddr = false) + ArrayRef<OpenMPMapModifierKind> MapModifiers, + ArrayRef<OpenMPMotionModifierKind> MotionModifiers, + bool ReturnDevicePointer, bool IsImplicit, + const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, + bool ForDeviceAddr = false) : Components(Components), MapType(MapType), MapModifiers(MapModifiers), + MotionModifiers(MotionModifiers), ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), - ForDeviceAddr(ForDeviceAddr) {} + Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} }; /// If use_device_ptr or use_device_addr is used on a decl which is a struct @@ -7138,7 +7300,8 @@ private: /// expression. OpenMPOffloadMappingFlags getMapTypeBits( OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, - bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { + ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, + bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { OpenMPOffloadMappingFlags Bits = IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; switch (MapType) { @@ -7174,6 +7337,14 @@ private: if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) != MapModifiers.end()) Bits |= OMP_MAP_CLOSE; + if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) + != MapModifiers.end()) + Bits |= OMP_MAP_PRESENT; + if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) + != MotionModifiers.end()) + Bits |= OMP_MAP_PRESENT; + if (IsNonContiguous) + Bits |= OMP_MAP_NON_CONTIG; return Bits; } @@ -7216,17 +7387,19 @@ private: return ConstLength.getSExtValue() != 1; } - /// Generate the base pointers, section pointers, sizes and map type - /// bits for the provided map type, map modifier, and expression components. + /// Generate the base pointers, section pointers, sizes, map type bits, and + /// user-defined mappers (all included in \a CombinedInfo) for the provided + /// map type, map or motion modifiers, and expression components. /// \a IsFirstComponent should be set to true if the provided set of /// components is the first associated with a capture. void generateInfoForComponentList( OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, + ArrayRef<OpenMPMotionModifierKind> MotionModifiers, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, - MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, - StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, - bool IsImplicit, bool ForDeviceAddr = false, + MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, + bool IsFirstComponentList, bool IsImplicit, + const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, + const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedElements = llvm::None) const { // The following summarizes what has to be generated for each map and the @@ -7449,11 +7622,11 @@ private: // can be associated with the combined storage if shared memory mode is // active or the base declaration is not global variable. const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); - if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || + if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || !VD || VD->hasLocalStorage()) BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); else - FirstPointerInComplexData = IsCaptureFirstInfo; + FirstPointerInComplexData = true; ++I; } } @@ -7483,6 +7656,12 @@ private: // whether we are dealing with a member of a declared struct. const MemberExpr *EncounteredME = nullptr; + // Track for the total number of dimension. Start from one for the dummy + // dimension. + uint64_t DimSize = 1; + + bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; + for (; I != CE; ++I) { // If the current component is member of a struct (parent struct) mark it. if (!EncounteredME) { @@ -7512,9 +7691,18 @@ private: // becomes the base address for the following components. // A final array section, is one whose length can't be proved to be one. + // If the map item is non-contiguous then we don't treat any array section + // as final array section. bool IsFinalArraySection = + !IsNonContiguous && isFinalArraySectionExpression(I->getAssociatedExpression()); + // If we have a declaration for the mapping use that, otherwise use + // the base declaration of the map clause. + const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) + ? I->getAssociatedDeclaration() + : BaseDecl; + // Get information on whether the element is a pointer. Have to do a // special treatment for array sections given that they are built-in // types. @@ -7530,7 +7718,10 @@ private: .getCanonicalType() ->isAnyPointerType()) || I->getAssociatedExpression()->getType()->isAnyPointerType(); - bool IsNonDerefPointer = IsPointer && !UO && !BO; + bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; + + if (OASE) + ++DimSize; if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { // If this is not the last component, we expect the pointer to be @@ -7539,6 +7730,7 @@ private: isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || + isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && "Unexpected expression"); @@ -7584,9 +7776,9 @@ private: // Emit data for non-overlapped data. OpenMPOffloadMappingFlags Flags = OMP_MAP_MEMBER_OF | - getMapTypeBits(MapType, MapModifiers, IsImplicit, + getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, /*AddPtrFlag=*/false, - /*AddIsTargetParamFlag=*/false); + /*AddIsTargetParamFlag=*/false, IsNonContiguous); LB = BP; llvm::Value *Size = nullptr; // Do bitcopy of all non-overlapped structure elements. @@ -7605,40 +7797,57 @@ private: break; } } - BasePointers.push_back(BP.getPointer()); - Pointers.push_back(LB.getPointer()); - Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, - /*isSigned=*/true)); - Types.push_back(Flags); + assert(Size && "Failed to determine structure size"); + CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + CombinedInfo.BasePointers.push_back(BP.getPointer()); + CombinedInfo.Pointers.push_back(LB.getPointer()); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + Size, CGF.Int64Ty, /*isSigned=*/true)); + CombinedInfo.Types.push_back(Flags); + CombinedInfo.Mappers.push_back(nullptr); + CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize + : 1); LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); } - BasePointers.push_back(BP.getPointer()); - Pointers.push_back(LB.getPointer()); + CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + CombinedInfo.BasePointers.push_back(BP.getPointer()); + CombinedInfo.Pointers.push_back(LB.getPointer()); Size = CGF.Builder.CreatePtrDiff( CGF.EmitCastToVoidPtr( CGF.Builder.CreateConstGEP(HB, 1).getPointer()), CGF.EmitCastToVoidPtr(LB.getPointer())); - Sizes.push_back( + CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); - Types.push_back(Flags); + CombinedInfo.Types.push_back(Flags); + CombinedInfo.Mappers.push_back(nullptr); + CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize + : 1); break; } llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); - if (!IsMemberPointerOrAddr) { - BasePointers.push_back(BP.getPointer()); - Pointers.push_back(LB.getPointer()); - Sizes.push_back( + if (!IsMemberPointerOrAddr || + (Next == CE && MapType != OMPC_MAP_unknown)) { + CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + CombinedInfo.BasePointers.push_back(BP.getPointer()); + CombinedInfo.Pointers.push_back(LB.getPointer()); + CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); + CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize + : 1); + + // If Mapper is valid, the last component inherits the mapper. + bool HasMapper = Mapper && Next == CE; + CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); // We need to add a pointer flag for each map that comes from the // same expression except for the first one. We also need to signal // this map is the first one that relates with the current capture // (there is a set of entries for each capture). - OpenMPOffloadMappingFlags Flags = - getMapTypeBits(MapType, MapModifiers, IsImplicit, - !IsExpressionFirstInfo || RequiresReference || - FirstPointerInComplexData, - IsCaptureFirstInfo && !RequiresReference); + OpenMPOffloadMappingFlags Flags = getMapTypeBits( + MapType, MapModifiers, MotionModifiers, IsImplicit, + !IsExpressionFirstInfo || RequiresReference || + FirstPointerInComplexData, + IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); if (!IsExpressionFirstInfo) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, @@ -7657,7 +7866,7 @@ private: } } - Types.push_back(Flags); + CombinedInfo.Types.push_back(Flags); } // If we have encountered a member expression so far, keep track of the @@ -7686,6 +7895,10 @@ private: } } + // Need to emit combined struct for array sections. + if (IsFinalArraySection || IsNonContiguous) + PartialStruct.IsArraySection = true; + // If we have a final array section, we are done with this expression. if (IsFinalArraySection) break; @@ -7697,8 +7910,191 @@ private: IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; FirstPointerInComplexData = false; + } else if (FirstPointerInComplexData) { + QualType Ty = Components.rbegin() + ->getAssociatedDeclaration() + ->getType() + .getNonReferenceType(); + BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); + FirstPointerInComplexData = false; + } + } + + if (!IsNonContiguous) + return; + + const ASTContext &Context = CGF.getContext(); + + // For supporting stride in array section, we need to initialize the first + // dimension size as 1, first offset as 0, and first count as 1 + MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; + MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; + MapValuesArrayTy CurStrides; + MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; + uint64_t ElementTypeSize; + + // Collect Size information for each dimension and get the element size as + // the first Stride. For example, for `int arr[10][10]`, the DimSizes + // should be [10, 10] and the first stride is 4 btyes. + for (const OMPClauseMappableExprCommon::MappableComponent &Component : + Components) { + const Expr *AssocExpr = Component.getAssociatedExpression(); + const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); + + if (!OASE) + continue; + + QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + auto *CAT = Context.getAsConstantArrayType(Ty); + auto *VAT = Context.getAsVariableArrayType(Ty); + + // We need all the dimension size except for the last dimension. + assert((VAT || CAT || &Component == &*Components.begin()) && + "Should be either ConstantArray or VariableArray if not the " + "first Component"); + + // Get element size if CurStrides is empty. + if (CurStrides.empty()) { + const Type *ElementType = nullptr; + if (CAT) + ElementType = CAT->getElementType().getTypePtr(); + else if (VAT) + ElementType = VAT->getElementType().getTypePtr(); + else + assert(&Component == &*Components.begin() && + "Only expect pointer (non CAT or VAT) when this is the " + "first Component"); + // If ElementType is null, then it means the base is a pointer + // (neither CAT nor VAT) and we'll attempt to get ElementType again + // for next iteration. + if (ElementType) { + // For the case that having pointer as base, we need to remove one + // level of indirection. + if (&Component != &*Components.begin()) + ElementType = ElementType->getPointeeOrArrayElementType(); + ElementTypeSize = + Context.getTypeSizeInChars(ElementType).getQuantity(); + CurStrides.push_back( + llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); + } + } + // Get dimension value except for the last dimension since we don't need + // it. + if (DimSizes.size() < Components.size() - 1) { + if (CAT) + DimSizes.push_back(llvm::ConstantInt::get( + CGF.Int64Ty, CAT->getSize().getZExtValue())); + else if (VAT) + DimSizes.push_back(CGF.Builder.CreateIntCast( + CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, + /*IsSigned=*/false)); + } + } + + // Skip the dummy dimension since we have already have its information. + auto DI = DimSizes.begin() + 1; + // Product of dimension. + llvm::Value *DimProd = + llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); + + // Collect info for non-contiguous. Notice that offset, count, and stride + // are only meaningful for array-section, so we insert a null for anything + // other than array-section. + // Also, the size of offset, count, and stride are not the same as + // pointers, base_pointers, sizes, or dims. Instead, the size of offset, + // count, and stride are the same as the number of non-contiguous + // declaration in target update to/from clause. + for (const OMPClauseMappableExprCommon::MappableComponent &Component : + Components) { + const Expr *AssocExpr = Component.getAssociatedExpression(); + + if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { + llvm::Value *Offset = CGF.Builder.CreateIntCast( + CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, + /*isSigned=*/false); + CurOffsets.push_back(Offset); + CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); + CurStrides.push_back(CurStrides.back()); + continue; + } + + const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); + + if (!OASE) + continue; + + // Offset + const Expr *OffsetExpr = OASE->getLowerBound(); + llvm::Value *Offset = nullptr; + if (!OffsetExpr) { + // If offset is absent, then we just set it to zero. + Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); + } else { + Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), + CGF.Int64Ty, + /*isSigned=*/false); + } + CurOffsets.push_back(Offset); + + // Count + const Expr *CountExpr = OASE->getLength(); + llvm::Value *Count = nullptr; + if (!CountExpr) { + // In Clang, once a high dimension is an array section, we construct all + // the lower dimension as array section, however, for case like + // arr[0:2][2], Clang construct the inner dimension as an array section + // but it actually is not in an array section form according to spec. + if (!OASE->getColonLocFirst().isValid() && + !OASE->getColonLocSecond().isValid()) { + Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); + } else { + // OpenMP 5.0, 2.1.5 Array Sections, Description. + // When the length is absent it defaults to ⌈(size − + // lower-bound)/stride⌉, where size is the size of the array + // dimension. + const Expr *StrideExpr = OASE->getStride(); + llvm::Value *Stride = + StrideExpr + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), + CGF.Int64Ty, /*isSigned=*/false) + : nullptr; + if (Stride) + Count = CGF.Builder.CreateUDiv( + CGF.Builder.CreateNUWSub(*DI, Offset), Stride); + else + Count = CGF.Builder.CreateNUWSub(*DI, Offset); + } + } else { + Count = CGF.EmitScalarExpr(CountExpr); } + Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); + CurCounts.push_back(Count); + + // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size + // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: + // Offset Count Stride + // D0 0 1 4 (int) <- dummy dimension + // D1 0 2 8 (2 * (1) * 4) + // D2 1 2 20 (1 * (1 * 5) * 4) + // D3 0 2 200 (2 * (1 * 5 * 4) * 4) + const Expr *StrideExpr = OASE->getStride(); + llvm::Value *Stride = + StrideExpr + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), + CGF.Int64Ty, /*isSigned=*/false) + : nullptr; + DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); + if (Stride) + CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); + else + CurStrides.push_back(DimProd); + if (DI != DimSizes.end()) + ++DI; } + + CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); + CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); + CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); } /// Return the adjusted map modifiers if the declaration a capture refers to @@ -7831,7 +8227,7 @@ public: // Extract device pointer clause information. for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) for (auto L : C->component_lists()) - DevPointersMap[L.first].push_back(L.second); + DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); } /// Constructor for the declare mapper directive. @@ -7841,15 +8237,23 @@ public: /// Generate code for the combined entry if we have a partially mapped struct /// and take care of the mapping flags of the arguments corresponding to /// individual struct members. - void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, - MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, - const StructRangeInfoTy &PartialStruct) const { + void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, + MapFlagsArrayTy &CurTypes, + const StructRangeInfoTy &PartialStruct, + const ValueDecl *VD = nullptr, + bool NotTargetParams = true) const { + if (CurTypes.size() == 1 && + ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && + !PartialStruct.IsArraySection) + return; + CombinedInfo.Exprs.push_back(VD); // Base is the base of the struct - BasePointers.push_back(PartialStruct.Base.getPointer()); + CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); // Pointer is the address of the lowest element llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); - Pointers.push_back(LB); + CombinedInfo.Pointers.push_back(LB); + // There should not be a mapper for a combined entry. + CombinedInfo.Mappers.push_back(nullptr); // Size is (addr of {highest+1} element) - (addr of lowest element) llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); @@ -7858,28 +8262,39 @@ public: llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, /*isSigned=*/false); - Sizes.push_back(Size); - // Map type is always TARGET_PARAM - Types.push_back(OMP_MAP_TARGET_PARAM); - // Remove TARGET_PARAM flag from the first element - (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; + CombinedInfo.Sizes.push_back(Size); + // Map type is always TARGET_PARAM, if generate info for captures. + CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE + : OMP_MAP_TARGET_PARAM); + // If any element has the present modifier, then make sure the runtime + // doesn't attempt to allocate the struct. + if (CurTypes.end() != + llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { + return Type & OMP_MAP_PRESENT; + })) + CombinedInfo.Types.back() |= OMP_MAP_PRESENT; + // Remove TARGET_PARAM flag from the first element if any. + if (!CurTypes.empty()) + CurTypes.front() &= ~OMP_MAP_TARGET_PARAM; // All other current entries will be MEMBER_OF the combined entry // (except for PTR_AND_OBJ entries which do not have a placeholder value // 0xFFFF in the MEMBER_OF field). OpenMPOffloadMappingFlags MemberOfFlag = - getMemberOfFlag(BasePointers.size() - 1); + getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); for (auto &M : CurTypes) setCorrectMemberOfFlag(M, MemberOfFlag); } - /// Generate all the base pointers, section pointers, sizes and map - /// types for the extracted mappable expressions. Also, for each item that - /// relates with a device pointer, a pair of the relevant declaration and - /// index where it occurs is appended to the device pointers info array. - void generateAllInfo(MapBaseValuesArrayTy &BasePointers, - MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types) const { + /// Generate all the base pointers, section pointers, sizes, map types, and + /// mappers for the extracted mappable expressions (all included in \a + /// CombinedInfo). Also, for each item that relates with a device pointer, a + /// pair of the relevant declaration and index where it occurs is appended to + /// the device pointers info array. + void generateAllInfo( + MapCombinedInfoTy &CombinedInfo, + const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = + llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { // We have to process the component lists that relate with the same // declaration in a single chunk so that we can generate the map flags // correctly. Therefore, we organize all lists in a map. @@ -7888,36 +8303,56 @@ public: // Helper function to fill the information map for the different supported // clauses. auto &&InfoGen = - [&Info](const ValueDecl *D, - OMPClauseMappableExprCommon::MappableExprComponentListRef L, - OpenMPMapClauseKind MapType, - ArrayRef<OpenMPMapModifierKind> MapModifiers, - bool ReturnDevicePointer, bool IsImplicit, - bool ForDeviceAddr = false) { + [&Info, &SkipVarSet]( + const ValueDecl *D, + OMPClauseMappableExprCommon::MappableExprComponentListRef L, + OpenMPMapClauseKind MapType, + ArrayRef<OpenMPMapModifierKind> MapModifiers, + ArrayRef<OpenMPMotionModifierKind> MotionModifiers, + bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, + const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { const ValueDecl *VD = D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; - Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, - IsImplicit, ForDeviceAddr); + if (SkipVarSet.count(VD)) + return; + Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, + ReturnDevicePointer, IsImplicit, Mapper, VarRef, + ForDeviceAddr); }; assert(CurDir.is<const OMPExecutableDirective *>() && "Expect a executable directive"); const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); - for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) + for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { + const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), - /*ReturnDevicePointer=*/false, C->isImplicit()); + // The Expression is not correct if the mapping is implicit + const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; + InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), + C->getMapTypeModifiers(), llvm::None, + /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), + E); + ++EI; } - for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) + } + for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) { + const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, - /*ReturnDevicePointer=*/false, C->isImplicit()); + InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, + C->getMotionModifiers(), /*ReturnDevicePointer=*/false, + C->isImplicit(), std::get<2>(L), *EI); + ++EI; } - for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) + } + for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) { + const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, - /*ReturnDevicePointer=*/false, C->isImplicit()); + InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, + C->getMotionModifiers(), /*ReturnDevicePointer=*/false, + C->isImplicit(), std::get<2>(L), *EI); + ++EI; } + } // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the @@ -7927,18 +8362,18 @@ public: // emission of that entry until the whole struct has been processed. llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> DeferredInfo; - MapBaseValuesArrayTy UseDevicePtrBasePointers; - MapValuesArrayTy UseDevicePtrPointers; - MapValuesArrayTy UseDevicePtrSizes; - MapFlagsArrayTy UseDevicePtrTypes; + MapCombinedInfoTy UseDevicePtrCombinedInfo; for (const auto *C : CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { for (const auto L : C->component_lists()) { - assert(!L.second.empty() && "Not expecting empty list of components!"); - const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); + OMPClauseMappableExprCommon::MappableExprComponentListRef Components = + std::get<1>(L); + assert(!Components.empty() && + "Not expecting empty list of components!"); + const ValueDecl *VD = Components.back().getAssociatedDeclaration(); VD = cast<ValueDecl>(VD->getCanonicalDecl()); - const Expr *IE = L.second.back().getAssociatedExpression(); + const Expr *IE = Components.back().getAssociatedExpression(); // If the first component is a member expression, we have to look into // 'this', which maps to null in the map of map information. Otherwise // look directly for the information. @@ -7982,18 +8417,19 @@ public: // Nonetheless, generateInfoForComponentList must be called to take // the pointer into account for the calculation of the range of the // partial struct. - InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, - /*ReturnDevicePointer=*/false, C->isImplicit()); + InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, + /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); } else { llvm::Value *Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); - UseDevicePtrBasePointers.emplace_back(Ptr, VD); - UseDevicePtrPointers.push_back(Ptr); - UseDevicePtrSizes.push_back( + UseDevicePtrCombinedInfo.Exprs.push_back(VD); + UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); + UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); + UseDevicePtrCombinedInfo.Sizes.push_back( llvm::Constant::getNullValue(CGF.Int64Ty)); - UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM | - OMP_MAP_TARGET_PARAM); + UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); + UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); } } } @@ -8008,12 +8444,13 @@ public: for (const auto *C : CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { for (const auto L : C->component_lists()) { - assert(!L.second.empty() && "Not expecting empty list of components!"); - const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); + assert(!std::get<1>(L).empty() && + "Not expecting empty list of components!"); + const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); if (!Processed.insert(VD).second) continue; VD = cast<ValueDecl>(VD->getCanonicalDecl()); - const Expr *IE = L.second.back().getAssociatedExpression(); + const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); // If the first component is a member expression, we have to look into // 'this', which maps to null in the map of map information. Otherwise // look directly for the information. @@ -8044,9 +8481,9 @@ public: // Nonetheless, generateInfoForComponentList must be called to take // the pointer into account for the calculation of the range of the // partial struct. - InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, - /*ReturnDevicePointer=*/false, C->isImplicit(), - /*ForDeviceAddr=*/true); + InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, + llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), + nullptr, nullptr, /*ForDeviceAddr=*/true); DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); } else { llvm::Value *Ptr; @@ -8054,26 +8491,23 @@ public: Ptr = CGF.EmitLValue(IE).getPointer(CGF); else Ptr = CGF.EmitScalarExpr(IE); - UseDevicePtrBasePointers.emplace_back(Ptr, VD); - UseDevicePtrPointers.push_back(Ptr); - UseDevicePtrSizes.push_back( + CombinedInfo.Exprs.push_back(VD); + CombinedInfo.BasePointers.emplace_back(Ptr, VD); + CombinedInfo.Pointers.push_back(Ptr); + CombinedInfo.Sizes.push_back( llvm::Constant::getNullValue(CGF.Int64Ty)); - UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM | - OMP_MAP_TARGET_PARAM); + CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); + CombinedInfo.Mappers.push_back(nullptr); } } } for (const auto &M : Info) { - // We need to know when we generate information for the first component - // associated with a capture, because the mapping flags depend on it. - bool IsFirstComponentList = true; + // Underlying variable declaration used in the map clause. + const ValueDecl *VD = std::get<0>(M); - // Temporary versions of arrays - MapBaseValuesArrayTy CurBasePointers; - MapValuesArrayTy CurPointers; - MapValuesArrayTy CurSizes; - MapFlagsArrayTy CurTypes; + // Temporary generated information. + MapCombinedInfoTy CurInfo; StructRangeInfoTy PartialStruct; for (const MapInfo &L : M.second) { @@ -8081,16 +8515,18 @@ public: "Not expecting declaration with no component lists."); // Remember the current base pointer index. - unsigned CurrentBasePointersIdx = CurBasePointers.size(); + unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); + CurInfo.NonContigInfo.IsNonContiguous = + L.Components.back().isNonContiguous(); generateInfoForComponentList( - L.MapType, L.MapModifiers, L.Components, CurBasePointers, - CurPointers, CurSizes, CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); + L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, + PartialStruct, /*IsFirstComponentList=*/false, L.IsImplicit, + L.Mapper, L.ForDeviceAddr, VD, L.VarRef); // If this entry relates with a device pointer, set the relevant // declaration and add the 'return pointer' flag. if (L.ReturnDevicePointer) { - assert(CurBasePointers.size() > CurrentBasePointersIdx && + assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && "Unexpected number of mapped base pointers."); const ValueDecl *RelevantVD = @@ -8098,10 +8534,10 @@ public: assert(RelevantVD && "No relevant declaration related with device pointer??"); - CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); - CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; + CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( + RelevantVD); + CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; } - IsFirstComponentList = false; } // Append any pending zero-length pointers which are struct members and @@ -8120,7 +8556,7 @@ public: // Entry is RETURN_PARAM. Also, set the placeholder value // MEMBER_OF=FFFF so that the entry is later updated with the // correct value of MEMBER_OF. - CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); + CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); } else { BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), @@ -8128,41 +8564,34 @@ public: // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder // value MEMBER_OF=FFFF so that the entry is later updated with the // correct value of MEMBER_OF. - CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | - OMP_MAP_MEMBER_OF); + CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | + OMP_MAP_MEMBER_OF); } - CurBasePointers.emplace_back(BasePtr, L.VD); - CurPointers.push_back(Ptr); - CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); + CurInfo.Exprs.push_back(L.VD); + CurInfo.BasePointers.emplace_back(BasePtr, L.VD); + CurInfo.Pointers.push_back(Ptr); + CurInfo.Sizes.push_back( + llvm::Constant::getNullValue(this->CGF.Int64Ty)); + CurInfo.Mappers.push_back(nullptr); } } // If there is an entry in PartialStruct it means we have a struct with // individual members mapped. Emit an extra combined entry. if (PartialStruct.Base.isValid()) - emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, - PartialStruct); + emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); // We need to append the results of this capture to what we already have. - BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); - Pointers.append(CurPointers.begin(), CurPointers.end()); - Sizes.append(CurSizes.begin(), CurSizes.end()); - Types.append(CurTypes.begin(), CurTypes.end()); + CombinedInfo.append(CurInfo); } // Append data for use_device_ptr clauses. - BasePointers.append(UseDevicePtrBasePointers.begin(), - UseDevicePtrBasePointers.end()); - Pointers.append(UseDevicePtrPointers.begin(), UseDevicePtrPointers.end()); - Sizes.append(UseDevicePtrSizes.begin(), UseDevicePtrSizes.end()); - Types.append(UseDevicePtrTypes.begin(), UseDevicePtrTypes.end()); - } - - /// Generate all the base pointers, section pointers, sizes and map types for - /// the extracted map clauses of user-defined mapper. - void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, - MapValuesArrayTy &Pointers, - MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types) const { + CombinedInfo.append(UseDevicePtrCombinedInfo); + } + + /// Generate all the base pointers, section pointers, sizes, map types, and + /// mappers for the extracted map clauses of user-defined mapper (all included + /// in \a CombinedInfo). + void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { assert(CurDir.is<const OMPDeclareMapperDecl *>() && "Expect a declare mapper directive"); const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); @@ -8171,25 +8600,22 @@ public: // correctly. Therefore, we organize all lists in a map. llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; - // Helper function to fill the information map for the different supported - // clauses. - auto &&InfoGen = [&Info]( - const ValueDecl *D, - OMPClauseMappableExprCommon::MappableExprComponentListRef L, - OpenMPMapClauseKind MapType, - ArrayRef<OpenMPMapModifierKind> MapModifiers, - bool ReturnDevicePointer, bool IsImplicit) { - const ValueDecl *VD = - D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; - Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, - IsImplicit); - }; - + // Fill the information map for map clauses. for (const auto *C : CurMapperDir->clauselists()) { const auto *MC = cast<OMPMapClause>(C); + const auto *EI = MC->getVarRefs().begin(); for (const auto L : MC->component_lists()) { - InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), - /*ReturnDevicePointer=*/false, MC->isImplicit()); + // The Expression is not correct if the mapping is implicit + const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr; + const ValueDecl *VD = + std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) + : nullptr; + // Get the corresponding user-defined mapper. + Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), + MC->getMapTypeModifiers(), llvm::None, + /*ReturnDevicePointer=*/false, MC->isImplicit(), + std::get<2>(L), E); + ++EI; } } @@ -8198,42 +8624,38 @@ public: // associated with a capture, because the mapping flags depend on it. bool IsFirstComponentList = true; - // Temporary versions of arrays - MapBaseValuesArrayTy CurBasePointers; - MapValuesArrayTy CurPointers; - MapValuesArrayTy CurSizes; - MapFlagsArrayTy CurTypes; + // Underlying variable declaration used in the map clause. + const ValueDecl *VD = std::get<0>(M); + + // Temporary generated information. + MapCombinedInfoTy CurInfo; StructRangeInfoTy PartialStruct; for (const MapInfo &L : M.second) { assert(!L.Components.empty() && "Not expecting declaration with no component lists."); generateInfoForComponentList( - L.MapType, L.MapModifiers, L.Components, CurBasePointers, - CurPointers, CurSizes, CurTypes, PartialStruct, - IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr); + L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, + PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper, + L.ForDeviceAddr, VD, L.VarRef); IsFirstComponentList = false; } // If there is an entry in PartialStruct it means we have a struct with // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) - emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, - PartialStruct); + if (PartialStruct.Base.isValid()) { + CurInfo.NonContigInfo.Dims.push_back(0); + emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); + } // We need to append the results of this capture to what we already have. - BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); - Pointers.append(CurPointers.begin(), CurPointers.end()); - Sizes.append(CurSizes.begin(), CurSizes.end()); - Types.append(CurTypes.begin(), CurTypes.end()); + CombinedInfo.append(CurInfo); } } /// Emit capture info for lambdas for variables captured by reference. void generateInfoForLambdaCaptures( - const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, - MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types, + const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { const auto *RD = VD->getType() .getCanonicalType() @@ -8253,13 +8675,15 @@ public: LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), VDLVal.getPointer(CGF)); - BasePointers.push_back(ThisLVal.getPointer(CGF)); - Pointers.push_back(ThisLValVal.getPointer(CGF)); - Sizes.push_back( + CombinedInfo.Exprs.push_back(VD); + CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); + CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); + CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); - Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | - OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + CombinedInfo.Mappers.push_back(nullptr); } for (const LambdaCapture &LC : RD->captures()) { if (!LC.capturesVariable()) @@ -8274,9 +8698,10 @@ public: LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); LambdaPointers.try_emplace(VarLVal.getPointer(CGF), VDLVal.getPointer(CGF)); - BasePointers.push_back(VarLVal.getPointer(CGF)); - Pointers.push_back(VarLValVal.getPointer(CGF)); - Sizes.push_back(CGF.Builder.CreateIntCast( + CombinedInfo.Exprs.push_back(VD); + CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); + CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize( VD->getType().getCanonicalType().getNonReferenceType()), CGF.Int64Ty, /*isSigned=*/true)); @@ -8284,12 +8709,14 @@ public: RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); LambdaPointers.try_emplace(VarLVal.getPointer(CGF), VDLVal.getPointer(CGF)); - BasePointers.push_back(VarLVal.getPointer(CGF)); - Pointers.push_back(VarRVal.getScalarVal()); - Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); + CombinedInfo.Exprs.push_back(VD); + CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); + CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); + CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); } - Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | - OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | + OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + CombinedInfo.Mappers.push_back(nullptr); } } @@ -8322,13 +8749,10 @@ public: } } - /// Generate the base pointers, section pointers, sizes and map types - /// associated to a given capture. + /// Generate the base pointers, section pointers, sizes, map types, and + /// mappers associated to a given capture (all included in \a CombinedInfo). void generateInfoForCapture(const CapturedStmt::Capture *Cap, - llvm::Value *Arg, - MapBaseValuesArrayTy &BasePointers, - MapValuesArrayTy &Pointers, - MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, + llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct) const { assert(!Cap->capturesVariableArrayType() && "Not expecting to generate map info for a variable array type!"); @@ -8342,31 +8766,42 @@ public: // pass the pointer by value. If it is a reference to a declaration, we just // pass its value. if (DevPointersMap.count(VD)) { - BasePointers.emplace_back(Arg, VD); - Pointers.push_back(Arg); - Sizes.push_back( - CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), - CGF.Int64Ty, /*isSigned=*/true)); - Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); + CombinedInfo.Exprs.push_back(VD); + CombinedInfo.BasePointers.emplace_back(Arg, VD); + CombinedInfo.Pointers.push_back(Arg); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, + /*isSigned=*/true)); + CombinedInfo.Types.push_back( + (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | + OMP_MAP_TARGET_PARAM); + CombinedInfo.Mappers.push_back(nullptr); return; } using MapData = std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, - OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; + OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, + const ValueDecl *, const Expr *>; SmallVector<MapData, 4> DeclComponentLists; assert(CurDir.is<const OMPExecutableDirective *>() && "Expect a executable directive"); const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { + const auto *EI = C->getVarRefs().begin(); for (const auto L : C->decl_component_lists(VD)) { - assert(L.first == VD && - "We got information for the wrong declaration??"); - assert(!L.second.empty() && + const ValueDecl *VDecl, *Mapper; + // The Expression is not correct if the mapping is implicit + const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; + OMPClauseMappableExprCommon::MappableExprComponentListRef Components; + std::tie(VDecl, Components, Mapper) = L; + assert(VDecl == VD && "We got information for the wrong declaration??"); + assert(!Components.empty() && "Not expecting declaration with no component lists."); - DeclComponentLists.emplace_back(L.second, C->getMapType(), + DeclComponentLists.emplace_back(Components, C->getMapType(), C->getMapTypeModifiers(), - C->isImplicit()); + C->isImplicit(), Mapper, E); + ++EI; } } @@ -8383,11 +8818,15 @@ public: OpenMPMapClauseKind MapType; ArrayRef<OpenMPMapModifierKind> MapModifiers; bool IsImplicit; - std::tie(Components, MapType, MapModifiers, IsImplicit) = L; + const ValueDecl *Mapper; + const Expr *VarRef; + std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = + L; ++Count; for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; - std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; + std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, + VarRef) = L1; auto CI = Components.rbegin(); auto CE = Components.rend(); auto SI = Components1.rbegin(); @@ -8473,14 +8912,17 @@ public: OpenMPMapClauseKind MapType; ArrayRef<OpenMPMapModifierKind> MapModifiers; bool IsImplicit; - std::tie(Components, MapType, MapModifiers, IsImplicit) = L; + const ValueDecl *Mapper; + const Expr *VarRef; + std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = + L; ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedComponents = Pair.getSecond(); bool IsFirstComponentList = true; generateInfoForComponentList( - MapType, MapModifiers, Components, BasePointers, Pointers, Sizes, - Types, PartialStruct, IsFirstComponentList, IsImplicit, - /*ForDeviceAddr=*/false, OverlappedComponents); + MapType, MapModifiers, llvm::None, Components, CombinedInfo, + PartialStruct, IsFirstComponentList, IsImplicit, Mapper, + /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); } // Go through other elements without overlapped elements. bool IsFirstComponentList = OverlappedData.empty(); @@ -8489,86 +8931,54 @@ public: OpenMPMapClauseKind MapType; ArrayRef<OpenMPMapModifierKind> MapModifiers; bool IsImplicit; - std::tie(Components, MapType, MapModifiers, IsImplicit) = L; + const ValueDecl *Mapper; + const Expr *VarRef; + std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = + L; auto It = OverlappedData.find(&L); if (It == OverlappedData.end()) - generateInfoForComponentList(MapType, MapModifiers, Components, - BasePointers, Pointers, Sizes, Types, - PartialStruct, IsFirstComponentList, - IsImplicit); + generateInfoForComponentList(MapType, MapModifiers, llvm::None, + Components, CombinedInfo, PartialStruct, + IsFirstComponentList, IsImplicit, Mapper, + /*ForDeviceAddr=*/false, VD, VarRef); IsFirstComponentList = false; } } - /// Generate the base pointers, section pointers, sizes and map types - /// associated with the declare target link variables. - void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, - MapValuesArrayTy &Pointers, - MapValuesArrayTy &Sizes, - MapFlagsArrayTy &Types) const { - assert(CurDir.is<const OMPExecutableDirective *>() && - "Expect a executable directive"); - const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); - // Map other list items in the map clause which are not captured variables - // but "declare target link" global variables. - for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { - for (const auto L : C->component_lists()) { - if (!L.first) - continue; - const auto *VD = dyn_cast<VarDecl>(L.first); - if (!VD) - continue; - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || - !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) - continue; - StructRangeInfoTy PartialStruct; - generateInfoForComponentList( - C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, - Pointers, Sizes, Types, PartialStruct, - /*IsFirstComponentList=*/true, C->isImplicit()); - assert(!PartialStruct.Base.isValid() && - "No partial structs for declare target link expected."); - } - } - } - /// Generate the default map information for a given capture \a CI, /// record field declaration \a RI and captured value \a CV. void generateDefaultMapInfo(const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV, - MapBaseValuesArrayTy &CurBasePointers, - MapValuesArrayTy &CurPointers, - MapValuesArrayTy &CurSizes, - MapFlagsArrayTy &CurMapTypes) const { + MapCombinedInfoTy &CombinedInfo) const { bool IsImplicit = true; // Do the default mapping. if (CI.capturesThis()) { - CurBasePointers.push_back(CV); - CurPointers.push_back(CV); + CombinedInfo.Exprs.push_back(nullptr); + CombinedInfo.BasePointers.push_back(CV); + CombinedInfo.Pointers.push_back(CV); const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); - CurSizes.push_back( + CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), CGF.Int64Ty, /*isSigned=*/true)); // Default map type. - CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); + CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); } else if (CI.capturesVariableByCopy()) { - CurBasePointers.push_back(CV); - CurPointers.push_back(CV); + const VarDecl *VD = CI.getCapturedVar(); + CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); + CombinedInfo.BasePointers.push_back(CV); + CombinedInfo.Pointers.push_back(CV); if (!RI.getType()->isAnyPointerType()) { // We have to signal to the runtime captures passed by value that are // not pointers. - CurMapTypes.push_back(OMP_MAP_LITERAL); - CurSizes.push_back(CGF.Builder.CreateIntCast( + CombinedInfo.Types.push_back(OMP_MAP_LITERAL); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); } else { // Pointers are implicitly mapped with a zero size and no flags // (other than first map that is added for all implicit maps). - CurMapTypes.push_back(OMP_MAP_NONE); - CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); + CombinedInfo.Types.push_back(OMP_MAP_NONE); + CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); } - const VarDecl *VD = CI.getCapturedVar(); auto I = FirstPrivateDecls.find(VD); if (I != FirstPrivateDecls.end()) IsImplicit = I->getSecond(); @@ -8576,12 +8986,12 @@ public: assert(CI.capturesVariable() && "Expected captured reference."); const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); QualType ElementType = PtrTy->getPointeeType(); - CurSizes.push_back(CGF.Builder.CreateIntCast( + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); // The default map type for a scalar/complex type is 'to' because by // default the value doesn't have to be retrieved. For an aggregate // type, the default is 'tofrom'. - CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); + CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); const VarDecl *VD = CI.getCapturedVar(); auto I = FirstPrivateDecls.find(VD); if (I != FirstPrivateDecls.end() && @@ -8592,56 +9002,157 @@ public: CGF.Builder.CreateMemCpy( CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), - CurSizes.back(), /*IsVolatile=*/false); + CombinedInfo.Sizes.back(), /*IsVolatile=*/false); // Use new global variable as the base pointers. - CurBasePointers.push_back(Addr); - CurPointers.push_back(Addr); + CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); + CombinedInfo.BasePointers.push_back(Addr); + CombinedInfo.Pointers.push_back(Addr); } else { - CurBasePointers.push_back(CV); + CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); + CombinedInfo.BasePointers.push_back(CV); if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( CV, ElementType, CGF.getContext().getDeclAlign(VD), AlignmentSource::Decl)); - CurPointers.push_back(PtrAddr.getPointer()); + CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); } else { - CurPointers.push_back(CV); + CombinedInfo.Pointers.push_back(CV); } } if (I != FirstPrivateDecls.end()) IsImplicit = I->getSecond(); } // Every default map produces a single argument which is a target parameter. - CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; + CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; // Add flag stating this is an implicit map. if (IsImplicit) - CurMapTypes.back() |= OMP_MAP_IMPLICIT; + CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; + + // No user-defined mapper for default mapping. + CombinedInfo.Mappers.push_back(nullptr); } }; } // anonymous namespace +static void emitNonContiguousDescriptor( + CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, + CGOpenMPRuntime::TargetDataInfo &Info) { + CodeGenModule &CGM = CGF.CGM; + MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo + &NonContigInfo = CombinedInfo.NonContigInfo; + + // Build an array of struct descriptor_dim and then assign it to + // offload_args. + // + // struct descriptor_dim { + // uint64_t offset; + // uint64_t count; + // uint64_t stride + // }; + ASTContext &C = CGF.getContext(); + QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); + RecordDecl *RD; + RD = C.buildImplicitRecord("descriptor_dim"); + RD->startDefinition(); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + RD->completeDefinition(); + QualType DimTy = C.getRecordType(RD); + + enum { OffsetFD = 0, CountFD, StrideFD }; + // We need two index variable here since the size of "Dims" is the same as the + // size of Components, however, the size of offset, count, and stride is equal + // to the size of base declaration that is non-contiguous. + for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { + // Skip emitting ir if dimension size is 1 since it cannot be + // non-contiguous. + if (NonContigInfo.Dims[I] == 1) + continue; + llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); + QualType ArrayTy = + C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); + Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); + for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { + unsigned RevIdx = EE - II - 1; + LValue DimsLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); + // Offset + LValue OffsetLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), OffsetFD)); + CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); + // Count + LValue CountLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), CountFD)); + CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); + // Stride + LValue StrideLVal = CGF.EmitLValueForField( + DimsLVal, *std::next(RD->field_begin(), StrideFD)); + CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); + } + // args[I] = &dims + Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + DimsAddr, CGM.Int8PtrTy); + llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), + Info.PointersArray, 0, I); + Address PAddr(P, CGF.getPointerAlign()); + CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); + ++L; + } +} + +/// Emit a string constant containing the names of the values mapped to the +/// offloading runtime library. +llvm::Constant * +emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, + MappableExprsHandler::MappingExprInfo &MapExprs) { + llvm::Constant *SrcLocStr; + if (!MapExprs.getMapDecl()) { + SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); + } else { + std::string ExprName = ""; + if (MapExprs.getMapExpr()) { + PrintingPolicy P(CGF.getContext().getLangOpts()); + llvm::raw_string_ostream OS(ExprName); + MapExprs.getMapExpr()->printPretty(OS, nullptr, P); + OS.flush(); + } else { + ExprName = MapExprs.getMapDecl()->getNameAsString(); + } + + SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); + PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); + const char *FileName = PLoc.getFilename(); + unsigned Line = PLoc.getLine(); + unsigned Column = PLoc.getColumn(); + SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), + Line, Column); + } + + return SrcLocStr; +} + /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. -static void -emitOffloadingArrays(CodeGenFunction &CGF, - MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, - MappableExprsHandler::MapValuesArrayTy &Pointers, - MappableExprsHandler::MapValuesArrayTy &Sizes, - MappableExprsHandler::MapFlagsArrayTy &MapTypes, - CGOpenMPRuntime::TargetDataInfo &Info) { +static void emitOffloadingArrays( + CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, + CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, + bool IsNonContiguous = false) { CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGF.getContext(); // Reset the array information. Info.clearArrayInfo(); - Info.NumberOfPtrs = BasePointers.size(); + Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); if (Info.NumberOfPtrs) { // Detect if we have any capture size requiring runtime evaluation of the // size so that a constant array could be eventually used. bool hasRuntimeEvaluationCaptureSize = false; - for (llvm::Value *S : Sizes) + for (llvm::Value *S : CombinedInfo.Sizes) if (!isa<llvm::Constant>(S)) { hasRuntimeEvaluationCaptureSize = true; break; @@ -8656,6 +9167,9 @@ emitOffloadingArrays(CodeGenFunction &CGF, CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); Info.PointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); + Address MappersArray = + CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); + Info.MappersArray = MappersArray.getPointer(); // If we don't have any VLA types or other types that require runtime // evaluation, we can use a constant array for the map sizes, otherwise we @@ -8672,8 +9186,15 @@ emitOffloadingArrays(CodeGenFunction &CGF, // We expect all the sizes to be constant, so we collect them to create // a constant array. SmallVector<llvm::Constant *, 16> ConstSizes; - for (llvm::Value *S : Sizes) - ConstSizes.push_back(cast<llvm::Constant>(S)); + for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { + if (IsNonContiguous && + (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { + ConstSizes.push_back(llvm::ConstantInt::get( + CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); + } else { + ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); + } + } auto *SizesArrayInit = llvm::ConstantArray::get( llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); @@ -8688,8 +9209,8 @@ emitOffloadingArrays(CodeGenFunction &CGF, // The map types are always constant so we don't need to generate code to // fill arrays. Instead, we create an array constant. - SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); - llvm::copy(MapTypes, Mapping.begin()); + SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); + llvm::copy(CombinedInfo.Types, Mapping.begin()); llvm::Constant *MapTypesArrayInit = llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); std::string MaptypesName = @@ -8701,8 +9222,57 @@ emitOffloadingArrays(CodeGenFunction &CGF, MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Info.MapTypesArray = MapTypesArrayGbl; + // The information types are only built if there is debug information + // requested. + if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { + Info.MapNamesArray = llvm::Constant::getNullValue( + llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); + } else { + auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { + return emitMappingInformation(CGF, OMPBuilder, MapExpr); + }; + SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); + llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); + + llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( + llvm::ArrayType::get( + llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), + CombinedInfo.Exprs.size()), + InfoMap); + auto *MapNamesArrayGbl = new llvm::GlobalVariable( + CGM.getModule(), MapNamesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, + MapNamesArrayInit, + CGM.getOpenMPRuntime().getName({"offload_mapnames"})); + Info.MapNamesArray = MapNamesArrayGbl; + } + + // If there's a present map type modifier, it must not be applied to the end + // of a region, so generate a separate map type array in that case. + if (Info.separateBeginEndCalls()) { + bool EndMapTypesDiffer = false; + for (uint64_t &Type : Mapping) { + if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { + Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; + EndMapTypesDiffer = true; + } + } + if (EndMapTypesDiffer) { + MapTypesArrayInit = + llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); + MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); + MapTypesArrayGbl = new llvm::GlobalVariable( + CGM.getModule(), MapTypesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, + MapTypesArrayInit, MaptypesName); + MapTypesArrayGbl->setUnnamedAddr( + llvm::GlobalValue::UnnamedAddr::Global); + Info.MapTypesArrayEnd = MapTypesArrayGbl; + } + } + for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { - llvm::Value *BPVal = *BasePointers[I]; + llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.BasePointersArray, 0, I); @@ -8712,10 +9282,11 @@ emitOffloadingArrays(CodeGenFunction &CGF, CGF.Builder.CreateStore(BPVal, BPAddr); if (Info.requiresDevicePointerInfo()) - if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) + if (const ValueDecl *DevVD = + CombinedInfo.BasePointers[I].getDevicePtrDecl()) Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); - llvm::Value *PVal = Pointers[I]; + llvm::Value *PVal = CombinedInfo.Pointers[I]; llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), Info.PointersArray, 0, I); @@ -8731,20 +9302,53 @@ emitOffloadingArrays(CodeGenFunction &CGF, /*Idx0=*/0, /*Idx1=*/I); Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); - CGF.Builder.CreateStore( - CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), - SAddr); + CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], + CGM.Int64Ty, + /*isSigned=*/true), + SAddr); + } + + // Fill up the mapper array. + llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + if (CombinedInfo.Mappers[I]) { + MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); + MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); + Info.HasMapper = true; } + Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); + CGF.Builder.CreateStore(MFunc, MAddr); } } + + if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || + Info.NumberOfPtrs == 0) + return; + + emitNonContiguousDescriptor(CGF, CombinedInfo, Info); } +namespace { +/// Additional arguments for emitOffloadingArraysArgument function. +struct ArgumentsOptions { + bool ForEndCall = false; + ArgumentsOptions() = default; + ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} +}; +} // namespace + /// Emit the arguments to be passed to the runtime library based on the -/// arrays of pointers, sizes and map types. +/// arrays of base pointers, pointers, sizes, map types, and mappers. If +/// ForEndCall, emit map types to be passed for the end of the region instead of +/// the beginning. static void emitOffloadingArraysArgument( CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, - llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { + llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, + llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, + const ArgumentsOptions &Options = ArgumentsOptions()) { + assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && + "expected region end call to runtime only when end call is separate"); CodeGenModule &CGM = CGF.CGM; if (Info.NumberOfPtrs) { BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( @@ -8761,15 +9365,36 @@ static void emitOffloadingArraysArgument( /*Idx0=*/0, /*Idx1=*/0); MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), - Info.MapTypesArray, + Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd + : Info.MapTypesArray, /*Idx0=*/0, /*Idx1=*/0); + + // Only emit the mapper information arrays if debug information is + // requested. + if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) + MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); + else + MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), + Info.MapNamesArray, + /*Idx0=*/0, + /*Idx1=*/0); + // If there is no user-defined mapper, set the mapper array to nullptr to + // avoid an unnecessary data privatization + if (!Info.HasMapper) + MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); + else + MappersArrayArg = + CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); } else { BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); MapTypesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); + MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); + MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); } } @@ -8885,7 +9510,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { /// \code /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, /// void *base, void *begin, -/// int64_t size, int64_t type) { +/// int64_t size, int64_t type, +/// void *name = nullptr) { /// // Allocate space for an array section first. /// if (size > 1 && !maptype.IsDelete) /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, @@ -8896,10 +9522,11 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { /// for (auto c : all_components) { /// if (c.hasMapper()) /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, -/// c.arg_type); +/// c.arg_type, c.arg_name); /// else /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, -/// c.arg_begin, c.arg_size, c.arg_type); +/// c.arg_begin, c.arg_size, c.arg_type, +/// c.arg_name); /// } /// } /// // Delete the array section. @@ -8932,12 +9559,15 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, ImplicitParamDecl::Other); ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, ImplicitParamDecl::Other); + ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, + ImplicitParamDecl::Other); FunctionArgList Args; Args.push_back(&HandleArg); Args.push_back(&BaseArg); Args.push_back(&BeginArg); Args.push_back(&SizeArg); Args.push_back(&TypeArg); + Args.push_back(&NameArg); const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); @@ -8956,6 +9586,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, llvm::Value *Size = MapperCGF.EmitLoadOfScalar( MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, C.getPointerType(Int64Ty), Loc); + // Convert the size in bytes into the number of array elements. + Size = MapperCGF.Builder.CreateExactUDiv( + Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); @@ -8994,6 +9627,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // Emit the loop body block. MapperCGF.EmitBlock(BodyBB); + llvm::BasicBlock *LastBB = BodyBB; llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); PtrPHI->addIncoming(PtrBegin, EntryBB); @@ -9011,12 +9645,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, (void)Scope.Privatize(); // Get map clause information. Fill up the arrays with all mapped variables. - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapCombinedInfoTy Info; MappableExprsHandler MEHandler(*D, MapperCGF); - MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); + MEHandler.generateAllInfoForMapper(Info); // Call the runtime API __tgt_mapper_num_components to get the number of // pre-existing components. @@ -9030,17 +9661,21 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); // Fill up the runtime mapper handle for all components. - for (unsigned I = 0; I < BasePointers.size(); ++I) { + for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( - *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( - Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); - llvm::Value *CurSizeArg = Sizes[I]; + Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + llvm::Value *CurSizeArg = Info.Sizes[I]; + llvm::Value *CurNameArg = + (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) + ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) + : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); // Extract the MEMBER_OF field from the map type. llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); MapperCGF.EmitBlock(MemberBB); - llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); + llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); llvm::Value *Member = MapperCGF.Builder.CreateAnd( OriMapType, MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); @@ -9116,6 +9751,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); // In case of tofrom, do nothing. MapperCGF.EmitBlock(EndBB); + LastBB = EndBB; llvm::PHINode *CurMapType = MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); CurMapType->addIncoming(AllocMapType, AllocBB); @@ -9123,23 +9759,29 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CurMapType->addIncoming(FromMapType, FromBB); CurMapType->addIncoming(MemberMapType, ToElseBB); - // TODO: call the corresponding mapper function if a user-defined mapper is - // associated with this map clause. - // Call the runtime API __tgt_push_mapper_component to fill up the runtime - // data structure. - llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, - CurSizeArg, CurMapType}; - MapperCGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___tgt_push_mapper_component), - OffloadingArgs); + llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, + CurSizeArg, CurMapType, CurNameArg}; + if (Info.Mappers[I]) { + // Call the corresponding mapper function. + llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(Info.Mappers[I])); + assert(MapperFunc && "Expect a valid mapper function is available."); + MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); + } else { + // Call the runtime API __tgt_push_mapper_component to fill up the runtime + // data structure. + MapperCGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___tgt_push_mapper_component), + OffloadingArgs); + } } // Update the pointer to point to the next element that needs to be mapped, // and check whether we have mapped all elements. llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); - PtrPHI->addIncoming(PtrNext, BodyBB); + PtrPHI->addIncoming(PtrNext, LastBB); llvm::Value *IsDone = MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); @@ -9208,15 +9850,27 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( MapType, MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | MappableExprsHandler::OMP_MAP_FROM))); + llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + // Call the runtime API __tgt_push_mapper_component to fill up the runtime // data structure. - llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; + llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, + ArraySize, MapTypeArg, MapNameArg}; MapperCGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___tgt_push_mapper_component), OffloadingArgs); } +llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( + const OMPDeclareMapperDecl *D) { + auto I = UDMMap.find(D); + if (I != UDMMap.end()) + return I->second; + emitUserDefinedMapper(D); + return UDMMap.lookup(D); +} + void CGOpenMPRuntime::emitTargetNumIterationsCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *DeviceID, @@ -9231,10 +9885,11 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall( if (!TD) return; const auto *LD = cast<OMPLoopDirective>(TD); - auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, + PrePostActionTy &) { if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { - llvm::Value *Args[] = {DeviceID, NumIterations}; + llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); + llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), @@ -9256,7 +9911,8 @@ void CGOpenMPRuntime::emitTargetCall( assert(OutlinedFn && "Invalid outlined function!"); - const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); + const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || + D.hasClausesOfKind<OMPNowaitClause>(); llvm::SmallVector<llvm::Value *, 16> CapturedVars; const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, @@ -9267,9 +9923,11 @@ void CGOpenMPRuntime::emitTargetCall( CodeGenFunction::OMPTargetDataInfo InputInfo; llvm::Value *MapTypesArray = nullptr; + llvm::Value *MapNamesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, + &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, + &CapturedVars, SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { if (Device.getInt() == OMPC_DEVICE_ancestor) { // Reverse offloading is not supported, so just execute on the host. @@ -9316,6 +9974,9 @@ void CGOpenMPRuntime::emitTargetCall( llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); + // Source location for the ident struct + llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); + // Emit tripcount for the target loop-based directive. emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); @@ -9355,32 +10016,39 @@ void CGOpenMPRuntime::emitTargetCall( // passed to the runtime library - a 32-bit integer with the value zero. assert(NumThreads && "Thread limit expression should be available along " "with number of teams."); - llvm::Value *OffloadingArgs[] = {DeviceID, + llvm::Value *OffloadingArgs[] = {RTLoc, + DeviceID, OutlinedFnID, PointerNum, InputInfo.BasePointersArray.getPointer(), InputInfo.PointersArray.getPointer(), InputInfo.SizesArray.getPointer(), MapTypesArray, + MapNamesArray, + InputInfo.MappersArray.getPointer(), NumTeams, NumThreads}; Return = CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait - : OMPRTL___tgt_target_teams), + CGM.getModule(), HasNowait + ? OMPRTL___tgt_target_teams_nowait_mapper + : OMPRTL___tgt_target_teams_mapper), OffloadingArgs); } else { - llvm::Value *OffloadingArgs[] = {DeviceID, + llvm::Value *OffloadingArgs[] = {RTLoc, + DeviceID, OutlinedFnID, PointerNum, InputInfo.BasePointersArray.getPointer(), InputInfo.PointersArray.getPointer(), InputInfo.SizesArray.getPointer(), - MapTypesArray}; + MapTypesArray, + MapNamesArray, + InputInfo.MappersArray.getPointer()}; Return = CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), - HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target), + CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper + : OMPRTL___tgt_target_mapper), OffloadingArgs); } @@ -9415,96 +10083,96 @@ void CGOpenMPRuntime::emitTargetCall( }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, - &CapturedVars, RequiresOuterTask, + &MapNamesArray, &CapturedVars, RequiresOuterTask, &CS](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the captured variables. - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapCombinedInfoTy CombinedInfo; // Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; + llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto CV = CapturedVars.begin(); for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), CE = CS.capture_end(); CI != CE; ++CI, ++RI, ++CV) { - MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; - MappableExprsHandler::MapValuesArrayTy CurPointers; - MappableExprsHandler::MapValuesArrayTy CurSizes; - MappableExprsHandler::MapFlagsArrayTy CurMapTypes; + MappableExprsHandler::MapCombinedInfoTy CurInfo; MappableExprsHandler::StructRangeInfoTy PartialStruct; // VLA sizes are passed to the outlined region by copy and do not have map // information associated. if (CI->capturesVariableArrayType()) { - CurBasePointers.push_back(*CV); - CurPointers.push_back(*CV); - CurSizes.push_back(CGF.Builder.CreateIntCast( + CurInfo.Exprs.push_back(nullptr); + CurInfo.BasePointers.push_back(*CV); + CurInfo.Pointers.push_back(*CV); + CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); // Copy to the device as an argument. No need to retrieve it. - CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | - MappableExprsHandler::OMP_MAP_TARGET_PARAM | - MappableExprsHandler::OMP_MAP_IMPLICIT); + CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | + MappableExprsHandler::OMP_MAP_TARGET_PARAM | + MappableExprsHandler::OMP_MAP_IMPLICIT); + CurInfo.Mappers.push_back(nullptr); } else { // If we have any information in the map clause, we use it, otherwise we // just do a default mapping. - MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, - CurSizes, CurMapTypes, PartialStruct); - if (CurBasePointers.empty()) - MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, - CurPointers, CurSizes, CurMapTypes); + MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); + if (!CI->capturesThis()) + MappedVarSet.insert(CI->getCapturedVar()); + else + MappedVarSet.insert(nullptr); + if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) + MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); // Generate correct mapping for variables captured by reference in // lambdas. if (CI->capturesVariable()) - MEHandler.generateInfoForLambdaCaptures( - CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, - CurMapTypes, LambdaPointers); + MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, + CurInfo, LambdaPointers); } // We expect to have at least an element of information for this capture. - assert(!CurBasePointers.empty() && + assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && "Non-existing map pointer for capture!"); - assert(CurBasePointers.size() == CurPointers.size() && - CurBasePointers.size() == CurSizes.size() && - CurBasePointers.size() == CurMapTypes.size() && + assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && + CurInfo.BasePointers.size() == CurInfo.Sizes.size() && + CurInfo.BasePointers.size() == CurInfo.Types.size() && + CurInfo.BasePointers.size() == CurInfo.Mappers.size() && "Inconsistent map information sizes!"); // If there is an entry in PartialStruct it means we have a struct with // individual members mapped. Emit an extra combined entry. if (PartialStruct.Base.isValid()) - MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, - CurMapTypes, PartialStruct); + MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, + nullptr, /*NoTargetParam=*/false); // We need to append the results of this capture to what we already have. - BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); - Pointers.append(CurPointers.begin(), CurPointers.end()); - Sizes.append(CurSizes.begin(), CurSizes.end()); - MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); + CombinedInfo.append(CurInfo); } // Adjust MEMBER_OF flags for the lambdas captures. - MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, - Pointers, MapTypes); - // Map other list items in the map clause which are not captured variables - // but "declare target link" global variables. - MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, - MapTypes); + MEHandler.adjustMemberOfForLambdaCaptures( + LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, + CombinedInfo.Types); + // Map any list items in a map clause that were not captures because they + // weren't referenced within the construct. + MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); TargetDataInfo Info; // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); - emitOffloadingArraysArgument(CGF, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info); + emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); + emitOffloadingArraysArgument( + CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, + {/*ForEndTask=*/false}); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.BasePointersArray, CGM.getPointerAlign()); InputInfo.PointersArray = Address(Info.PointersArray, CGM.getPointerAlign()); InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); + InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); MapTypesArray = Info.MapTypesArray; + MapNamesArray = Info.MapNamesArray; if (RequiresOuterTask) CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); else @@ -9673,8 +10341,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) return; - scanForTargetRegionsFunctions( - E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); + scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); return; } @@ -10092,24 +10759,25 @@ void CGOpenMPRuntime::emitTargetDataCalls( auto &&BeginThenGen = [this, &D, Device, &Info, &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the mapped variables. - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapCombinedInfoTy CombinedInfo; // Get map clause information. - MappableExprsHandler MCHandler(D, CGF); - MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + MappableExprsHandler MEHandler(D, CGF); + MEHandler.generateAllInfo(CombinedInfo); // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); + emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, + /*IsNonContiguous=*/true); llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr; llvm::Value *SizesArrayArg = nullptr; llvm::Value *MapTypesArrayArg = nullptr; + llvm::Value *MapNamesArrayArg = nullptr; + llvm::Value *MappersArrayArg = nullptr; emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, - SizesArrayArg, MapTypesArrayArg, Info); + SizesArrayArg, MapTypesArrayArg, + MapNamesArrayArg, MappersArrayArg, Info); // Emit device ID if any. llvm::Value *DeviceID = nullptr; @@ -10122,13 +10790,23 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Emit the number of elements in the offloading arrays. llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); + // + // Source location for the ident struct + llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - llvm::Value *OffloadingArgs[] = { - DeviceID, PointerNum, BasePointersArrayArg, - PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___tgt_target_data_begin), - OffloadingArgs); + llvm::Value *OffloadingArgs[] = {RTLoc, + DeviceID, + PointerNum, + BasePointersArrayArg, + PointersArrayArg, + SizesArrayArg, + MapTypesArrayArg, + MapNamesArrayArg, + MappersArrayArg}; + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), + OffloadingArgs); // If device pointer privatization is required, emit the body of the region // here. It will have to be duplicated: with and without privatization. @@ -10137,16 +10815,20 @@ void CGOpenMPRuntime::emitTargetDataCalls( }; // Generate code for the closing of the data region. - auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, - PrePostActionTy &) { + auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, + PrePostActionTy &) { assert(Info.isValid() && "Invalid data environment closing arguments."); llvm::Value *BasePointersArrayArg = nullptr; llvm::Value *PointersArrayArg = nullptr; llvm::Value *SizesArrayArg = nullptr; llvm::Value *MapTypesArrayArg = nullptr; + llvm::Value *MapNamesArrayArg = nullptr; + llvm::Value *MappersArrayArg = nullptr; emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, - SizesArrayArg, MapTypesArrayArg, Info); + SizesArrayArg, MapTypesArrayArg, + MapNamesArrayArg, MappersArrayArg, Info, + {/*ForEndCall=*/true}); // Emit device ID if any. llvm::Value *DeviceID = nullptr; @@ -10160,12 +10842,22 @@ void CGOpenMPRuntime::emitTargetDataCalls( // Emit the number of elements in the offloading arrays. llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); - llvm::Value *OffloadingArgs[] = { - DeviceID, PointerNum, BasePointersArrayArg, - PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___tgt_target_data_end), - OffloadingArgs); + // Source location for the ident struct + llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); + + llvm::Value *OffloadingArgs[] = {RTLoc, + DeviceID, + PointerNum, + BasePointersArrayArg, + PointersArrayArg, + SizesArrayArg, + MapTypesArrayArg, + MapNamesArrayArg, + MappersArrayArg}; + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), + OffloadingArgs); }; // If we need device pointer privatization, we need to emit the body of the @@ -10218,9 +10910,10 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( CodeGenFunction::OMPTargetDataInfo InputInfo; llvm::Value *MapTypesArray = nullptr; + llvm::Value *MapNamesArray = nullptr; // Generate the code for the opening of the data environment. - auto &&ThenGen = [this, &D, Device, &InputInfo, - &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, + &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { // Emit device ID if any. llvm::Value *DeviceID = nullptr; if (Device) { @@ -10234,29 +10927,35 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( llvm::Constant *PointerNum = CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); - llvm::Value *OffloadingArgs[] = {DeviceID, + // Source location for the ident struct + llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); + + llvm::Value *OffloadingArgs[] = {RTLoc, + DeviceID, PointerNum, InputInfo.BasePointersArray.getPointer(), InputInfo.PointersArray.getPointer(), InputInfo.SizesArray.getPointer(), - MapTypesArray}; + MapTypesArray, + MapNamesArray, + InputInfo.MappersArray.getPointer()}; - // Select the right runtime function call for each expected standalone + // Select the right runtime function call for each standalone // directive. const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); RuntimeFunction RTLFn; switch (D.getDirectiveKind()) { case OMPD_target_enter_data: - RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait - : OMPRTL___tgt_target_data_begin; + RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper + : OMPRTL___tgt_target_data_begin_mapper; break; case OMPD_target_exit_data: - RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait - : OMPRTL___tgt_target_data_end; + RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper + : OMPRTL___tgt_target_data_end_mapper; break; case OMPD_target_update: - RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait - : OMPRTL___tgt_target_data_update; + RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper + : OMPRTL___tgt_target_data_update_mapper; break; case OMPD_parallel: case OMPD_for: @@ -10330,24 +11029,26 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( OffloadingArgs); }; - auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( - CodeGenFunction &CGF, PrePostActionTy &) { + auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, + &MapNamesArray](CodeGenFunction &CGF, + PrePostActionTy &) { // Fill up the arrays with all the mapped variables. - MappableExprsHandler::MapBaseValuesArrayTy BasePointers; - MappableExprsHandler::MapValuesArrayTy Pointers; - MappableExprsHandler::MapValuesArrayTy Sizes; - MappableExprsHandler::MapFlagsArrayTy MapTypes; + MappableExprsHandler::MapCombinedInfoTy CombinedInfo; // Get map clause information. MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); + MEHandler.generateAllInfo(CombinedInfo); TargetDataInfo Info; // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); - emitOffloadingArraysArgument(CGF, Info.BasePointersArray, - Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info); + emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, + /*IsNonContiguous=*/true); + bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || + D.hasClausesOfKind<OMPNowaitClause>(); + emitOffloadingArraysArgument( + CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, + Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, + {/*ForEndTask=*/false}); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.BasePointersArray, CGM.getPointerAlign()); @@ -10355,8 +11056,10 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( Address(Info.PointersArray, CGM.getPointerAlign()); InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); + InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); MapTypesArray = Info.MapTypesArray; - if (D.hasClausesOfKind<OMPDependClause>()) + MapNamesArray = Info.MapNamesArray; + if (RequiresOuterTask) CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); else emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); @@ -11095,87 +11798,115 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, return CGF.GetAddrOfLocalVar(NativeParam); } -namespace { -/// Cleanup action for allocate support. -class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { -public: - static const int CleanupArgs = 3; - -private: - llvm::FunctionCallee RTLFn; - llvm::Value *Args[CleanupArgs]; - -public: - OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, - ArrayRef<llvm::Value *> CallArgs) - : RTLFn(RTLFn) { - assert(CallArgs.size() == CleanupArgs && - "Size of arguments does not match."); - std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); - } - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { - if (!CGF.HaveInsertPoint()) - return; - CGF.EmitRuntimeCall(RTLFn, Args); - } -}; -} // namespace - Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { if (!VD) return Address::invalid(); + Address UntiedAddr = Address::invalid(); + Address UntiedRealAddr = Address::invalid(); + auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); + if (It != FunctionToUntiedTaskStackMap.end()) { + const UntiedLocalVarsAddressesMap &UntiedData = + UntiedLocalVarsStack[It->second]; + auto I = UntiedData.find(VD); + if (I != UntiedData.end()) { + UntiedAddr = I->second.first; + UntiedRealAddr = I->second.second; + } + } const VarDecl *CVD = VD->getCanonicalDecl(); - if (!CVD->hasAttr<OMPAllocateDeclAttr>()) - return Address::invalid(); - const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); - // Use the default allocation. - if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || - AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && - !AA->getAllocator()) - return Address::invalid(); - llvm::Value *Size; - CharUnits Align = CGM.getContext().getDeclAlign(CVD); - if (CVD->getType()->isVariablyModifiedType()) { - Size = CGF.getTypeSize(CVD->getType()); - // Align the size: ((size + align - 1) / align) * align - Size = CGF.Builder.CreateNUWAdd( - Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); - Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); - Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); - } else { - CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); - Size = CGM.getSize(Sz.alignTo(Align)); + if (CVD->hasAttr<OMPAllocateDeclAttr>()) { + // Use the default allocation. + if (!isAllocatableDecl(VD)) + return UntiedAddr; + llvm::Value *Size; + CharUnits Align = CGM.getContext().getDeclAlign(CVD); + if (CVD->getType()->isVariablyModifiedType()) { + Size = CGF.getTypeSize(CVD->getType()); + // Align the size: ((size + align - 1) / align) * align + Size = CGF.Builder.CreateNUWAdd( + Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); + Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); + Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); + } else { + CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); + Size = CGM.getSize(Sz.alignTo(Align)); + } + llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); + const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); + assert(AA->getAllocator() && + "Expected allocator expression for non-default allocator."); + llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); + // According to the standard, the original allocator type is a enum + // (integer). Convert to pointer type, if required. + Allocator = CGF.EmitScalarConversion( + Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, + AA->getAllocator()->getExprLoc()); + llvm::Value *Args[] = {ThreadID, Size, Allocator}; + + llvm::Value *Addr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc), + Args, getName({CVD->getName(), ".void.addr"})); + llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_free); + QualType Ty = CGM.getContext().getPointerType(CVD->getType()); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); + if (UntiedAddr.isValid()) + CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); + + // Cleanup action for allocate support. + class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { + llvm::FunctionCallee RTLFn; + unsigned LocEncoding; + Address Addr; + const Expr *Allocator; + + public: + OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, + Address Addr, const Expr *Allocator) + : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), + Allocator(Allocator) {} + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + llvm::Value *Args[3]; + Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( + CGF, SourceLocation::getFromRawEncoding(LocEncoding)); + Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr.getPointer(), CGF.VoidPtrTy); + llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); + // According to the standard, the original allocator type is a enum + // (integer). Convert to pointer type, if required. + AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), + CGF.getContext().VoidPtrTy, + Allocator->getExprLoc()); + Args[2] = AllocVal; + + CGF.EmitRuntimeCall(RTLFn, Args); + } + }; + Address VDAddr = + UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); + CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( + NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), + VDAddr, AA->getAllocator()); + if (UntiedRealAddr.isValid()) + if (auto *Region = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); + return VDAddr; } - llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); - assert(AA->getAllocator() && - "Expected allocator expression for non-default allocator."); - llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); - // According to the standard, the original allocator type is a enum (integer). - // Convert to pointer type, if required. - if (Allocator->getType()->isIntegerTy()) - Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); - else if (Allocator->getType()->isPointerTy()) - Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, - CGM.VoidPtrTy); - llvm::Value *Args[] = {ThreadID, Size, Allocator}; - - llvm::Value *Addr = - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_alloc), - Args, getName({CVD->getName(), ".void.addr"})); - llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, - Allocator}; - llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_free); + return UntiedAddr; +} - CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, - llvm::makeArrayRef(FiniArgs)); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Addr, - CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), - getName({CVD->getName(), ".addr"})); - return Address(Addr, Align); +bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, + const VarDecl *VD) const { + auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); + if (It == FunctionToUntiedTaskStackMap.end()) + return false; + return UntiedLocalVarsStack[It->second].count(VD) > 0; } CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( @@ -11210,6 +11941,24 @@ CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); } +CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( + CodeGenFunction &CGF, + const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, + std::pair<Address, Address>> &LocalVars) + : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( + CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); + CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); +} + +CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); +} + bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); |